chore(typos): Add parse tracing

This commit is contained in:
Ed Page 2023-07-14 12:32:07 -05:00
parent a1ad167632
commit e98fc52b0d

View file

@ -133,6 +133,7 @@ mod parser {
use winnow::stream::Stream;
use winnow::stream::StreamIsPartial;
use winnow::token::*;
use winnow::trace::trace;
pub(crate) fn next_identifier<T>(input: T) -> IResult<T, <T as Stream>::Slice>
where
@ -153,7 +154,7 @@ mod parser {
// `{XID_Continue}+` because XID_Continue is a superset of XID_Start and rather catch odd
// or unexpected cases than strip off start characters to a word since we aren't doing a
// proper word boundary parse
take_while(1.., is_xid_continue).parse_next(input)
trace("identifier", take_while(1.., is_xid_continue)).parse_next(input)
}
fn ignore<T>(input: T) -> IResult<T, <T as Stream>::Slice>
@ -162,23 +163,26 @@ mod parser {
<T as Stream>::Slice: AsBStr + SliceLen + Default,
<T as Stream>::Token: AsChar + Copy,
{
take_many0(alt((
// CAUTION: If adding an ignorable literal, if it doesn't start with `is_xid_continue`,
// - Update `is_ignore_char` to make sure `sep1` doesn't eat it all up
// - Make sure you always consume it
terminated(uuid_literal, peek(sep1)),
terminated(hash_literal, peek(sep1)),
terminated(base64_literal, peek(sep1)), // base64 should be quoted or something
terminated(ordinal_literal, peek(sep1)),
terminated(hex_literal, peek(sep1)),
terminated(dec_literal, peek(sep1)), // Allow digit-prefixed words
terminated(email_literal, peek(sep1)),
terminated(url_literal, peek(sep1)),
terminated(css_color, peek(sep1)),
c_escape,
printf,
other,
)))
trace(
"ignore",
take_many0(alt((
// CAUTION: If adding an ignorable literal, if it doesn't start with `is_xid_continue`,
// - Update `is_ignore_char` to make sure `sep1` doesn't eat it all up
// - Make sure you always consume it
terminated(uuid_literal, peek(sep1)),
terminated(hash_literal, peek(sep1)),
terminated(base64_literal, peek(sep1)), // base64 should be quoted or something
terminated(ordinal_literal, peek(sep1)),
terminated(hex_literal, peek(sep1)),
terminated(dec_literal, peek(sep1)), // Allow digit-prefixed words
terminated(email_literal, peek(sep1)),
terminated(url_literal, peek(sep1)),
terminated(css_color, peek(sep1)),
c_escape,
printf,
other,
))),
)
.parse_next(input)
}
@ -201,12 +205,15 @@ mod parser {
<T as Stream>::Slice: AsBStr + SliceLen + Default,
<T as Stream>::Token: AsChar + Copy,
{
(
one_of(|c| !is_xid_continue(c)),
take_while(0.., is_ignore_char),
trace(
"other",
(
one_of(|c| !is_xid_continue(c)),
take_while(0.., is_ignore_char),
)
.recognize(),
)
.recognize()
.parse_next(input)
.parse_next(input)
}
fn ordinal_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice>
@ -221,14 +228,17 @@ mod parser {
['_'].contains(&c)
}
(
take_while(0.., is_sep),
take_while(1.., is_dec_digit),
alt((('s', 't'), ('n', 'd'), ('r', 'd'), ('t', 'h'))),
take_while(0.., is_sep),
trace(
"ordinal_literal",
(
take_while(0.., is_sep),
take_while(1.., is_dec_digit),
alt((('s', 't'), ('n', 'd'), ('r', 'd'), ('t', 'h'))),
take_while(0.., is_sep),
)
.recognize(),
)
.recognize()
.parse_next(input)
.parse_next(input)
}
fn dec_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice>
@ -237,7 +247,7 @@ mod parser {
<T as Stream>::Slice: AsBStr + SliceLen + Default,
<T as Stream>::Token: AsChar + Copy,
{
take_while(1.., is_dec_digit_with_sep).parse_next(input)
trace("dec_literal", take_while(1.., is_dec_digit_with_sep)).parse_next(input)
}
fn hex_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice>
@ -259,12 +269,15 @@ mod parser {
<T as Stream>::Slice: AsBStr + SliceLen + Default,
<T as Stream>::Token: AsChar + Copy,
{
preceded(
'#',
alt((
terminated(take_while(3..=8, is_lower_hex_digit), peek(sep1)),
terminated(take_while(3..=8, is_upper_hex_digit), peek(sep1)),
)),
trace(
"color",
preceded(
'#',
alt((
terminated(take_while(3..=8, is_lower_hex_digit), peek(sep1)),
terminated(take_while(3..=8, is_upper_hex_digit), peek(sep1)),
)),
),
)
.parse_next(input)
}
@ -275,31 +288,34 @@ mod parser {
<T as Stream>::Slice: AsBStr + SliceLen + Default,
<T as Stream>::Token: AsChar + Copy,
{
alt((
(
take_while(8, is_lower_hex_digit),
'-',
take_while(4, is_lower_hex_digit),
'-',
take_while(4, is_lower_hex_digit),
'-',
take_while(4, is_lower_hex_digit),
'-',
take_while(12, is_lower_hex_digit),
),
(
take_while(8, is_upper_hex_digit),
'-',
take_while(4, is_upper_hex_digit),
'-',
take_while(4, is_upper_hex_digit),
'-',
take_while(4, is_upper_hex_digit),
'-',
take_while(12, is_upper_hex_digit),
),
))
.recognize()
trace(
"uuid",
alt((
(
take_while(8, is_lower_hex_digit),
'-',
take_while(4, is_lower_hex_digit),
'-',
take_while(4, is_lower_hex_digit),
'-',
take_while(4, is_lower_hex_digit),
'-',
take_while(12, is_lower_hex_digit),
),
(
take_while(8, is_upper_hex_digit),
'-',
take_while(4, is_upper_hex_digit),
'-',
take_while(4, is_upper_hex_digit),
'-',
take_while(4, is_upper_hex_digit),
'-',
take_while(12, is_upper_hex_digit),
),
))
.recognize(),
)
.parse_next(input)
}
@ -319,10 +335,13 @@ mod parser {
// or more.
const IGNORE_HEX_MIN: usize = 32;
alt((
take_while(IGNORE_HEX_MIN.., is_lower_hex_digit),
take_while(IGNORE_HEX_MIN.., is_upper_hex_digit),
))
trace(
"hash",
alt((
take_while(IGNORE_HEX_MIN.., is_lower_hex_digit),
take_while(IGNORE_HEX_MIN.., is_upper_hex_digit),
)),
)
.parse_next(input)
}
@ -332,32 +351,35 @@ mod parser {
<T as Stream>::Slice: AsBStr + SliceLen + Default,
<T as Stream>::Token: AsChar + Copy,
{
let (padding, captured) = take_while(1.., is_base64_digit).parse_next(input.clone())?;
trace("base64", move |input: T| {
let (padding, captured) = take_while(1.., is_base64_digit).parse_next(input.clone())?;
const CHUNK: usize = 4;
let padding_offset = input.offset_to(&padding);
let mut padding_len = CHUNK - padding_offset % CHUNK;
if padding_len == CHUNK {
padding_len = 0;
}
const CHUNK: usize = 4;
let padding_offset = input.offset_to(&padding);
let mut padding_len = CHUNK - padding_offset % CHUNK;
if padding_len == CHUNK {
padding_len = 0;
}
if captured.slice_len() < 90
&& padding_len == 0
&& captured
.as_bstr()
.iter()
.all(|c| !['/', '+'].contains(&c.as_char()))
{
return Err(winnow::error::ErrMode::Backtrack(
winnow::error::Error::new(input, winnow::error::ErrorKind::Slice),
));
}
if captured.slice_len() < 90
&& padding_len == 0
&& captured
.as_bstr()
.iter()
.all(|c| !['/', '+'].contains(&c.as_char()))
{
return Err(winnow::error::ErrMode::Backtrack(
winnow::error::Error::new(input, winnow::error::ErrorKind::Slice),
));
}
let (after, _) =
take_while(padding_len..=padding_len, is_base64_padding).parse_next(padding)?;
let (after, _) =
take_while(padding_len..=padding_len, is_base64_padding).parse_next(padding)?;
let after_offset = input.offset_to(&after);
Ok(input.next_slice(after_offset))
let after_offset = input.offset_to(&after);
Ok(input.next_slice(after_offset))
})
.parse_next(input)
}
fn email_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice>
@ -366,13 +388,16 @@ mod parser {
<T as Stream>::Slice: AsBStr + SliceLen + Default,
<T as Stream>::Token: AsChar + Copy,
{
(
take_while(1.., is_localport_char),
'@',
take_while(1.., is_domain_char),
trace(
"email",
(
take_while(1.., is_localport_char),
'@',
take_while(1.., is_domain_char),
)
.recognize(),
)
.recognize()
.parse_next(input)
.parse_next(input)
}
fn url_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice>
@ -381,24 +406,27 @@ mod parser {
<T as Stream>::Slice: AsBStr + SliceLen + Default,
<T as Stream>::Token: AsChar + Copy,
{
(
opt(terminated(
take_while(1.., is_scheme_char),
// HACK: Technically you can skip `//` if you don't have a domain but that would
// get messy to support.
(':', '/', '/'),
)),
trace(
"url",
(
opt(terminated(url_userinfo, '@')),
take_while(1.., is_domain_char),
opt(preceded(':', take_while(1.., AsChar::is_dec_digit))),
),
'/',
// HACK: Too lazy to enumerate
take_while(0.., is_path_query_fragment),
opt(terminated(
take_while(1.., is_scheme_char),
// HACK: Technically you can skip `//` if you don't have a domain but that would
// get messy to support.
(':', '/', '/'),
)),
(
opt(terminated(url_userinfo, '@')),
take_while(1.., is_domain_char),
opt(preceded(':', take_while(1.., AsChar::is_dec_digit))),
),
'/',
// HACK: Too lazy to enumerate
take_while(0.., is_path_query_fragment),
)
.recognize(),
)
.recognize()
.parse_next(input)
.parse_next(input)
}
fn url_userinfo<T>(input: T) -> IResult<T, <T as Stream>::Slice>
@ -407,12 +435,15 @@ mod parser {
<T as Stream>::Slice: AsBStr + SliceLen + Default,
<T as Stream>::Token: AsChar + Copy,
{
(
take_while(1.., is_localport_char),
opt(preceded(':', take_while(0.., is_localport_char))),
trace(
"userinfo",
(
take_while(1.., is_localport_char),
opt(preceded(':', take_while(0.., is_localport_char))),
)
.recognize(),
)
.recognize()
.parse_next(input)
.parse_next(input)
}
fn c_escape<T>(input: T) -> IResult<T, <T as Stream>::Slice>
@ -425,7 +456,11 @@ mod parser {
// regular string that does escaping. The escaped letter might be part of a word, or it
// might not be. Rather than guess and be wrong part of the time and correct people's words
// incorrectly, we opt for just not evaluating it at all.
preceded(take_while(1.., is_escape), take_while(0.., is_xid_continue)).parse_next(input)
trace(
"escape",
preceded(take_while(1.., is_escape), take_while(0.., is_xid_continue)),
)
.parse_next(input)
}
fn printf<T>(input: T) -> IResult<T, <T as Stream>::Slice>
@ -434,7 +469,7 @@ mod parser {
<T as Stream>::Slice: AsBStr + SliceLen + Default,
<T as Stream>::Token: AsChar + Copy,
{
preceded('%', take_while(1.., is_xid_continue)).parse_next(input)
trace("printf", preceded('%', take_while(1.., is_xid_continue))).parse_next(input)
}
fn take_many0<I, E, F>(mut f: F) -> impl FnMut(I) -> IResult<I, <I as Stream>::Slice, E>