chore(typos): Add parse tracing

This commit is contained in:
Ed Page 2023-07-14 12:32:07 -05:00
parent a1ad167632
commit e98fc52b0d

View file

@ -133,6 +133,7 @@ mod parser {
use winnow::stream::Stream; use winnow::stream::Stream;
use winnow::stream::StreamIsPartial; use winnow::stream::StreamIsPartial;
use winnow::token::*; use winnow::token::*;
use winnow::trace::trace;
pub(crate) fn next_identifier<T>(input: T) -> IResult<T, <T as Stream>::Slice> pub(crate) fn next_identifier<T>(input: T) -> IResult<T, <T as Stream>::Slice>
where where
@ -153,7 +154,7 @@ mod parser {
// `{XID_Continue}+` because XID_Continue is a superset of XID_Start and rather catch odd // `{XID_Continue}+` because XID_Continue is a superset of XID_Start and rather catch odd
// or unexpected cases than strip off start characters to a word since we aren't doing a // or unexpected cases than strip off start characters to a word since we aren't doing a
// proper word boundary parse // proper word boundary parse
take_while(1.., is_xid_continue).parse_next(input) trace("identifier", take_while(1.., is_xid_continue)).parse_next(input)
} }
fn ignore<T>(input: T) -> IResult<T, <T as Stream>::Slice> fn ignore<T>(input: T) -> IResult<T, <T as Stream>::Slice>
@ -162,23 +163,26 @@ mod parser {
<T as Stream>::Slice: AsBStr + SliceLen + Default, <T as Stream>::Slice: AsBStr + SliceLen + Default,
<T as Stream>::Token: AsChar + Copy, <T as Stream>::Token: AsChar + Copy,
{ {
take_many0(alt(( trace(
// CAUTION: If adding an ignorable literal, if it doesn't start with `is_xid_continue`, "ignore",
// - Update `is_ignore_char` to make sure `sep1` doesn't eat it all up take_many0(alt((
// - Make sure you always consume it // CAUTION: If adding an ignorable literal, if it doesn't start with `is_xid_continue`,
terminated(uuid_literal, peek(sep1)), // - Update `is_ignore_char` to make sure `sep1` doesn't eat it all up
terminated(hash_literal, peek(sep1)), // - Make sure you always consume it
terminated(base64_literal, peek(sep1)), // base64 should be quoted or something terminated(uuid_literal, peek(sep1)),
terminated(ordinal_literal, peek(sep1)), terminated(hash_literal, peek(sep1)),
terminated(hex_literal, peek(sep1)), terminated(base64_literal, peek(sep1)), // base64 should be quoted or something
terminated(dec_literal, peek(sep1)), // Allow digit-prefixed words terminated(ordinal_literal, peek(sep1)),
terminated(email_literal, peek(sep1)), terminated(hex_literal, peek(sep1)),
terminated(url_literal, peek(sep1)), terminated(dec_literal, peek(sep1)), // Allow digit-prefixed words
terminated(css_color, peek(sep1)), terminated(email_literal, peek(sep1)),
c_escape, terminated(url_literal, peek(sep1)),
printf, terminated(css_color, peek(sep1)),
other, c_escape,
))) printf,
other,
))),
)
.parse_next(input) .parse_next(input)
} }
@ -201,12 +205,15 @@ mod parser {
<T as Stream>::Slice: AsBStr + SliceLen + Default, <T as Stream>::Slice: AsBStr + SliceLen + Default,
<T as Stream>::Token: AsChar + Copy, <T as Stream>::Token: AsChar + Copy,
{ {
( trace(
one_of(|c| !is_xid_continue(c)), "other",
take_while(0.., is_ignore_char), (
one_of(|c| !is_xid_continue(c)),
take_while(0.., is_ignore_char),
)
.recognize(),
) )
.recognize() .parse_next(input)
.parse_next(input)
} }
fn ordinal_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice> fn ordinal_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice>
@ -221,14 +228,17 @@ mod parser {
['_'].contains(&c) ['_'].contains(&c)
} }
( trace(
take_while(0.., is_sep), "ordinal_literal",
take_while(1.., is_dec_digit), (
alt((('s', 't'), ('n', 'd'), ('r', 'd'), ('t', 'h'))), take_while(0.., is_sep),
take_while(0.., is_sep), take_while(1.., is_dec_digit),
alt((('s', 't'), ('n', 'd'), ('r', 'd'), ('t', 'h'))),
take_while(0.., is_sep),
)
.recognize(),
) )
.recognize() .parse_next(input)
.parse_next(input)
} }
fn dec_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice> fn dec_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice>
@ -237,7 +247,7 @@ mod parser {
<T as Stream>::Slice: AsBStr + SliceLen + Default, <T as Stream>::Slice: AsBStr + SliceLen + Default,
<T as Stream>::Token: AsChar + Copy, <T as Stream>::Token: AsChar + Copy,
{ {
take_while(1.., is_dec_digit_with_sep).parse_next(input) trace("dec_literal", take_while(1.., is_dec_digit_with_sep)).parse_next(input)
} }
fn hex_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice> fn hex_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice>
@ -259,12 +269,15 @@ mod parser {
<T as Stream>::Slice: AsBStr + SliceLen + Default, <T as Stream>::Slice: AsBStr + SliceLen + Default,
<T as Stream>::Token: AsChar + Copy, <T as Stream>::Token: AsChar + Copy,
{ {
preceded( trace(
'#', "color",
alt(( preceded(
terminated(take_while(3..=8, is_lower_hex_digit), peek(sep1)), '#',
terminated(take_while(3..=8, is_upper_hex_digit), peek(sep1)), alt((
)), terminated(take_while(3..=8, is_lower_hex_digit), peek(sep1)),
terminated(take_while(3..=8, is_upper_hex_digit), peek(sep1)),
)),
),
) )
.parse_next(input) .parse_next(input)
} }
@ -275,31 +288,34 @@ mod parser {
<T as Stream>::Slice: AsBStr + SliceLen + Default, <T as Stream>::Slice: AsBStr + SliceLen + Default,
<T as Stream>::Token: AsChar + Copy, <T as Stream>::Token: AsChar + Copy,
{ {
alt(( trace(
( "uuid",
take_while(8, is_lower_hex_digit), alt((
'-', (
take_while(4, is_lower_hex_digit), take_while(8, is_lower_hex_digit),
'-', '-',
take_while(4, is_lower_hex_digit), take_while(4, is_lower_hex_digit),
'-', '-',
take_while(4, is_lower_hex_digit), take_while(4, is_lower_hex_digit),
'-', '-',
take_while(12, is_lower_hex_digit), take_while(4, is_lower_hex_digit),
), '-',
( take_while(12, is_lower_hex_digit),
take_while(8, is_upper_hex_digit), ),
'-', (
take_while(4, is_upper_hex_digit), take_while(8, is_upper_hex_digit),
'-', '-',
take_while(4, is_upper_hex_digit), take_while(4, is_upper_hex_digit),
'-', '-',
take_while(4, is_upper_hex_digit), take_while(4, is_upper_hex_digit),
'-', '-',
take_while(12, is_upper_hex_digit), take_while(4, is_upper_hex_digit),
), '-',
)) take_while(12, is_upper_hex_digit),
.recognize() ),
))
.recognize(),
)
.parse_next(input) .parse_next(input)
} }
@ -319,10 +335,13 @@ mod parser {
// or more. // or more.
const IGNORE_HEX_MIN: usize = 32; const IGNORE_HEX_MIN: usize = 32;
alt(( trace(
take_while(IGNORE_HEX_MIN.., is_lower_hex_digit), "hash",
take_while(IGNORE_HEX_MIN.., is_upper_hex_digit), alt((
)) take_while(IGNORE_HEX_MIN.., is_lower_hex_digit),
take_while(IGNORE_HEX_MIN.., is_upper_hex_digit),
)),
)
.parse_next(input) .parse_next(input)
} }
@ -332,32 +351,35 @@ mod parser {
<T as Stream>::Slice: AsBStr + SliceLen + Default, <T as Stream>::Slice: AsBStr + SliceLen + Default,
<T as Stream>::Token: AsChar + Copy, <T as Stream>::Token: AsChar + Copy,
{ {
let (padding, captured) = take_while(1.., is_base64_digit).parse_next(input.clone())?; trace("base64", move |input: T| {
let (padding, captured) = take_while(1.., is_base64_digit).parse_next(input.clone())?;
const CHUNK: usize = 4; const CHUNK: usize = 4;
let padding_offset = input.offset_to(&padding); let padding_offset = input.offset_to(&padding);
let mut padding_len = CHUNK - padding_offset % CHUNK; let mut padding_len = CHUNK - padding_offset % CHUNK;
if padding_len == CHUNK { if padding_len == CHUNK {
padding_len = 0; padding_len = 0;
} }
if captured.slice_len() < 90 if captured.slice_len() < 90
&& padding_len == 0 && padding_len == 0
&& captured && captured
.as_bstr() .as_bstr()
.iter() .iter()
.all(|c| !['/', '+'].contains(&c.as_char())) .all(|c| !['/', '+'].contains(&c.as_char()))
{ {
return Err(winnow::error::ErrMode::Backtrack( return Err(winnow::error::ErrMode::Backtrack(
winnow::error::Error::new(input, winnow::error::ErrorKind::Slice), winnow::error::Error::new(input, winnow::error::ErrorKind::Slice),
)); ));
} }
let (after, _) = let (after, _) =
take_while(padding_len..=padding_len, is_base64_padding).parse_next(padding)?; take_while(padding_len..=padding_len, is_base64_padding).parse_next(padding)?;
let after_offset = input.offset_to(&after); let after_offset = input.offset_to(&after);
Ok(input.next_slice(after_offset)) Ok(input.next_slice(after_offset))
})
.parse_next(input)
} }
fn email_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice> fn email_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice>
@ -366,13 +388,16 @@ mod parser {
<T as Stream>::Slice: AsBStr + SliceLen + Default, <T as Stream>::Slice: AsBStr + SliceLen + Default,
<T as Stream>::Token: AsChar + Copy, <T as Stream>::Token: AsChar + Copy,
{ {
( trace(
take_while(1.., is_localport_char), "email",
'@', (
take_while(1.., is_domain_char), take_while(1.., is_localport_char),
'@',
take_while(1.., is_domain_char),
)
.recognize(),
) )
.recognize() .parse_next(input)
.parse_next(input)
} }
fn url_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice> fn url_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice>
@ -381,24 +406,27 @@ mod parser {
<T as Stream>::Slice: AsBStr + SliceLen + Default, <T as Stream>::Slice: AsBStr + SliceLen + Default,
<T as Stream>::Token: AsChar + Copy, <T as Stream>::Token: AsChar + Copy,
{ {
( trace(
opt(terminated( "url",
take_while(1.., is_scheme_char),
// HACK: Technically you can skip `//` if you don't have a domain but that would
// get messy to support.
(':', '/', '/'),
)),
( (
opt(terminated(url_userinfo, '@')), opt(terminated(
take_while(1.., is_domain_char), take_while(1.., is_scheme_char),
opt(preceded(':', take_while(1.., AsChar::is_dec_digit))), // HACK: Technically you can skip `//` if you don't have a domain but that would
), // get messy to support.
'/', (':', '/', '/'),
// HACK: Too lazy to enumerate )),
take_while(0.., is_path_query_fragment), (
opt(terminated(url_userinfo, '@')),
take_while(1.., is_domain_char),
opt(preceded(':', take_while(1.., AsChar::is_dec_digit))),
),
'/',
// HACK: Too lazy to enumerate
take_while(0.., is_path_query_fragment),
)
.recognize(),
) )
.recognize() .parse_next(input)
.parse_next(input)
} }
fn url_userinfo<T>(input: T) -> IResult<T, <T as Stream>::Slice> fn url_userinfo<T>(input: T) -> IResult<T, <T as Stream>::Slice>
@ -407,12 +435,15 @@ mod parser {
<T as Stream>::Slice: AsBStr + SliceLen + Default, <T as Stream>::Slice: AsBStr + SliceLen + Default,
<T as Stream>::Token: AsChar + Copy, <T as Stream>::Token: AsChar + Copy,
{ {
( trace(
take_while(1.., is_localport_char), "userinfo",
opt(preceded(':', take_while(0.., is_localport_char))), (
take_while(1.., is_localport_char),
opt(preceded(':', take_while(0.., is_localport_char))),
)
.recognize(),
) )
.recognize() .parse_next(input)
.parse_next(input)
} }
fn c_escape<T>(input: T) -> IResult<T, <T as Stream>::Slice> fn c_escape<T>(input: T) -> IResult<T, <T as Stream>::Slice>
@ -425,7 +456,11 @@ mod parser {
// regular string that does escaping. The escaped letter might be part of a word, or it // regular string that does escaping. The escaped letter might be part of a word, or it
// might not be. Rather than guess and be wrong part of the time and correct people's words // might not be. Rather than guess and be wrong part of the time and correct people's words
// incorrectly, we opt for just not evaluating it at all. // incorrectly, we opt for just not evaluating it at all.
preceded(take_while(1.., is_escape), take_while(0.., is_xid_continue)).parse_next(input) trace(
"escape",
preceded(take_while(1.., is_escape), take_while(0.., is_xid_continue)),
)
.parse_next(input)
} }
fn printf<T>(input: T) -> IResult<T, <T as Stream>::Slice> fn printf<T>(input: T) -> IResult<T, <T as Stream>::Slice>
@ -434,7 +469,7 @@ mod parser {
<T as Stream>::Slice: AsBStr + SliceLen + Default, <T as Stream>::Slice: AsBStr + SliceLen + Default,
<T as Stream>::Token: AsChar + Copy, <T as Stream>::Token: AsChar + Copy,
{ {
preceded('%', take_while(1.., is_xid_continue)).parse_next(input) trace("printf", preceded('%', take_while(1.., is_xid_continue))).parse_next(input)
} }
fn take_many0<I, E, F>(mut f: F) -> impl FnMut(I) -> IResult<I, <I as Stream>::Slice, E> fn take_many0<I, E, F>(mut f: F) -> impl FnMut(I) -> IResult<I, <I as Stream>::Slice, E>