From e98fc52b0df66e7ddb7e08ea5ae537888a5fd3e7 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Fri, 14 Jul 2023 12:32:07 -0500 Subject: [PATCH] chore(typos): Add parse tracing --- crates/typos/src/tokens.rs | 269 +++++++++++++++++++++---------------- 1 file changed, 152 insertions(+), 117 deletions(-) diff --git a/crates/typos/src/tokens.rs b/crates/typos/src/tokens.rs index 6ac6ce1..d36c94c 100644 --- a/crates/typos/src/tokens.rs +++ b/crates/typos/src/tokens.rs @@ -133,6 +133,7 @@ mod parser { use winnow::stream::Stream; use winnow::stream::StreamIsPartial; use winnow::token::*; + use winnow::trace::trace; pub(crate) fn next_identifier(input: T) -> IResult::Slice> where @@ -153,7 +154,7 @@ mod parser { // `{XID_Continue}+` because XID_Continue is a superset of XID_Start and rather catch odd // or unexpected cases than strip off start characters to a word since we aren't doing a // proper word boundary parse - take_while(1.., is_xid_continue).parse_next(input) + trace("identifier", take_while(1.., is_xid_continue)).parse_next(input) } fn ignore(input: T) -> IResult::Slice> @@ -162,23 +163,26 @@ mod parser { ::Slice: AsBStr + SliceLen + Default, ::Token: AsChar + Copy, { - take_many0(alt(( - // CAUTION: If adding an ignorable literal, if it doesn't start with `is_xid_continue`, - // - Update `is_ignore_char` to make sure `sep1` doesn't eat it all up - // - Make sure you always consume it - terminated(uuid_literal, peek(sep1)), - terminated(hash_literal, peek(sep1)), - terminated(base64_literal, peek(sep1)), // base64 should be quoted or something - terminated(ordinal_literal, peek(sep1)), - terminated(hex_literal, peek(sep1)), - terminated(dec_literal, peek(sep1)), // Allow digit-prefixed words - terminated(email_literal, peek(sep1)), - terminated(url_literal, peek(sep1)), - terminated(css_color, peek(sep1)), - c_escape, - printf, - other, - ))) + trace( + "ignore", + take_many0(alt(( + // CAUTION: If adding an ignorable literal, if it doesn't start with `is_xid_continue`, + // - Update `is_ignore_char` to make sure `sep1` doesn't eat it all up + // - Make sure you always consume it + terminated(uuid_literal, peek(sep1)), + terminated(hash_literal, peek(sep1)), + terminated(base64_literal, peek(sep1)), // base64 should be quoted or something + terminated(ordinal_literal, peek(sep1)), + terminated(hex_literal, peek(sep1)), + terminated(dec_literal, peek(sep1)), // Allow digit-prefixed words + terminated(email_literal, peek(sep1)), + terminated(url_literal, peek(sep1)), + terminated(css_color, peek(sep1)), + c_escape, + printf, + other, + ))), + ) .parse_next(input) } @@ -201,12 +205,15 @@ mod parser { ::Slice: AsBStr + SliceLen + Default, ::Token: AsChar + Copy, { - ( - one_of(|c| !is_xid_continue(c)), - take_while(0.., is_ignore_char), + trace( + "other", + ( + one_of(|c| !is_xid_continue(c)), + take_while(0.., is_ignore_char), + ) + .recognize(), ) - .recognize() - .parse_next(input) + .parse_next(input) } fn ordinal_literal(input: T) -> IResult::Slice> @@ -221,14 +228,17 @@ mod parser { ['_'].contains(&c) } - ( - take_while(0.., is_sep), - take_while(1.., is_dec_digit), - alt((('s', 't'), ('n', 'd'), ('r', 'd'), ('t', 'h'))), - take_while(0.., is_sep), + trace( + "ordinal_literal", + ( + take_while(0.., is_sep), + take_while(1.., is_dec_digit), + alt((('s', 't'), ('n', 'd'), ('r', 'd'), ('t', 'h'))), + take_while(0.., is_sep), + ) + .recognize(), ) - .recognize() - .parse_next(input) + .parse_next(input) } fn dec_literal(input: T) -> IResult::Slice> @@ -237,7 +247,7 @@ mod parser { ::Slice: AsBStr + SliceLen + Default, ::Token: AsChar + Copy, { - take_while(1.., is_dec_digit_with_sep).parse_next(input) + trace("dec_literal", take_while(1.., is_dec_digit_with_sep)).parse_next(input) } fn hex_literal(input: T) -> IResult::Slice> @@ -259,12 +269,15 @@ mod parser { ::Slice: AsBStr + SliceLen + Default, ::Token: AsChar + Copy, { - preceded( - '#', - alt(( - terminated(take_while(3..=8, is_lower_hex_digit), peek(sep1)), - terminated(take_while(3..=8, is_upper_hex_digit), peek(sep1)), - )), + trace( + "color", + preceded( + '#', + alt(( + terminated(take_while(3..=8, is_lower_hex_digit), peek(sep1)), + terminated(take_while(3..=8, is_upper_hex_digit), peek(sep1)), + )), + ), ) .parse_next(input) } @@ -275,31 +288,34 @@ mod parser { ::Slice: AsBStr + SliceLen + Default, ::Token: AsChar + Copy, { - alt(( - ( - take_while(8, is_lower_hex_digit), - '-', - take_while(4, is_lower_hex_digit), - '-', - take_while(4, is_lower_hex_digit), - '-', - take_while(4, is_lower_hex_digit), - '-', - take_while(12, is_lower_hex_digit), - ), - ( - take_while(8, is_upper_hex_digit), - '-', - take_while(4, is_upper_hex_digit), - '-', - take_while(4, is_upper_hex_digit), - '-', - take_while(4, is_upper_hex_digit), - '-', - take_while(12, is_upper_hex_digit), - ), - )) - .recognize() + trace( + "uuid", + alt(( + ( + take_while(8, is_lower_hex_digit), + '-', + take_while(4, is_lower_hex_digit), + '-', + take_while(4, is_lower_hex_digit), + '-', + take_while(4, is_lower_hex_digit), + '-', + take_while(12, is_lower_hex_digit), + ), + ( + take_while(8, is_upper_hex_digit), + '-', + take_while(4, is_upper_hex_digit), + '-', + take_while(4, is_upper_hex_digit), + '-', + take_while(4, is_upper_hex_digit), + '-', + take_while(12, is_upper_hex_digit), + ), + )) + .recognize(), + ) .parse_next(input) } @@ -319,10 +335,13 @@ mod parser { // or more. const IGNORE_HEX_MIN: usize = 32; - alt(( - take_while(IGNORE_HEX_MIN.., is_lower_hex_digit), - take_while(IGNORE_HEX_MIN.., is_upper_hex_digit), - )) + trace( + "hash", + alt(( + take_while(IGNORE_HEX_MIN.., is_lower_hex_digit), + take_while(IGNORE_HEX_MIN.., is_upper_hex_digit), + )), + ) .parse_next(input) } @@ -332,32 +351,35 @@ mod parser { ::Slice: AsBStr + SliceLen + Default, ::Token: AsChar + Copy, { - let (padding, captured) = take_while(1.., is_base64_digit).parse_next(input.clone())?; + trace("base64", move |input: T| { + let (padding, captured) = take_while(1.., is_base64_digit).parse_next(input.clone())?; - const CHUNK: usize = 4; - let padding_offset = input.offset_to(&padding); - let mut padding_len = CHUNK - padding_offset % CHUNK; - if padding_len == CHUNK { - padding_len = 0; - } + const CHUNK: usize = 4; + let padding_offset = input.offset_to(&padding); + let mut padding_len = CHUNK - padding_offset % CHUNK; + if padding_len == CHUNK { + padding_len = 0; + } - if captured.slice_len() < 90 - && padding_len == 0 - && captured - .as_bstr() - .iter() - .all(|c| !['/', '+'].contains(&c.as_char())) - { - return Err(winnow::error::ErrMode::Backtrack( - winnow::error::Error::new(input, winnow::error::ErrorKind::Slice), - )); - } + if captured.slice_len() < 90 + && padding_len == 0 + && captured + .as_bstr() + .iter() + .all(|c| !['/', '+'].contains(&c.as_char())) + { + return Err(winnow::error::ErrMode::Backtrack( + winnow::error::Error::new(input, winnow::error::ErrorKind::Slice), + )); + } - let (after, _) = - take_while(padding_len..=padding_len, is_base64_padding).parse_next(padding)?; + let (after, _) = + take_while(padding_len..=padding_len, is_base64_padding).parse_next(padding)?; - let after_offset = input.offset_to(&after); - Ok(input.next_slice(after_offset)) + let after_offset = input.offset_to(&after); + Ok(input.next_slice(after_offset)) + }) + .parse_next(input) } fn email_literal(input: T) -> IResult::Slice> @@ -366,13 +388,16 @@ mod parser { ::Slice: AsBStr + SliceLen + Default, ::Token: AsChar + Copy, { - ( - take_while(1.., is_localport_char), - '@', - take_while(1.., is_domain_char), + trace( + "email", + ( + take_while(1.., is_localport_char), + '@', + take_while(1.., is_domain_char), + ) + .recognize(), ) - .recognize() - .parse_next(input) + .parse_next(input) } fn url_literal(input: T) -> IResult::Slice> @@ -381,24 +406,27 @@ mod parser { ::Slice: AsBStr + SliceLen + Default, ::Token: AsChar + Copy, { - ( - opt(terminated( - take_while(1.., is_scheme_char), - // HACK: Technically you can skip `//` if you don't have a domain but that would - // get messy to support. - (':', '/', '/'), - )), + trace( + "url", ( - opt(terminated(url_userinfo, '@')), - take_while(1.., is_domain_char), - opt(preceded(':', take_while(1.., AsChar::is_dec_digit))), - ), - '/', - // HACK: Too lazy to enumerate - take_while(0.., is_path_query_fragment), + opt(terminated( + take_while(1.., is_scheme_char), + // HACK: Technically you can skip `//` if you don't have a domain but that would + // get messy to support. + (':', '/', '/'), + )), + ( + opt(terminated(url_userinfo, '@')), + take_while(1.., is_domain_char), + opt(preceded(':', take_while(1.., AsChar::is_dec_digit))), + ), + '/', + // HACK: Too lazy to enumerate + take_while(0.., is_path_query_fragment), + ) + .recognize(), ) - .recognize() - .parse_next(input) + .parse_next(input) } fn url_userinfo(input: T) -> IResult::Slice> @@ -407,12 +435,15 @@ mod parser { ::Slice: AsBStr + SliceLen + Default, ::Token: AsChar + Copy, { - ( - take_while(1.., is_localport_char), - opt(preceded(':', take_while(0.., is_localport_char))), + trace( + "userinfo", + ( + take_while(1.., is_localport_char), + opt(preceded(':', take_while(0.., is_localport_char))), + ) + .recognize(), ) - .recognize() - .parse_next(input) + .parse_next(input) } fn c_escape(input: T) -> IResult::Slice> @@ -425,7 +456,11 @@ mod parser { // regular string that does escaping. The escaped letter might be part of a word, or it // might not be. Rather than guess and be wrong part of the time and correct people's words // incorrectly, we opt for just not evaluating it at all. - preceded(take_while(1.., is_escape), take_while(0.., is_xid_continue)).parse_next(input) + trace( + "escape", + preceded(take_while(1.., is_escape), take_while(0.., is_xid_continue)), + ) + .parse_next(input) } fn printf(input: T) -> IResult::Slice> @@ -434,7 +469,7 @@ mod parser { ::Slice: AsBStr + SliceLen + Default, ::Token: AsChar + Copy, { - preceded('%', take_while(1.., is_xid_continue)).parse_next(input) + trace("printf", preceded('%', take_while(1.., is_xid_continue))).parse_next(input) } fn take_many0(mut f: F) -> impl FnMut(I) -> IResult::Slice, E>