From e98fc52b0df66e7ddb7e08ea5ae537888a5fd3e7 Mon Sep 17 00:00:00 2001
From: Ed Page <eopage@gmail.com>
Date: Fri, 14 Jul 2023 12:32:07 -0500
Subject: [PATCH] chore(typos): Add parse tracing

---
 crates/typos/src/tokens.rs | 269 +++++++++++++++++++++----------------
 1 file changed, 152 insertions(+), 117 deletions(-)
diff --git a/crates/typos/src/tokens.rs b/crates/typos/src/tokens.rs
index 6ac6ce1..d36c94c 100644
--- a/crates/typos/src/tokens.rs
+++ b/crates/typos/src/tokens.rs
@@ -133,6 +133,7 @@ mod parser {
     use winnow::stream::Stream;
     use winnow::stream::StreamIsPartial;
     use winnow::token::*;
+    use winnow::trace::trace;
 
     pub(crate) fn next_identifier<T>(input: T) -> IResult<T, <T as Stream>::Slice>
     where
@@ -153,7 +154,7 @@ mod parser {
         // `{XID_Continue}+` because XID_Continue is a superset of XID_Start and rather catch odd
         // or unexpected cases than strip off start characters to a word since we aren't doing a
         // proper word boundary parse
-        take_while(1.., is_xid_continue).parse_next(input)
+        trace("identifier", take_while(1.., is_xid_continue)).parse_next(input)
     }
 
     fn ignore<T>(input: T) -> IResult<T, <T as Stream>::Slice>
@@ -162,23 +163,26 @@ mod parser {
         <T as Stream>::Slice: AsBStr + SliceLen + Default,
         <T as Stream>::Token: AsChar + Copy,
     {
-        take_many0(alt((
-            // CAUTION: If adding an ignorable literal, if it doesn't start with `is_xid_continue`,
-            // - Update `is_ignore_char` to make sure `sep1` doesn't eat it all up
-            // - Make sure you always consume it
-            terminated(uuid_literal, peek(sep1)),
-            terminated(hash_literal, peek(sep1)),
-            terminated(base64_literal, peek(sep1)), // base64 should be quoted or something
-            terminated(ordinal_literal, peek(sep1)),
-            terminated(hex_literal, peek(sep1)),
-            terminated(dec_literal, peek(sep1)), // Allow digit-prefixed words
-            terminated(email_literal, peek(sep1)),
-            terminated(url_literal, peek(sep1)),
-            terminated(css_color, peek(sep1)),
-            c_escape,
-            printf,
-            other,
-        )))
+        trace(
+            "ignore",
+            take_many0(alt((
+                // CAUTION: If adding an ignorable literal, if it doesn't start with `is_xid_continue`,
+                // - Update `is_ignore_char` to make sure `sep1` doesn't eat it all up
+                // - Make sure you always consume it
+                terminated(uuid_literal, peek(sep1)),
+                terminated(hash_literal, peek(sep1)),
+                terminated(base64_literal, peek(sep1)), // base64 should be quoted or something
+                terminated(ordinal_literal, peek(sep1)),
+                terminated(hex_literal, peek(sep1)),
+                terminated(dec_literal, peek(sep1)), // Allow digit-prefixed words
+                terminated(email_literal, peek(sep1)),
+                terminated(url_literal, peek(sep1)),
+                terminated(css_color, peek(sep1)),
+                c_escape,
+                printf,
+                other,
+            ))),
+        )
         .parse_next(input)
     }
 
@@ -201,12 +205,15 @@ mod parser {
         <T as Stream>::Slice: AsBStr + SliceLen + Default,
         <T as Stream>::Token: AsChar + Copy,
     {
-        (
-            one_of(|c| !is_xid_continue(c)),
-            take_while(0.., is_ignore_char),
+        trace(
+            "other",
+            (
+                one_of(|c| !is_xid_continue(c)),
+                take_while(0.., is_ignore_char),
+            )
+                .recognize(),
         )
-            .recognize()
-            .parse_next(input)
+        .parse_next(input)
     }
 
     fn ordinal_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice>
@@ -221,14 +228,17 @@ mod parser {
             ['_'].contains(&c)
         }
 
-        (
-            take_while(0.., is_sep),
-            take_while(1.., is_dec_digit),
-            alt((('s', 't'), ('n', 'd'), ('r', 'd'), ('t', 'h'))),
-            take_while(0.., is_sep),
+        trace(
+            "ordinal_literal",
+            (
+                take_while(0.., is_sep),
+                take_while(1.., is_dec_digit),
+                alt((('s', 't'), ('n', 'd'), ('r', 'd'), ('t', 'h'))),
+                take_while(0.., is_sep),
+            )
+                .recognize(),
         )
-            .recognize()
-            .parse_next(input)
+        .parse_next(input)
     }
 
     fn dec_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice>
@@ -237,7 +247,7 @@ mod parser {
         <T as Stream>::Slice: AsBStr + SliceLen + Default,
         <T as Stream>::Token: AsChar + Copy,
     {
-        take_while(1.., is_dec_digit_with_sep).parse_next(input)
+        trace("dec_literal", take_while(1.., is_dec_digit_with_sep)).parse_next(input)
     }
 
     fn hex_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice>
@@ -259,12 +269,15 @@ mod parser {
         <T as Stream>::Slice: AsBStr + SliceLen + Default,
         <T as Stream>::Token: AsChar + Copy,
     {
-        preceded(
-            '#',
-            alt((
-                terminated(take_while(3..=8, is_lower_hex_digit), peek(sep1)),
-                terminated(take_while(3..=8, is_upper_hex_digit), peek(sep1)),
-            )),
+        trace(
+            "color",
+            preceded(
+                '#',
+                alt((
+                    terminated(take_while(3..=8, is_lower_hex_digit), peek(sep1)),
+                    terminated(take_while(3..=8, is_upper_hex_digit), peek(sep1)),
+                )),
+            ),
         )
         .parse_next(input)
     }
@@ -275,31 +288,34 @@ mod parser {
         <T as Stream>::Slice: AsBStr + SliceLen + Default,
         <T as Stream>::Token: AsChar + Copy,
     {
-        alt((
-            (
-                take_while(8, is_lower_hex_digit),
-                '-',
-                take_while(4, is_lower_hex_digit),
-                '-',
-                take_while(4, is_lower_hex_digit),
-                '-',
-                take_while(4, is_lower_hex_digit),
-                '-',
-                take_while(12, is_lower_hex_digit),
-            ),
-            (
-                take_while(8, is_upper_hex_digit),
-                '-',
-                take_while(4, is_upper_hex_digit),
-                '-',
-                take_while(4, is_upper_hex_digit),
-                '-',
-                take_while(4, is_upper_hex_digit),
-                '-',
-                take_while(12, is_upper_hex_digit),
-            ),
-        ))
-        .recognize()
+        trace(
+            "uuid",
+            alt((
+                (
+                    take_while(8, is_lower_hex_digit),
+                    '-',
+                    take_while(4, is_lower_hex_digit),
+                    '-',
+                    take_while(4, is_lower_hex_digit),
+                    '-',
+                    take_while(4, is_lower_hex_digit),
+                    '-',
+                    take_while(12, is_lower_hex_digit),
+                ),
+                (
+                    take_while(8, is_upper_hex_digit),
+                    '-',
+                    take_while(4, is_upper_hex_digit),
+                    '-',
+                    take_while(4, is_upper_hex_digit),
+                    '-',
+                    take_while(4, is_upper_hex_digit),
+                    '-',
+                    take_while(12, is_upper_hex_digit),
+                ),
+            ))
+            .recognize(),
+        )
         .parse_next(input)
     }
 
@@ -319,10 +335,13 @@ mod parser {
         //     or more.
 
         const IGNORE_HEX_MIN: usize = 32;
-        alt((
-            take_while(IGNORE_HEX_MIN.., is_lower_hex_digit),
-            take_while(IGNORE_HEX_MIN.., is_upper_hex_digit),
-        ))
+        trace(
+            "hash",
+            alt((
+                take_while(IGNORE_HEX_MIN.., is_lower_hex_digit),
+                take_while(IGNORE_HEX_MIN.., is_upper_hex_digit),
+            )),
+        )
         .parse_next(input)
     }
 
@@ -332,32 +351,35 @@ mod parser {
         <T as Stream>::Slice: AsBStr + SliceLen + Default,
         <T as Stream>::Token: AsChar + Copy,
     {
-        let (padding, captured) = take_while(1.., is_base64_digit).parse_next(input.clone())?;
+        trace("base64", move |input: T| {
+            let (padding, captured) = take_while(1.., is_base64_digit).parse_next(input.clone())?;
 
-        const CHUNK: usize = 4;
-        let padding_offset = input.offset_to(&padding);
-        let mut padding_len = CHUNK - padding_offset % CHUNK;
-        if padding_len == CHUNK {
-            padding_len = 0;
-        }
+            const CHUNK: usize = 4;
+            let padding_offset = input.offset_to(&padding);
+            let mut padding_len = CHUNK - padding_offset % CHUNK;
+            if padding_len == CHUNK {
+                padding_len = 0;
+            }
 
-        if captured.slice_len() < 90
-            && padding_len == 0
-            && captured
-                .as_bstr()
-                .iter()
-                .all(|c| !['/', '+'].contains(&c.as_char()))
-        {
-            return Err(winnow::error::ErrMode::Backtrack(
-                winnow::error::Error::new(input, winnow::error::ErrorKind::Slice),
-            ));
-        }
+            if captured.slice_len() < 90
+                && padding_len == 0
+                && captured
+                    .as_bstr()
+                    .iter()
+                    .all(|c| !['/', '+'].contains(&c.as_char()))
+            {
+                return Err(winnow::error::ErrMode::Backtrack(
+                    winnow::error::Error::new(input, winnow::error::ErrorKind::Slice),
+                ));
+            }
 
-        let (after, _) =
-            take_while(padding_len..=padding_len, is_base64_padding).parse_next(padding)?;
+            let (after, _) =
+                take_while(padding_len..=padding_len, is_base64_padding).parse_next(padding)?;
 
-        let after_offset = input.offset_to(&after);
-        Ok(input.next_slice(after_offset))
+            let after_offset = input.offset_to(&after);
+            Ok(input.next_slice(after_offset))
+        })
+        .parse_next(input)
     }
 
     fn email_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice>
@@ -366,13 +388,16 @@ mod parser {
         <T as Stream>::Slice: AsBStr + SliceLen + Default,
         <T as Stream>::Token: AsChar + Copy,
     {
-        (
-            take_while(1.., is_localport_char),
-            '@',
-            take_while(1.., is_domain_char),
+        trace(
+            "email",
+            (
+                take_while(1.., is_localport_char),
+                '@',
+                take_while(1.., is_domain_char),
+            )
+                .recognize(),
         )
-            .recognize()
-            .parse_next(input)
+        .parse_next(input)
     }
 
     fn url_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice>
@@ -381,24 +406,27 @@ mod parser {
         <T as Stream>::Slice: AsBStr + SliceLen + Default,
         <T as Stream>::Token: AsChar + Copy,
     {
-        (
-            opt(terminated(
-                take_while(1.., is_scheme_char),
-                // HACK: Technically you can skip `//` if you don't have a domain but that would
-                // get messy to support.
-                (':', '/', '/'),
-            )),
+        trace(
+            "url",
             (
-                opt(terminated(url_userinfo, '@')),
-                take_while(1.., is_domain_char),
-                opt(preceded(':', take_while(1.., AsChar::is_dec_digit))),
-            ),
-            '/',
-            // HACK: Too lazy to enumerate
-            take_while(0.., is_path_query_fragment),
+                opt(terminated(
+                    take_while(1.., is_scheme_char),
+                    // HACK: Technically you can skip `//` if you don't have a domain but that would
+                    // get messy to support.
+                    (':', '/', '/'),
+                )),
+                (
+                    opt(terminated(url_userinfo, '@')),
+                    take_while(1.., is_domain_char),
+                    opt(preceded(':', take_while(1.., AsChar::is_dec_digit))),
+                ),
+                '/',
+                // HACK: Too lazy to enumerate
+                take_while(0.., is_path_query_fragment),
+            )
+                .recognize(),
         )
-            .recognize()
-            .parse_next(input)
+        .parse_next(input)
     }
 
     fn url_userinfo<T>(input: T) -> IResult<T, <T as Stream>::Slice>
@@ -407,12 +435,15 @@ mod parser {
         <T as Stream>::Slice: AsBStr + SliceLen + Default,
         <T as Stream>::Token: AsChar + Copy,
     {
-        (
-            take_while(1.., is_localport_char),
-            opt(preceded(':', take_while(0.., is_localport_char))),
+        trace(
+            "userinfo",
+            (
+                take_while(1.., is_localport_char),
+                opt(preceded(':', take_while(0.., is_localport_char))),
+            )
+                .recognize(),
         )
-            .recognize()
-            .parse_next(input)
+        .parse_next(input)
     }
 
     fn c_escape<T>(input: T) -> IResult<T, <T as Stream>::Slice>
@@ -425,7 +456,11 @@ mod parser {
         // regular string that does escaping. The escaped letter might be part of a word, or it
         // might not be. Rather than guess and be wrong part of the time and correct people's words
         // incorrectly, we opt for just not evaluating it at all.
-        preceded(take_while(1.., is_escape), take_while(0.., is_xid_continue)).parse_next(input)
+        trace(
+            "escape",
+            preceded(take_while(1.., is_escape), take_while(0.., is_xid_continue)),
+        )
+        .parse_next(input)
     }
 
     fn printf<T>(input: T) -> IResult<T, <T as Stream>::Slice>
@@ -434,7 +469,7 @@ mod parser {
         <T as Stream>::Slice: AsBStr + SliceLen + Default,
         <T as Stream>::Token: AsChar + Copy,
     {
-        preceded('%', take_while(1.., is_xid_continue)).parse_next(input)
+        trace("printf", preceded('%', take_while(1.., is_xid_continue))).parse_next(input)
     }
 
     fn take_many0<I, E, F>(mut f: F) -> impl FnMut(I) -> IResult<I, <I as Stream>::Slice, E>