From bd5048def52fb840f403cae9a9c91c66b6f1ae2c Mon Sep 17 00:00:00 2001 From: Ed Page Date: Tue, 10 May 2022 13:50:59 -0500 Subject: [PATCH] fix(parser): Allow backslashes after ignore items To allow `\\` to start a token, we couldn't let it end a token. By switching the termiantor to a peek, we can now make it end a token **and** start a token, allowing us to work better with windows paths. Fixes #481 --- crates/typos/src/tokens.rs | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/crates/typos/src/tokens.rs b/crates/typos/src/tokens.rs index 8a6e596..860d5f3 100644 --- a/crates/typos/src/tokens.rs +++ b/crates/typos/src/tokens.rs @@ -183,15 +183,15 @@ mod parser { // CAUTION: If adding an ignorable literal, if it doesn't start with `is_xid_continue`, // - Update `is_ignore_char` to make sure `sep1` doesn't eat it all up // - Make sure you always consume it - terminated(uuid_literal, sep1), - terminated(hash_literal, sep1), - terminated(base64_literal, sep1), // base64 should be quoted or something - terminated(ordinal_literal, sep1), - terminated(hex_literal, sep1), - terminated(dec_literal, sep1), // Allow digit-prefixed words - terminated(email_literal, sep1), - terminated(url_literal, sep1), - terminated(css_color, sep1), + terminated(uuid_literal, peek(sep1)), + terminated(hash_literal, peek(sep1)), + terminated(base64_literal, peek(sep1)), // base64 should be quoted or something + terminated(ordinal_literal, peek(sep1)), + terminated(hex_literal, peek(sep1)), + terminated(dec_literal, peek(sep1)), // Allow digit-prefixed words + terminated(email_literal, peek(sep1)), + terminated(url_literal, peek(sep1)), + terminated(css_color, peek(sep1)), c_escape, printf, other, @@ -212,8 +212,12 @@ mod parser { + PartialEq + std::fmt::Debug, ::Item: AsChar + Copy, + ::Item: AsChar + Copy, { - alt((take_while1(is_ignore_char), map(eof, |_| T::default())))(input) + alt(( + recognize(satisfy(|c| !is_xid_continue(c))), + map(eof, |_| T::default()), + ))(input) } fn other(input: T) -> IResult @@ -1167,9 +1171,7 @@ mod test { let input = " /// at /rustc/c7087fe00d2ba919df1d813c040a5d47e43b0fe7\\/src\\libstd\\rt.rs:51"; let expected: Vec = vec![ Identifier::new_unchecked("at", Case::None, 25), - Identifier::new_unchecked("rustc", Case::None, 29), - Identifier::new_unchecked("c7087fe00d2ba919df1d813c040a5d47e43b0fe7", Case::None, 35), // BUG: This shouldn't be here - Identifier::new_unchecked("src", Case::None, 77), + // `rustc...` looks like the start of a URL Identifier::new_unchecked("rs", Case::None, 91), ]; let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect();