fix: Fix multiple escape sequences

If escape sequences follow straight after each other, there is no delimiter in-between. In such a case, parsing previously stopped and did not find any typos further in the file.
2024-11-22 17:11:07 -05:00 · 2021-11-15 11:31:40 +01:00 · 2021-11-15 11:31:40 +01:00 · 3fc6089660
commit 3fc6089660
parent 141adb6837
4 changed files with 43 additions and 2 deletions
--- a/crates/typos/src/tokens.rs
+++ b/crates/typos/src/tokens.rs
@ -190,8 +190,8 @@ mod parser {
            terminated(base64_literal, sep1),
            terminated(email_literal, sep1),
            terminated(url_literal, sep1),
-            terminated(c_escape, sep1),
+            c_escape,
-            terminated(printf, sep1),
+            printf,
            sep1,
        )))(input)
    }
@ -410,6 +410,10 @@ mod parser {
        <T as nom::InputTakeAtPosition>::Item: AsChar + Copy,
        <T as nom::InputIter>::Item: AsChar + Copy,
    {
        // We don't know whether the string we are parsing is a literal string (no escaping) or
        // regular string that does escaping. The escaped letter might be part of a word, or it
        // might not be. Rather than guess and be wrong part of the time and correct people's words
        // incorrectly, we opt for just not evaluating it at all.
        preceded(take_while1(is_escape), take_while(is_xid_continue))(input)
    }
@ -1103,6 +1107,36 @@ mod test {
        assert_eq!(expected, actual);
    }
    #[test]
    fn tokenize_double_escape() {
        let parser = TokenizerBuilder::new().build();
        let input = "Hello \\n\\n World";
        let expected: Vec<Identifier> = vec![
            Identifier::new_unchecked("Hello", Case::None, 0),
            Identifier::new_unchecked("World", Case::None, 11),
        ];
        let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect();
        assert_eq!(expected, actual);
        let actual: Vec<_> = parser.parse_str(input).collect();
        assert_eq!(expected, actual);
    }
    #[test]
    fn tokenize_ignore_escape() {
        let parser = TokenizerBuilder::new().build();
        let input = "Hello \\nanana\\nanana World";
        let expected: Vec<Identifier> = vec![
            Identifier::new_unchecked("Hello", Case::None, 0),
            Identifier::new_unchecked("World", Case::None, 21),
        ];
        let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect();
        assert_eq!(expected, actual);
        let actual: Vec<_> = parser.parse_str(input).collect();
        assert_eq!(expected, actual);
    }
    #[test]
    fn tokenize_printf() {
        let parser = TokenizerBuilder::new().build();
--- a/tests/cmd/double-escaped.stdin
+++ b/tests/cmd/double-escaped.stdin
@ -0,0 +1,2 @@
 \n\n
 Destory
--- a/tests/cmd/double-escaped.stdout
+++ b/tests/cmd/double-escaped.stdout
@ -0,0 +1,2 @@
 \n\n
 Destroy
--- a/tests/cmd/double-escaped.toml
+++ b/tests/cmd/double-escaped.toml
@ -0,0 +1,3 @@
 bin.name = "typos"
 args = "--write-changes -"
 status.code = 0