fix: Fix multiple escape sequences

If escape sequences follow straight after each other, there is no delimiter in-between. In such a case, parsing previously stopped and did not find any typos further in the file.
2024-11-22 09:01:04 -05:00 · 2021-11-15 11:31:40 +01:00 · 2021-11-15 11:31:40 +01:00 · 3fc6089660
commit 3fc6089660
parent 141adb6837
4 changed files with 43 additions and 2 deletions
--- a/crates/typos/src/tokens.rs
+++ b/crates/typos/src/tokens.rs
@ -190,8 +190,8 @@ mod parser {
            terminated(base64_literal, sep1),
            terminated(email_literal, sep1),
            terminated(url_literal, sep1),
-            terminated(c_escape, sep1),
-            terminated(printf, sep1),
+            c_escape,
+            printf,
            sep1,
        )))(input)
    }
@ -410,6 +410,10 @@ mod parser {
        <T as nom::InputTakeAtPosition>::Item: AsChar + Copy,
        <T as nom::InputIter>::Item: AsChar + Copy,
    {
+        // We don't know whether the string we are parsing is a literal string (no escaping) or
+        // regular string that does escaping. The escaped letter might be part of a word, or it
+        // might not be. Rather than guess and be wrong part of the time and correct people's words
+        // incorrectly, we opt for just not evaluating it at all.
        preceded(take_while1(is_escape), take_while(is_xid_continue))(input)
    }

@ -1103,6 +1107,36 @@ mod test {
        assert_eq!(expected, actual);
    }

+    #[test]
+    fn tokenize_double_escape() {
+        let parser = TokenizerBuilder::new().build();
+
+        let input = "Hello \\n\\n World";
+        let expected: Vec<Identifier> = vec![
+            Identifier::new_unchecked("Hello", Case::None, 0),
+            Identifier::new_unchecked("World", Case::None, 11),
+        ];
+        let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect();
+        assert_eq!(expected, actual);
+        let actual: Vec<_> = parser.parse_str(input).collect();
+        assert_eq!(expected, actual);
+    }
+
+    #[test]
+    fn tokenize_ignore_escape() {
+        let parser = TokenizerBuilder::new().build();
+
+        let input = "Hello \\nanana\\nanana World";
+        let expected: Vec<Identifier> = vec![
+            Identifier::new_unchecked("Hello", Case::None, 0),
+            Identifier::new_unchecked("World", Case::None, 21),
+        ];
+        let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect();
+        assert_eq!(expected, actual);
+        let actual: Vec<_> = parser.parse_str(input).collect();
+        assert_eq!(expected, actual);
+    }
+
    #[test]
    fn tokenize_printf() {
        let parser = TokenizerBuilder::new().build();
--- a/tests/cmd/double-escaped.stdin
+++ b/tests/cmd/double-escaped.stdin
@ -0,0 +1,2 @@
+\n\n
+Destory
--- a/tests/cmd/double-escaped.stdout
+++ b/tests/cmd/double-escaped.stdout
@ -0,0 +1,2 @@
+\n\n
+Destroy
--- a/tests/cmd/double-escaped.toml
+++ b/tests/cmd/double-escaped.toml
@ -0,0 +1,3 @@
+bin.name = "typos"
+args = "--write-changes -"
+status.code = 0