fix: Fix multiple escape sequences

If escape sequences follow straight after each other, there is no
delimiter in-between.
In such a case, parsing previously stopped and did not find any
typos further in the file.
This commit is contained in:
Neubauer, Sebastian 2021-11-15 11:31:40 +01:00
parent 141adb6837
commit 3fc6089660
4 changed files with 43 additions and 2 deletions

View file

@ -190,8 +190,8 @@ mod parser {
terminated(base64_literal, sep1),
terminated(email_literal, sep1),
terminated(url_literal, sep1),
terminated(c_escape, sep1),
terminated(printf, sep1),
c_escape,
printf,
sep1,
)))(input)
}
@ -410,6 +410,10 @@ mod parser {
<T as nom::InputTakeAtPosition>::Item: AsChar + Copy,
<T as nom::InputIter>::Item: AsChar + Copy,
{
// We don't know whether the string we are parsing is a literal string (no escaping) or
// regular string that does escaping. The escaped letter might be part of a word, or it
// might not be. Rather than guess and be wrong part of the time and correct people's words
// incorrectly, we opt for just not evaluating it at all.
preceded(take_while1(is_escape), take_while(is_xid_continue))(input)
}
@ -1103,6 +1107,36 @@ mod test {
assert_eq!(expected, actual);
}
#[test]
fn tokenize_double_escape() {
let parser = TokenizerBuilder::new().build();
let input = "Hello \\n\\n World";
let expected: Vec<Identifier> = vec![
Identifier::new_unchecked("Hello", Case::None, 0),
Identifier::new_unchecked("World", Case::None, 11),
];
let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect();
assert_eq!(expected, actual);
let actual: Vec<_> = parser.parse_str(input).collect();
assert_eq!(expected, actual);
}
#[test]
fn tokenize_ignore_escape() {
let parser = TokenizerBuilder::new().build();
let input = "Hello \\nanana\\nanana World";
let expected: Vec<Identifier> = vec![
Identifier::new_unchecked("Hello", Case::None, 0),
Identifier::new_unchecked("World", Case::None, 21),
];
let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect();
assert_eq!(expected, actual);
let actual: Vec<_> = parser.parse_str(input).collect();
assert_eq!(expected, actual);
}
#[test]
fn tokenize_printf() {
let parser = TokenizerBuilder::new().build();

View file

@ -0,0 +1,2 @@
\n\n
Destory

View file

@ -0,0 +1,2 @@
\n\n
Destroy

View file

@ -0,0 +1,3 @@
bin.name = "typos"
args = "--write-changes -"
status.code = 0