mirror of
https://github.com/crate-ci/typos.git
synced 2024-11-25 10:31:02 -05:00
fix: Fix multiple escape sequences
If escape sequences follow straight after each other, there is no delimiter in-between. In such a case, parsing previously stopped and did not find any typos further in the file.
This commit is contained in:
parent
141adb6837
commit
3fc6089660
4 changed files with 43 additions and 2 deletions
|
@ -190,8 +190,8 @@ mod parser {
|
||||||
terminated(base64_literal, sep1),
|
terminated(base64_literal, sep1),
|
||||||
terminated(email_literal, sep1),
|
terminated(email_literal, sep1),
|
||||||
terminated(url_literal, sep1),
|
terminated(url_literal, sep1),
|
||||||
terminated(c_escape, sep1),
|
c_escape,
|
||||||
terminated(printf, sep1),
|
printf,
|
||||||
sep1,
|
sep1,
|
||||||
)))(input)
|
)))(input)
|
||||||
}
|
}
|
||||||
|
@ -410,6 +410,10 @@ mod parser {
|
||||||
<T as nom::InputTakeAtPosition>::Item: AsChar + Copy,
|
<T as nom::InputTakeAtPosition>::Item: AsChar + Copy,
|
||||||
<T as nom::InputIter>::Item: AsChar + Copy,
|
<T as nom::InputIter>::Item: AsChar + Copy,
|
||||||
{
|
{
|
||||||
|
// We don't know whether the string we are parsing is a literal string (no escaping) or
|
||||||
|
// regular string that does escaping. The escaped letter might be part of a word, or it
|
||||||
|
// might not be. Rather than guess and be wrong part of the time and correct people's words
|
||||||
|
// incorrectly, we opt for just not evaluating it at all.
|
||||||
preceded(take_while1(is_escape), take_while(is_xid_continue))(input)
|
preceded(take_while1(is_escape), take_while(is_xid_continue))(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1103,6 +1107,36 @@ mod test {
|
||||||
assert_eq!(expected, actual);
|
assert_eq!(expected, actual);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn tokenize_double_escape() {
|
||||||
|
let parser = TokenizerBuilder::new().build();
|
||||||
|
|
||||||
|
let input = "Hello \\n\\n World";
|
||||||
|
let expected: Vec<Identifier> = vec![
|
||||||
|
Identifier::new_unchecked("Hello", Case::None, 0),
|
||||||
|
Identifier::new_unchecked("World", Case::None, 11),
|
||||||
|
];
|
||||||
|
let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect();
|
||||||
|
assert_eq!(expected, actual);
|
||||||
|
let actual: Vec<_> = parser.parse_str(input).collect();
|
||||||
|
assert_eq!(expected, actual);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn tokenize_ignore_escape() {
|
||||||
|
let parser = TokenizerBuilder::new().build();
|
||||||
|
|
||||||
|
let input = "Hello \\nanana\\nanana World";
|
||||||
|
let expected: Vec<Identifier> = vec![
|
||||||
|
Identifier::new_unchecked("Hello", Case::None, 0),
|
||||||
|
Identifier::new_unchecked("World", Case::None, 21),
|
||||||
|
];
|
||||||
|
let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect();
|
||||||
|
assert_eq!(expected, actual);
|
||||||
|
let actual: Vec<_> = parser.parse_str(input).collect();
|
||||||
|
assert_eq!(expected, actual);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn tokenize_printf() {
|
fn tokenize_printf() {
|
||||||
let parser = TokenizerBuilder::new().build();
|
let parser = TokenizerBuilder::new().build();
|
||||||
|
|
2
tests/cmd/double-escaped.stdin
Normal file
2
tests/cmd/double-escaped.stdin
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
\n\n
|
||||||
|
Destory
|
2
tests/cmd/double-escaped.stdout
Normal file
2
tests/cmd/double-escaped.stdout
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
\n\n
|
||||||
|
Destroy
|
3
tests/cmd/double-escaped.toml
Normal file
3
tests/cmd/double-escaped.toml
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
bin.name = "typos"
|
||||||
|
args = "--write-changes -"
|
||||||
|
status.code = 0
|
Loading…
Reference in a new issue