Merge pull request #374 from Flakebi/fix-escape

Fix multiple escape sequences
This commit is contained in:
Ed Page 2021-11-15 08:18:41 -06:00 committed by GitHub
commit 3ca0aed0a7
WARNING! Although there is a key with this ID in the database it does not verify this commit! This commit is SUSPICIOUS.
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 46 additions and 5 deletions

4
Cargo.lock generated
View file

@ -1499,9 +1499,9 @@ checksum = "a7f741b240f1a48843f9b8e0444fb55fb2a4ff67293b50a9179dfd5ea67f8d41"
[[package]] [[package]]
name = "trycmd" name = "trycmd"
version = "0.4.0" version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b15571a9c85f2dc93e93907813b8b82583a60c1ee738ef5fa5123f4c96863b5" checksum = "2d034538089e906ac14df42c19aae52a55aa1014102d6895d9748080e043cc48"
dependencies = [ dependencies = [
"concolor-control", "concolor-control",
"difflib", "difflib",

View file

@ -94,7 +94,7 @@ maplit = "1.0"
[dev-dependencies] [dev-dependencies]
assert_fs = "1.0" assert_fs = "1.0"
trycmd = "0.4" trycmd = "0.5.1"
criterion = "0.3" criterion = "0.3"
[profile.dev] [profile.dev]

View file

@ -190,8 +190,8 @@ mod parser {
terminated(base64_literal, sep1), terminated(base64_literal, sep1),
terminated(email_literal, sep1), terminated(email_literal, sep1),
terminated(url_literal, sep1), terminated(url_literal, sep1),
terminated(c_escape, sep1), c_escape,
terminated(printf, sep1), printf,
sep1, sep1,
)))(input) )))(input)
} }
@ -410,6 +410,10 @@ mod parser {
<T as nom::InputTakeAtPosition>::Item: AsChar + Copy, <T as nom::InputTakeAtPosition>::Item: AsChar + Copy,
<T as nom::InputIter>::Item: AsChar + Copy, <T as nom::InputIter>::Item: AsChar + Copy,
{ {
// We don't know whether the string we are parsing is a literal string (no escaping) or
// regular string that does escaping. The escaped letter might be part of a word, or it
// might not be. Rather than guess and be wrong part of the time and correct people's words
// incorrectly, we opt for just not evaluating it at all.
preceded(take_while1(is_escape), take_while(is_xid_continue))(input) preceded(take_while1(is_escape), take_while(is_xid_continue))(input)
} }
@ -1103,6 +1107,36 @@ mod test {
assert_eq!(expected, actual); assert_eq!(expected, actual);
} }
#[test]
fn tokenize_double_escape() {
let parser = TokenizerBuilder::new().build();
let input = "Hello \\n\\n World";
let expected: Vec<Identifier> = vec![
Identifier::new_unchecked("Hello", Case::None, 0),
Identifier::new_unchecked("World", Case::None, 11),
];
let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect();
assert_eq!(expected, actual);
let actual: Vec<_> = parser.parse_str(input).collect();
assert_eq!(expected, actual);
}
#[test]
fn tokenize_ignore_escape() {
let parser = TokenizerBuilder::new().build();
let input = "Hello \\nanana\\nanana World";
let expected: Vec<Identifier> = vec![
Identifier::new_unchecked("Hello", Case::None, 0),
Identifier::new_unchecked("World", Case::None, 21),
];
let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect();
assert_eq!(expected, actual);
let actual: Vec<_> = parser.parse_str(input).collect();
assert_eq!(expected, actual);
}
#[test] #[test]
fn tokenize_printf() { fn tokenize_printf() {
let parser = TokenizerBuilder::new().build(); let parser = TokenizerBuilder::new().build();

View file

@ -0,0 +1,2 @@
\n\n
Destory

View file

@ -0,0 +1,2 @@
\n\n
Destroy

View file

@ -0,0 +1,3 @@
bin.name = "typos"
args = "--write-changes -"
status.code = 0