From dc42232bba9c26afb12a6e7e36bf60007cfddb77 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Wed, 10 Jul 2024 11:47:12 -0500 Subject: [PATCH 1/4] test(tokens): Use snapshot testing --- Cargo.lock | 5 +- crates/typos/Cargo.toml | 3 + crates/typos/src/tokens.rs | 1015 +++++++++++++++++++++++++++++++----- 3 files changed, 878 insertions(+), 145 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6b644f2..4968ce7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1147,9 +1147,9 @@ checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" [[package]] name = "snapbox" -version = "0.6.10" +version = "0.6.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40e14d10e4c2b4331ac24c33baa5a03e1fbca81c045b285b53b2a612d28569fb" +checksum = "699c824ef8c2061c39efb3af4f334310b3acbfb2a50c6d1f867e4d95dcff94be" dependencies = [ "anstream", "anstyle", @@ -1331,6 +1331,7 @@ dependencies = [ "once_cell", "serde", "simdutf8", + "snapbox", "thiserror", "unicode-segmentation", "unicode-xid", diff --git a/crates/typos/Cargo.toml b/crates/typos/Cargo.toml index 67475c9..252f2b0 100644 --- a/crates/typos/Cargo.toml +++ b/crates/typos/Cargo.toml @@ -29,3 +29,6 @@ bstr = "1.9" [lints] workspace = true + +[dev-dependencies] +snapbox = "0.6.11" diff --git a/crates/typos/src/tokens.rs b/crates/typos/src/tokens.rs index 6688b0e..42997bd 100644 --- a/crates/typos/src/tokens.rs +++ b/crates/typos/src/tokens.rs @@ -938,17 +938,31 @@ impl WordMode { #[cfg(test)] mod test { use super::*; + use snapbox::assert_data_eq; + use snapbox::prelude::*; + use snapbox::str; #[test] fn tokenize_empty_is_empty() { let parser = Tokenizer::new(); let input = ""; - let expected: Vec> = vec![]; let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect(); - assert_eq!(expected, actual); + assert_data_eq!( + actual.to_debug(), + str![[r#" +[] + +"#]] + ); let actual: Vec<_> = parser.parse_str(input).collect(); - assert_eq!(expected, actual); + assert_data_eq!( + actual.to_debug(), + str![[r#" +[] + +"#]] + ); } #[test] @@ -956,11 +970,34 @@ mod test { let parser = Tokenizer::new(); let input = "word"; - let expected: Vec> = vec![Identifier::new_unchecked("word", Case::None, 0)]; let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect(); - assert_eq!(expected, actual); + assert_data_eq!( + actual.to_debug(), + str![[r#" +[ + Identifier { + token: "word", + case: None, + offset: 0, + }, +] + +"#]] + ); let actual: Vec<_> = parser.parse_str(input).collect(); - assert_eq!(expected, actual); + assert_data_eq!( + actual.to_debug(), + str![[r#" +[ + Identifier { + token: "word", + case: None, + offset: 0, + }, +] + +"#]] + ); } #[test] @@ -968,14 +1005,44 @@ mod test { let parser = Tokenizer::new(); let input = "A B"; - let expected: Vec> = vec![ - Identifier::new_unchecked("A", Case::None, 0), - Identifier::new_unchecked("B", Case::None, 2), - ]; let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect(); - assert_eq!(expected, actual); + assert_data_eq!( + actual.to_debug(), + str![[r#" +[ + Identifier { + token: "A", + case: None, + offset: 0, + }, + Identifier { + token: "B", + case: None, + offset: 2, + }, +] + +"#]] + ); let actual: Vec<_> = parser.parse_str(input).collect(); - assert_eq!(expected, actual); + assert_data_eq!( + actual.to_debug(), + str![[r#" +[ + Identifier { + token: "A", + case: None, + offset: 0, + }, + Identifier { + token: "B", + case: None, + offset: 2, + }, +] + +"#]] + ); } #[test] @@ -983,14 +1050,44 @@ mod test { let parser = Tokenizer::new(); let input = "A.B"; - let expected: Vec> = vec![ - Identifier::new_unchecked("A", Case::None, 0), - Identifier::new_unchecked("B", Case::None, 2), - ]; let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect(); - assert_eq!(expected, actual); + assert_data_eq!( + actual.to_debug(), + str![[r#" +[ + Identifier { + token: "A", + case: None, + offset: 0, + }, + Identifier { + token: "B", + case: None, + offset: 2, + }, +] + +"#]] + ); let actual: Vec<_> = parser.parse_str(input).collect(); - assert_eq!(expected, actual); + assert_data_eq!( + actual.to_debug(), + str![[r#" +[ + Identifier { + token: "A", + case: None, + offset: 0, + }, + Identifier { + token: "B", + case: None, + offset: 2, + }, +] + +"#]] + ); } #[test] @@ -998,14 +1095,44 @@ mod test { let parser = Tokenizer::new(); let input = "A::B"; - let expected: Vec> = vec![ - Identifier::new_unchecked("A", Case::None, 0), - Identifier::new_unchecked("B", Case::None, 3), - ]; let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect(); - assert_eq!(expected, actual); + assert_data_eq!( + actual.to_debug(), + str![[r#" +[ + Identifier { + token: "A", + case: None, + offset: 0, + }, + Identifier { + token: "B", + case: None, + offset: 3, + }, +] + +"#]] + ); let actual: Vec<_> = parser.parse_str(input).collect(); - assert_eq!(expected, actual); + assert_data_eq!( + actual.to_debug(), + str![[r#" +[ + Identifier { + token: "A", + case: None, + offset: 0, + }, + Identifier { + token: "B", + case: None, + offset: 3, + }, +] + +"#]] + ); } #[test] @@ -1013,11 +1140,34 @@ mod test { let parser = Tokenizer::new(); let input = "A_B"; - let expected: Vec> = vec![Identifier::new_unchecked("A_B", Case::None, 0)]; let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect(); - assert_eq!(expected, actual); + assert_data_eq!( + actual.to_debug(), + str![[r#" +[ + Identifier { + token: "A_B", + case: None, + offset: 0, + }, +] + +"#]] + ); let actual: Vec<_> = parser.parse_str(input).collect(); - assert_eq!(expected, actual); + assert_data_eq!( + actual.to_debug(), + str![[r#" +[ + Identifier { + token: "A_B", + case: None, + offset: 0, + }, +] + +"#]] + ); } #[test] @@ -1025,14 +1175,44 @@ mod test { let parser = TokenizerBuilder::new().build(); let input = "Hello 1st 2nd 3rd 4th __5th__ World"; - let expected: Vec> = vec![ - Identifier::new_unchecked("Hello", Case::None, 0), - Identifier::new_unchecked("World", Case::None, 30), - ]; let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect(); - assert_eq!(expected, actual); + assert_data_eq!( + actual.to_debug(), + str![[r#" +[ + Identifier { + token: "Hello", + case: None, + offset: 0, + }, + Identifier { + token: "World", + case: None, + offset: 30, + }, +] + +"#]] + ); let actual: Vec<_> = parser.parse_str(input).collect(); - assert_eq!(expected, actual); + assert_data_eq!( + actual.to_debug(), + str![[r#" +[ + Identifier { + token: "Hello", + case: None, + offset: 0, + }, + Identifier { + token: "World", + case: None, + offset: 30, + }, +] + +"#]] + ); } #[test] @@ -1040,14 +1220,44 @@ mod test { let parser = TokenizerBuilder::new().build(); let input = "Hello 0xDEADBEEF World"; - let expected: Vec> = vec![ - Identifier::new_unchecked("Hello", Case::None, 0), - Identifier::new_unchecked("World", Case::None, 17), - ]; let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect(); - assert_eq!(expected, actual); + assert_data_eq!( + actual.to_debug(), + str![[r#" +[ + Identifier { + token: "Hello", + case: None, + offset: 0, + }, + Identifier { + token: "World", + case: None, + offset: 17, + }, +] + +"#]] + ); let actual: Vec<_> = parser.parse_str(input).collect(); - assert_eq!(expected, actual); + assert_data_eq!( + actual.to_debug(), + str![[r#" +[ + Identifier { + token: "Hello", + case: None, + offset: 0, + }, + Identifier { + token: "World", + case: None, + offset: 17, + }, +] + +"#]] + ); } #[test] @@ -1055,14 +1265,44 @@ mod test { let parser = TokenizerBuilder::new().build(); let input = "Hello 123e4567-e89b-12d3-a456-426652340000 World"; - let expected: Vec> = vec![ - Identifier::new_unchecked("Hello", Case::None, 0), - Identifier::new_unchecked("World", Case::None, 43), - ]; let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect(); - assert_eq!(expected, actual); + assert_data_eq!( + actual.to_debug(), + str![[r#" +[ + Identifier { + token: "Hello", + case: None, + offset: 0, + }, + Identifier { + token: "World", + case: None, + offset: 43, + }, +] + +"#]] + ); let actual: Vec<_> = parser.parse_str(input).collect(); - assert_eq!(expected, actual); + assert_data_eq!( + actual.to_debug(), + str![[r#" +[ + Identifier { + token: "Hello", + case: None, + offset: 0, + }, + Identifier { + token: "World", + case: None, + offset: 43, + }, +] + +"#]] + ); } #[test] @@ -1070,14 +1310,44 @@ mod test { let parser = TokenizerBuilder::new().build(); let input = "Hello 123E4567-E89B-12D3-A456-426652340000 World"; - let expected: Vec> = vec![ - Identifier::new_unchecked("Hello", Case::None, 0), - Identifier::new_unchecked("World", Case::None, 43), - ]; let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect(); - assert_eq!(expected, actual); + assert_data_eq!( + actual.to_debug(), + str![[r#" +[ + Identifier { + token: "Hello", + case: None, + offset: 0, + }, + Identifier { + token: "World", + case: None, + offset: 43, + }, +] + +"#]] + ); let actual: Vec<_> = parser.parse_str(input).collect(); - assert_eq!(expected, actual); + assert_data_eq!( + actual.to_debug(), + str![[r#" +[ + Identifier { + token: "Hello", + case: None, + offset: 0, + }, + Identifier { + token: "World", + case: None, + offset: 43, + }, +] + +"#]] + ); } #[test] @@ -1107,9 +1377,9 @@ mod test { expected.insert(1, Identifier::new_unchecked(hashlike, Case::None, 6)); } let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect(); - assert_eq!(expected, actual); + assert_data_eq!(actual.to_debug(), expected.to_debug()); let actual: Vec<_> = parser.parse_str(&input).collect(); - assert_eq!(expected, actual); + assert_data_eq!(actual.to_debug(), expected.to_debug()); } } @@ -1117,16 +1387,46 @@ mod test { fn tokenize_hash_in_mixed_path() { let parser = TokenizerBuilder::new().build(); + // `rustc...` looks like the start of a URL let input = " /// at /rustc/c7087fe00d2ba919df1d813c040a5d47e43b0fe7\\/src\\libstd\\rt.rs:51"; - let expected: Vec> = vec![ - Identifier::new_unchecked("at", Case::None, 25), - // `rustc...` looks like the start of a URL - Identifier::new_unchecked("rs", Case::None, 91), - ]; let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect(); - assert_eq!(expected, actual); + assert_data_eq!( + actual.to_debug(), + str![[r#" +[ + Identifier { + token: "at", + case: None, + offset: 25, + }, + Identifier { + token: "rs", + case: None, + offset: 91, + }, +] + +"#]] + ); let actual: Vec<_> = parser.parse_str(input).collect(); - assert_eq!(expected, actual); + assert_data_eq!( + actual.to_debug(), + str![[r#" +[ + Identifier { + token: "at", + case: None, + offset: 25, + }, + Identifier { + token: "rs", + case: None, + offset: 91, + }, +] + +"#]] + ); } #[test] @@ -1134,14 +1434,44 @@ mod test { let parser = TokenizerBuilder::new().build(); let input = "Good Iy9+btvut+d92V+v84444ziIqJKHK879KJH59//X1Iy9+btvut+d92V+v84444ziIqJKHK879KJH59//X122Iy9+btvut+d92V+v84444ziIqJKHK879KJH59//X12== Bye"; - let expected: Vec> = vec![ - Identifier::new_unchecked("Good", Case::None, 0), - Identifier::new_unchecked("Bye", Case::None, 134), - ]; let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect(); - assert_eq!(expected, actual); + assert_data_eq!( + actual.to_debug(), + str![[r#" +[ + Identifier { + token: "Good", + case: None, + offset: 0, + }, + Identifier { + token: "Bye", + case: None, + offset: 134, + }, +] + +"#]] + ); let actual: Vec<_> = parser.parse_str(input).collect(); - assert_eq!(expected, actual); + assert_data_eq!( + actual.to_debug(), + str![[r#" +[ + Identifier { + token: "Good", + case: None, + offset: 0, + }, + Identifier { + token: "Bye", + case: None, + offset: 134, + }, +] + +"#]] + ); } #[test] @@ -1149,12 +1479,34 @@ mod test { let parser = TokenizerBuilder::new().build(); let input = r#""ed25519:1": "Wm+VzmOUOz08Ds+0NTWb1d4CZrVsJSikkeRxh6aCcUwu6pNC78FunoD7KNWzqFn241eYHYMGCA5McEiVPdhzBA==""#; - let expected: Vec> = - vec![Identifier::new_unchecked("ed25519", Case::None, 1)]; let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect(); - assert_eq!(expected, actual); + assert_data_eq!( + actual.to_debug(), + str![[r#" +[ + Identifier { + token: "ed25519", + case: None, + offset: 1, + }, +] + +"#]] + ); let actual: Vec<_> = parser.parse_str(input).collect(); - assert_eq!(expected, actual); + assert_data_eq!( + actual.to_debug(), + str![[r#" +[ + Identifier { + token: "ed25519", + case: None, + offset: 1, + }, +] + +"#]] + ); } #[test] @@ -1162,14 +1514,44 @@ mod test { let parser = TokenizerBuilder::new().build(); let input = r#" "integrity": "sha512-hCmlUAIlUiav8Xdqw3Io4LcpA1DOt7h3LSTAC4G6JGHFFaWzI6qvFt9oilvl8BmkbBRX1IhM90ZAmpk68zccQA==","#; - let expected: Vec> = vec![ - Identifier::new_unchecked("integrity", Case::None, 8), - Identifier::new_unchecked("sha512", Case::None, 21), - ]; let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect(); - assert_eq!(expected, actual); + assert_data_eq!( + actual.to_debug(), + str![[r#" +[ + Identifier { + token: "integrity", + case: None, + offset: 8, + }, + Identifier { + token: "sha512", + case: None, + offset: 21, + }, +] + +"#]] + ); let actual: Vec<_> = parser.parse_str(input).collect(); - assert_eq!(expected, actual); + assert_data_eq!( + actual.to_debug(), + str![[r#" +[ + Identifier { + token: "integrity", + case: None, + offset: 8, + }, + Identifier { + token: "sha512", + case: None, + offset: 21, + }, +] + +"#]] + ); } #[test] @@ -1177,14 +1559,44 @@ mod test { let parser = TokenizerBuilder::new().build(); let input = "Good example@example.com Bye"; - let expected: Vec> = vec![ - Identifier::new_unchecked("Good", Case::None, 0), - Identifier::new_unchecked("Bye", Case::None, 25), - ]; let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect(); - assert_eq!(expected, actual); + assert_data_eq!( + actual.to_debug(), + str![[r#" +[ + Identifier { + token: "Good", + case: None, + offset: 0, + }, + Identifier { + token: "Bye", + case: None, + offset: 25, + }, +] + +"#]] + ); let actual: Vec<_> = parser.parse_str(input).collect(); - assert_eq!(expected, actual); + assert_data_eq!( + actual.to_debug(), + str![[r#" +[ + Identifier { + token: "Good", + case: None, + offset: 0, + }, + Identifier { + token: "Bye", + case: None, + offset: 25, + }, +] + +"#]] + ); } #[test] @@ -1192,14 +1604,44 @@ mod test { let parser = TokenizerBuilder::new().build(); let input = "Good example.com/hello Bye"; - let expected: Vec> = vec![ - Identifier::new_unchecked("Good", Case::None, 0), - Identifier::new_unchecked("Bye", Case::None, 23), - ]; let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect(); - assert_eq!(expected, actual); + assert_data_eq!( + actual.to_debug(), + str![[r#" +[ + Identifier { + token: "Good", + case: None, + offset: 0, + }, + Identifier { + token: "Bye", + case: None, + offset: 23, + }, +] + +"#]] + ); let actual: Vec<_> = parser.parse_str(input).collect(); - assert_eq!(expected, actual); + assert_data_eq!( + actual.to_debug(), + str![[r#" +[ + Identifier { + token: "Good", + case: None, + offset: 0, + }, + Identifier { + token: "Bye", + case: None, + offset: 23, + }, +] + +"#]] + ); } #[test] @@ -1208,14 +1650,44 @@ mod test { let input = "Good http://user:password@example.com:3142/hello?query=value&extra=two#fragment,split Bye"; - let expected: Vec> = vec![ - Identifier::new_unchecked("Good", Case::None, 0), - Identifier::new_unchecked("Bye", Case::None, 86), - ]; let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect(); - assert_eq!(expected, actual); + assert_data_eq!( + actual.to_debug(), + str![[r#" +[ + Identifier { + token: "Good", + case: None, + offset: 0, + }, + Identifier { + token: "Bye", + case: None, + offset: 86, + }, +] + +"#]] + ); let actual: Vec<_> = parser.parse_str(input).collect(); - assert_eq!(expected, actual); + assert_data_eq!( + actual.to_debug(), + str![[r#" +[ + Identifier { + token: "Good", + case: None, + offset: 0, + }, + Identifier { + token: "Bye", + case: None, + offset: 86, + }, +] + +"#]] + ); } #[test] @@ -1223,15 +1695,54 @@ mod test { let parser = TokenizerBuilder::new().build(); let input = "Hello 0Hello 124 0xDEADBEEF World"; - let expected: Vec> = vec![ - Identifier::new_unchecked("Hello", Case::None, 0), - Identifier::new_unchecked("0Hello", Case::None, 6), - Identifier::new_unchecked("World", Case::None, 28), - ]; let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect(); - assert_eq!(expected, actual); + assert_data_eq!( + actual.to_debug(), + str![[r#" +[ + Identifier { + token: "Hello", + case: None, + offset: 0, + }, + Identifier { + token: "0Hello", + case: None, + offset: 6, + }, + Identifier { + token: "World", + case: None, + offset: 28, + }, +] + +"#]] + ); let actual: Vec<_> = parser.parse_str(input).collect(); - assert_eq!(expected, actual); + assert_data_eq!( + actual.to_debug(), + str![[r#" +[ + Identifier { + token: "Hello", + case: None, + offset: 0, + }, + Identifier { + token: "0Hello", + case: None, + offset: 6, + }, + Identifier { + token: "World", + case: None, + offset: 28, + }, +] + +"#]] + ); } #[test] @@ -1239,14 +1750,44 @@ mod test { let parser = TokenizerBuilder::new().build(); let input = "Hello \\Hello \\ \\\\ World"; - let expected: Vec> = vec![ - Identifier::new_unchecked("Hello", Case::None, 0), - Identifier::new_unchecked("World", Case::None, 18), - ]; let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect(); - assert_eq!(expected, actual); + assert_data_eq!( + actual.to_debug(), + str![[r#" +[ + Identifier { + token: "Hello", + case: None, + offset: 0, + }, + Identifier { + token: "World", + case: None, + offset: 18, + }, +] + +"#]] + ); let actual: Vec<_> = parser.parse_str(input).collect(); - assert_eq!(expected, actual); + assert_data_eq!( + actual.to_debug(), + str![[r#" +[ + Identifier { + token: "Hello", + case: None, + offset: 0, + }, + Identifier { + token: "World", + case: None, + offset: 18, + }, +] + +"#]] + ); } #[test] @@ -1254,14 +1795,44 @@ mod test { let parser = TokenizerBuilder::new().build(); let input = "Hello \\n\\n World"; - let expected: Vec> = vec![ - Identifier::new_unchecked("Hello", Case::None, 0), - Identifier::new_unchecked("World", Case::None, 11), - ]; let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect(); - assert_eq!(expected, actual); + assert_data_eq!( + actual.to_debug(), + str![[r#" +[ + Identifier { + token: "Hello", + case: None, + offset: 0, + }, + Identifier { + token: "World", + case: None, + offset: 11, + }, +] + +"#]] + ); let actual: Vec<_> = parser.parse_str(input).collect(); - assert_eq!(expected, actual); + assert_data_eq!( + actual.to_debug(), + str![[r#" +[ + Identifier { + token: "Hello", + case: None, + offset: 0, + }, + Identifier { + token: "World", + case: None, + offset: 11, + }, +] + +"#]] + ); } #[test] @@ -1269,14 +1840,44 @@ mod test { let parser = TokenizerBuilder::new().build(); let input = "Hello \\nanana\\nanana World"; - let expected: Vec> = vec![ - Identifier::new_unchecked("Hello", Case::None, 0), - Identifier::new_unchecked("World", Case::None, 21), - ]; let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect(); - assert_eq!(expected, actual); + assert_data_eq!( + actual.to_debug(), + str![[r#" +[ + Identifier { + token: "Hello", + case: None, + offset: 0, + }, + Identifier { + token: "World", + case: None, + offset: 21, + }, +] + +"#]] + ); let actual: Vec<_> = parser.parse_str(input).collect(); - assert_eq!(expected, actual); + assert_data_eq!( + actual.to_debug(), + str![[r#" +[ + Identifier { + token: "Hello", + case: None, + offset: 0, + }, + Identifier { + token: "World", + case: None, + offset: 21, + }, +] + +"#]] + ); } #[test] @@ -1284,14 +1885,44 @@ mod test { let parser = TokenizerBuilder::new().build(); let input = "Hello %Hello World"; - let expected: Vec> = vec![ - Identifier::new_unchecked("Hello", Case::None, 0), - Identifier::new_unchecked("World", Case::None, 13), - ]; let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect(); - assert_eq!(expected, actual); + assert_data_eq!( + actual.to_debug(), + str![[r#" +[ + Identifier { + token: "Hello", + case: None, + offset: 0, + }, + Identifier { + token: "World", + case: None, + offset: 13, + }, +] + +"#]] + ); let actual: Vec<_> = parser.parse_str(input).collect(); - assert_eq!(expected, actual); + assert_data_eq!( + actual.to_debug(), + str![[r#" +[ + Identifier { + token: "Hello", + case: None, + offset: 0, + }, + Identifier { + token: "World", + case: None, + offset: 13, + }, +] + +"#]] + ); } #[test] @@ -1299,16 +1930,64 @@ mod test { let parser = TokenizerBuilder::new().build(); let input = "#[derive(Clone)] #aaa # #111 #AABBCC #hello #AABBCCDD #1175BA World"; - let expected: Vec> = vec![ - Identifier::new_unchecked("derive", Case::None, 2), - Identifier::new_unchecked("Clone", Case::None, 9), - Identifier::new_unchecked("hello", Case::None, 38), - Identifier::new_unchecked("World", Case::None, 62), - ]; let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect(); - assert_eq!(expected, actual); + assert_data_eq!( + actual.to_debug(), + str![[r#" +[ + Identifier { + token: "derive", + case: None, + offset: 2, + }, + Identifier { + token: "Clone", + case: None, + offset: 9, + }, + Identifier { + token: "hello", + case: None, + offset: 38, + }, + Identifier { + token: "World", + case: None, + offset: 62, + }, +] + +"#]] + ); let actual: Vec<_> = parser.parse_str(input).collect(); - assert_eq!(expected, actual); + assert_data_eq!( + actual.to_debug(), + str![[r#" +[ + Identifier { + token: "derive", + case: None, + offset: 2, + }, + Identifier { + token: "Clone", + case: None, + offset: 9, + }, + Identifier { + token: "hello", + case: None, + offset: 38, + }, + Identifier { + token: "World", + case: None, + offset: 62, + }, +] + +"#]] + ); } #[test] @@ -1316,15 +1995,54 @@ mod test { let parser = TokenizerBuilder::new().build(); let input = "Hello {{% foo %}} world!"; - let expected: Vec> = vec![ - Identifier::new_unchecked("Hello", Case::None, 0), - Identifier::new_unchecked("foo", Case::None, 10), - Identifier::new_unchecked("world", Case::None, 18), - ]; let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect(); - assert_eq!(expected, actual); + assert_data_eq!( + actual.to_debug(), + str![[r#" +[ + Identifier { + token: "Hello", + case: None, + offset: 0, + }, + Identifier { + token: "foo", + case: None, + offset: 10, + }, + Identifier { + token: "world", + case: None, + offset: 18, + }, +] + +"#]] + ); let actual: Vec<_> = parser.parse_str(input).collect(); - assert_eq!(expected, actual); + assert_data_eq!( + actual.to_debug(), + str![[r#" +[ + Identifier { + token: "Hello", + case: None, + offset: 0, + }, + Identifier { + token: "foo", + case: None, + offset: 10, + }, + Identifier { + token: "world", + case: None, + offset: 18, + }, +] + +"#]] + ); } #[test] @@ -1332,11 +2050,22 @@ mod test { let parser = TokenizerBuilder::new().unicode(false).build(); let input = "appliqués"; - let expected: Vec> = vec![]; let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect(); - assert_eq!(expected, actual); + assert_data_eq!( + actual.to_debug(), + str![[r#" +[] + +"#]] + ); let actual: Vec<_> = parser.parse_str(input).collect(); - assert_eq!(expected, actual); + assert_data_eq!( + actual.to_debug(), + str![[r#" +[] + +"#]] + ); } #[test] From 8c8f52fe6a4abd2f76cf67c4c454ff41c12c0cc7 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Wed, 10 Jul 2024 11:47:56 -0500 Subject: [PATCH 2/4] test(tokens): Show JWT behavior --- crates/typos/src/tokens.rs | 95 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) diff --git a/crates/typos/src/tokens.rs b/crates/typos/src/tokens.rs index 42997bd..2a15a3d 100644 --- a/crates/typos/src/tokens.rs +++ b/crates/typos/src/tokens.rs @@ -1550,6 +1550,101 @@ mod test { }, ] +"#]] + ); + } + + #[test] + fn tokenize_ignore_jwt() { + let parser = TokenizerBuilder::new().build(); + + let input = "header 'Authorization: Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJvbGUiOiJhbm9uIiwiaWF0IjoxNjQ1MTkyODI0LCJleHAiOjE5NjA3Njg4MjR9.M9jrxyvPLkUxWgOYSf5dNdJ8v_eRrq810ShFRT8N-6M'"; + let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect(); + assert_data_eq!( + actual.to_debug(), + str![[r#" +[ + Identifier { + token: "header", + case: None, + offset: 0, + }, + Identifier { + token: "Authorization", + case: None, + offset: 8, + }, + Identifier { + token: "Bearer", + case: None, + offset: 23, + }, + Identifier { + token: "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9", + case: None, + offset: 30, + }, + Identifier { + token: "eyJpc3MiOiJzdXBhYmFzZSIsInJvbGUiOiJhbm9uIiwiaWF0IjoxNjQ1MTkyODI0LCJleHAiOjE5NjA3Njg4MjR9", + case: None, + offset: 67, + }, + Identifier { + token: "M9jrxyvPLkUxWgOYSf5dNdJ8v_eRrq810ShFRT8N", + case: None, + offset: 156, + }, + Identifier { + token: "6M", + case: None, + offset: 197, + }, +] + +"#]] + ); + let actual: Vec<_> = parser.parse_str(input).collect(); + assert_data_eq!( + actual.to_debug(), + str![[r#" +[ + Identifier { + token: "header", + case: None, + offset: 0, + }, + Identifier { + token: "Authorization", + case: None, + offset: 8, + }, + Identifier { + token: "Bearer", + case: None, + offset: 23, + }, + Identifier { + token: "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9", + case: None, + offset: 30, + }, + Identifier { + token: "eyJpc3MiOiJzdXBhYmFzZSIsInJvbGUiOiJhbm9uIiwiaWF0IjoxNjQ1MTkyODI0LCJleHAiOjE5NjA3Njg4MjR9", + case: None, + offset: 67, + }, + Identifier { + token: "M9jrxyvPLkUxWgOYSf5dNdJ8v_eRrq810ShFRT8N", + case: None, + offset: 156, + }, + Identifier { + token: "6M", + case: None, + offset: 197, + }, +] + "#]] ); } From 5eab324cdd8dc809810482c3f7fcce073c037876 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Wed, 10 Jul 2024 11:51:54 -0500 Subject: [PATCH 3/4] refactor(tokens): Simplify parser logic --- crates/typos/src/tokens.rs | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/crates/typos/src/tokens.rs b/crates/typos/src/tokens.rs index 2a15a3d..d64bd5d 100644 --- a/crates/typos/src/tokens.rs +++ b/crates/typos/src/tokens.rs @@ -271,11 +271,9 @@ mod parser { ::Slice: AsBStr + SliceLen + Default, ::Token: AsChar + Copy, { - preceded( - ('0', alt(('x', 'X'))), - take_while(1.., is_hex_digit_with_sep), - ) - .parse_next(input) + ('0', alt(('x', 'X')), take_while(1.., is_hex_digit_with_sep)) + .recognize() + .parse_next(input) } fn css_color(input: &mut T) -> PResult<::Slice, ()> @@ -287,13 +285,14 @@ mod parser { { trace( "color", - preceded( + ( '#', alt(( - terminated(take_while(3..=8, is_lower_hex_digit), peek(sep1)), - terminated(take_while(3..=8, is_upper_hex_digit), peek(sep1)), + (take_while(3..=8, is_lower_hex_digit), peek(sep1)), + (take_while(3..=8, is_upper_hex_digit), peek(sep1)), )), - ), + ) + .recognize(), ) .parse_next(input) } @@ -430,16 +429,16 @@ mod parser { trace( "url", ( - opt(terminated( + opt(( take_while(1.., is_scheme_char), // HACK: Technically you can skip `//` if you don't have a domain but that would // get messy to support. (':', '/', '/'), )), ( - opt(terminated(url_userinfo, '@')), + opt((url_userinfo, '@')), take_while(1.., is_domain_char), - opt(preceded(':', take_while(1.., AsChar::is_dec_digit))), + opt((':', take_while(1.., AsChar::is_dec_digit))), ), '/', // HACK: Too lazy to enumerate @@ -461,7 +460,7 @@ mod parser { "userinfo", ( take_while(1.., is_localport_char), - opt(preceded(':', take_while(0.., is_localport_char))), + opt((':', take_while(0.., is_localport_char))), ) .recognize(), ) @@ -480,7 +479,7 @@ mod parser { // incorrectly, we opt for just not evaluating it at all. trace( "escape", - preceded(take_while(1.., is_escape), take_while(0.., is_xid_continue)), + (take_while(1.., is_escape), take_while(0.., is_xid_continue)).recognize(), ) .parse_next(input) } @@ -492,7 +491,11 @@ mod parser { ::Slice: AsBStr + SliceLen + Default, ::Token: AsChar + Copy, { - trace("printf", preceded('%', take_while(1.., is_xid_continue))).parse_next(input) + trace( + "printf", + ('%', take_while(1.., is_xid_continue)).recognize(), + ) + .parse_next(input) } fn take_many0(mut f: F) -> impl Parser::Slice, E> From 6047fba1fea39116a05dbd9752dcd367399d66ab Mon Sep 17 00:00:00 2001 From: Ed Page Date: Wed, 10 Jul 2024 11:57:46 -0500 Subject: [PATCH 4/4] feat(tokens): Ignore JWTs Fixes #1057 --- crates/typos/src/tokens.rs | 76 ++++++++++++++++++-------------------- 1 file changed, 36 insertions(+), 40 deletions(-) diff --git a/crates/typos/src/tokens.rs b/crates/typos/src/tokens.rs index d64bd5d..a5bcb99 100644 --- a/crates/typos/src/tokens.rs +++ b/crates/typos/src/tokens.rs @@ -191,6 +191,7 @@ mod parser { terminated(email_literal, peek(sep1)), terminated(url_literal, peek(sep1)), terminated(css_color, peek(sep1)), + terminated(jwt, peek(sep1)), c_escape, printf, other, @@ -297,6 +298,41 @@ mod parser { .parse_next(input) } + fn jwt(input: &mut T) -> PResult<::Slice, ()> + where + T: Compare, + T: Stream + StreamIsPartial + PartialEq, + ::Slice: AsBStr + SliceLen + Default, + ::Token: AsChar + Copy, + { + trace( + "jwt", + ( + 'e', + 'y', + take_while(20.., is_jwt_token), + '.', + 'e', + 'y', + take_while(20.., is_jwt_token), + '.', + take_while(20.., is_jwt_token), + ) + .recognize(), + ) + .parse_next(input) + } + + #[inline] + fn is_jwt_token(i: impl AsChar + Copy) -> bool { + let c = i.as_char(); + c.is_ascii_lowercase() + || c.is_ascii_uppercase() + || c.is_ascii_digit() + || c == '_' + || c == '-' + } + fn uuid_literal(input: &mut T) -> PResult<::Slice, ()> where T: Compare, @@ -1582,26 +1618,6 @@ mod test { case: None, offset: 23, }, - Identifier { - token: "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9", - case: None, - offset: 30, - }, - Identifier { - token: "eyJpc3MiOiJzdXBhYmFzZSIsInJvbGUiOiJhbm9uIiwiaWF0IjoxNjQ1MTkyODI0LCJleHAiOjE5NjA3Njg4MjR9", - case: None, - offset: 67, - }, - Identifier { - token: "M9jrxyvPLkUxWgOYSf5dNdJ8v_eRrq810ShFRT8N", - case: None, - offset: 156, - }, - Identifier { - token: "6M", - case: None, - offset: 197, - }, ] "#]] @@ -1626,26 +1642,6 @@ mod test { case: None, offset: 23, }, - Identifier { - token: "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9", - case: None, - offset: 30, - }, - Identifier { - token: "eyJpc3MiOiJzdXBhYmFzZSIsInJvbGUiOiJhbm9uIiwiaWF0IjoxNjQ1MTkyODI0LCJleHAiOjE5NjA3Njg4MjR9", - case: None, - offset: 67, - }, - Identifier { - token: "M9jrxyvPLkUxWgOYSf5dNdJ8v_eRrq810ShFRT8N", - case: None, - offset: 156, - }, - Identifier { - token: "6M", - case: None, - offset: 197, - }, ] "#]]