mirror of
https://github.com/crate-ci/typos.git
synced 2024-12-22 23:52:12 -05:00
commit
1c392c2606
17 changed files with 1161 additions and 1207 deletions
144
Cargo.lock
generated
144
Cargo.lock
generated
|
@ -2,9 +2,9 @@
|
|||
# It is not intended for manual editing.
|
||||
[[package]]
|
||||
name = "addr2line"
|
||||
version = "0.14.0"
|
||||
version = "0.14.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7c0929d69e78dd9bf5408269919fcbcaeb2e35e5d43e5815517cdc6a8e11a423"
|
||||
checksum = "a55f82cfe485775d02112886f4169bde0c5894d75e79ead7eafe7e40a25e45f7"
|
||||
dependencies = [
|
||||
"gimli",
|
||||
]
|
||||
|
@ -17,9 +17,9 @@ checksum = "ee2a4ec343196209d6594e19543ae87a39f96d5534d7174822a3ad825dd6ed7e"
|
|||
|
||||
[[package]]
|
||||
name = "ahash"
|
||||
version = "0.6.1"
|
||||
version = "0.6.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "865f8b0b3fced577b7df82e9b0eb7609595d7209c0b39e78d0646672e244b1b1"
|
||||
checksum = "a75b7e6a93ecd6dbd2c225154d0fa7f86205574ecaa6c87429fb5f66ee677c44"
|
||||
dependencies = [
|
||||
"getrandom 0.2.0",
|
||||
"lazy_static",
|
||||
|
@ -46,9 +46,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "anyhow"
|
||||
version = "1.0.34"
|
||||
version = "1.0.37"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bf8dcb5b4bbaa28653b647d8c77bd4ed40183b48882e130c1f1ffb73de069fd7"
|
||||
checksum = "ee67c11feeac938fae061b232e38e0b6d94f97a9df10e6271319325ac4c56a86"
|
||||
|
||||
[[package]]
|
||||
name = "arrayvec"
|
||||
|
@ -207,12 +207,6 @@ dependencies = [
|
|||
"unicase",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "const_fn"
|
||||
version = "0.4.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c478836e029dcef17fb47c89023448c64f781a046e0300e257ad8225ae59afab"
|
||||
|
||||
[[package]]
|
||||
name = "content_inspector"
|
||||
version = "0.2.4"
|
||||
|
@ -224,13 +218,12 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "crossbeam-utils"
|
||||
version = "0.8.0"
|
||||
version = "0.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ec91540d98355f690a86367e566ecad2e9e579f230230eb7c21398372be73ea5"
|
||||
checksum = "02d96d1e189ef58269ebe5b97953da3274d83a93af647c2ddd6f9dab28cedb8d"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"cfg-if 1.0.0",
|
||||
"const_fn",
|
||||
"lazy_static",
|
||||
]
|
||||
|
||||
|
@ -275,9 +268,9 @@ dependencies = [
|
|||
"fnv",
|
||||
"ident_case",
|
||||
"proc-macro2 1.0.24",
|
||||
"quote 1.0.7",
|
||||
"quote 1.0.8",
|
||||
"strsim 0.9.3",
|
||||
"syn 1.0.50",
|
||||
"syn 1.0.57",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -287,8 +280,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "d9b5a2f4ac4969822c62224815d069952656cadc7084fdca9751e6d959189b72"
|
||||
dependencies = [
|
||||
"darling_core",
|
||||
"quote 1.0.7",
|
||||
"syn 1.0.50",
|
||||
"quote 1.0.8",
|
||||
"syn 1.0.57",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -312,8 +305,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "41cb0e6161ad61ed084a36ba71fbba9e3ac5aee3606fb607fe08da6acbcf3d8c"
|
||||
dependencies = [
|
||||
"proc-macro2 1.0.24",
|
||||
"quote 1.0.7",
|
||||
"syn 1.0.50",
|
||||
"quote 1.0.8",
|
||||
"syn 1.0.57",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -324,8 +317,8 @@ checksum = "6604612c19dd3bb353650b715b61f09bcb089dd17bdca1a9a42637079bf5e428"
|
|||
dependencies = [
|
||||
"darling",
|
||||
"proc-macro2 1.0.24",
|
||||
"quote 1.0.7",
|
||||
"syn 1.0.50",
|
||||
"quote 1.0.8",
|
||||
"syn 1.0.57",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -374,8 +367,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "946ee94e3dbf58fdd324f9ce245c7b238d46a66f00e86a020b71996349e46cce"
|
||||
dependencies = [
|
||||
"proc-macro2 1.0.24",
|
||||
"quote 1.0.7",
|
||||
"syn 1.0.50",
|
||||
"quote 1.0.8",
|
||||
"syn 1.0.57",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -421,17 +414,17 @@ checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
|
|||
|
||||
[[package]]
|
||||
name = "funty"
|
||||
version = "1.0.1"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0ba62103ce691c2fd80fbae2213dfdda9ce60804973ac6b6e97de818ea7f52c8"
|
||||
checksum = "fed34cd105917e91daa4da6b3728c47b068749d6a62c59811f06ed2ac71d9da7"
|
||||
|
||||
[[package]]
|
||||
name = "getrandom"
|
||||
version = "0.1.15"
|
||||
version = "0.1.16"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fc587bc0ec293155d5bfa6b9891ec18a1e330c234f896ea47fbada4cadbe47e6"
|
||||
checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce"
|
||||
dependencies = [
|
||||
"cfg-if 0.1.10",
|
||||
"cfg-if 1.0.0",
|
||||
"libc",
|
||||
"wasi",
|
||||
]
|
||||
|
@ -478,9 +471,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "heck"
|
||||
version = "0.3.1"
|
||||
version = "0.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "20564e78d53d2bb135c343b3f47714a56af2061f1c928fdb541dc7b9fdd94205"
|
||||
checksum = "87cbf45460356b7deeb5e3415b5563308c0a9b057c85e12b06ad551f98d0a6ac"
|
||||
dependencies = [
|
||||
"unicode-segmentation",
|
||||
]
|
||||
|
@ -559,9 +552,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "itoa"
|
||||
version = "0.4.6"
|
||||
version = "0.4.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dc6f3ad7b9d11a0c00842ff8de1b60ee58661048eb8049ed33c73594f359d7e6"
|
||||
checksum = "dd25036021b0de88a0aff6b850051563c6516d0bf53f8638938edbb9de732736"
|
||||
|
||||
[[package]]
|
||||
name = "lazy_static"
|
||||
|
@ -584,9 +577,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.80"
|
||||
version = "0.2.81"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4d58d1b70b004888f764dfbf6a26a3b0342a1632d33968e4a179d8011c760614"
|
||||
checksum = "1482821306169ec4d07f6aca392a4681f66c75c9918aa49641a2595db64053cb"
|
||||
|
||||
[[package]]
|
||||
name = "log"
|
||||
|
@ -730,9 +723,9 @@ checksum = "ac74c624d6b2d21f425f752262f42188365d7b8ff1aff74c82e45136510a4857"
|
|||
|
||||
[[package]]
|
||||
name = "predicates"
|
||||
version = "1.0.5"
|
||||
version = "1.0.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "96bfead12e90dccead362d62bb2c90a5f6fc4584963645bc7f71a735e0b0735a"
|
||||
checksum = "73dd9b7b200044694dfede9edf907c1ca19630908443e9447e624993700c6932"
|
||||
dependencies = [
|
||||
"difference",
|
||||
"float-cmp",
|
||||
|
@ -743,15 +736,15 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "predicates-core"
|
||||
version = "1.0.0"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "06075c3a3e92559ff8929e7a280684489ea27fe44805174c3ebd9328dcb37178"
|
||||
checksum = "fb3dbeaaf793584e29c58c7e3a82bbb3c7c06b63cea68d13b0e3cddc124104dc"
|
||||
|
||||
[[package]]
|
||||
name = "predicates-tree"
|
||||
version = "1.0.0"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8e63c4859013b38a76eca2414c64911fba30def9e3202ac461a2d22831220124"
|
||||
checksum = "aee95d988ee893cb35c06b148c80ed2cd52c8eea927f50ba7a0be1a786aeab73"
|
||||
dependencies = [
|
||||
"predicates-core",
|
||||
"treeline",
|
||||
|
@ -771,8 +764,8 @@ checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c"
|
|||
dependencies = [
|
||||
"proc-macro-error-attr",
|
||||
"proc-macro2 1.0.24",
|
||||
"quote 1.0.7",
|
||||
"syn 1.0.50",
|
||||
"quote 1.0.8",
|
||||
"syn 1.0.57",
|
||||
"version_check",
|
||||
]
|
||||
|
||||
|
@ -783,7 +776,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869"
|
||||
dependencies = [
|
||||
"proc-macro2 1.0.24",
|
||||
"quote 1.0.7",
|
||||
"quote 1.0.8",
|
||||
"version_check",
|
||||
]
|
||||
|
||||
|
@ -822,9 +815,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.7"
|
||||
version = "1.0.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "aa563d17ecb180e500da1cfd2b028310ac758de548efdd203e18f283af693f37"
|
||||
checksum = "991431c3519a3f36861882da93630ce66b52918dcf1b8e2fd66b397fc96f28df"
|
||||
dependencies = [
|
||||
"proc-macro2 1.0.24",
|
||||
]
|
||||
|
@ -841,7 +834,7 @@ version = "0.7.3"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03"
|
||||
dependencies = [
|
||||
"getrandom 0.1.15",
|
||||
"getrandom 0.1.16",
|
||||
"libc",
|
||||
"rand_chacha",
|
||||
"rand_core",
|
||||
|
@ -865,7 +858,7 @@ version = "0.5.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19"
|
||||
dependencies = [
|
||||
"getrandom 0.1.15",
|
||||
"getrandom 0.1.16",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -975,29 +968,29 @@ checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3"
|
|||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.117"
|
||||
version = "1.0.118"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b88fa983de7720629c9387e9f517353ed404164b1e482c970a90c1a4aaf7dc1a"
|
||||
checksum = "06c64263859d87aa2eb554587e2d23183398d617427327cf2b3d0ed8c69e4800"
|
||||
dependencies = [
|
||||
"serde_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_derive"
|
||||
version = "1.0.117"
|
||||
version = "1.0.118"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cbd1ae72adb44aab48f325a02444a5fc079349a8d804c1fc922aed3f7454c74e"
|
||||
checksum = "c84d3526699cd55261af4b941e4e725444df67aa4f9e6a3564f18030d12672df"
|
||||
dependencies = [
|
||||
"proc-macro2 1.0.24",
|
||||
"quote 1.0.7",
|
||||
"syn 1.0.50",
|
||||
"quote 1.0.8",
|
||||
"syn 1.0.57",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_json"
|
||||
version = "1.0.59"
|
||||
version = "1.0.61"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dcac07dbffa1c65e7f816ab9eba78eb142c6d44410f4eeba1e26e4f5dfa56b95"
|
||||
checksum = "4fceb2595057b6891a4ee808f70054bd2d12f0e97f1cbb78689b59f676df325a"
|
||||
dependencies = [
|
||||
"itoa",
|
||||
"ryu",
|
||||
|
@ -1048,8 +1041,8 @@ dependencies = [
|
|||
"heck",
|
||||
"proc-macro-error",
|
||||
"proc-macro2 1.0.24",
|
||||
"quote 1.0.7",
|
||||
"syn 1.0.50",
|
||||
"quote 1.0.8",
|
||||
"syn 1.0.57",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -1065,12 +1058,12 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "1.0.50"
|
||||
version = "1.0.57"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "443b4178719c5a851e1bde36ce12da21d74a0e60b4d982ec3385a933c812f0f6"
|
||||
checksum = "4211ce9909eb971f111059df92c45640aad50a619cf55cd76476be803c4c68e6"
|
||||
dependencies = [
|
||||
"proc-macro2 1.0.24",
|
||||
"quote 1.0.7",
|
||||
"quote 1.0.8",
|
||||
"unicode-xid 0.2.1",
|
||||
]
|
||||
|
||||
|
@ -1114,22 +1107,22 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "thiserror"
|
||||
version = "1.0.22"
|
||||
version = "1.0.23"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0e9ae34b84616eedaaf1e9dd6026dbe00dcafa92aa0c8077cb69df1fcfe5e53e"
|
||||
checksum = "76cc616c6abf8c8928e2fdcc0dbfab37175edd8fb49a4641066ad1364fdab146"
|
||||
dependencies = [
|
||||
"thiserror-impl",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thiserror-impl"
|
||||
version = "1.0.22"
|
||||
version = "1.0.23"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9ba20f23e85b10754cd195504aebf6a27e2e6cbe28c17778a0c930724628dd56"
|
||||
checksum = "9be73a2caec27583d0046ef3796c3794f868a5bc813db689eed00c7631275cd1"
|
||||
dependencies = [
|
||||
"proc-macro2 1.0.24",
|
||||
"quote 1.0.7",
|
||||
"syn 1.0.50",
|
||||
"quote 1.0.8",
|
||||
"syn 1.0.57",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -1143,9 +1136,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "toml"
|
||||
version = "0.5.7"
|
||||
version = "0.5.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "75cf45bb0bef80604d001caaec0d09da99611b3c0fd39d3080468875cdb65645"
|
||||
checksum = "a31142970826733df8241ef35dc040ef98c679ab14d7c3e54d827099b3acecaa"
|
||||
dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
@ -1161,16 +1154,11 @@ name = "typos"
|
|||
version = "0.3.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bstr",
|
||||
"content_inspector",
|
||||
"derive_more 0.99.11",
|
||||
"derive_setters",
|
||||
"itertools",
|
||||
"log",
|
||||
"once_cell",
|
||||
"regex",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"thiserror",
|
||||
"unicode-segmentation",
|
||||
]
|
||||
|
@ -1185,21 +1173,27 @@ dependencies = [
|
|||
"bstr",
|
||||
"clap",
|
||||
"clap-verbosity-flag",
|
||||
"content_inspector",
|
||||
"derive_more 0.99.11",
|
||||
"derive_setters",
|
||||
"difflib",
|
||||
"env_logger 0.8.2",
|
||||
"human-panic",
|
||||
"ignore",
|
||||
"itertools",
|
||||
"log",
|
||||
"phf",
|
||||
"predicates",
|
||||
"proc-exit",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"structopt",
|
||||
"toml",
|
||||
"typos",
|
||||
"typos-dict",
|
||||
"typos-vars",
|
||||
"unicase",
|
||||
"unicode-segmentation",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
|
@ -50,6 +50,12 @@ ahash = "0.6.1"
|
|||
difflib = "0.4"
|
||||
proc-exit = "1.0"
|
||||
human-panic = "1.0.3"
|
||||
content_inspector = "0.2.4"
|
||||
unicode-segmentation = "1.6.0"
|
||||
derive_more = "0.99.11"
|
||||
derive_setters = "0.1"
|
||||
itertools = "0.9"
|
||||
serde_json = "1.0"
|
||||
|
||||
[dev-dependencies]
|
||||
assert_fs = "1.0"
|
||||
|
|
|
@ -5,131 +5,180 @@ extern crate test;
|
|||
mod data;
|
||||
|
||||
use assert_fs::prelude::*;
|
||||
use typos::checks::Check;
|
||||
use typos_cli::checks::FileChecker;
|
||||
|
||||
fn bench_files(data: &str, b: &mut test::Bencher) {
|
||||
let temp = assert_fs::TempDir::new().unwrap();
|
||||
let sample_path = temp.child("sample");
|
||||
sample_path.write_str(data).unwrap();
|
||||
|
||||
fn bench_parse_ident_str(data: &str, b: &mut test::Bencher) {
|
||||
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
|
||||
let parser = typos::tokens::Parser::new();
|
||||
let checks = typos::checks::TyposSettings::new().build_identifier_parser();
|
||||
b.iter(|| checks.check_str(data, &parser, &corrections, &typos::report::PrintSilent));
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_idents_empty_str(b: &mut test::Bencher) {
|
||||
bench_parse_ident_str(data::EMPTY, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_idents_no_tokens_str(b: &mut test::Bencher) {
|
||||
bench_parse_ident_str(data::NO_TOKENS, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_idents_single_token_str(b: &mut test::Bencher) {
|
||||
bench_parse_ident_str(data::SINGLE_TOKEN, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_idents_sherlock_str(b: &mut test::Bencher) {
|
||||
bench_parse_ident_str(data::SHERLOCK, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_idents_code_str(b: &mut test::Bencher) {
|
||||
bench_parse_ident_str(data::CODE, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_idents_corpus_str(b: &mut test::Bencher) {
|
||||
bench_parse_ident_str(data::CORPUS, b);
|
||||
}
|
||||
|
||||
fn bench_parse_ident_bytes(data: &str, b: &mut test::Bencher) {
|
||||
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
|
||||
let parser = typos::tokens::Parser::new();
|
||||
let checks = typos::checks::TyposSettings::new().build_identifier_parser();
|
||||
let parser = typos::tokens::Tokenizer::new();
|
||||
let checks = typos_cli::checks::TyposSettings::new().build_files();
|
||||
b.iter(|| {
|
||||
checks.check_bytes(
|
||||
data.as_bytes(),
|
||||
checks.check_file(
|
||||
sample_path.path(),
|
||||
true,
|
||||
&parser,
|
||||
&corrections,
|
||||
&typos::report::PrintSilent,
|
||||
&typos_cli::report::PrintSilent,
|
||||
)
|
||||
});
|
||||
|
||||
temp.close().unwrap();
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_idents_empty_bytes(b: &mut test::Bencher) {
|
||||
bench_parse_ident_bytes(data::EMPTY, b);
|
||||
fn files_empty(b: &mut test::Bencher) {
|
||||
bench_files(data::EMPTY, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_idents_no_tokens_bytes(b: &mut test::Bencher) {
|
||||
bench_parse_ident_bytes(data::NO_TOKENS, b);
|
||||
fn files_no_tokens(b: &mut test::Bencher) {
|
||||
bench_files(data::NO_TOKENS, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_idents_single_token_bytes(b: &mut test::Bencher) {
|
||||
bench_parse_ident_bytes(data::SINGLE_TOKEN, b);
|
||||
fn files_single_token(b: &mut test::Bencher) {
|
||||
bench_files(data::SINGLE_TOKEN, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_idents_sherlock_bytes(b: &mut test::Bencher) {
|
||||
bench_parse_ident_bytes(data::SHERLOCK, b);
|
||||
fn files_sherlock(b: &mut test::Bencher) {
|
||||
bench_files(data::SHERLOCK, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_idents_code_bytes(b: &mut test::Bencher) {
|
||||
bench_parse_ident_bytes(data::CODE, b);
|
||||
fn files_code(b: &mut test::Bencher) {
|
||||
bench_files(data::CODE, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_idents_corpus_bytes(b: &mut test::Bencher) {
|
||||
bench_parse_ident_bytes(data::CORPUS, b);
|
||||
fn files_corpus(b: &mut test::Bencher) {
|
||||
bench_files(data::CORPUS, b);
|
||||
}
|
||||
|
||||
fn bench_parse_word_str(data: &str, b: &mut test::Bencher) {
|
||||
fn bench_identifiers(data: &str, b: &mut test::Bencher) {
|
||||
let temp = assert_fs::TempDir::new().unwrap();
|
||||
let sample_path = temp.child("sample");
|
||||
sample_path.write_str(data).unwrap();
|
||||
|
||||
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
|
||||
let parser = typos::tokens::Parser::new();
|
||||
let checks = typos::checks::TyposSettings::new().build_word_parser();
|
||||
b.iter(|| checks.check_str(data, &parser, &corrections, &typos::report::PrintSilent));
|
||||
let parser = typos::tokens::Tokenizer::new();
|
||||
let checks = typos_cli::checks::TyposSettings::new().build_identifier_parser();
|
||||
b.iter(|| {
|
||||
checks.check_file(
|
||||
sample_path.path(),
|
||||
true,
|
||||
&parser,
|
||||
&corrections,
|
||||
&typos_cli::report::PrintSilent,
|
||||
)
|
||||
});
|
||||
|
||||
temp.close().unwrap();
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_words_empty(b: &mut test::Bencher) {
|
||||
bench_parse_word_str(data::EMPTY, b);
|
||||
fn identifiers_empty(b: &mut test::Bencher) {
|
||||
bench_identifiers(data::EMPTY, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_words_no_tokens(b: &mut test::Bencher) {
|
||||
bench_parse_word_str(data::NO_TOKENS, b);
|
||||
fn identifiers_no_tokens(b: &mut test::Bencher) {
|
||||
bench_identifiers(data::NO_TOKENS, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_words_single_token(b: &mut test::Bencher) {
|
||||
bench_parse_word_str(data::SINGLE_TOKEN, b);
|
||||
fn identifiers_single_token(b: &mut test::Bencher) {
|
||||
bench_identifiers(data::SINGLE_TOKEN, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_words_sherlock(b: &mut test::Bencher) {
|
||||
bench_parse_word_str(data::SHERLOCK, b);
|
||||
fn identifiers_sherlock(b: &mut test::Bencher) {
|
||||
bench_identifiers(data::SHERLOCK, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_words_code(b: &mut test::Bencher) {
|
||||
bench_parse_word_str(data::CODE, b);
|
||||
fn identifiers_code(b: &mut test::Bencher) {
|
||||
bench_identifiers(data::CODE, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_words_corpus(b: &mut test::Bencher) {
|
||||
bench_parse_word_str(data::CORPUS, b);
|
||||
fn identifiers_corpus(b: &mut test::Bencher) {
|
||||
bench_identifiers(data::CORPUS, b);
|
||||
}
|
||||
|
||||
fn bench_words(data: &str, b: &mut test::Bencher) {
|
||||
let temp = assert_fs::TempDir::new().unwrap();
|
||||
let sample_path = temp.child("sample");
|
||||
sample_path.write_str(data).unwrap();
|
||||
|
||||
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
|
||||
let parser = typos::tokens::Tokenizer::new();
|
||||
let checks = typos_cli::checks::TyposSettings::new().build_word_parser();
|
||||
b.iter(|| {
|
||||
checks.check_file(
|
||||
sample_path.path(),
|
||||
true,
|
||||
&parser,
|
||||
&corrections,
|
||||
&typos_cli::report::PrintSilent,
|
||||
)
|
||||
});
|
||||
|
||||
temp.close().unwrap();
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn words_empty(b: &mut test::Bencher) {
|
||||
bench_words(data::EMPTY, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn words_no_tokens(b: &mut test::Bencher) {
|
||||
bench_words(data::NO_TOKENS, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn words_single_token(b: &mut test::Bencher) {
|
||||
bench_words(data::SINGLE_TOKEN, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn words_sherlock(b: &mut test::Bencher) {
|
||||
bench_words(data::SHERLOCK, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn words_code(b: &mut test::Bencher) {
|
||||
bench_words(data::CODE, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn words_corpus(b: &mut test::Bencher) {
|
||||
bench_words(data::CORPUS, b);
|
||||
}
|
||||
|
||||
fn bench_typos(data: &str, b: &mut test::Bencher) {
|
||||
let temp = assert_fs::TempDir::new().unwrap();
|
||||
let sample_path = temp.child("sample");
|
||||
sample_path.write_str(data).unwrap();
|
||||
|
||||
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
|
||||
let parser = typos::tokens::Parser::new();
|
||||
let checks = typos::checks::TyposSettings::new().build_typos();
|
||||
b.iter(|| checks.check_str(data, &parser, &corrections, &typos::report::PrintSilent));
|
||||
let parser = typos::tokens::Tokenizer::new();
|
||||
let checks = typos_cli::checks::TyposSettings::new().build_typos();
|
||||
b.iter(|| {
|
||||
checks.check_file(
|
||||
sample_path.path(),
|
||||
true,
|
||||
&parser,
|
||||
&corrections,
|
||||
&typos_cli::report::PrintSilent,
|
||||
)
|
||||
});
|
||||
|
||||
temp.close().unwrap();
|
||||
}
|
||||
|
||||
#[bench]
|
||||
|
@ -161,54 +210,3 @@ fn typos_code(b: &mut test::Bencher) {
|
|||
fn typos_corpus(b: &mut test::Bencher) {
|
||||
bench_typos(data::CORPUS, b);
|
||||
}
|
||||
|
||||
fn bench_check_file(data: &str, b: &mut test::Bencher) {
|
||||
let temp = assert_fs::TempDir::new().unwrap();
|
||||
let sample_path = temp.child("sample");
|
||||
sample_path.write_str(data).unwrap();
|
||||
|
||||
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
|
||||
let parser = typos::tokens::Parser::new();
|
||||
let checks = typos::checks::TyposSettings::new().build_typos();
|
||||
b.iter(|| {
|
||||
checks.check_file(
|
||||
sample_path.path(),
|
||||
true,
|
||||
&parser,
|
||||
&corrections,
|
||||
&typos::report::PrintSilent,
|
||||
)
|
||||
});
|
||||
|
||||
temp.close().unwrap();
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn check_file_empty(b: &mut test::Bencher) {
|
||||
bench_check_file(data::EMPTY, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn check_file_no_tokens(b: &mut test::Bencher) {
|
||||
bench_check_file(data::NO_TOKENS, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn check_file_single_token(b: &mut test::Bencher) {
|
||||
bench_check_file(data::SINGLE_TOKEN, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn check_file_sherlock(b: &mut test::Bencher) {
|
||||
bench_check_file(data::SHERLOCK, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn check_file_code(b: &mut test::Bencher) {
|
||||
bench_check_file(data::CODE, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn check_file_corpus(b: &mut test::Bencher) {
|
||||
bench_check_file(data::CORPUS, b);
|
||||
}
|
||||
|
|
|
@ -6,19 +6,19 @@ mod data;
|
|||
|
||||
#[bench]
|
||||
fn ident_parse_empty(b: &mut test::Bencher) {
|
||||
let parser = typos::tokens::Parser::new();
|
||||
let parser = typos::tokens::Tokenizer::new();
|
||||
b.iter(|| parser.parse_bytes(data::EMPTY.as_bytes()).last());
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn ident_parse_no_tokens(b: &mut test::Bencher) {
|
||||
let parser = typos::tokens::Parser::new();
|
||||
let parser = typos::tokens::Tokenizer::new();
|
||||
b.iter(|| parser.parse_bytes(data::NO_TOKENS.as_bytes()).last());
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn ident_parse_single_token(b: &mut test::Bencher) {
|
||||
let parser = typos::tokens::Parser::new();
|
||||
let parser = typos::tokens::Tokenizer::new();
|
||||
b.iter(|| {
|
||||
parser.parse_bytes(data::SINGLE_TOKEN.as_bytes()).last();
|
||||
});
|
||||
|
@ -26,19 +26,19 @@ fn ident_parse_single_token(b: &mut test::Bencher) {
|
|||
|
||||
#[bench]
|
||||
fn ident_parse_sherlock(b: &mut test::Bencher) {
|
||||
let parser = typos::tokens::Parser::new();
|
||||
let parser = typos::tokens::Tokenizer::new();
|
||||
b.iter(|| parser.parse_bytes(data::SHERLOCK.as_bytes()).last());
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn ident_parse_code(b: &mut test::Bencher) {
|
||||
let parser = typos::tokens::Parser::new();
|
||||
let parser = typos::tokens::Tokenizer::new();
|
||||
b.iter(|| parser.parse_bytes(data::CODE.as_bytes()).last());
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn ident_parse_corpus(b: &mut test::Bencher) {
|
||||
let parser = typos::tokens::Parser::new();
|
||||
let parser = typos::tokens::Tokenizer::new();
|
||||
b.iter(|| parser.parse_bytes(data::CORPUS.as_bytes()).last());
|
||||
}
|
||||
|
||||
|
|
|
@ -20,11 +20,6 @@ thiserror = "1.0"
|
|||
regex = "1.3"
|
||||
once_cell = "1.2.0"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0"
|
||||
itertools = "0.9"
|
||||
bstr = "0.2"
|
||||
log = "0.4"
|
||||
unicode-segmentation = "1.7.1"
|
||||
derive_more = "0.99.11"
|
||||
derive_setters = "0.1"
|
||||
content_inspector = "0.2.4"
|
||||
|
|
|
@ -1,489 +0,0 @@
|
|||
use bstr::ByteSlice;
|
||||
|
||||
use crate::report;
|
||||
use crate::tokens;
|
||||
use crate::Dictionary;
|
||||
use crate::Status;
|
||||
|
||||
pub trait Check: Send + Sync {
|
||||
fn check_str(
|
||||
&self,
|
||||
buffer: &str,
|
||||
parser: &tokens::Parser,
|
||||
dictionary: &dyn Dictionary,
|
||||
reporter: &dyn report::Report,
|
||||
) -> Result<(), std::io::Error>;
|
||||
|
||||
fn check_bytes(
|
||||
&self,
|
||||
buffer: &[u8],
|
||||
parser: &tokens::Parser,
|
||||
dictionary: &dyn Dictionary,
|
||||
reporter: &dyn report::Report,
|
||||
) -> Result<(), std::io::Error>;
|
||||
|
||||
fn check_filenames(&self) -> bool;
|
||||
|
||||
fn check_files(&self) -> bool;
|
||||
|
||||
fn binary(&self) -> bool;
|
||||
|
||||
fn check_filename(
|
||||
&self,
|
||||
path: &std::path::Path,
|
||||
parser: &tokens::Parser,
|
||||
dictionary: &dyn Dictionary,
|
||||
reporter: &dyn report::Report,
|
||||
) -> Result<(), std::io::Error> {
|
||||
if !self.check_filenames() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
|
||||
let context_reporter = ReportContext {
|
||||
reporter,
|
||||
context: report::PathContext { path }.into(),
|
||||
};
|
||||
self.check_str(file_name, parser, dictionary, &context_reporter)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn check_file(
|
||||
&self,
|
||||
path: &std::path::Path,
|
||||
explicit: bool,
|
||||
parser: &tokens::Parser,
|
||||
dictionary: &dyn Dictionary,
|
||||
reporter: &dyn report::Report,
|
||||
) -> Result<(), std::io::Error> {
|
||||
if !self.check_files() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let buffer = read_file(path, reporter)?;
|
||||
let (buffer, content_type) = massage_data(buffer)?;
|
||||
if !explicit && !self.binary() && content_type.is_binary() {
|
||||
let msg = report::BinaryFile { path };
|
||||
reporter.report(msg.into())?;
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
for (line_idx, line) in buffer.lines().enumerate() {
|
||||
let line_num = line_idx + 1;
|
||||
let context_reporter = ReportContext {
|
||||
reporter,
|
||||
context: report::FileContext { path, line_num }.into(),
|
||||
};
|
||||
self.check_bytes(line, parser, dictionary, &context_reporter)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
struct ReportContext<'m, 'r> {
|
||||
reporter: &'r dyn report::Report,
|
||||
context: report::Context<'m>,
|
||||
}
|
||||
|
||||
impl<'m, 'r> report::Report for ReportContext<'m, 'r> {
|
||||
fn report(&self, msg: report::Message) -> Result<(), std::io::Error> {
|
||||
let msg = msg.context(Some(self.context.clone()));
|
||||
self.reporter.report(msg)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct TyposSettings {
|
||||
check_filenames: bool,
|
||||
check_files: bool,
|
||||
binary: bool,
|
||||
}
|
||||
|
||||
impl TyposSettings {
|
||||
pub fn new() -> Self {
|
||||
Default::default()
|
||||
}
|
||||
|
||||
pub fn check_filenames(&mut self, yes: bool) -> &mut Self {
|
||||
self.check_filenames = yes;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn check_files(&mut self, yes: bool) -> &mut Self {
|
||||
self.check_files = yes;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn binary(&mut self, yes: bool) -> &mut Self {
|
||||
self.binary = yes;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn build_typos(&self) -> Typos {
|
||||
Typos {
|
||||
check_filenames: self.check_filenames,
|
||||
check_files: self.check_files,
|
||||
binary: self.binary,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn build_identifier_parser(&self) -> ParseIdentifiers {
|
||||
ParseIdentifiers {
|
||||
check_filenames: self.check_filenames,
|
||||
check_files: self.check_files,
|
||||
binary: self.binary,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn build_word_parser(&self) -> ParseWords {
|
||||
ParseWords {
|
||||
check_filenames: self.check_filenames,
|
||||
check_files: self.check_files,
|
||||
binary: self.binary,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn build_files(&self) -> Files {
|
||||
Files {}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for TyposSettings {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
check_filenames: true,
|
||||
check_files: true,
|
||||
binary: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Typos {
|
||||
check_filenames: bool,
|
||||
check_files: bool,
|
||||
binary: bool,
|
||||
}
|
||||
|
||||
impl Check for Typos {
|
||||
fn check_str(
|
||||
&self,
|
||||
buffer: &str,
|
||||
parser: &tokens::Parser,
|
||||
dictionary: &dyn Dictionary,
|
||||
reporter: &dyn report::Report,
|
||||
) -> Result<(), std::io::Error> {
|
||||
for ident in parser.parse_str(buffer) {
|
||||
match dictionary.correct_ident(ident) {
|
||||
Some(Status::Valid) => {}
|
||||
Some(corrections) => {
|
||||
let byte_offset = ident.offset();
|
||||
let msg = report::Typo {
|
||||
context: None,
|
||||
buffer: std::borrow::Cow::Borrowed(buffer.as_bytes()),
|
||||
byte_offset,
|
||||
typo: ident.token(),
|
||||
corrections,
|
||||
};
|
||||
reporter.report(msg.into())?;
|
||||
}
|
||||
None => {
|
||||
for word in ident.split() {
|
||||
match dictionary.correct_word(word) {
|
||||
Some(Status::Valid) => {}
|
||||
Some(corrections) => {
|
||||
let byte_offset = word.offset();
|
||||
let msg = report::Typo {
|
||||
context: None,
|
||||
buffer: std::borrow::Cow::Borrowed(buffer.as_bytes()),
|
||||
byte_offset,
|
||||
typo: word.token(),
|
||||
corrections,
|
||||
};
|
||||
reporter.report(msg.into())?;
|
||||
}
|
||||
None => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn check_bytes(
|
||||
&self,
|
||||
buffer: &[u8],
|
||||
parser: &tokens::Parser,
|
||||
dictionary: &dyn Dictionary,
|
||||
reporter: &dyn report::Report,
|
||||
) -> Result<(), std::io::Error> {
|
||||
for ident in parser.parse_bytes(buffer) {
|
||||
match dictionary.correct_ident(ident) {
|
||||
Some(Status::Valid) => {}
|
||||
Some(corrections) => {
|
||||
let byte_offset = ident.offset();
|
||||
let msg = report::Typo {
|
||||
context: None,
|
||||
buffer: std::borrow::Cow::Borrowed(buffer),
|
||||
byte_offset,
|
||||
typo: ident.token(),
|
||||
corrections,
|
||||
};
|
||||
reporter.report(msg.into())?;
|
||||
}
|
||||
None => {
|
||||
for word in ident.split() {
|
||||
match dictionary.correct_word(word) {
|
||||
Some(Status::Valid) => {}
|
||||
Some(corrections) => {
|
||||
let byte_offset = word.offset();
|
||||
let msg = report::Typo {
|
||||
context: None,
|
||||
buffer: std::borrow::Cow::Borrowed(buffer),
|
||||
byte_offset,
|
||||
typo: word.token(),
|
||||
corrections,
|
||||
};
|
||||
reporter.report(msg.into())?;
|
||||
}
|
||||
None => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn check_filenames(&self) -> bool {
|
||||
self.check_filenames
|
||||
}
|
||||
|
||||
fn check_files(&self) -> bool {
|
||||
self.check_files
|
||||
}
|
||||
|
||||
fn binary(&self) -> bool {
|
||||
self.binary
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ParseIdentifiers {
|
||||
check_filenames: bool,
|
||||
check_files: bool,
|
||||
binary: bool,
|
||||
}
|
||||
|
||||
impl Check for ParseIdentifiers {
|
||||
fn check_str(
|
||||
&self,
|
||||
buffer: &str,
|
||||
parser: &tokens::Parser,
|
||||
_dictionary: &dyn Dictionary,
|
||||
reporter: &dyn report::Report,
|
||||
) -> Result<(), std::io::Error> {
|
||||
let msg = report::Parse {
|
||||
context: None,
|
||||
kind: report::ParseKind::Identifier,
|
||||
data: parser.parse_str(buffer).map(|i| i.token()).collect(),
|
||||
};
|
||||
if !msg.data.is_empty() {
|
||||
reporter.report(msg.into())?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn check_bytes(
|
||||
&self,
|
||||
buffer: &[u8],
|
||||
parser: &tokens::Parser,
|
||||
_dictionary: &dyn Dictionary,
|
||||
reporter: &dyn report::Report,
|
||||
) -> Result<(), std::io::Error> {
|
||||
let msg = report::Parse {
|
||||
context: None,
|
||||
kind: report::ParseKind::Identifier,
|
||||
data: parser.parse_bytes(buffer).map(|i| i.token()).collect(),
|
||||
};
|
||||
if !msg.data.is_empty() {
|
||||
reporter.report(msg.into())?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn check_filenames(&self) -> bool {
|
||||
self.check_filenames
|
||||
}
|
||||
|
||||
fn check_files(&self) -> bool {
|
||||
self.check_files
|
||||
}
|
||||
|
||||
fn binary(&self) -> bool {
|
||||
self.binary
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ParseWords {
|
||||
check_filenames: bool,
|
||||
check_files: bool,
|
||||
binary: bool,
|
||||
}
|
||||
|
||||
impl Check for ParseWords {
|
||||
fn check_str(
|
||||
&self,
|
||||
buffer: &str,
|
||||
parser: &tokens::Parser,
|
||||
_dictionary: &dyn Dictionary,
|
||||
reporter: &dyn report::Report,
|
||||
) -> Result<(), std::io::Error> {
|
||||
let msg = report::Parse {
|
||||
context: None,
|
||||
kind: report::ParseKind::Word,
|
||||
data: parser
|
||||
.parse_str(buffer)
|
||||
.flat_map(|ident| ident.split().map(|i| i.token()))
|
||||
.collect(),
|
||||
};
|
||||
if !msg.data.is_empty() {
|
||||
reporter.report(msg.into())?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn check_bytes(
|
||||
&self,
|
||||
buffer: &[u8],
|
||||
parser: &tokens::Parser,
|
||||
_dictionary: &dyn Dictionary,
|
||||
reporter: &dyn report::Report,
|
||||
) -> Result<(), std::io::Error> {
|
||||
let msg = report::Parse {
|
||||
context: None,
|
||||
kind: report::ParseKind::Word,
|
||||
data: parser
|
||||
.parse_bytes(buffer)
|
||||
.flat_map(|ident| ident.split().map(|i| i.token()))
|
||||
.collect(),
|
||||
};
|
||||
if !msg.data.is_empty() {
|
||||
reporter.report(msg.into())?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn check_filenames(&self) -> bool {
|
||||
self.check_filenames
|
||||
}
|
||||
|
||||
fn check_files(&self) -> bool {
|
||||
self.check_files
|
||||
}
|
||||
|
||||
fn binary(&self) -> bool {
|
||||
self.binary
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Files {}
|
||||
|
||||
impl Check for Files {
|
||||
fn check_str(
|
||||
&self,
|
||||
_buffer: &str,
|
||||
_parser: &tokens::Parser,
|
||||
_dictionary: &dyn Dictionary,
|
||||
_reporter: &dyn report::Report,
|
||||
) -> Result<(), std::io::Error> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn check_bytes(
|
||||
&self,
|
||||
_buffer: &[u8],
|
||||
_parser: &tokens::Parser,
|
||||
_dictionary: &dyn Dictionary,
|
||||
_reporter: &dyn report::Report,
|
||||
) -> Result<(), std::io::Error> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn check_filenames(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn check_files(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn binary(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn check_filename(
|
||||
&self,
|
||||
_path: &std::path::Path,
|
||||
_parser: &tokens::Parser,
|
||||
_dictionary: &dyn Dictionary,
|
||||
_reporter: &dyn report::Report,
|
||||
) -> Result<(), std::io::Error> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn check_file(
|
||||
&self,
|
||||
path: &std::path::Path,
|
||||
_explicit: bool,
|
||||
_parser: &tokens::Parser,
|
||||
_dictionary: &dyn Dictionary,
|
||||
reporter: &dyn report::Report,
|
||||
) -> Result<(), std::io::Error> {
|
||||
let msg = report::File::new(path);
|
||||
reporter.report(msg.into())?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
fn read_file(
|
||||
path: &std::path::Path,
|
||||
reporter: &dyn report::Report,
|
||||
) -> Result<Vec<u8>, std::io::Error> {
|
||||
let buffer = match std::fs::read(path) {
|
||||
Ok(buffer) => buffer,
|
||||
Err(err) => {
|
||||
let msg = report::Error::new(err.to_string());
|
||||
reporter.report(msg.into())?;
|
||||
Vec::new()
|
||||
}
|
||||
};
|
||||
Ok(buffer)
|
||||
}
|
||||
|
||||
fn massage_data(
|
||||
buffer: Vec<u8>,
|
||||
) -> Result<(Vec<u8>, content_inspector::ContentType), std::io::Error> {
|
||||
let mut content_type = content_inspector::inspect(&buffer);
|
||||
|
||||
// HACK: We only support UTF-8 at the moment
|
||||
if content_type != content_inspector::ContentType::UTF_8_BOM
|
||||
&& content_type != content_inspector::ContentType::UTF_8
|
||||
{
|
||||
content_type = content_inspector::ContentType::BINARY;
|
||||
}
|
||||
|
||||
Ok((buffer, content_type))
|
||||
}
|
|
@ -1,6 +1,35 @@
|
|||
use std::borrow::Cow;
|
||||
|
||||
#[derive(Clone, PartialEq, Eq, Debug, serde::Serialize, derive_more::From)]
|
||||
/// Look up the validity of a term.
|
||||
pub trait Dictionary: Send + Sync {
|
||||
/// Look up the validity of an Identifier.
|
||||
///
|
||||
/// `None` if the status is unknown.
|
||||
fn correct_ident<'s, 'w>(&'s self, ident: crate::tokens::Identifier<'w>) -> Option<Status<'s>>;
|
||||
|
||||
/// Look up the validity of a Word.
|
||||
///
|
||||
/// `None` if the status is unknown.
|
||||
fn correct_word<'s, 'w>(&'s self, word: crate::tokens::Word<'w>) -> Option<Status<'s>>;
|
||||
}
|
||||
|
||||
pub(crate) struct NullDictionary;
|
||||
|
||||
impl Dictionary for NullDictionary {
|
||||
fn correct_ident<'s, 'w>(
|
||||
&'s self,
|
||||
_ident: crate::tokens::Identifier<'w>,
|
||||
) -> Option<Status<'s>> {
|
||||
None
|
||||
}
|
||||
|
||||
fn correct_word<'s, 'w>(&'s self, _word: crate::tokens::Word<'w>) -> Option<Status<'s>> {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Validity of a term in a Dictionary.
|
||||
#[derive(Clone, PartialEq, Eq, Debug, serde::Serialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
#[serde(untagged)]
|
||||
pub enum Status<'c> {
|
||||
|
@ -27,6 +56,20 @@ impl<'c> Status<'c> {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn into_owned(self) -> Status<'static> {
|
||||
match self {
|
||||
Status::Valid => Status::Valid,
|
||||
Status::Invalid => Status::Invalid,
|
||||
Status::Corrections(corrections) => {
|
||||
let corrections = corrections
|
||||
.into_iter()
|
||||
.map(|c| Cow::Owned(c.into_owned()))
|
||||
.collect();
|
||||
Status::Corrections(corrections)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn borrow(&self) -> Status<'_> {
|
||||
match self {
|
||||
Status::Corrections(corrections) => {
|
||||
|
@ -40,10 +83,3 @@ impl<'c> Status<'c> {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub trait Dictionary: Send + Sync {
|
||||
fn correct_ident<'s, 'w>(&'s self, _ident: crate::tokens::Identifier<'w>)
|
||||
-> Option<Status<'s>>;
|
||||
|
||||
fn correct_word<'s, 'w>(&'s self, word: crate::tokens::Word<'w>) -> Option<Status<'s>>;
|
||||
}
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
mod dict;
|
||||
mod parser;
|
||||
|
||||
pub mod checks;
|
||||
pub mod report;
|
||||
pub mod tokens;
|
||||
|
||||
pub use crate::dict::*;
|
||||
pub use dict::*;
|
||||
pub use parser::*;
|
||||
|
|
147
crates/typos/src/parser.rs
Normal file
147
crates/typos/src/parser.rs
Normal file
|
@ -0,0 +1,147 @@
|
|||
use crate::tokens;
|
||||
use crate::Dictionary;
|
||||
use std::borrow::Cow;
|
||||
|
||||
/// Extract typos from the buffer.
|
||||
#[derive(Clone)]
|
||||
pub struct ParserBuilder<'p, 'd> {
|
||||
tokenizer: Option<&'p tokens::Tokenizer>,
|
||||
dictionary: &'d dyn Dictionary,
|
||||
}
|
||||
|
||||
impl<'p> ParserBuilder<'p, 'static> {
|
||||
pub fn new() -> Self {
|
||||
Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'p, 'd> ParserBuilder<'p, 'd> {
|
||||
/// Set the Tokenizer used when parsing.
|
||||
pub fn tokenizer(mut self, tokenizer: &'p tokens::Tokenizer) -> Self {
|
||||
self.tokenizer = Some(tokenizer);
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the dictionary used when parsing.
|
||||
pub fn dictionary<'d1>(self, dictionary: &'d1 dyn Dictionary) -> ParserBuilder<'p, 'd1> {
|
||||
ParserBuilder {
|
||||
tokenizer: self.tokenizer,
|
||||
dictionary,
|
||||
}
|
||||
}
|
||||
|
||||
/// Extract typos from the buffer.
|
||||
pub fn build(&self) -> TyposParser<'p, 'd> {
|
||||
TyposParser {
|
||||
tokenizer: self.tokenizer.unwrap_or(&DEFAULT_TOKENIZER),
|
||||
dictionary: self.dictionary,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'p> Default for ParserBuilder<'p, 'static> {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
tokenizer: None,
|
||||
dictionary: &crate::NullDictionary,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static DEFAULT_TOKENIZER: once_cell::sync::Lazy<tokens::Tokenizer> =
|
||||
once_cell::sync::Lazy::new(tokens::Tokenizer::new);
|
||||
|
||||
/// Extract typos from the buffer.
|
||||
#[derive(Clone)]
|
||||
pub struct TyposParser<'p, 'd> {
|
||||
tokenizer: &'p tokens::Tokenizer,
|
||||
dictionary: &'d dyn Dictionary,
|
||||
}
|
||||
|
||||
impl<'p, 'd> TyposParser<'p, 'd> {
|
||||
pub fn parse_str<'b, 's: 'b>(&'s self, buffer: &'b str) -> impl Iterator<Item = Typo<'b>> {
|
||||
self.tokenizer
|
||||
.parse_str(buffer)
|
||||
.flat_map(move |ident| self.process_ident(ident))
|
||||
}
|
||||
|
||||
pub fn parse_bytes<'b, 's: 'b>(&'s self, buffer: &'b [u8]) -> impl Iterator<Item = Typo<'b>> {
|
||||
self.tokenizer
|
||||
.parse_bytes(buffer)
|
||||
.flat_map(move |ident| self.process_ident(ident))
|
||||
}
|
||||
|
||||
fn process_ident<'i, 's: 'i>(
|
||||
&'s self,
|
||||
ident: tokens::Identifier<'i>,
|
||||
) -> impl Iterator<Item = Typo<'i>> {
|
||||
match self.dictionary.correct_ident(ident) {
|
||||
Some(crate::Status::Valid) => itertools::Either::Left(None.into_iter()),
|
||||
Some(corrections) => {
|
||||
let typo = Typo {
|
||||
byte_offset: ident.offset(),
|
||||
typo: ident.token().into(),
|
||||
corrections,
|
||||
};
|
||||
itertools::Either::Left(Some(typo).into_iter())
|
||||
}
|
||||
None => itertools::Either::Right(
|
||||
ident
|
||||
.split()
|
||||
.filter_map(move |word| self.process_word(word)),
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
fn process_word<'w, 's: 'w>(&'s self, word: tokens::Word<'w>) -> Option<Typo<'w>> {
|
||||
match self.dictionary.correct_word(word) {
|
||||
Some(crate::Status::Valid) => None,
|
||||
Some(corrections) => {
|
||||
let typo = Typo {
|
||||
byte_offset: word.offset(),
|
||||
typo: word.token().into(),
|
||||
corrections,
|
||||
};
|
||||
Some(typo)
|
||||
}
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// An invalid term found in the buffer.
|
||||
#[derive(Clone, Debug)]
|
||||
#[non_exhaustive]
|
||||
pub struct Typo<'m> {
|
||||
pub byte_offset: usize,
|
||||
pub typo: Cow<'m, str>,
|
||||
pub corrections: crate::Status<'m>,
|
||||
}
|
||||
|
||||
impl<'m> Typo<'m> {
|
||||
pub fn into_owned(self) -> Typo<'static> {
|
||||
Typo {
|
||||
byte_offset: self.byte_offset,
|
||||
typo: Cow::Owned(self.typo.into_owned()),
|
||||
corrections: self.corrections.into_owned(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn borrow(&self) -> Typo<'_> {
|
||||
Typo {
|
||||
byte_offset: self.byte_offset,
|
||||
typo: Cow::Borrowed(self.typo.as_ref()),
|
||||
corrections: self.corrections.borrow(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'m> Default for Typo<'m> {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
byte_offset: 0,
|
||||
typo: "".into(),
|
||||
corrections: crate::Status::Invalid,
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,13 +1,6 @@
|
|||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum Case {
|
||||
Title,
|
||||
Lower,
|
||||
Scream,
|
||||
None,
|
||||
}
|
||||
|
||||
/// Define rules for tokenizaing a buffer.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct ParserBuilder {
|
||||
pub struct TokenizerBuilder {
|
||||
ignore_hex: bool,
|
||||
leading_digits: bool,
|
||||
leading_chars: String,
|
||||
|
@ -15,37 +8,42 @@ pub struct ParserBuilder {
|
|||
include_chars: String,
|
||||
}
|
||||
|
||||
impl ParserBuilder {
|
||||
impl TokenizerBuilder {
|
||||
pub fn new() -> Self {
|
||||
Default::default()
|
||||
}
|
||||
|
||||
/// Specify that hexadecimal numbers should be ignored.
|
||||
pub fn ignore_hex(&mut self, yes: bool) -> &mut Self {
|
||||
self.ignore_hex = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Specify that leading digits are allowed for Identifiers.
|
||||
pub fn leading_digits(&mut self, yes: bool) -> &mut Self {
|
||||
self.leading_digits = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Extend accepted leading characters for Identifiers.
|
||||
pub fn leading_chars(&mut self, chars: String) -> &mut Self {
|
||||
self.leading_chars = chars;
|
||||
self
|
||||
}
|
||||
|
||||
/// Specify that digits can be included in Identifiers.
|
||||
pub fn include_digits(&mut self, yes: bool) -> &mut Self {
|
||||
self.include_digits = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Extend accepted characters for Identifiers.
|
||||
pub fn include_chars(&mut self, chars: String) -> &mut Self {
|
||||
self.include_chars = chars;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn build(&self) -> Parser {
|
||||
pub fn build(&self) -> Tokenizer {
|
||||
let mut pattern = r#"\b("#.to_owned();
|
||||
Self::push_pattern(&mut pattern, self.leading_digits, &self.leading_chars);
|
||||
Self::push_pattern(&mut pattern, self.include_digits, &self.include_chars);
|
||||
|
@ -54,7 +52,7 @@ impl ParserBuilder {
|
|||
let words_str = regex::Regex::new(&pattern).unwrap();
|
||||
let words_bytes = regex::bytes::Regex::new(&pattern).unwrap();
|
||||
|
||||
Parser {
|
||||
Tokenizer {
|
||||
words_str,
|
||||
words_bytes,
|
||||
// `leading_digits` let's us bypass the regexes since you can't have a decimal or
|
||||
|
@ -77,7 +75,7 @@ impl ParserBuilder {
|
|||
}
|
||||
}
|
||||
|
||||
impl Default for ParserBuilder {
|
||||
impl Default for TokenizerBuilder {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
ignore_hex: true,
|
||||
|
@ -89,17 +87,18 @@ impl Default for ParserBuilder {
|
|||
}
|
||||
}
|
||||
|
||||
/// Extract Identifiers from a buffer.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Parser {
|
||||
pub struct Tokenizer {
|
||||
words_str: regex::Regex,
|
||||
words_bytes: regex::bytes::Regex,
|
||||
ignore_numbers: bool,
|
||||
ignore_hex: bool,
|
||||
}
|
||||
|
||||
impl Parser {
|
||||
impl Tokenizer {
|
||||
pub fn new() -> Self {
|
||||
ParserBuilder::default().build()
|
||||
TokenizerBuilder::default().build()
|
||||
}
|
||||
|
||||
pub fn parse_str<'c>(&'c self, content: &'c str) -> impl Iterator<Item = Identifier<'c>> {
|
||||
|
@ -132,7 +131,7 @@ impl Parser {
|
|||
}
|
||||
}
|
||||
|
||||
impl Default for Parser {
|
||||
impl Default for Tokenizer {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
|
@ -156,6 +155,7 @@ fn is_hex(ident: &[u8]) -> bool {
|
|||
HEX.is_match(ident)
|
||||
}
|
||||
|
||||
/// A term composed of Words.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub struct Identifier<'t> {
|
||||
token: &'t str,
|
||||
|
@ -179,11 +179,13 @@ impl<'t> Identifier<'t> {
|
|||
self.offset
|
||||
}
|
||||
|
||||
/// Split into individual Words.
|
||||
pub fn split(&self) -> impl Iterator<Item = Word<'t>> {
|
||||
split_ident(self.token, self.offset)
|
||||
}
|
||||
}
|
||||
|
||||
/// An indivisible term.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub struct Word<'t> {
|
||||
token: &'t str,
|
||||
|
@ -237,52 +239,8 @@ impl<'t> Word<'t> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Tracks the current 'mode' of the transformation algorithm as it scans the input string.
|
||||
///
|
||||
/// The mode is a tri-state which tracks the case of the last cased character of the current
|
||||
/// word. If there is no cased character (either lowercase or uppercase) since the previous
|
||||
/// word boundary, than the mode is `Boundary`. If the last cased character is lowercase, then
|
||||
/// the mode is `Lowercase`. Otherrwise, the mode is `Uppercase`.
|
||||
#[derive(Clone, Copy, PartialEq, Debug)]
|
||||
enum WordMode {
|
||||
/// There have been no lowercase or uppercase characters in the current word.
|
||||
Boundary,
|
||||
/// The previous cased character in the current word is lowercase.
|
||||
Lowercase,
|
||||
/// The previous cased character in the current word is uppercase.
|
||||
Uppercase,
|
||||
Number,
|
||||
}
|
||||
|
||||
impl WordMode {
|
||||
fn classify(c: char) -> Self {
|
||||
if c.is_lowercase() {
|
||||
WordMode::Lowercase
|
||||
} else if c.is_uppercase() {
|
||||
WordMode::Uppercase
|
||||
} else if c.is_ascii_digit() {
|
||||
WordMode::Number
|
||||
} else {
|
||||
// This assumes all characters are either lower or upper case.
|
||||
WordMode::Boundary
|
||||
}
|
||||
}
|
||||
|
||||
fn case(self, last: WordMode) -> Case {
|
||||
match (self, last) {
|
||||
(WordMode::Uppercase, WordMode::Uppercase) => Case::Scream,
|
||||
(WordMode::Uppercase, WordMode::Lowercase) => Case::Title,
|
||||
(WordMode::Lowercase, WordMode::Lowercase) => Case::Lower,
|
||||
(WordMode::Number, WordMode::Number) => Case::None,
|
||||
(WordMode::Number, _)
|
||||
| (_, WordMode::Number)
|
||||
| (WordMode::Boundary, _)
|
||||
| (_, WordMode::Boundary)
|
||||
| (WordMode::Lowercase, WordMode::Uppercase) => {
|
||||
unreachable!("Invalid case combination: ({:?}, {:?})", self, last)
|
||||
}
|
||||
}
|
||||
}
|
||||
fn split_ident(ident: &str, offset: usize) -> impl Iterator<Item = Word<'_>> {
|
||||
SplitIdent::new(ident, offset)
|
||||
}
|
||||
|
||||
struct SplitIdent<'s> {
|
||||
|
@ -377,8 +335,61 @@ impl<'s> Iterator for SplitIdent<'s> {
|
|||
}
|
||||
}
|
||||
|
||||
fn split_ident(ident: &str, offset: usize) -> impl Iterator<Item = Word<'_>> {
|
||||
SplitIdent::new(ident, offset)
|
||||
/// Format of the term.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum Case {
|
||||
Title,
|
||||
Lower,
|
||||
Scream,
|
||||
None,
|
||||
}
|
||||
|
||||
/// Tracks the current 'mode' of the transformation algorithm as it scans the input string.
|
||||
///
|
||||
/// The mode is a tri-state which tracks the case of the last cased character of the current
|
||||
/// word. If there is no cased character (either lowercase or uppercase) since the previous
|
||||
/// word boundary, than the mode is `Boundary`. If the last cased character is lowercase, then
|
||||
/// the mode is `Lowercase`. Otherrwise, the mode is `Uppercase`.
|
||||
#[derive(Clone, Copy, PartialEq, Debug)]
|
||||
enum WordMode {
|
||||
/// There have been no lowercase or uppercase characters in the current word.
|
||||
Boundary,
|
||||
/// The previous cased character in the current word is lowercase.
|
||||
Lowercase,
|
||||
/// The previous cased character in the current word is uppercase.
|
||||
Uppercase,
|
||||
Number,
|
||||
}
|
||||
|
||||
impl WordMode {
|
||||
fn classify(c: char) -> Self {
|
||||
if c.is_lowercase() {
|
||||
WordMode::Lowercase
|
||||
} else if c.is_uppercase() {
|
||||
WordMode::Uppercase
|
||||
} else if c.is_ascii_digit() {
|
||||
WordMode::Number
|
||||
} else {
|
||||
// This assumes all characters are either lower or upper case.
|
||||
WordMode::Boundary
|
||||
}
|
||||
}
|
||||
|
||||
fn case(self, last: WordMode) -> Case {
|
||||
match (self, last) {
|
||||
(WordMode::Uppercase, WordMode::Uppercase) => Case::Scream,
|
||||
(WordMode::Uppercase, WordMode::Lowercase) => Case::Title,
|
||||
(WordMode::Lowercase, WordMode::Lowercase) => Case::Lower,
|
||||
(WordMode::Number, WordMode::Number) => Case::None,
|
||||
(WordMode::Number, _)
|
||||
| (_, WordMode::Number)
|
||||
| (WordMode::Boundary, _)
|
||||
| (_, WordMode::Boundary)
|
||||
| (WordMode::Lowercase, WordMode::Uppercase) => {
|
||||
unreachable!("Invalid case combination: ({:?}, {:?})", self, last)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
@ -387,7 +398,7 @@ mod test {
|
|||
|
||||
#[test]
|
||||
fn tokenize_empty_is_empty() {
|
||||
let parser = Parser::new();
|
||||
let parser = Tokenizer::new();
|
||||
|
||||
let input = "";
|
||||
let expected: Vec<Identifier> = vec![];
|
||||
|
@ -399,7 +410,7 @@ mod test {
|
|||
|
||||
#[test]
|
||||
fn tokenize_word_is_word() {
|
||||
let parser = Parser::new();
|
||||
let parser = Tokenizer::new();
|
||||
|
||||
let input = "word";
|
||||
let expected: Vec<Identifier> = vec![Identifier::new_unchecked("word", 0)];
|
||||
|
@ -411,7 +422,7 @@ mod test {
|
|||
|
||||
#[test]
|
||||
fn tokenize_space_separated_words() {
|
||||
let parser = Parser::new();
|
||||
let parser = Tokenizer::new();
|
||||
|
||||
let input = "A B";
|
||||
let expected: Vec<Identifier> = vec![
|
||||
|
@ -426,7 +437,7 @@ mod test {
|
|||
|
||||
#[test]
|
||||
fn tokenize_dot_separated_words() {
|
||||
let parser = Parser::new();
|
||||
let parser = Tokenizer::new();
|
||||
|
||||
let input = "A.B";
|
||||
let expected: Vec<Identifier> = vec![
|
||||
|
@ -441,7 +452,7 @@ mod test {
|
|||
|
||||
#[test]
|
||||
fn tokenize_namespace_separated_words() {
|
||||
let parser = Parser::new();
|
||||
let parser = Tokenizer::new();
|
||||
|
||||
let input = "A::B";
|
||||
let expected: Vec<Identifier> = vec![
|
||||
|
@ -456,7 +467,7 @@ mod test {
|
|||
|
||||
#[test]
|
||||
fn tokenize_underscore_doesnt_separate() {
|
||||
let parser = Parser::new();
|
||||
let parser = Tokenizer::new();
|
||||
|
||||
let input = "A_B";
|
||||
let expected: Vec<Identifier> = vec![Identifier::new_unchecked("A_B", 0)];
|
||||
|
@ -468,7 +479,7 @@ mod test {
|
|||
|
||||
#[test]
|
||||
fn tokenize_ignore_hex_enabled() {
|
||||
let parser = ParserBuilder::new().ignore_hex(true).build();
|
||||
let parser = TokenizerBuilder::new().ignore_hex(true).build();
|
||||
|
||||
let input = "Hello 0xDEADBEEF World";
|
||||
let expected: Vec<Identifier> = vec![
|
||||
|
@ -483,7 +494,7 @@ mod test {
|
|||
|
||||
#[test]
|
||||
fn tokenize_ignore_hex_disabled() {
|
||||
let parser = ParserBuilder::new()
|
||||
let parser = TokenizerBuilder::new()
|
||||
.ignore_hex(false)
|
||||
.leading_digits(true)
|
||||
.build();
|
||||
|
@ -523,11 +534,11 @@ mod test {
|
|||
&[("A", Case::Scream, 0), ("String", Case::Title, 1)],
|
||||
),
|
||||
(
|
||||
"SimpleXMLParser",
|
||||
"SimpleXMLTokenizer",
|
||||
&[
|
||||
("Simple", Case::Title, 0),
|
||||
("XML", Case::Scream, 6),
|
||||
("Parser", Case::Title, 9),
|
||||
("Tokenizer", Case::Title, 9),
|
||||
],
|
||||
),
|
||||
(
|
||||
|
|
10
src/args.rs
10
src/args.rs
|
@ -12,13 +12,13 @@ arg_enum! {
|
|||
}
|
||||
}
|
||||
|
||||
pub const PRINT_SILENT: typos::report::PrintSilent = typos::report::PrintSilent;
|
||||
pub const PRINT_BRIEF: typos::report::PrintBrief = typos::report::PrintBrief;
|
||||
pub const PRINT_LONG: typos::report::PrintLong = typos::report::PrintLong;
|
||||
pub const PRINT_JSON: typos::report::PrintJson = typos::report::PrintJson;
|
||||
pub const PRINT_SILENT: typos_cli::report::PrintSilent = typos_cli::report::PrintSilent;
|
||||
pub const PRINT_BRIEF: typos_cli::report::PrintBrief = typos_cli::report::PrintBrief;
|
||||
pub const PRINT_LONG: typos_cli::report::PrintLong = typos_cli::report::PrintLong;
|
||||
pub const PRINT_JSON: typos_cli::report::PrintJson = typos_cli::report::PrintJson;
|
||||
|
||||
impl Format {
|
||||
pub(crate) fn reporter(self) -> &'static dyn typos::report::Report {
|
||||
pub(crate) fn reporter(self) -> &'static dyn typos_cli::report::Report {
|
||||
match self {
|
||||
Format::Silent => &PRINT_SILENT,
|
||||
Format::Brief => &PRINT_BRIEF,
|
||||
|
|
639
src/checks.rs
639
src/checks.rs
|
@ -1,27 +1,637 @@
|
|||
pub(crate) fn check_path(
|
||||
use bstr::ByteSlice;
|
||||
|
||||
use crate::report;
|
||||
use typos::tokens;
|
||||
use typos::Dictionary;
|
||||
|
||||
pub trait FileChecker: Send + Sync {
|
||||
fn check_file(
|
||||
&self,
|
||||
path: &std::path::Path,
|
||||
explicit: bool,
|
||||
parser: &tokens::Tokenizer,
|
||||
dictionary: &dyn Dictionary,
|
||||
reporter: &dyn report::Report,
|
||||
) -> Result<(), std::io::Error>;
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct TyposSettings {
|
||||
check_filenames: bool,
|
||||
check_files: bool,
|
||||
binary: bool,
|
||||
}
|
||||
|
||||
impl TyposSettings {
|
||||
pub fn new() -> Self {
|
||||
Default::default()
|
||||
}
|
||||
|
||||
pub fn check_filenames(&mut self, yes: bool) -> &mut Self {
|
||||
self.check_filenames = yes;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn check_files(&mut self, yes: bool) -> &mut Self {
|
||||
self.check_files = yes;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn binary(&mut self, yes: bool) -> &mut Self {
|
||||
self.binary = yes;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn build_typos(&self) -> Typos {
|
||||
Typos {
|
||||
check_filenames: self.check_filenames,
|
||||
check_files: self.check_files,
|
||||
binary: self.binary,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn build_fix_typos(&self) -> FixTypos {
|
||||
FixTypos {
|
||||
check_filenames: self.check_filenames,
|
||||
check_files: self.check_files,
|
||||
binary: self.binary,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn build_diff_typos(&self) -> DiffTypos {
|
||||
DiffTypos {
|
||||
check_filenames: self.check_filenames,
|
||||
check_files: self.check_files,
|
||||
binary: self.binary,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn build_identifier_parser(&self) -> Identifiers {
|
||||
Identifiers {
|
||||
check_filenames: self.check_filenames,
|
||||
check_files: self.check_files,
|
||||
binary: self.binary,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn build_word_parser(&self) -> Words {
|
||||
Words {
|
||||
check_filenames: self.check_filenames,
|
||||
check_files: self.check_files,
|
||||
binary: self.binary,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn build_files(&self) -> FoundFiles {
|
||||
FoundFiles {
|
||||
binary: self.binary,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for TyposSettings {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
check_filenames: true,
|
||||
check_files: true,
|
||||
binary: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Typos {
|
||||
check_filenames: bool,
|
||||
check_files: bool,
|
||||
binary: bool,
|
||||
}
|
||||
|
||||
impl FileChecker for Typos {
|
||||
fn check_file(
|
||||
&self,
|
||||
path: &std::path::Path,
|
||||
explicit: bool,
|
||||
tokenizer: &tokens::Tokenizer,
|
||||
dictionary: &dyn Dictionary,
|
||||
reporter: &dyn report::Report,
|
||||
) -> Result<(), std::io::Error> {
|
||||
let parser = typos::ParserBuilder::new()
|
||||
.tokenizer(tokenizer)
|
||||
.dictionary(dictionary)
|
||||
.build();
|
||||
|
||||
if self.check_filenames {
|
||||
if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
|
||||
for typo in parser.parse_str(file_name) {
|
||||
let msg = report::Typo {
|
||||
context: Some(report::PathContext { path }.into()),
|
||||
buffer: std::borrow::Cow::Borrowed(file_name.as_bytes()),
|
||||
byte_offset: typo.byte_offset,
|
||||
typo: typo.typo.as_ref(),
|
||||
corrections: typo.corrections,
|
||||
};
|
||||
reporter.report(msg.into())?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if self.check_files {
|
||||
let (buffer, content_type) = read_file(path, reporter)?;
|
||||
if !explicit && !self.binary && content_type.is_binary() {
|
||||
let msg = report::BinaryFile { path };
|
||||
reporter.report(msg.into())?;
|
||||
} else {
|
||||
let mut accum_line_num = AccumulateLineNum::new();
|
||||
for typo in parser.parse_bytes(&buffer) {
|
||||
let line_num = accum_line_num.line_num(&buffer, typo.byte_offset);
|
||||
let (line, line_offset) = extract_line(&buffer, typo.byte_offset);
|
||||
let msg = report::Typo {
|
||||
context: Some(report::FileContext { path, line_num }.into()),
|
||||
buffer: std::borrow::Cow::Borrowed(line),
|
||||
byte_offset: line_offset,
|
||||
typo: typo.typo.as_ref(),
|
||||
corrections: typo.corrections,
|
||||
};
|
||||
reporter.report(msg.into())?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct FixTypos {
|
||||
check_filenames: bool,
|
||||
check_files: bool,
|
||||
binary: bool,
|
||||
}
|
||||
|
||||
impl FileChecker for FixTypos {
|
||||
fn check_file(
|
||||
&self,
|
||||
path: &std::path::Path,
|
||||
explicit: bool,
|
||||
tokenizer: &tokens::Tokenizer,
|
||||
dictionary: &dyn Dictionary,
|
||||
reporter: &dyn report::Report,
|
||||
) -> Result<(), std::io::Error> {
|
||||
let parser = typos::ParserBuilder::new()
|
||||
.tokenizer(tokenizer)
|
||||
.dictionary(dictionary)
|
||||
.build();
|
||||
|
||||
if self.check_files {
|
||||
let (buffer, content_type) = read_file(path, reporter)?;
|
||||
if !explicit && !self.binary && content_type.is_binary() {
|
||||
let msg = report::BinaryFile { path };
|
||||
reporter.report(msg.into())?;
|
||||
} else {
|
||||
let mut fixes = Vec::new();
|
||||
let mut accum_line_num = AccumulateLineNum::new();
|
||||
for typo in parser.parse_bytes(&buffer) {
|
||||
if is_fixable(&typo) {
|
||||
fixes.push(typo.into_owned());
|
||||
} else {
|
||||
let line_num = accum_line_num.line_num(&buffer, typo.byte_offset);
|
||||
let (line, line_offset) = extract_line(&buffer, typo.byte_offset);
|
||||
let msg = report::Typo {
|
||||
context: Some(report::FileContext { path, line_num }.into()),
|
||||
buffer: std::borrow::Cow::Borrowed(line),
|
||||
byte_offset: line_offset,
|
||||
typo: typo.typo.as_ref(),
|
||||
corrections: typo.corrections,
|
||||
};
|
||||
reporter.report(msg.into())?;
|
||||
}
|
||||
}
|
||||
if !fixes.is_empty() {
|
||||
let buffer = fix_buffer(buffer, fixes.into_iter());
|
||||
write_file(path, content_type, &buffer, reporter)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Ensure the above write can happen before renaming the file.
|
||||
if self.check_filenames {
|
||||
if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
|
||||
let mut fixes = Vec::new();
|
||||
for typo in parser.parse_str(file_name) {
|
||||
if is_fixable(&typo) {
|
||||
fixes.push(typo.into_owned());
|
||||
} else {
|
||||
let msg = report::Typo {
|
||||
context: Some(report::PathContext { path }.into()),
|
||||
buffer: std::borrow::Cow::Borrowed(file_name.as_bytes()),
|
||||
byte_offset: typo.byte_offset,
|
||||
typo: typo.typo.as_ref(),
|
||||
corrections: typo.corrections,
|
||||
};
|
||||
reporter.report(msg.into())?;
|
||||
}
|
||||
}
|
||||
if !fixes.is_empty() {
|
||||
let file_name = file_name.to_owned().into_bytes();
|
||||
let new_name = fix_buffer(file_name, fixes.into_iter());
|
||||
let new_name =
|
||||
String::from_utf8(new_name).expect("corrections are valid utf-8");
|
||||
let new_path = path.with_file_name(new_name);
|
||||
std::fs::rename(path, new_path)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct DiffTypos {
|
||||
check_filenames: bool,
|
||||
check_files: bool,
|
||||
binary: bool,
|
||||
}
|
||||
|
||||
impl FileChecker for DiffTypos {
|
||||
fn check_file(
|
||||
&self,
|
||||
path: &std::path::Path,
|
||||
explicit: bool,
|
||||
tokenizer: &tokens::Tokenizer,
|
||||
dictionary: &dyn Dictionary,
|
||||
reporter: &dyn report::Report,
|
||||
) -> Result<(), std::io::Error> {
|
||||
let parser = typos::ParserBuilder::new()
|
||||
.tokenizer(tokenizer)
|
||||
.dictionary(dictionary)
|
||||
.build();
|
||||
|
||||
let mut content = Vec::new();
|
||||
let mut new_content = Vec::new();
|
||||
if self.check_files {
|
||||
let (buffer, content_type) = read_file(path, reporter)?;
|
||||
if !explicit && !self.binary && content_type.is_binary() {
|
||||
let msg = report::BinaryFile { path };
|
||||
reporter.report(msg.into())?;
|
||||
} else {
|
||||
let mut fixes = Vec::new();
|
||||
let mut accum_line_num = AccumulateLineNum::new();
|
||||
for typo in parser.parse_bytes(&buffer) {
|
||||
if is_fixable(&typo) {
|
||||
fixes.push(typo.into_owned());
|
||||
} else {
|
||||
let line_num = accum_line_num.line_num(&buffer, typo.byte_offset);
|
||||
let (line, line_offset) = extract_line(&buffer, typo.byte_offset);
|
||||
let msg = report::Typo {
|
||||
context: Some(report::FileContext { path, line_num }.into()),
|
||||
buffer: std::borrow::Cow::Borrowed(line),
|
||||
byte_offset: line_offset,
|
||||
typo: typo.typo.as_ref(),
|
||||
corrections: typo.corrections,
|
||||
};
|
||||
reporter.report(msg.into())?;
|
||||
}
|
||||
}
|
||||
if !fixes.is_empty() {
|
||||
new_content = fix_buffer(buffer.clone(), fixes.into_iter());
|
||||
content = buffer
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Match FixTypos ordering for easy diffing.
|
||||
let mut new_path = None;
|
||||
if self.check_filenames {
|
||||
if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
|
||||
let mut fixes = Vec::new();
|
||||
for typo in parser.parse_str(file_name) {
|
||||
if is_fixable(&typo) {
|
||||
fixes.push(typo.into_owned());
|
||||
} else {
|
||||
let msg = report::Typo {
|
||||
context: Some(report::PathContext { path }.into()),
|
||||
buffer: std::borrow::Cow::Borrowed(file_name.as_bytes()),
|
||||
byte_offset: typo.byte_offset,
|
||||
typo: typo.typo.as_ref(),
|
||||
corrections: typo.corrections,
|
||||
};
|
||||
reporter.report(msg.into())?;
|
||||
}
|
||||
}
|
||||
if !fixes.is_empty() {
|
||||
let file_name = file_name.to_owned().into_bytes();
|
||||
let new_name = fix_buffer(file_name, fixes.into_iter());
|
||||
let new_name =
|
||||
String::from_utf8(new_name).expect("corrections are valid utf-8");
|
||||
new_path = Some(path.with_file_name(new_name));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if new_path.is_some() || !content.is_empty() {
|
||||
let original_path = path.display().to_string();
|
||||
let fixed_path = new_path.as_deref().unwrap_or(path).display().to_string();
|
||||
let original_content: Vec<_> = content
|
||||
.lines_with_terminator()
|
||||
.map(|s| String::from_utf8_lossy(s).into_owned())
|
||||
.collect();
|
||||
let fixed_content: Vec<_> = new_content
|
||||
.lines_with_terminator()
|
||||
.map(|s| String::from_utf8_lossy(s).into_owned())
|
||||
.collect();
|
||||
let diff = difflib::unified_diff(
|
||||
&original_content,
|
||||
&fixed_content,
|
||||
original_path.as_str(),
|
||||
fixed_path.as_str(),
|
||||
"original",
|
||||
"fixed",
|
||||
0,
|
||||
);
|
||||
for line in diff {
|
||||
print!("{}", line);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Identifiers {
|
||||
check_filenames: bool,
|
||||
check_files: bool,
|
||||
binary: bool,
|
||||
}
|
||||
|
||||
impl FileChecker for Identifiers {
|
||||
fn check_file(
|
||||
&self,
|
||||
path: &std::path::Path,
|
||||
explicit: bool,
|
||||
tokenizer: &tokens::Tokenizer,
|
||||
_dictionary: &dyn Dictionary,
|
||||
reporter: &dyn report::Report,
|
||||
) -> Result<(), std::io::Error> {
|
||||
if self.check_filenames {
|
||||
if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
|
||||
for word in tokenizer.parse_str(file_name) {
|
||||
let msg = report::Parse {
|
||||
context: Some(report::PathContext { path }.into()),
|
||||
kind: report::ParseKind::Identifier,
|
||||
data: word.token(),
|
||||
};
|
||||
reporter.report(msg.into())?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if self.check_files {
|
||||
let (buffer, content_type) = read_file(path, reporter)?;
|
||||
if !explicit && !self.binary && content_type.is_binary() {
|
||||
let msg = report::BinaryFile { path };
|
||||
reporter.report(msg.into())?;
|
||||
} else {
|
||||
for word in tokenizer.parse_bytes(&buffer) {
|
||||
// HACK: Don't look up the line_num per entry to better match the performance
|
||||
// of Typos for comparison purposes. We don't really get much out of it
|
||||
// anyway.
|
||||
let line_num = 0;
|
||||
let msg = report::Parse {
|
||||
context: Some(report::FileContext { path, line_num }.into()),
|
||||
kind: report::ParseKind::Identifier,
|
||||
data: word.token(),
|
||||
};
|
||||
reporter.report(msg.into())?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Words {
|
||||
check_filenames: bool,
|
||||
check_files: bool,
|
||||
binary: bool,
|
||||
}
|
||||
|
||||
impl FileChecker for Words {
|
||||
fn check_file(
|
||||
&self,
|
||||
path: &std::path::Path,
|
||||
explicit: bool,
|
||||
tokenizer: &tokens::Tokenizer,
|
||||
_dictionary: &dyn Dictionary,
|
||||
reporter: &dyn report::Report,
|
||||
) -> Result<(), std::io::Error> {
|
||||
if self.check_filenames {
|
||||
if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
|
||||
for word in tokenizer.parse_str(file_name).flat_map(|i| i.split()) {
|
||||
let msg = report::Parse {
|
||||
context: Some(report::PathContext { path }.into()),
|
||||
kind: report::ParseKind::Word,
|
||||
data: word.token(),
|
||||
};
|
||||
reporter.report(msg.into())?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if self.check_files {
|
||||
let (buffer, content_type) = read_file(path, reporter)?;
|
||||
if !explicit && !self.binary && content_type.is_binary() {
|
||||
let msg = report::BinaryFile { path };
|
||||
reporter.report(msg.into())?;
|
||||
} else {
|
||||
for word in tokenizer.parse_bytes(&buffer).flat_map(|i| i.split()) {
|
||||
// HACK: Don't look up the line_num per entry to better match the performance
|
||||
// of Typos for comparison purposes. We don't really get much out of it
|
||||
// anyway.
|
||||
let line_num = 0;
|
||||
let msg = report::Parse {
|
||||
context: Some(report::FileContext { path, line_num }.into()),
|
||||
kind: report::ParseKind::Word,
|
||||
data: word.token(),
|
||||
};
|
||||
reporter.report(msg.into())?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct FoundFiles {
|
||||
binary: bool,
|
||||
}
|
||||
|
||||
impl FileChecker for FoundFiles {
|
||||
fn check_file(
|
||||
&self,
|
||||
path: &std::path::Path,
|
||||
explicit: bool,
|
||||
_parser: &tokens::Tokenizer,
|
||||
_dictionary: &dyn Dictionary,
|
||||
reporter: &dyn report::Report,
|
||||
) -> Result<(), std::io::Error> {
|
||||
// Check `self.binary` first so we can easily check performance of walking vs reading
|
||||
if self.binary {
|
||||
let msg = report::File::new(path);
|
||||
reporter.report(msg.into())?;
|
||||
} else {
|
||||
let (_buffer, content_type) = read_file(path, reporter)?;
|
||||
if !explicit && content_type.is_binary() {
|
||||
let msg = report::BinaryFile { path };
|
||||
reporter.report(msg.into())?;
|
||||
} else {
|
||||
let msg = report::File::new(path);
|
||||
reporter.report(msg.into())?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn read_file(
|
||||
path: &std::path::Path,
|
||||
reporter: &dyn report::Report,
|
||||
) -> Result<(Vec<u8>, content_inspector::ContentType), std::io::Error> {
|
||||
let buffer = match std::fs::read(path) {
|
||||
Ok(buffer) => buffer,
|
||||
Err(err) => {
|
||||
let msg = report::Error::new(err.to_string());
|
||||
reporter.report(msg.into())?;
|
||||
Vec::new()
|
||||
}
|
||||
};
|
||||
|
||||
let mut content_type = content_inspector::inspect(&buffer);
|
||||
// HACK: We only support UTF-8 at the moment
|
||||
if content_type != content_inspector::ContentType::UTF_8_BOM
|
||||
&& content_type != content_inspector::ContentType::UTF_8
|
||||
{
|
||||
content_type = content_inspector::ContentType::BINARY;
|
||||
}
|
||||
|
||||
Ok((buffer, content_type))
|
||||
}
|
||||
|
||||
pub fn write_file(
|
||||
path: &std::path::Path,
|
||||
content_type: content_inspector::ContentType,
|
||||
buffer: &[u8],
|
||||
reporter: &dyn report::Report,
|
||||
) -> Result<(), std::io::Error> {
|
||||
assert!(
|
||||
content_type == content_inspector::ContentType::UTF_8_BOM
|
||||
|| content_type == content_inspector::ContentType::UTF_8
|
||||
|| content_type == content_inspector::ContentType::BINARY
|
||||
);
|
||||
match std::fs::write(path, buffer) {
|
||||
Ok(()) => (),
|
||||
Err(err) => {
|
||||
let msg = report::Error::new(err.to_string());
|
||||
reporter.report(msg.into())?;
|
||||
}
|
||||
};
|
||||
Ok(())
|
||||
}
|
||||
|
||||
struct AccumulateLineNum {
|
||||
line_num: usize,
|
||||
last_offset: usize,
|
||||
}
|
||||
|
||||
impl AccumulateLineNum {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
// 1-indexed
|
||||
line_num: 1,
|
||||
last_offset: 0,
|
||||
}
|
||||
}
|
||||
|
||||
fn line_num(&mut self, buffer: &[u8], byte_offset: usize) -> usize {
|
||||
assert!(self.last_offset <= byte_offset);
|
||||
let slice = &buffer[self.last_offset..byte_offset];
|
||||
let newlines = slice.lines().count();
|
||||
let line_num = self.line_num + newlines;
|
||||
self.line_num = line_num;
|
||||
self.last_offset = byte_offset;
|
||||
line_num
|
||||
}
|
||||
}
|
||||
|
||||
fn extract_line(buffer: &[u8], byte_offset: usize) -> (&[u8], usize) {
|
||||
let line_start = buffer[0..byte_offset]
|
||||
.rfind_byte(b'\n')
|
||||
// Skip the newline
|
||||
.map(|s| s + 1)
|
||||
.unwrap_or(0);
|
||||
let line = buffer[line_start..]
|
||||
.lines()
|
||||
.next()
|
||||
.expect("should always be at least a line");
|
||||
let line_offset = byte_offset - line_start;
|
||||
(line, line_offset)
|
||||
}
|
||||
|
||||
fn extract_fix<'t>(typo: &'t typos::Typo<'t>) -> Option<&'t str> {
|
||||
match &typo.corrections {
|
||||
typos::Status::Corrections(c) if c.len() == 1 => Some(c[0].as_ref()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn is_fixable(typo: &typos::Typo<'_>) -> bool {
|
||||
extract_fix(typo).is_some()
|
||||
}
|
||||
|
||||
fn fix_buffer(mut buffer: Vec<u8>, typos: impl Iterator<Item = typos::Typo<'static>>) -> Vec<u8> {
|
||||
let mut offset = 0isize;
|
||||
for typo in typos {
|
||||
let fix = extract_fix(&typo).expect("Caller only provides fixable typos");
|
||||
let start = ((typo.byte_offset as isize) + offset) as usize;
|
||||
let end = start + typo.typo.len();
|
||||
|
||||
buffer.splice(start..end, fix.as_bytes().iter().copied());
|
||||
|
||||
offset += (fix.len() as isize) - (typo.typo.len() as isize);
|
||||
}
|
||||
buffer
|
||||
}
|
||||
|
||||
pub fn walk_path(
|
||||
walk: ignore::Walk,
|
||||
checks: &dyn typos::checks::Check,
|
||||
parser: &typos::tokens::Parser,
|
||||
checks: &dyn FileChecker,
|
||||
parser: &typos::tokens::Tokenizer,
|
||||
dictionary: &dyn typos::Dictionary,
|
||||
reporter: &dyn typos::report::Report,
|
||||
reporter: &dyn report::Report,
|
||||
) -> Result<(), ignore::Error> {
|
||||
for entry in walk {
|
||||
check_entry(entry, checks, parser, dictionary, reporter)?;
|
||||
walk_entry(entry, checks, parser, dictionary, reporter)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn check_path_parallel(
|
||||
pub fn walk_path_parallel(
|
||||
walk: ignore::WalkParallel,
|
||||
checks: &dyn typos::checks::Check,
|
||||
parser: &typos::tokens::Parser,
|
||||
checks: &dyn FileChecker,
|
||||
parser: &typos::tokens::Tokenizer,
|
||||
dictionary: &dyn typos::Dictionary,
|
||||
reporter: &dyn typos::report::Report,
|
||||
reporter: &dyn report::Report,
|
||||
) -> Result<(), ignore::Error> {
|
||||
let error: std::sync::Mutex<Result<(), ignore::Error>> = std::sync::Mutex::new(Ok(()));
|
||||
walk.run(|| {
|
||||
Box::new(|entry: Result<ignore::DirEntry, ignore::Error>| {
|
||||
match check_entry(entry, checks, parser, dictionary, reporter) {
|
||||
match walk_entry(entry, checks, parser, dictionary, reporter) {
|
||||
Ok(()) => ignore::WalkState::Continue,
|
||||
Err(err) => {
|
||||
*error.lock().unwrap() = Err(err);
|
||||
|
@ -34,17 +644,16 @@ pub(crate) fn check_path_parallel(
|
|||
error.into_inner().unwrap()
|
||||
}
|
||||
|
||||
fn check_entry(
|
||||
fn walk_entry(
|
||||
entry: Result<ignore::DirEntry, ignore::Error>,
|
||||
checks: &dyn typos::checks::Check,
|
||||
parser: &typos::tokens::Parser,
|
||||
checks: &dyn FileChecker,
|
||||
parser: &typos::tokens::Tokenizer,
|
||||
dictionary: &dyn typos::Dictionary,
|
||||
reporter: &dyn typos::report::Report,
|
||||
reporter: &dyn report::Report,
|
||||
) -> Result<(), ignore::Error> {
|
||||
let entry = entry?;
|
||||
if entry.file_type().map(|t| t.is_file()).unwrap_or(true) {
|
||||
let explicit = entry.depth() == 0;
|
||||
checks.check_filename(entry.path(), parser, dictionary, reporter)?;
|
||||
checks.check_file(entry.path(), explicit, parser, dictionary, reporter)?;
|
||||
}
|
||||
|
||||
|
|
93
src/diff.rs
93
src/diff.rs
|
@ -1,93 +0,0 @@
|
|||
use std::collections::BTreeMap;
|
||||
use std::sync;
|
||||
|
||||
use bstr::ByteSlice;
|
||||
|
||||
pub struct Diff<'r> {
|
||||
reporter: &'r dyn typos::report::Report,
|
||||
deferred: sync::Mutex<crate::replace::Deferred>,
|
||||
}
|
||||
|
||||
impl<'r> Diff<'r> {
|
||||
pub(crate) fn new(reporter: &'r dyn typos::report::Report) -> Self {
|
||||
Self {
|
||||
reporter,
|
||||
deferred: sync::Mutex::new(crate::replace::Deferred::default()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn show(&self) -> Result<(), std::io::Error> {
|
||||
let deferred = self.deferred.lock().unwrap();
|
||||
|
||||
for (path, corrections) in deferred.content.iter() {
|
||||
let buffer = std::fs::read(path)?;
|
||||
|
||||
let mut original = Vec::new();
|
||||
let mut corrected = Vec::new();
|
||||
for (line_idx, line) in buffer.lines_with_terminator().enumerate() {
|
||||
original.push(String::from_utf8_lossy(line).into_owned());
|
||||
|
||||
let line_num = line_idx + 1;
|
||||
let line = if let Some(corrections) = corrections.get(&line_num) {
|
||||
let line = line.to_vec();
|
||||
crate::replace::correct(line, &corrections)
|
||||
} else {
|
||||
line.to_owned()
|
||||
};
|
||||
corrected.push(String::from_utf8_lossy(&line).into_owned())
|
||||
}
|
||||
|
||||
let display_path = path.display().to_string();
|
||||
let diff = difflib::unified_diff(
|
||||
&original,
|
||||
&corrected,
|
||||
display_path.as_str(),
|
||||
display_path.as_str(),
|
||||
"original",
|
||||
"corrected",
|
||||
0,
|
||||
);
|
||||
for line in diff {
|
||||
print!("{}", line);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl<'r> typos::report::Report for Diff<'r> {
|
||||
fn report(&self, msg: typos::report::Message<'_>) -> Result<(), std::io::Error> {
|
||||
let typo = match &msg {
|
||||
typos::report::Message::Typo(typo) => typo,
|
||||
_ => return self.reporter.report(msg),
|
||||
};
|
||||
|
||||
let corrections = match &typo.corrections {
|
||||
typos::Status::Corrections(corrections) if corrections.len() == 1 => corrections,
|
||||
_ => return self.reporter.report(msg),
|
||||
};
|
||||
|
||||
match &typo.context {
|
||||
Some(typos::report::Context::File(file)) => {
|
||||
let path = file.path.to_owned();
|
||||
let line_num = file.line_num;
|
||||
let correction = crate::replace::Correction::new(
|
||||
typo.byte_offset,
|
||||
typo.typo,
|
||||
corrections[0].as_ref(),
|
||||
);
|
||||
let mut deferred = self.deferred.lock().unwrap();
|
||||
let content = deferred
|
||||
.content
|
||||
.entry(path)
|
||||
.or_insert_with(BTreeMap::new)
|
||||
.entry(line_num)
|
||||
.or_insert_with(Vec::new);
|
||||
content.push(correction);
|
||||
Ok(())
|
||||
}
|
||||
_ => self.reporter.report(msg),
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,2 +1,4 @@
|
|||
pub mod checks;
|
||||
pub mod config;
|
||||
pub mod dict;
|
||||
pub mod report;
|
||||
|
|
46
src/main.rs
46
src/main.rs
|
@ -7,11 +7,10 @@ use std::io::Write;
|
|||
use structopt::StructOpt;
|
||||
|
||||
mod args;
|
||||
mod checks;
|
||||
mod config;
|
||||
mod dict;
|
||||
mod diff;
|
||||
mod replace;
|
||||
use typos_cli::checks;
|
||||
use typos_cli::config;
|
||||
use typos_cli::dict;
|
||||
use typos_cli::report;
|
||||
|
||||
use proc_exit::WithCodeResultExt;
|
||||
|
||||
|
@ -61,7 +60,7 @@ fn run() -> proc_exit::ExitResult {
|
|||
config.default.update(&args.overrides);
|
||||
let config = config;
|
||||
|
||||
let parser = typos::tokens::ParserBuilder::new()
|
||||
let parser = typos::tokens::TokenizerBuilder::new()
|
||||
.ignore_hex(config.default.ignore_hex())
|
||||
.leading_digits(config.default.identifier_leading_digits())
|
||||
.leading_chars(config.default.identifier_leading_chars().to_owned())
|
||||
|
@ -74,7 +73,7 @@ fn run() -> proc_exit::ExitResult {
|
|||
dictionary.identifiers(config.default.extend_identifiers());
|
||||
dictionary.words(config.default.extend_words());
|
||||
|
||||
let mut settings = typos::checks::TyposSettings::new();
|
||||
let mut settings = checks::TyposSettings::new();
|
||||
settings
|
||||
.check_filenames(config.default.check_filename())
|
||||
.check_files(config.default.check_file())
|
||||
|
@ -98,18 +97,11 @@ fn run() -> proc_exit::ExitResult {
|
|||
} else {
|
||||
args.format.reporter()
|
||||
};
|
||||
let status_reporter = typos::report::MessageStatus::new(output_reporter);
|
||||
let mut reporter: &dyn typos::report::Report = &status_reporter;
|
||||
let replace_reporter = replace::Replace::new(reporter);
|
||||
let diff_reporter = diff::Diff::new(reporter);
|
||||
if args.diff {
|
||||
reporter = &diff_reporter;
|
||||
} else if args.write_changes {
|
||||
reporter = &replace_reporter;
|
||||
}
|
||||
let status_reporter = report::MessageStatus::new(output_reporter);
|
||||
let reporter: &dyn report::Report = &status_reporter;
|
||||
|
||||
let (files, identifier_parser, word_parser, checks);
|
||||
let selected_checks: &dyn typos::checks::Check = if args.files {
|
||||
let (files, identifier_parser, word_parser, checks, fixer, differ);
|
||||
let selected_checks: &dyn checks::FileChecker = if args.files {
|
||||
files = settings.build_files();
|
||||
&files
|
||||
} else if args.identifiers {
|
||||
|
@ -118,13 +110,19 @@ fn run() -> proc_exit::ExitResult {
|
|||
} else if args.words {
|
||||
word_parser = settings.build_word_parser();
|
||||
&word_parser
|
||||
} else if args.write_changes {
|
||||
fixer = settings.build_fix_typos();
|
||||
&fixer
|
||||
} else if args.diff {
|
||||
differ = settings.build_diff_typos();
|
||||
&differ
|
||||
} else {
|
||||
checks = settings.build_typos();
|
||||
&checks
|
||||
};
|
||||
|
||||
if single_threaded {
|
||||
checks::check_path(
|
||||
checks::walk_path(
|
||||
walk.build(),
|
||||
selected_checks,
|
||||
&parser,
|
||||
|
@ -132,7 +130,7 @@ fn run() -> proc_exit::ExitResult {
|
|||
reporter,
|
||||
)
|
||||
} else {
|
||||
checks::check_path_parallel(
|
||||
checks::walk_path_parallel(
|
||||
walk.build_parallel(),
|
||||
selected_checks,
|
||||
&parser,
|
||||
|
@ -152,14 +150,6 @@ fn run() -> proc_exit::ExitResult {
|
|||
if status_reporter.errors_found() {
|
||||
errors_found = true;
|
||||
}
|
||||
|
||||
if args.diff {
|
||||
diff_reporter.show().with_code(proc_exit::Code::FAILURE)?;
|
||||
} else if args.write_changes {
|
||||
replace_reporter
|
||||
.write()
|
||||
.with_code(proc_exit::Code::FAILURE)?;
|
||||
}
|
||||
}
|
||||
|
||||
if errors_found {
|
||||
|
|
263
src/replace.rs
263
src/replace.rs
|
@ -1,263 +0,0 @@
|
|||
use std::collections::BTreeMap;
|
||||
use std::io::Write;
|
||||
use std::path;
|
||||
use std::sync;
|
||||
|
||||
use bstr::ByteSlice;
|
||||
|
||||
pub struct Replace<'r> {
|
||||
reporter: &'r dyn typos::report::Report,
|
||||
deferred: sync::Mutex<Deferred>,
|
||||
}
|
||||
|
||||
impl<'r> Replace<'r> {
|
||||
pub(crate) fn new(reporter: &'r dyn typos::report::Report) -> Self {
|
||||
Self {
|
||||
reporter,
|
||||
deferred: sync::Mutex::new(Deferred::default()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn write(&self) -> Result<(), std::io::Error> {
|
||||
let deferred = self.deferred.lock().unwrap();
|
||||
|
||||
for (path, corrections) in deferred.content.iter() {
|
||||
let buffer = std::fs::read(path)?;
|
||||
|
||||
let mut file = std::fs::File::create(path)?;
|
||||
for (line_idx, line) in buffer.lines_with_terminator().enumerate() {
|
||||
let line_num = line_idx + 1;
|
||||
if let Some(corrections) = corrections.get(&line_num) {
|
||||
let line = line.to_vec();
|
||||
let line = correct(line, &corrections);
|
||||
file.write_all(&line)?;
|
||||
} else {
|
||||
file.write_all(&line)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (path, corrections) in deferred.paths.iter() {
|
||||
let orig_name = path
|
||||
.file_name()
|
||||
.and_then(|s| s.to_str())
|
||||
.expect("generating a correction requires the filename to be valid.")
|
||||
.to_owned()
|
||||
.into_bytes();
|
||||
let new_name = correct(orig_name, &corrections);
|
||||
let new_name = String::from_utf8(new_name).expect("corrections are valid utf-8");
|
||||
let new_path = path.with_file_name(new_name);
|
||||
std::fs::rename(path, new_path)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl<'r> typos::report::Report for Replace<'r> {
|
||||
fn report(&self, msg: typos::report::Message<'_>) -> Result<(), std::io::Error> {
|
||||
let typo = match &msg {
|
||||
typos::report::Message::Typo(typo) => typo,
|
||||
_ => return self.reporter.report(msg),
|
||||
};
|
||||
|
||||
let corrections = match &typo.corrections {
|
||||
typos::Status::Corrections(corrections) if corrections.len() == 1 => corrections,
|
||||
_ => return self.reporter.report(msg),
|
||||
};
|
||||
|
||||
match &typo.context {
|
||||
Some(typos::report::Context::File(file)) => {
|
||||
let path = file.path.to_owned();
|
||||
let line_num = file.line_num;
|
||||
let correction =
|
||||
Correction::new(typo.byte_offset, typo.typo, corrections[0].as_ref());
|
||||
let mut deferred = self.deferred.lock().unwrap();
|
||||
let content = deferred
|
||||
.content
|
||||
.entry(path)
|
||||
.or_insert_with(BTreeMap::new)
|
||||
.entry(line_num)
|
||||
.or_insert_with(Vec::new);
|
||||
content.push(correction);
|
||||
Ok(())
|
||||
}
|
||||
Some(typos::report::Context::Path(path)) => {
|
||||
let path = path.path.to_owned();
|
||||
let correction =
|
||||
Correction::new(typo.byte_offset, typo.typo, corrections[0].as_ref());
|
||||
let mut deferred = self.deferred.lock().unwrap();
|
||||
let content = deferred.paths.entry(path).or_insert_with(Vec::new);
|
||||
content.push(correction);
|
||||
Ok(())
|
||||
}
|
||||
_ => self.reporter.report(msg),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub(crate) struct Deferred {
|
||||
pub(crate) content: BTreeMap<path::PathBuf, BTreeMap<usize, Vec<Correction>>>,
|
||||
pub(crate) paths: BTreeMap<path::PathBuf, Vec<Correction>>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialOrd, Ord, PartialEq, Eq)]
|
||||
pub(crate) struct Correction {
|
||||
pub byte_offset: usize,
|
||||
pub typo: Vec<u8>,
|
||||
pub correction: Vec<u8>,
|
||||
}
|
||||
|
||||
impl Correction {
|
||||
pub(crate) fn new(byte_offset: usize, typo: &str, correction: &str) -> Self {
|
||||
Self {
|
||||
byte_offset,
|
||||
typo: typo.as_bytes().to_vec(),
|
||||
correction: correction.as_bytes().to_vec(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn correct(mut line: Vec<u8>, corrections: &[Correction]) -> Vec<u8> {
|
||||
let mut corrections: Vec<_> = corrections.iter().collect();
|
||||
corrections.sort_unstable();
|
||||
corrections.reverse();
|
||||
|
||||
for correction in corrections {
|
||||
let start = correction.byte_offset;
|
||||
let end = start + correction.typo.len();
|
||||
line.splice(start..end, correction.correction.iter().copied());
|
||||
}
|
||||
|
||||
line
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
use assert_fs::prelude::*;
|
||||
use typos::report::Report;
|
||||
|
||||
fn simple_correct(line: &str, corrections: Vec<(usize, &str, &str)>) -> String {
|
||||
let line = line.as_bytes().to_vec();
|
||||
let corrections: Vec<_> = corrections
|
||||
.into_iter()
|
||||
.map(|(byte_offset, typo, correction)| Correction {
|
||||
byte_offset,
|
||||
typo: typo.as_bytes().to_vec(),
|
||||
correction: correction.as_bytes().to_vec(),
|
||||
})
|
||||
.collect();
|
||||
let actual = correct(line, &corrections);
|
||||
String::from_utf8(actual).unwrap()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_correct_single() {
|
||||
let actual = simple_correct("foo foo foo", vec![(4, "foo", "bar")]);
|
||||
assert_eq!(actual, "foo bar foo");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_correct_single_grow() {
|
||||
let actual = simple_correct("foo foo foo", vec![(4, "foo", "happy")]);
|
||||
assert_eq!(actual, "foo happy foo");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_correct_single_shrink() {
|
||||
let actual = simple_correct("foo foo foo", vec![(4, "foo", "if")]);
|
||||
assert_eq!(actual, "foo if foo");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_correct_start() {
|
||||
let actual = simple_correct("foo foo foo", vec![(0, "foo", "bar")]);
|
||||
assert_eq!(actual, "bar foo foo");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_correct_end() {
|
||||
let actual = simple_correct("foo foo foo", vec![(8, "foo", "bar")]);
|
||||
assert_eq!(actual, "foo foo bar");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_correct_end_grow() {
|
||||
let actual = simple_correct("foo foo foo", vec![(8, "foo", "happy")]);
|
||||
assert_eq!(actual, "foo foo happy");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_correct_multiple() {
|
||||
let actual = simple_correct(
|
||||
"foo foo foo",
|
||||
vec![(4, "foo", "happy"), (8, "foo", "world")],
|
||||
);
|
||||
assert_eq!(actual, "foo happy world");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_replace_content() {
|
||||
let temp = assert_fs::TempDir::new().unwrap();
|
||||
let input_file = temp.child("foo.txt");
|
||||
input_file.write_str("1 foo 2\n3 4 5").unwrap();
|
||||
|
||||
let primary = typos::report::PrintSilent;
|
||||
let replace = Replace::new(&primary);
|
||||
replace
|
||||
.report(
|
||||
typos::report::Typo::default()
|
||||
.context(Some(
|
||||
typos::report::FileContext::default()
|
||||
.path(input_file.path())
|
||||
.line_num(1)
|
||||
.into(),
|
||||
))
|
||||
.buffer(std::borrow::Cow::Borrowed(b"1 foo 2\n3 4 5"))
|
||||
.byte_offset(2)
|
||||
.typo("foo")
|
||||
.corrections(typos::Status::Corrections(vec![
|
||||
std::borrow::Cow::Borrowed("bar"),
|
||||
]))
|
||||
.into(),
|
||||
)
|
||||
.unwrap();
|
||||
replace.write().unwrap();
|
||||
|
||||
input_file.assert("1 bar 2\n3 4 5");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_replace_path() {
|
||||
let temp = assert_fs::TempDir::new().unwrap();
|
||||
let input_file = temp.child("foo.txt");
|
||||
input_file.write_str("foo foo foo").unwrap();
|
||||
|
||||
let primary = typos::report::PrintSilent;
|
||||
let replace = Replace::new(&primary);
|
||||
replace
|
||||
.report(
|
||||
typos::report::Typo::default()
|
||||
.context(Some(
|
||||
typos::report::PathContext::default()
|
||||
.path(input_file.path())
|
||||
.into(),
|
||||
))
|
||||
.buffer(std::borrow::Cow::Borrowed(b"foo.txt"))
|
||||
.byte_offset(0)
|
||||
.typo("foo")
|
||||
.corrections(typos::Status::Corrections(vec![
|
||||
std::borrow::Cow::Borrowed("bar"),
|
||||
]))
|
||||
.into(),
|
||||
)
|
||||
.unwrap();
|
||||
replace.write().unwrap();
|
||||
|
||||
input_file.assert(predicates::path::missing());
|
||||
temp.child("bar.txt").assert("foo foo foo");
|
||||
}
|
||||
}
|
|
@ -72,7 +72,7 @@ pub struct Typo<'m> {
|
|||
pub buffer: Cow<'m, [u8]>,
|
||||
pub byte_offset: usize,
|
||||
pub typo: &'m str,
|
||||
pub corrections: crate::Status<'m>,
|
||||
pub corrections: typos::Status<'m>,
|
||||
}
|
||||
|
||||
impl<'m> Default for Typo<'m> {
|
||||
|
@ -82,7 +82,7 @@ impl<'m> Default for Typo<'m> {
|
|||
buffer: Cow::Borrowed(&[]),
|
||||
byte_offset: 0,
|
||||
typo: "",
|
||||
corrections: crate::Status::Invalid,
|
||||
corrections: typos::Status::Invalid,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -168,7 +168,7 @@ pub struct Parse<'m> {
|
|||
#[serde(flatten)]
|
||||
pub context: Option<Context<'m>>,
|
||||
pub kind: ParseKind,
|
||||
pub data: Vec<&'m str>,
|
||||
pub data: &'m str,
|
||||
}
|
||||
|
||||
impl<'m> Default for Parse<'m> {
|
||||
|
@ -176,7 +176,7 @@ impl<'m> Default for Parse<'m> {
|
|||
Self {
|
||||
context: None,
|
||||
kind: ParseKind::Identifier,
|
||||
data: vec![],
|
||||
data: "",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -234,10 +234,21 @@ impl<'r> MessageStatus<'r> {
|
|||
|
||||
impl<'r> Report for MessageStatus<'r> {
|
||||
fn report(&self, msg: Message) -> Result<(), std::io::Error> {
|
||||
self.typos_found
|
||||
.compare_and_swap(false, msg.is_correction(), atomic::Ordering::Relaxed);
|
||||
self.errors_found
|
||||
.compare_and_swap(false, msg.is_error(), atomic::Ordering::Relaxed);
|
||||
let _ = self.typos_found.compare_exchange(
|
||||
false,
|
||||
msg.is_correction(),
|
||||
atomic::Ordering::Relaxed,
|
||||
atomic::Ordering::Relaxed,
|
||||
);
|
||||
let _ = self
|
||||
.errors_found
|
||||
.compare_exchange(
|
||||
false,
|
||||
msg.is_error(),
|
||||
atomic::Ordering::Relaxed,
|
||||
atomic::Ordering::Relaxed,
|
||||
)
|
||||
.unwrap();
|
||||
self.reporter.report(msg)
|
||||
}
|
||||
}
|
||||
|
@ -265,7 +276,7 @@ impl Report for PrintBrief {
|
|||
writeln!(io::stdout(), "{}", msg.path.display())?;
|
||||
}
|
||||
Message::Parse(msg) => {
|
||||
writeln!(io::stdout(), "{}", itertools::join(msg.data.iter(), " "))?;
|
||||
writeln!(io::stdout(), "{}", msg.data)?;
|
||||
}
|
||||
Message::Error(msg) => {
|
||||
log::error!("{}: {}", context_display(&msg.context), msg.msg);
|
||||
|
@ -289,7 +300,7 @@ impl Report for PrintLong {
|
|||
writeln!(io::stdout(), "{}", msg.path.display())?;
|
||||
}
|
||||
Message::Parse(msg) => {
|
||||
writeln!(io::stdout(), "{}", itertools::join(msg.data.iter(), " "))?;
|
||||
writeln!(io::stdout(), "{}", msg.data)?;
|
||||
}
|
||||
Message::Error(msg) => {
|
||||
log::error!("{}: {}", context_display(&msg.context), msg.msg);
|
||||
|
@ -308,8 +319,8 @@ fn print_brief_correction(msg: &Typo) -> Result<(), std::io::Error> {
|
|||
)
|
||||
.count();
|
||||
match &msg.corrections {
|
||||
crate::Status::Valid => {}
|
||||
crate::Status::Invalid => {
|
||||
typos::Status::Valid => {}
|
||||
typos::Status::Invalid => {
|
||||
writeln!(
|
||||
io::stdout(),
|
||||
"{}:{}: `{}` is disallowed",
|
||||
|
@ -318,7 +329,7 @@ fn print_brief_correction(msg: &Typo) -> Result<(), std::io::Error> {
|
|||
msg.typo,
|
||||
)?;
|
||||
}
|
||||
crate::Status::Corrections(corrections) => {
|
||||
typos::Status::Corrections(corrections) => {
|
||||
writeln!(
|
||||
io::stdout(),
|
||||
"{}:{}: `{}` -> {}",
|
||||
|
@ -345,11 +356,11 @@ fn print_long_correction(msg: &Typo) -> Result<(), std::io::Error> {
|
|||
)
|
||||
.count();
|
||||
match &msg.corrections {
|
||||
crate::Status::Valid => {}
|
||||
crate::Status::Invalid => {
|
||||
typos::Status::Valid => {}
|
||||
typos::Status::Invalid => {
|
||||
writeln!(handle, "error: `{}` is disallowed`", msg.typo,)?;
|
||||
}
|
||||
crate::Status::Corrections(corrections) => {
|
||||
typos::Status::Corrections(corrections) => {
|
||||
writeln!(
|
||||
handle,
|
||||
"error: `{}` should be {}",
|
Loading…
Reference in a new issue