mirror of
https://github.com/crate-ci/typos.git
synced 2024-11-23 09:30:57 -05:00
feat: Support english dialects
The goal is to be as accepting and unobtrusive to new code bases as possible. To this end, we correct typos into the closest english dialect. If someone wants to opt-in, they can have typos correct to a specific english dialect. Fixes #52 Fixes #22
This commit is contained in:
parent
f1cf48b6be
commit
ab4a5bbdaf
22 changed files with 236208 additions and 37310 deletions
4
.ignore
4
.ignore
|
@ -1,3 +1,3 @@
|
||||||
*_codegen.rs
|
*codegen.rs
|
||||||
assets/
|
assets/
|
||||||
typos/benches/corrections.rs
|
benches/corrections.rs
|
||||||
|
|
268
Cargo.lock
generated
268
Cargo.lock
generated
|
@ -2,9 +2,9 @@
|
||||||
# It is not intended for manual editing.
|
# It is not intended for manual editing.
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "aho-corasick"
|
name = "aho-corasick"
|
||||||
version = "0.7.10"
|
version = "0.7.13"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "8716408b8bc624ed7f65d223ddb9ac2d044c0547b6fa4b0d554f3a9540496ada"
|
checksum = "043164d8ba5c4c3035fec9bbee8647c0261d788f3474306f93bb65901cae0e86"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"memchr",
|
"memchr",
|
||||||
]
|
]
|
||||||
|
@ -20,18 +20,15 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "anyhow"
|
name = "anyhow"
|
||||||
version = "1.0.31"
|
version = "1.0.32"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "85bb70cc08ec97ca5450e6eba421deeea5f172c0fc61f78b5357b2a8e8be195f"
|
checksum = "6b602bfe940d21c130f3895acd65221e8a61270debe89d628b9cb4e3ccb8569b"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "arrayvec"
|
name = "arrayvec"
|
||||||
version = "0.4.12"
|
version = "0.5.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "cd9fd44efafa8690358b7408d253adf110036b88f55672a933f01d616ad9b1b9"
|
checksum = "cff77d8686867eceff3105329d4698d96c2391c176d5d03adc90c7389162b5b8"
|
||||||
dependencies = [
|
|
||||||
"nodrop",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "assert_fs"
|
name = "assert_fs"
|
||||||
|
@ -90,15 +87,15 @@ checksum = "08c48aae112d48ed9f069b33538ea9e3e90aa263cfa3d1c24309612b1f7472de"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cfg-if"
|
name = "cfg-if"
|
||||||
version = "0.1.9"
|
version = "0.1.10"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "b486ce3ccf7ffd79fdeb678eac06a9e6c09fc88d33836340becb8fffe87c5e33"
|
checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "clap"
|
name = "clap"
|
||||||
version = "2.33.1"
|
version = "2.33.3"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "bdfa80d47f954d53a35a64987ca1422f495b8d6483c0fe9f7117b36c2a792129"
|
checksum = "37e58ac78573c40708d45522f0d80fa2f01cc4f9b4e2bf749807255454312002"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"ansi_term",
|
"ansi_term",
|
||||||
"atty",
|
"atty",
|
||||||
|
@ -203,10 +200,10 @@ checksum = "f0c960ae2da4de88a91b2d920c2a7233b400bc33cb28453a2987822d8392519b"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"fnv",
|
"fnv",
|
||||||
"ident_case",
|
"ident_case",
|
||||||
"proc-macro2 1.0.18",
|
"proc-macro2 1.0.19",
|
||||||
"quote 1.0.6",
|
"quote 1.0.7",
|
||||||
"strsim 0.9.3",
|
"strsim 0.9.3",
|
||||||
"syn 1.0.33",
|
"syn 1.0.38",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -216,8 +213,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "d9b5a2f4ac4969822c62224815d069952656cadc7084fdca9751e6d959189b72"
|
checksum = "d9b5a2f4ac4969822c62224815d069952656cadc7084fdca9751e6d959189b72"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"darling_core",
|
"darling_core",
|
||||||
"quote 1.0.6",
|
"quote 1.0.7",
|
||||||
"syn 1.0.33",
|
"syn 1.0.38",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -240,9 +237,9 @@ version = "0.99.9"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "298998b1cf6b5b2c8a7b023dfd45821825ce3ba8a8af55c921a0e734e4653f76"
|
checksum = "298998b1cf6b5b2c8a7b023dfd45821825ce3ba8a8af55c921a0e734e4653f76"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2 1.0.18",
|
"proc-macro2 1.0.19",
|
||||||
"quote 1.0.6",
|
"quote 1.0.7",
|
||||||
"syn 1.0.33",
|
"syn 1.0.38",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -252,9 +249,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "6604612c19dd3bb353650b715b61f09bcb089dd17bdca1a9a42637079bf5e428"
|
checksum = "6604612c19dd3bb353650b715b61f09bcb089dd17bdca1a9a42637079bf5e428"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"darling",
|
"darling",
|
||||||
"proc-macro2 1.0.18",
|
"proc-macro2 1.0.19",
|
||||||
"quote 1.0.6",
|
"quote 1.0.7",
|
||||||
"syn 1.0.33",
|
"syn 1.0.38",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -270,10 +267,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10"
|
checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "either"
|
name = "edit-distance"
|
||||||
version = "1.5.3"
|
version = "2.1.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "bb1f6b1ce1c140482ea30ddd3335fc0024ac7ee112895426e0a629a6c20adfe3"
|
checksum = "bbbaaaf38131deb9ca518a274a45bfdb8771f139517b073b16c2d3d32ae5037b"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "either"
|
||||||
|
version = "1.6.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "cd56b59865bce947ac5958779cfa508f6c3b9497cc762b7e24a12d11ccde2c4f"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "enumflags2"
|
name = "enumflags2"
|
||||||
|
@ -290,9 +293,9 @@ version = "0.6.4"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "946ee94e3dbf58fdd324f9ce245c7b238d46a66f00e86a020b71996349e46cce"
|
checksum = "946ee94e3dbf58fdd324f9ce245c7b238d46a66f00e86a020b71996349e46cce"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2 1.0.18",
|
"proc-macro2 1.0.19",
|
||||||
"quote 1.0.6",
|
"quote 1.0.7",
|
||||||
"syn 1.0.33",
|
"syn 1.0.38",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -310,9 +313,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "float-cmp"
|
name = "float-cmp"
|
||||||
version = "0.6.0"
|
version = "0.8.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "da62c4f1b81918835a8c6a484a397775fff5953fe83529afd51b05f5c6a6617d"
|
checksum = "e1267f4ac4f343772758f7b1bdcbe767c218bbab93bb432acbf5162bbf85a6c4"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"num-traits",
|
"num-traits",
|
||||||
]
|
]
|
||||||
|
@ -368,9 +371,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "hermit-abi"
|
name = "hermit-abi"
|
||||||
version = "0.1.13"
|
version = "0.1.15"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "91780f809e750b0a89f5544be56617ff6b1227ee485bcb06ebe10cdf89bd3b71"
|
checksum = "3deed196b6e7f9e44a2ae8d94225d80302d81208b1bb673fd21fe634645c85a9"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"libc",
|
"libc",
|
||||||
]
|
]
|
||||||
|
@ -419,9 +422,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "itoa"
|
name = "itoa"
|
||||||
version = "0.4.5"
|
version = "0.4.6"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "b8b7a7c0c47db5545ed3fef7468ee7bb5b74691498139e4b3f6a20685dc6dd8e"
|
checksum = "dc6f3ad7b9d11a0c00842ff8de1b60ee58661048eb8049ed33c73594f359d7e6"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lazy_static"
|
name = "lazy_static"
|
||||||
|
@ -431,29 +434,28 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lexical-core"
|
name = "lexical-core"
|
||||||
version = "0.6.7"
|
version = "0.7.4"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "f86d66d380c9c5a685aaac7a11818bdfa1f733198dfd9ec09c70b762cd12ad6f"
|
checksum = "db65c6da02e61f55dae90a0ae427b2a5f6b3e8db09f58d10efab23af92592616"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrayvec",
|
"arrayvec",
|
||||||
"bitflags",
|
"bitflags",
|
||||||
"cfg-if",
|
"cfg-if",
|
||||||
"rustc_version",
|
|
||||||
"ryu",
|
"ryu",
|
||||||
"static_assertions",
|
"static_assertions",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "libc"
|
name = "libc"
|
||||||
version = "0.2.71"
|
version = "0.2.74"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "9457b06509d27052635f90d6466700c65095fdf75409b3fbdd903e988b886f49"
|
checksum = "a2f02823cf78b754822df5f7f268fb59822e7296276d3e069d8e8cb26a14bd10"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "log"
|
name = "log"
|
||||||
version = "0.4.8"
|
version = "0.4.11"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "14b6052be84e6b71ab17edffc2eeabf5c2c3ae1fdb464aae35ac50c67a44e1f7"
|
checksum = "4fabed175da42fed1fa0746b0ea71f412aa9d35e76e95e59b192c64b9dc2bf8b"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"cfg-if",
|
"cfg-if",
|
||||||
]
|
]
|
||||||
|
@ -486,12 +488,6 @@ dependencies = [
|
||||||
"unicase",
|
"unicase",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "nodrop"
|
|
||||||
version = "0.1.14"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "nom"
|
name = "nom"
|
||||||
version = "5.1.2"
|
version = "5.1.2"
|
||||||
|
@ -511,9 +507,9 @@ checksum = "61807f77802ff30975e01f4f071c8ba10c022052f98b3294119f3e615d13e5be"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "num-traits"
|
name = "num-traits"
|
||||||
version = "0.2.11"
|
version = "0.2.12"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "c62be47e61d1842b9170f0fdeec8eba98e60e90e5446449a0545e5152acd7096"
|
checksum = "ac267bcc07f48ee5f8935ab0d24f316fb722d7a1292e2913f0cc196b29ffd611"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"autocfg",
|
"autocfg",
|
||||||
]
|
]
|
||||||
|
@ -559,15 +555,15 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "ppv-lite86"
|
name = "ppv-lite86"
|
||||||
version = "0.2.8"
|
version = "0.2.9"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "237a5ed80e274dbc66f86bd59c1e25edc039660be53194b5fe0a482e0f2612ea"
|
checksum = "c36fa947111f5c62a733b652544dd0016a43ce89619538a8ef92724a6f501a20"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "predicates"
|
name = "predicates"
|
||||||
version = "1.0.4"
|
version = "1.0.5"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "347a1b6f0b21e636bc9872fb60b83b8e185f6f5516298b8238699f7f9a531030"
|
checksum = "96bfead12e90dccead362d62bb2c90a5f6fc4584963645bc7f71a735e0b0735a"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"difference",
|
"difference",
|
||||||
"float-cmp",
|
"float-cmp",
|
||||||
|
@ -594,27 +590,25 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "proc-macro-error"
|
name = "proc-macro-error"
|
||||||
version = "1.0.2"
|
version = "1.0.4"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "98e9e4b82e0ef281812565ea4751049f1bdcdfccda7d3f459f2e138a40c08678"
|
checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro-error-attr",
|
"proc-macro-error-attr",
|
||||||
"proc-macro2 1.0.18",
|
"proc-macro2 1.0.19",
|
||||||
"quote 1.0.6",
|
"quote 1.0.7",
|
||||||
"syn 1.0.33",
|
"syn 1.0.38",
|
||||||
"version_check",
|
"version_check",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "proc-macro-error-attr"
|
name = "proc-macro-error-attr"
|
||||||
version = "1.0.2"
|
version = "1.0.4"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "4f5444ead4e9935abd7f27dc51f7e852a0569ac888096d5ec2499470794e2e53"
|
checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2 1.0.18",
|
"proc-macro2 1.0.19",
|
||||||
"quote 1.0.6",
|
"quote 1.0.7",
|
||||||
"syn 1.0.33",
|
|
||||||
"syn-mid",
|
|
||||||
"version_check",
|
"version_check",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -629,11 +623,11 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "proc-macro2"
|
name = "proc-macro2"
|
||||||
version = "1.0.18"
|
version = "1.0.19"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "beae6331a816b1f65d04c45b078fd8e6c93e8071771f41b8163255bbd8d7c8fa"
|
checksum = "04f5f085b5d71e2188cb8271e5da0161ad52c3f227a661a3c135fdf28e258b12"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"unicode-xid 0.2.0",
|
"unicode-xid 0.2.1",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -653,11 +647,11 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "quote"
|
name = "quote"
|
||||||
version = "1.0.6"
|
version = "1.0.7"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "54a21852a652ad6f610c9510194f398ff6f8692e334fd1145fed931f7fbe44ea"
|
checksum = "aa563d17ecb180e500da1cfd2b028310ac758de548efdd203e18f283af693f37"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2 1.0.18",
|
"proc-macro2 1.0.19",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -713,9 +707,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "redox_syscall"
|
name = "redox_syscall"
|
||||||
version = "0.1.56"
|
version = "0.1.57"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "2439c63f3f6139d1b57529d16bc3b8bb855230c8efcc5d3a896c8bea7c3b1e84"
|
checksum = "41cc0f7e4d5d4544e8861606a285bb08d3e70712ccc7d2b84d7c0ccfaf4b05ce"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "regex"
|
name = "regex"
|
||||||
|
@ -746,9 +740,9 @@ checksum = "26412eb97c6b088a6997e05f69403a802a92d520de2f8e63c2b65f9e0f47c4e8"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "remove_dir_all"
|
name = "remove_dir_all"
|
||||||
version = "0.5.2"
|
version = "0.5.3"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "4a83fa3702a688b9359eccba92d153ac33fd2e8462f9e0e3fdf155239ea7792e"
|
checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"winapi",
|
"winapi",
|
||||||
]
|
]
|
||||||
|
@ -794,29 +788,29 @@ checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "serde"
|
name = "serde"
|
||||||
version = "1.0.114"
|
version = "1.0.115"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "5317f7588f0a5078ee60ef675ef96735a1442132dc645eb1d12c018620ed8cd3"
|
checksum = "e54c9a88f2da7238af84b5101443f0c0d0a3bbdc455e34a5c9497b1903ed55d5"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"serde_derive",
|
"serde_derive",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "serde_derive"
|
name = "serde_derive"
|
||||||
version = "1.0.114"
|
version = "1.0.115"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "2a0be94b04690fbaed37cddffc5c134bf537c8e3329d53e982fe04c374978f8e"
|
checksum = "609feed1d0a73cc36a0182a840a9b37b4a82f0b1150369f0536a9e3f2a31dc48"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2 1.0.18",
|
"proc-macro2 1.0.19",
|
||||||
"quote 1.0.6",
|
"quote 1.0.7",
|
||||||
"syn 1.0.33",
|
"syn 1.0.38",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "serde_json"
|
name = "serde_json"
|
||||||
version = "1.0.56"
|
version = "1.0.57"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "3433e879a558dde8b5e8feb2a04899cf34fdde1fafb894687e52105fc1162ac3"
|
checksum = "164eacbdb13512ec2745fb09d51fd5b22b0d65ed294a1dcf7285a360c80a675c"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"itoa",
|
"itoa",
|
||||||
"ryu",
|
"ryu",
|
||||||
|
@ -831,9 +825,9 @@ checksum = "fa8f3741c7372e75519bd9346068370c9cdaabcc1f9599cbcf2a2719352286b7"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "static_assertions"
|
name = "static_assertions"
|
||||||
version = "0.3.4"
|
version = "1.1.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "7f3eb36b47e512f8f1c9e3d10c2c1965bc992bd9cdb024fa581e2194501c83d3"
|
checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "strsim"
|
name = "strsim"
|
||||||
|
@ -849,9 +843,9 @@ checksum = "6446ced80d6c486436db5c078dde11a9f73d42b57fb273121e160b84f63d894c"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "structopt"
|
name = "structopt"
|
||||||
version = "0.3.15"
|
version = "0.3.16"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "de2f5e239ee807089b62adce73e48c625e0ed80df02c7ab3f068f5db5281065c"
|
checksum = "de5472fb24d7e80ae84a7801b7978f95a19ec32cb1876faea59ab711eb901976"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"clap",
|
"clap",
|
||||||
"lazy_static",
|
"lazy_static",
|
||||||
|
@ -860,15 +854,15 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "structopt-derive"
|
name = "structopt-derive"
|
||||||
version = "0.4.8"
|
version = "0.4.9"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "510413f9de616762a4fbeab62509bf15c729603b72d7cd71280fbca431b1c118"
|
checksum = "1e0eb37335aeeebe51be42e2dc07f031163fbabfa6ac67d7ea68b5c2f68d5f99"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"heck",
|
"heck",
|
||||||
"proc-macro-error",
|
"proc-macro-error",
|
||||||
"proc-macro2 1.0.18",
|
"proc-macro2 1.0.19",
|
||||||
"quote 1.0.6",
|
"quote 1.0.7",
|
||||||
"syn 1.0.33",
|
"syn 1.0.38",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -884,24 +878,13 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "syn"
|
name = "syn"
|
||||||
version = "1.0.33"
|
version = "1.0.38"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "e8d5d96e8cbb005d6959f119f773bfaebb5684296108fb32600c00cde305b2cd"
|
checksum = "e69abc24912995b3038597a7a593be5053eb0fb44f3cc5beec0deb421790c1f4"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2 1.0.18",
|
"proc-macro2 1.0.19",
|
||||||
"quote 1.0.6",
|
"quote 1.0.7",
|
||||||
"unicode-xid 0.2.0",
|
"unicode-xid 0.2.1",
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "syn-mid"
|
|
||||||
version = "0.5.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "7be3539f6c128a931cf19dcee741c1af532c7fd387baa739c03dd2e96479338a"
|
|
||||||
dependencies = [
|
|
||||||
"proc-macro2 1.0.18",
|
|
||||||
"quote 1.0.6",
|
|
||||||
"syn 1.0.33",
|
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -951,9 +934,9 @@ version = "1.0.20"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "bd80fc12f73063ac132ac92aceea36734f04a1d93c1240c6944e23a3b8841793"
|
checksum = "bd80fc12f73063ac132ac92aceea36734f04a1d93c1240c6944e23a3b8841793"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2 1.0.18",
|
"proc-macro2 1.0.19",
|
||||||
"quote 1.0.6",
|
"quote 1.0.7",
|
||||||
"syn 1.0.33",
|
"syn 1.0.38",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -1017,11 +1000,21 @@ dependencies = [
|
||||||
"toml",
|
"toml",
|
||||||
"typos",
|
"typos",
|
||||||
"typos-dict",
|
"typos-dict",
|
||||||
|
"typos-vars",
|
||||||
"unicase",
|
"unicase",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "typos-codegen"
|
name = "typos-dict"
|
||||||
|
version = "0.2.1"
|
||||||
|
dependencies = [
|
||||||
|
"log",
|
||||||
|
"phf",
|
||||||
|
"unicase",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "typos-dict-codegen"
|
||||||
version = "1.0.2"
|
version = "1.0.2"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"codegenrs",
|
"codegenrs",
|
||||||
|
@ -1033,12 +1026,45 @@ dependencies = [
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "typos-dict"
|
name = "typos-dict-verify"
|
||||||
|
version = "1.0.2"
|
||||||
|
dependencies = [
|
||||||
|
"codegenrs",
|
||||||
|
"csv",
|
||||||
|
"edit-distance",
|
||||||
|
"itertools",
|
||||||
|
"structopt",
|
||||||
|
"unicase",
|
||||||
|
"varcon",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "typos-vars"
|
||||||
version = "0.2.1"
|
version = "0.2.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"log",
|
"log",
|
||||||
"phf",
|
"phf",
|
||||||
"unicase",
|
"unicase",
|
||||||
|
"varcon-core",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "typos-vars-codegen"
|
||||||
|
version = "1.0.2"
|
||||||
|
dependencies = [
|
||||||
|
"clap",
|
||||||
|
"clap-verbosity-flag",
|
||||||
|
"codegenrs",
|
||||||
|
"env_logger",
|
||||||
|
"itertools",
|
||||||
|
"log",
|
||||||
|
"phf",
|
||||||
|
"phf_codegen",
|
||||||
|
"structopt",
|
||||||
|
"typos",
|
||||||
|
"unicase",
|
||||||
|
"varcon",
|
||||||
|
"varcon-core",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -1058,9 +1084,9 @@ checksum = "e83e153d1053cbb5a118eeff7fd5be06ed99153f00dbcd8ae310c5fb2b22edc0"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "unicode-width"
|
name = "unicode-width"
|
||||||
version = "0.1.7"
|
version = "0.1.8"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "caaa9d531767d1ff2150b9332433f32a24622147e5ebb1f26409d5da67afd479"
|
checksum = "9337591893a19b88d8d87f2cec1e73fad5cdfd10e5a6f349f498ad6ea2ffb1e3"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "unicode-xid"
|
name = "unicode-xid"
|
||||||
|
@ -1070,9 +1096,9 @@ checksum = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "unicode-xid"
|
name = "unicode-xid"
|
||||||
version = "0.2.0"
|
version = "0.2.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "826e7639553986605ec5979c7dd957c7895e93eabed50ab2ffa7f6128a75097c"
|
checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "varcon"
|
name = "varcon"
|
||||||
|
@ -1150,9 +1176,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "winapi"
|
name = "winapi"
|
||||||
version = "0.3.8"
|
version = "0.3.9"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "8093091eeb260906a183e6ae1abdba2ef5ef2257a21801128899c3fc699229c6"
|
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"winapi-i686-pc-windows-gnu",
|
"winapi-i686-pc-windows-gnu",
|
||||||
"winapi-x86_64-pc-windows-gnu",
|
"winapi-x86_64-pc-windows-gnu",
|
||||||
|
|
|
@ -1,7 +1,8 @@
|
||||||
[workspace]
|
[workspace]
|
||||||
members = [
|
members = [
|
||||||
"crates/typos",
|
"crates/typos",
|
||||||
"crates/typos-dict", "crates/typos-dict/codegen",
|
"crates/typos-dict", "crates/typos-dict/codegen", "crates/typos-dict/verify",
|
||||||
|
"crates/typos-vars", "crates/typos-vars/codegen",
|
||||||
"crates/codespell-dict", "crates/codespell-dict/codegen",
|
"crates/codespell-dict", "crates/codespell-dict/codegen",
|
||||||
"crates/misspell-dict", "crates/misspell-dict/codegen",
|
"crates/misspell-dict", "crates/misspell-dict/codegen",
|
||||||
"crates/wikipedia-dict", "crates/wikipedia-dict/codegen",
|
"crates/wikipedia-dict", "crates/wikipedia-dict/codegen",
|
||||||
|
@ -32,6 +33,7 @@ codecov = { repository = "crate-ci/typos" }
|
||||||
[dependencies]
|
[dependencies]
|
||||||
typos = { version = "^0.3", path = "crates/typos" }
|
typos = { version = "^0.3", path = "crates/typos" }
|
||||||
typos-dict = { version = "^0.2", path = "crates/typos-dict" }
|
typos-dict = { version = "^0.2", path = "crates/typos-dict" }
|
||||||
|
typos-vars = { version = "^0.2", path = "crates/typos-vars" }
|
||||||
phf = { version = "0.8", features = ["unicase"] }
|
phf = { version = "0.8", features = ["unicase"] }
|
||||||
unicase = "2.5"
|
unicase = "2.5"
|
||||||
anyhow = "1.0"
|
anyhow = "1.0"
|
||||||
|
|
|
@ -154,7 +154,13 @@ stages:
|
||||||
steps:
|
steps:
|
||||||
- template: install-rust.yml@templates
|
- template: install-rust.yml@templates
|
||||||
- script: |
|
- script: |
|
||||||
cargo run --package typos-codegen -- --output crates/typos-dict/src/dict_codegen.rs --check
|
cargo run --package typos-dict-codegen -- --output crates/typos-dict/src/dict_codegen.rs --check
|
||||||
|
displayName: Verify typos-dict
|
||||||
|
- script: |
|
||||||
|
cargo run --package typos-vars-codegen -- --output crates/typos-vars/src/vars_codegen.rs --check
|
||||||
|
displayName: Verify typos-dict
|
||||||
|
- script: |
|
||||||
|
cargo run --package typos-dict-verify -- --input crates/typos-dict/assets/words.csv --output crates/typos-dict/assets/words.csv --check
|
||||||
displayName: Verify typos-dict
|
displayName: Verify typos-dict
|
||||||
- script: |
|
- script: |
|
||||||
cargo run --package codespell-codegen -- --output crates/codespell-dict/src/dict_codegen.rs --check
|
cargo run --package codespell-codegen -- --output crates/codespell-dict/src/dict_codegen.rs --check
|
||||||
|
|
|
@ -4,12 +4,12 @@ extern crate test;
|
||||||
|
|
||||||
#[bench]
|
#[bench]
|
||||||
fn load_corrections(b: &mut test::Bencher) {
|
fn load_corrections(b: &mut test::Bencher) {
|
||||||
b.iter(|| typos_cli::dict::BuiltIn::new());
|
b.iter(|| typos_cli::dict::BuiltIn::new(Default::default()));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[bench]
|
#[bench]
|
||||||
fn correct_word_hit(b: &mut test::Bencher) {
|
fn correct_word_hit(b: &mut test::Bencher) {
|
||||||
let corrections = typos_cli::dict::BuiltIn::new();
|
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
|
||||||
let input = typos::tokens::Word::new("successs", 0).unwrap();
|
let input = typos::tokens::Word::new("successs", 0).unwrap();
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
corrections.correct_word(input),
|
corrections.correct_word(input),
|
||||||
|
@ -20,7 +20,7 @@ fn correct_word_hit(b: &mut test::Bencher) {
|
||||||
|
|
||||||
#[bench]
|
#[bench]
|
||||||
fn correct_word_miss(b: &mut test::Bencher) {
|
fn correct_word_miss(b: &mut test::Bencher) {
|
||||||
let corrections = typos_cli::dict::BuiltIn::new();
|
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
|
||||||
let input = typos::tokens::Word::new("success", 0).unwrap();
|
let input = typos::tokens::Word::new("success", 0).unwrap();
|
||||||
assert!(corrections.correct_word(input).is_empty());
|
assert!(corrections.correct_word(input).is_empty());
|
||||||
b.iter(|| corrections.correct_word(input));
|
b.iter(|| corrections.correct_word(input));
|
||||||
|
|
|
@ -184,7 +184,7 @@ fn bench_check_file(data: &str, b: &mut test::Bencher) {
|
||||||
let sample_path = temp.child("sample");
|
let sample_path = temp.child("sample");
|
||||||
sample_path.write_str(data).unwrap();
|
sample_path.write_str(data).unwrap();
|
||||||
|
|
||||||
let corrections = typos_cli::dict::BuiltIn::new();
|
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
|
||||||
let parser = typos::tokens::Parser::new();
|
let parser = typos::tokens::Parser::new();
|
||||||
let checks = typos::checks::TyposSettings::new().build_checks();
|
let checks = typos::checks::TyposSettings::new().build_checks();
|
||||||
b.iter(|| {
|
b.iter(|| {
|
||||||
|
|
1794
crates/typos-dict/assets/words.csv
vendored
1794
crates/typos-dict/assets/words.csv
vendored
File diff suppressed because it is too large
Load diff
|
@ -1,5 +1,5 @@
|
||||||
[package]
|
[package]
|
||||||
name = "typos-codegen"
|
name = "typos-dict-codegen"
|
||||||
version = "1.0.2"
|
version = "1.0.2"
|
||||||
authors = ["Ed Page <eopage@gmail.com>"]
|
authors = ["Ed Page <eopage@gmail.com>"]
|
||||||
description = "Source Code Spelling Correction"
|
description = "Source Code Spelling Correction"
|
||||||
|
|
File diff suppressed because it is too large
Load diff
24
crates/typos-dict/verify/Cargo.toml
Normal file
24
crates/typos-dict/verify/Cargo.toml
Normal file
|
@ -0,0 +1,24 @@
|
||||||
|
[package]
|
||||||
|
name = "typos-dict-verify"
|
||||||
|
version = "1.0.2"
|
||||||
|
authors = ["Ed Page <eopage@gmail.com>"]
|
||||||
|
description = "Source Code Spelling Correction"
|
||||||
|
repository = "https://github.com/crate-ci/typos"
|
||||||
|
readme = "../../../README.md"
|
||||||
|
categories = ["text-processing"]
|
||||||
|
license = "MIT"
|
||||||
|
edition = "2018"
|
||||||
|
publish = false
|
||||||
|
|
||||||
|
[badges]
|
||||||
|
azure-devops = { project = "crate-ci", pipeline = "typos" }
|
||||||
|
codecov = { repository = "crate-ci/typos" }
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
csv = "1.0"
|
||||||
|
unicase = "2.5"
|
||||||
|
codegenrs = "0.1"
|
||||||
|
structopt = "0.3"
|
||||||
|
varcon = { version = "0.2", path = "../../varcon" }
|
||||||
|
itertools = "0.9"
|
||||||
|
edit-distance = "2.1"
|
99
crates/typos-dict/verify/src/main.rs
Normal file
99
crates/typos-dict/verify/src/main.rs
Normal file
|
@ -0,0 +1,99 @@
|
||||||
|
use std::collections::HashMap;
|
||||||
|
use std::collections::HashSet;
|
||||||
|
|
||||||
|
use structopt::StructOpt;
|
||||||
|
|
||||||
|
fn generate<W: std::io::Write>(file: &mut W, dict: &[u8]) {
|
||||||
|
let mut wtr = csv::Writer::from_writer(file);
|
||||||
|
|
||||||
|
let disallowed_typos = disallowed_typos();
|
||||||
|
let related_words = related_words();
|
||||||
|
|
||||||
|
let mut reader = csv::ReaderBuilder::new()
|
||||||
|
.has_headers(false)
|
||||||
|
.from_reader(dict);
|
||||||
|
for record in reader.records() {
|
||||||
|
let record = record.unwrap();
|
||||||
|
let typo = &record[0];
|
||||||
|
let correction = &record[1];
|
||||||
|
if disallowed_typos.contains(&unicase::UniCase::new(typo)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let correction = related_words
|
||||||
|
.get(correction)
|
||||||
|
.and_then(|words| find_best_match(typo, correction, words))
|
||||||
|
.unwrap_or(correction);
|
||||||
|
wtr.write_record(&[typo, correction]).unwrap();
|
||||||
|
}
|
||||||
|
wtr.flush().unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
fn disallowed_typos() -> HashSet<unicase::UniCase<&'static str>> {
|
||||||
|
varcon::VARCON
|
||||||
|
.iter()
|
||||||
|
.flat_map(|c| c.entries.iter())
|
||||||
|
.flat_map(|e| e.variants.iter())
|
||||||
|
.map(|v| unicase::UniCase::new(v.word))
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn related_words() -> HashMap<&'static str, HashSet<&'static str>> {
|
||||||
|
let mut words: HashMap<&'static str, HashSet<&'static str>> = HashMap::new();
|
||||||
|
for entry in varcon::VARCON.iter().flat_map(|c| c.entries.iter()) {
|
||||||
|
let variants: HashSet<_> = entry
|
||||||
|
.variants
|
||||||
|
.iter()
|
||||||
|
.filter(|v| v.types.iter().any(|t| t.tag != Some(varcon::Tag::Improper)))
|
||||||
|
.map(|v| v.word)
|
||||||
|
.collect();
|
||||||
|
for variant in variants.iter() {
|
||||||
|
let set = words.entry(variant).or_insert_with(HashSet::new);
|
||||||
|
set.extend(variants.iter().filter(|v| *v != variant));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
words
|
||||||
|
}
|
||||||
|
|
||||||
|
fn find_best_match<'c>(
|
||||||
|
typo: &'c str,
|
||||||
|
correction: &'c str,
|
||||||
|
related_words: &HashSet<&'static str>,
|
||||||
|
) -> Option<&'c str> {
|
||||||
|
assert!(!related_words.contains(correction));
|
||||||
|
let current = edit_distance::edit_distance(typo, correction);
|
||||||
|
let mut matches: Vec<_> = related_words
|
||||||
|
.iter()
|
||||||
|
.map(|r| (edit_distance::edit_distance(typo, r), *r))
|
||||||
|
.filter(|(d, _)| *d < current)
|
||||||
|
.collect();
|
||||||
|
matches.sort_unstable();
|
||||||
|
matches.into_iter().next().map(|(_, r)| r)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, StructOpt)]
|
||||||
|
#[structopt(rename_all = "kebab-case")]
|
||||||
|
struct Options {
|
||||||
|
#[structopt(short("-i"), long, parse(from_os_str))]
|
||||||
|
input: std::path::PathBuf,
|
||||||
|
#[structopt(flatten)]
|
||||||
|
codegen: codegenrs::CodeGenArgs,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn run() -> Result<i32, Box<dyn std::error::Error>> {
|
||||||
|
let options = Options::from_args();
|
||||||
|
|
||||||
|
let data = std::fs::read(&options.input).unwrap();
|
||||||
|
|
||||||
|
let mut content = vec![];
|
||||||
|
generate(&mut content, &data);
|
||||||
|
|
||||||
|
let content = String::from_utf8(content)?;
|
||||||
|
options.codegen.write_str(&content)?;
|
||||||
|
|
||||||
|
Ok(0)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
let code = run().unwrap();
|
||||||
|
std::process::exit(code);
|
||||||
|
}
|
21
crates/typos-vars/Cargo.toml
Normal file
21
crates/typos-vars/Cargo.toml
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
[package]
|
||||||
|
name = "typos-vars"
|
||||||
|
version = "0.2.1"
|
||||||
|
authors = ["Ed Page <eopage@gmail.com>"]
|
||||||
|
description = "Source Code Spelling Correction"
|
||||||
|
repository = "https://github.com/crate-ci/typos"
|
||||||
|
readme = "../../README.md"
|
||||||
|
categories = ["development-tools", "text-processing"]
|
||||||
|
keywords = ["development", "spelling"]
|
||||||
|
license = "MIT"
|
||||||
|
edition = "2018"
|
||||||
|
|
||||||
|
[badges]
|
||||||
|
azure-devops = { project = "crate-ci", pipeline = "typos" }
|
||||||
|
codecov = { repository = "crate-ci/typos" }
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
phf = { version = "0.8", features = ["unicase"] }
|
||||||
|
unicase = "2.5"
|
||||||
|
log = "0.4"
|
||||||
|
varcon-core = { version = "1.0", path = "../varcon-core", features = ["flags"] }
|
30
crates/typos-vars/codegen/Cargo.toml
Normal file
30
crates/typos-vars/codegen/Cargo.toml
Normal file
|
@ -0,0 +1,30 @@
|
||||||
|
[package]
|
||||||
|
name = "typos-vars-codegen"
|
||||||
|
version = "1.0.2"
|
||||||
|
authors = ["Ed Page <eopage@gmail.com>"]
|
||||||
|
description = "Source Code Spelling Correction"
|
||||||
|
repository = "https://github.com/crate-ci/typos"
|
||||||
|
readme = "../../../README.md"
|
||||||
|
categories = ["text-processing"]
|
||||||
|
license = "MIT"
|
||||||
|
edition = "2018"
|
||||||
|
publish = false
|
||||||
|
|
||||||
|
[badges]
|
||||||
|
azure-devops = { project = "crate-ci", pipeline = "typos" }
|
||||||
|
codecov = { repository = "crate-ci/typos" }
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
phf = { version = "0.8", features = ["unicase"] }
|
||||||
|
phf_codegen = "0.8"
|
||||||
|
varcon = { version = "0.2", path = "../../varcon", features = ["flags"] }
|
||||||
|
varcon-core = { version = "1.0", path = "../../varcon-core", features = ["flags"] }
|
||||||
|
typos = { version = "^0.3", path = "../../typos" }
|
||||||
|
unicase = "2.5"
|
||||||
|
codegenrs = "0.1"
|
||||||
|
structopt = "0.3"
|
||||||
|
clap = "2"
|
||||||
|
log = "0.4"
|
||||||
|
env_logger = "0.7"
|
||||||
|
clap-verbosity-flag = "0.3"
|
||||||
|
itertools = "0.9"
|
314
crates/typos-vars/codegen/src/main.rs
Normal file
314
crates/typos-vars/codegen/src/main.rs
Normal file
|
@ -0,0 +1,314 @@
|
||||||
|
use std::collections::BTreeMap;
|
||||||
|
use std::collections::HashSet;
|
||||||
|
use std::io::Write;
|
||||||
|
|
||||||
|
use structopt::StructOpt;
|
||||||
|
|
||||||
|
static CATEGORIES: [varcon::Category; 4] = [
|
||||||
|
varcon::Category::American,
|
||||||
|
varcon::Category::BritishIse,
|
||||||
|
// For now, only want to support one form of British, so going with -ise as it seems more
|
||||||
|
// popular.
|
||||||
|
varcon::Category::Canadian,
|
||||||
|
varcon::Category::Australian,
|
||||||
|
// Other basically means all
|
||||||
|
];
|
||||||
|
|
||||||
|
fn generate_variations<W: std::io::Write>(file: &mut W) {
|
||||||
|
let entries = entries();
|
||||||
|
|
||||||
|
writeln!(
|
||||||
|
file,
|
||||||
|
"// This file is code-genned by {}",
|
||||||
|
env!("CARGO_PKG_NAME")
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
writeln!(file, "#![allow(clippy::unreadable_literal)]",).unwrap();
|
||||||
|
writeln!(file).unwrap();
|
||||||
|
|
||||||
|
writeln!(file, "use unicase::UniCase;").unwrap();
|
||||||
|
writeln!(file).unwrap();
|
||||||
|
|
||||||
|
writeln!(file, "pub type Variants = &'static [&'static str];",).unwrap();
|
||||||
|
writeln!(
|
||||||
|
file,
|
||||||
|
"pub type VariantsMap = [Variants; {}];",
|
||||||
|
CATEGORIES.len()
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
writeln!(file).unwrap();
|
||||||
|
|
||||||
|
writeln!(file, "pub fn all_categories() -> crate::CategorySet {{",).unwrap();
|
||||||
|
writeln!(
|
||||||
|
file,
|
||||||
|
" {}",
|
||||||
|
itertools::join(
|
||||||
|
CATEGORIES
|
||||||
|
.iter()
|
||||||
|
.map(|c| format!("crate::Category::{:?}", c)),
|
||||||
|
" | "
|
||||||
|
)
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
writeln!(file, "}}",).unwrap();
|
||||||
|
writeln!(file).unwrap();
|
||||||
|
|
||||||
|
writeln!(
|
||||||
|
file,
|
||||||
|
"pub fn corrections(category: crate::Category, options: VariantsMap) -> &'static [&'static str] {{",
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
writeln!(file, " match category {{").unwrap();
|
||||||
|
for (index, category) in CATEGORIES.iter().enumerate() {
|
||||||
|
writeln!(
|
||||||
|
file,
|
||||||
|
" crate::Category::{:?} => options[{}],",
|
||||||
|
category, index
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
}
|
||||||
|
writeln!(
|
||||||
|
file,
|
||||||
|
" crate::Category::BritishIze | crate::Category::Other => unreachable!(\"{{:?}} is unused\", category),",
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
writeln!(file, " }}").unwrap();
|
||||||
|
writeln!(file, "}}").unwrap();
|
||||||
|
writeln!(file).unwrap();
|
||||||
|
|
||||||
|
writeln!(
|
||||||
|
file,
|
||||||
|
"pub static VARS_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &'static [(u8, &VariantsMap)]> = "
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
let entry_sets = entry_sets(entries.iter());
|
||||||
|
let mut referenced_symbols: HashSet<&str> = HashSet::new();
|
||||||
|
let mut builder = phf_codegen::Map::new();
|
||||||
|
for (word, data) in entry_sets.iter() {
|
||||||
|
if is_always_valid(data) {
|
||||||
|
// No need to convert from current form to target form
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
referenced_symbols.extend(data.iter().map(|(s, _)| s));
|
||||||
|
let value = generate_link(&data);
|
||||||
|
builder.entry(unicase::UniCase::new(word), &value);
|
||||||
|
}
|
||||||
|
let codegenned = builder.build();
|
||||||
|
writeln!(file, "{}", codegenned).unwrap();
|
||||||
|
writeln!(file, ";").unwrap();
|
||||||
|
|
||||||
|
for (symbol, entry) in entries.iter() {
|
||||||
|
if !referenced_symbols.contains(symbol.as_str()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
generate_entry(file, symbol, entry);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn generate_entry(file: &mut impl std::io::Write, symbol: &str, entry: &varcon_core::Entry) {
|
||||||
|
writeln!(file, "pub(crate) static {}: VariantsMap = [", symbol).unwrap();
|
||||||
|
for category in &CATEGORIES {
|
||||||
|
let corrections = collect_correct(entry, *category);
|
||||||
|
let mut corrections: Vec<_> = corrections.iter().collect();
|
||||||
|
corrections.sort_unstable();
|
||||||
|
writeln!(file, " &[").unwrap();
|
||||||
|
for correction in &corrections {
|
||||||
|
writeln!(file, " {:?},", correction).unwrap();
|
||||||
|
}
|
||||||
|
writeln!(file, " ],").unwrap();
|
||||||
|
}
|
||||||
|
writeln!(file, "];").unwrap();
|
||||||
|
writeln!(file).unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
fn generate_link(data: &[(&str, varcon::CategorySet)]) -> String {
|
||||||
|
let mut output = Vec::new();
|
||||||
|
|
||||||
|
write!(output, "&[").unwrap();
|
||||||
|
for (symbol, set) in data.iter() {
|
||||||
|
write!(output, "(0b{:05b}, &{}), ", set.bits(), symbol).unwrap();
|
||||||
|
}
|
||||||
|
write!(output, "]").unwrap();
|
||||||
|
|
||||||
|
String::from_utf8(output).unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_always_valid(data: &[(&str, varcon::CategorySet)]) -> bool {
|
||||||
|
let valid_categories = valid_categories();
|
||||||
|
for (_symbol, set) in data.iter() {
|
||||||
|
if *set == valid_categories {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
false
|
||||||
|
}
|
||||||
|
|
||||||
|
fn entries() -> BTreeMap<String, varcon_core::Entry> {
|
||||||
|
varcon::VARCON
|
||||||
|
.iter()
|
||||||
|
.flat_map(|c| c.entries.iter())
|
||||||
|
.filter(|e| {
|
||||||
|
e.variants
|
||||||
|
.iter()
|
||||||
|
.all(|v| typos::tokens::Word::new(&v.word, 0).is_ok())
|
||||||
|
})
|
||||||
|
.map(|e| {
|
||||||
|
let mut e = e.into_owned();
|
||||||
|
for variant in e.variants.iter_mut() {
|
||||||
|
variant.word.make_ascii_lowercase();
|
||||||
|
}
|
||||||
|
(entry_symbol(&e), e)
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn entry_symbol(entry: &varcon_core::Entry) -> String {
|
||||||
|
let mut hasher = std::collections::hash_map::DefaultHasher::new();
|
||||||
|
std::hash::Hash::hash(entry, &mut hasher);
|
||||||
|
let hash = std::hash::Hasher::finish(&hasher);
|
||||||
|
format!(
|
||||||
|
"ENTRY_{}_{}",
|
||||||
|
entry.variants[0].word.to_ascii_uppercase(),
|
||||||
|
hash
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn entry_sets<'e>(
|
||||||
|
entries: impl Iterator<Item = (&'e String, &'e varcon_core::Entry)>,
|
||||||
|
) -> BTreeMap<&'e str, Vec<(&'e str, varcon::CategorySet)>> {
|
||||||
|
let mut sets = BTreeMap::new();
|
||||||
|
for (symbol, entry) in entries {
|
||||||
|
for (word, set) in entry_set(entry).iter() {
|
||||||
|
let v = sets.entry(*word).or_insert_with(Vec::new);
|
||||||
|
v.push((symbol.as_str(), *set));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sets
|
||||||
|
}
|
||||||
|
|
||||||
|
fn entry_set(entry: &varcon_core::Entry) -> BTreeMap<&str, varcon::CategorySet> {
|
||||||
|
let mut sets = BTreeMap::new();
|
||||||
|
let valid_categories = valid_categories();
|
||||||
|
for variant in entry.variants.iter() {
|
||||||
|
let set = sets
|
||||||
|
.entry(variant.word.as_str())
|
||||||
|
.or_insert_with(varcon::CategorySet::empty);
|
||||||
|
for t in variant.types.iter() {
|
||||||
|
match t.category {
|
||||||
|
varcon::Category::Other => *set |= valid_categories,
|
||||||
|
varcon::Category::BritishIze => (),
|
||||||
|
_ => set.insert(t.category),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sets
|
||||||
|
}
|
||||||
|
|
||||||
|
fn valid_categories() -> varcon::CategorySet {
|
||||||
|
let mut c = varcon::CategorySet::empty();
|
||||||
|
for cat in CATEGORIES.iter() {
|
||||||
|
c.insert(*cat);
|
||||||
|
}
|
||||||
|
c
|
||||||
|
}
|
||||||
|
|
||||||
|
fn collect_correct(entry: &varcon_core::Entry, category: varcon::Category) -> HashSet<&str> {
|
||||||
|
// If there is ambiguity, collect all potential options.
|
||||||
|
let mut primary = HashSet::new();
|
||||||
|
let mut backup = HashSet::new();
|
||||||
|
for variant in entry.variants.iter().filter(|v| !ignore_variant(v)) {
|
||||||
|
for t in variant
|
||||||
|
.types
|
||||||
|
.iter()
|
||||||
|
.filter(|t| t.category == category || t.category == varcon::Category::Other)
|
||||||
|
{
|
||||||
|
let tag = t.tag.unwrap_or(varcon::Tag::Eq);
|
||||||
|
if tag == varcon::Tag::Eq {
|
||||||
|
primary.insert(variant.word.as_str());
|
||||||
|
}
|
||||||
|
if tag != varcon::Tag::Improper {
|
||||||
|
backup.insert(variant.word.as_str());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if primary.len() == 1 {
|
||||||
|
primary
|
||||||
|
} else {
|
||||||
|
backup
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn ignore_variant(variant: &varcon_core::Variant) -> bool {
|
||||||
|
if variant.word == "anesthetisation"
|
||||||
|
&& variant.types.len() == 1
|
||||||
|
&& variant.types[0].category == varcon::Category::Australian
|
||||||
|
&& (variant.types[0].tag == Some(varcon::Tag::Variant)
|
||||||
|
|| variant.types[0].tag == Some(varcon::Tag::Seldom))
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
false
|
||||||
|
}
|
||||||
|
|
||||||
|
// dict needs
|
||||||
|
// all words, with bitfags, pointing to list of entry names
|
||||||
|
//
|
||||||
|
// varcon needs
|
||||||
|
// all entries by name
|
||||||
|
|
||||||
|
#[derive(Debug, StructOpt)]
|
||||||
|
#[structopt(rename_all = "kebab-case")]
|
||||||
|
struct Options {
|
||||||
|
#[structopt(flatten)]
|
||||||
|
codegen: codegenrs::CodeGenArgs,
|
||||||
|
#[structopt(flatten)]
|
||||||
|
rustmft: codegenrs::RustfmtArgs,
|
||||||
|
|
||||||
|
#[structopt(flatten)]
|
||||||
|
pub(crate) verbose: clap_verbosity_flag::Verbosity,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn init_logging(level: Option<log::Level>) {
|
||||||
|
if let Some(level) = level {
|
||||||
|
let mut builder = env_logger::Builder::new();
|
||||||
|
|
||||||
|
builder.filter(None, level.to_level_filter());
|
||||||
|
|
||||||
|
if level == log::LevelFilter::Trace {
|
||||||
|
builder.format_timestamp_secs();
|
||||||
|
} else {
|
||||||
|
builder.format(|f, record| {
|
||||||
|
writeln!(
|
||||||
|
f,
|
||||||
|
"[{}] {}",
|
||||||
|
record.level().to_string().to_lowercase(),
|
||||||
|
record.args()
|
||||||
|
)
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
builder.init();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn run() -> Result<i32, Box<dyn std::error::Error>> {
|
||||||
|
let mut options = Options::from_args();
|
||||||
|
options.verbose.set_default(Some(log::Level::Info));
|
||||||
|
init_logging(options.verbose.log_level());
|
||||||
|
|
||||||
|
let mut content = vec![];
|
||||||
|
generate_variations(&mut content);
|
||||||
|
|
||||||
|
let content = String::from_utf8(content)?;
|
||||||
|
let content = options.rustmft.reformat(&content)?;
|
||||||
|
options.codegen.write_str(&content)?;
|
||||||
|
|
||||||
|
Ok(0)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
let code = run().unwrap();
|
||||||
|
std::process::exit(code);
|
||||||
|
}
|
6
crates/typos-vars/src/lib.rs
Normal file
6
crates/typos-vars/src/lib.rs
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
mod vars_codegen;
|
||||||
|
|
||||||
|
pub use crate::vars_codegen::*;
|
||||||
|
|
||||||
|
pub use varcon_core::Category;
|
||||||
|
pub use varcon_core::CategorySet;
|
201827
crates/typos-vars/src/vars_codegen.rs
Normal file
201827
crates/typos-vars/src/vars_codegen.rs
Normal file
File diff suppressed because it is too large
Load diff
|
@ -14,15 +14,18 @@ Configuration is read from the following (in precedence order)
|
||||||
|
|
||||||
| Field | Argument | Format | Description |
|
| Field | Argument | Format | Description |
|
||||||
|------------------------|-------------------|--------|-------------|
|
|------------------------|-------------------|--------|-------------|
|
||||||
| files.binary | --binary | bool | |
|
| files.binary | --binary | bool | Check binary files as text |
|
||||||
| files.ignore-hidden | --hidden | bool | |
|
| files.ignore-hidden | --hidden | bool | Skip hidden files and directories. |
|
||||||
| files.ignore-files | --ignore | bool | |
|
| files.ignore-files | --ignore | bool | Respect ignore files. |
|
||||||
| files.ignore-dot | --ignore-dot | bool | |
|
| files.ignore-dot | --ignore-dot | bool | Respect .ignore files. |
|
||||||
| files.ignore-vcs | --ignore-vcs | bool | |
|
| files.ignore-vcs | --ignore-vcs | bool | Respect ignore files in vcs directories. |
|
||||||
| files.ignore-global | --ignore-global | bool | |
|
| files.ignore-global | --ignore-global | bool | Respect global ignore files. |
|
||||||
| files.ignore-parent | --ignore-parent | bool | |
|
| files.ignore-parent | --ignore-parent | bool | Respect ignore files in parent directories. |
|
||||||
| default.check-filename | \- | bool | |
|
| default.check-filename | \- | bool | Verifying spelling in file names. |
|
||||||
| default.check-file | \- | bool | |
|
| default.check-file | \- | bool | Verifying spelling in files. |
|
||||||
| default.ignore-hex | \- | bool | |
|
| default.ignore-hex | \- | bool | Do not check identifiers that appear to be hexadecimal values. |
|
||||||
| default.identifier-include-digits | \- | bool | |
|
| default.identifier-leading-digits | \- | bool | Allow identifiers to start with digits, in addition to letters. |
|
||||||
| default.identifier-include-chars | \- | string | |
|
| default.identifier-include-digits | \- | bool | Allow identifiers to include digits, in addition to letters. |
|
||||||
|
| default.identifier-leading-chars | \- | string | Allow identifiers to start with one of these characters. |
|
||||||
|
| default.identifier-include-chars | \- | string | Allow identifiers to include these characters. |
|
||||||
|
| default.locale | \- | en, en-us, en-gb, en-ca, en-au | |
|
||||||
|
|
10
src/args.rs
10
src/args.rs
|
@ -112,6 +112,12 @@ pub(crate) struct FileArgs {
|
||||||
no_hex: bool,
|
no_hex: bool,
|
||||||
#[structopt(long, overrides_with("no-hex"), hidden(true))]
|
#[structopt(long, overrides_with("no-hex"), hidden(true))]
|
||||||
hex: bool,
|
hex: bool,
|
||||||
|
|
||||||
|
#[structopt(
|
||||||
|
long,
|
||||||
|
possible_values(&config::Locale::variants()),
|
||||||
|
)]
|
||||||
|
pub(crate) locale: Option<config::Locale>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl config::FileSource for FileArgs {
|
impl config::FileSource for FileArgs {
|
||||||
|
@ -141,6 +147,10 @@ impl config::FileSource for FileArgs {
|
||||||
(_, _) => unreachable!("StructOpt should make this impossible"),
|
(_, _) => unreachable!("StructOpt should make this impossible"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn locale(&self) -> Option<config::Locale> {
|
||||||
|
self.locale
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, StructOpt)]
|
#[derive(Debug, StructOpt)]
|
||||||
|
|
|
@ -82,6 +82,10 @@ pub trait FileSource {
|
||||||
fn identifier_include_chars(&self) -> Option<&str> {
|
fn identifier_include_chars(&self) -> Option<&str> {
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn locale(&self) -> Option<Locale> {
|
||||||
|
None
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
|
#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
|
||||||
|
@ -247,6 +251,7 @@ pub struct FileConfig {
|
||||||
pub identifier_leading_chars: Option<String>,
|
pub identifier_leading_chars: Option<String>,
|
||||||
pub identifier_include_digits: Option<bool>,
|
pub identifier_include_digits: Option<bool>,
|
||||||
pub identifier_include_chars: Option<String>,
|
pub identifier_include_chars: Option<String>,
|
||||||
|
pub locale: Option<Locale>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl FileConfig {
|
impl FileConfig {
|
||||||
|
@ -272,6 +277,9 @@ impl FileConfig {
|
||||||
if let Some(source) = source.identifier_include_chars() {
|
if let Some(source) = source.identifier_include_chars() {
|
||||||
self.identifier_include_chars = Some(source.to_owned());
|
self.identifier_include_chars = Some(source.to_owned());
|
||||||
}
|
}
|
||||||
|
if let Some(source) = source.locale() {
|
||||||
|
self.locale = Some(source);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn check_filename(&self) -> bool {
|
pub fn check_filename(&self) -> bool {
|
||||||
|
@ -301,6 +309,10 @@ impl FileConfig {
|
||||||
pub fn identifier_include_chars(&self) -> &str {
|
pub fn identifier_include_chars(&self) -> &str {
|
||||||
self.identifier_include_chars.as_deref().unwrap_or("_'")
|
self.identifier_include_chars.as_deref().unwrap_or("_'")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn locale(&self) -> Locale {
|
||||||
|
self.locale.unwrap_or_default()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl FileSource for FileConfig {
|
impl FileSource for FileConfig {
|
||||||
|
@ -331,6 +343,10 @@ impl FileSource for FileConfig {
|
||||||
fn identifier_include_chars(&self) -> Option<&str> {
|
fn identifier_include_chars(&self) -> Option<&str> {
|
||||||
self.identifier_include_chars.as_deref()
|
self.identifier_include_chars.as_deref()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn locale(&self) -> Option<Locale> {
|
||||||
|
self.locale
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn find_project_file(dir: std::path::PathBuf, name: &str) -> Option<std::path::PathBuf> {
|
fn find_project_file(dir: std::path::PathBuf, name: &str) -> Option<std::path::PathBuf> {
|
||||||
|
@ -346,3 +362,62 @@ fn find_project_file(dir: std::path::PathBuf, name: &str) -> Option<std::path::P
|
||||||
}
|
}
|
||||||
Some(file_path)
|
Some(file_path)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Copy, Clone, serde::Serialize, serde::Deserialize)]
|
||||||
|
#[serde(rename_all = "kebab-case")]
|
||||||
|
pub enum Locale {
|
||||||
|
En,
|
||||||
|
EnUs,
|
||||||
|
EnGb,
|
||||||
|
EnCa,
|
||||||
|
EnAu,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Locale {
|
||||||
|
pub fn category(self) -> Option<typos_vars::Category> {
|
||||||
|
match self {
|
||||||
|
Locale::En => None,
|
||||||
|
Locale::EnUs => Some(typos_vars::Category::American),
|
||||||
|
Locale::EnGb => Some(typos_vars::Category::BritishIse),
|
||||||
|
Locale::EnCa => Some(typos_vars::Category::Canadian),
|
||||||
|
Locale::EnAu => Some(typos_vars::Category::Australian),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn variants() -> [&'static str; 5] {
|
||||||
|
["en", "en-us", "en-gb", "en-ca", "en-au"]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for Locale {
|
||||||
|
fn default() -> Self {
|
||||||
|
Locale::En
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::str::FromStr for Locale {
|
||||||
|
type Err = String;
|
||||||
|
|
||||||
|
fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
|
||||||
|
match s {
|
||||||
|
"en" => Ok(Locale::En),
|
||||||
|
"en-us" => Ok(Locale::EnUs),
|
||||||
|
"en-gb" => Ok(Locale::EnGb),
|
||||||
|
"en-ca" => Ok(Locale::EnCa),
|
||||||
|
"en-au" => Ok(Locale::EnAu),
|
||||||
|
_ => Err("valid values: en, en-us, en-gb, en-ca, en-au".to_owned()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Display for Locale {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||||
|
match *self {
|
||||||
|
Locale::En => write!(f, "en"),
|
||||||
|
Locale::EnUs => write!(f, "en-us"),
|
||||||
|
Locale::EnGb => write!(f, "en-gb"),
|
||||||
|
Locale::EnCa => write!(f, "en-ca"),
|
||||||
|
Locale::EnAu => write!(f, "en-au"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
75
src/dict.rs
75
src/dict.rs
|
@ -5,11 +5,15 @@ use unicase::UniCase;
|
||||||
use typos::tokens::Case;
|
use typos::tokens::Case;
|
||||||
|
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
pub struct BuiltIn {}
|
pub struct BuiltIn {
|
||||||
|
locale: Option<typos_vars::Category>,
|
||||||
|
}
|
||||||
|
|
||||||
impl BuiltIn {
|
impl BuiltIn {
|
||||||
pub fn new() -> Self {
|
pub fn new(locale: crate::config::Locale) -> Self {
|
||||||
Self {}
|
Self {
|
||||||
|
locale: locale.category(),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn correct_ident<'s, 'w>(
|
pub fn correct_ident<'s, 'w>(
|
||||||
|
@ -19,12 +23,66 @@ impl BuiltIn {
|
||||||
Vec::new()
|
Vec::new()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn correct_word<'s, 'w>(&'s self, word: typos::tokens::Word<'w>) -> Vec<Cow<'s, str>> {
|
pub fn correct_word<'s, 'w>(
|
||||||
map_lookup(&typos_dict::WORD_DICTIONARY, word.token())
|
&'s self,
|
||||||
.map(|s| case_correct(s, word.case()))
|
word_token: typos::tokens::Word<'w>,
|
||||||
|
) -> Vec<Cow<'s, str>> {
|
||||||
|
let word = word_token.token();
|
||||||
|
let corrections = if let Some(correction) = self.correct_with_dict(word) {
|
||||||
|
self.correct_with_vars(word)
|
||||||
|
.unwrap_or_else(|| vec![correction])
|
||||||
|
} else {
|
||||||
|
self.correct_with_vars(word).unwrap_or_else(Vec::new)
|
||||||
|
};
|
||||||
|
corrections
|
||||||
.into_iter()
|
.into_iter()
|
||||||
|
.map(|s| case_correct(s, word_token.case()))
|
||||||
.collect()
|
.collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn correct_with_dict(&self, word: &str) -> Option<&'static str> {
|
||||||
|
map_lookup(&typos_dict::WORD_DICTIONARY, word)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn correct_with_vars(&self, word: &str) -> Option<Vec<&'static str>> {
|
||||||
|
let variants = map_lookup(&typos_vars::VARS_DICTIONARY, word)?;
|
||||||
|
self.select_variant(variants)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn select_variant(
|
||||||
|
&self,
|
||||||
|
vars: &'static [(u8, &'static typos_vars::VariantsMap)],
|
||||||
|
) -> Option<Vec<&'static str>> {
|
||||||
|
let var = vars[0];
|
||||||
|
let var_categories = unsafe {
|
||||||
|
// Code-genned from a checked category-set, so known to be safe
|
||||||
|
typos_vars::CategorySet::new(var.0)
|
||||||
|
};
|
||||||
|
if let Some(locale) = self.locale {
|
||||||
|
if var_categories.contains(locale) {
|
||||||
|
// Already valid for the current locale.
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(
|
||||||
|
typos_vars::corrections(locale, *var.1)
|
||||||
|
.iter()
|
||||||
|
.copied()
|
||||||
|
.collect(),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// All locales are valid
|
||||||
|
if var_categories.is_empty() {
|
||||||
|
// But the word is never valid.
|
||||||
|
let mut unique: Vec<_> = var.1.iter().flat_map(|v| v.iter()).copied().collect();
|
||||||
|
unique.sort_unstable();
|
||||||
|
unique.dedup();
|
||||||
|
Some(unique)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl typos::Dictionary for BuiltIn {
|
impl typos::Dictionary for BuiltIn {
|
||||||
|
@ -37,10 +95,7 @@ impl typos::Dictionary for BuiltIn {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn map_lookup(
|
fn map_lookup<V: Clone>(map: &'static phf::Map<UniCase<&'static str>, V>, key: &str) -> Option<V> {
|
||||||
map: &'static phf::Map<UniCase<&'static str>, &'static str>,
|
|
||||||
key: &str,
|
|
||||||
) -> Option<&'static str> {
|
|
||||||
// This transmute should be safe as `get` will not store the reference with
|
// This transmute should be safe as `get` will not store the reference with
|
||||||
// the expanded lifetime. This is due to `Borrow` being overly strict and
|
// the expanded lifetime. This is due to `Borrow` being overly strict and
|
||||||
// can't have an impl for `&'static str` to `Borrow<&'a str>`.
|
// can't have an impl for `&'static str` to `Borrow<&'a str>`.
|
||||||
|
|
|
@ -1 +1,2 @@
|
||||||
|
pub mod config;
|
||||||
pub mod dict;
|
pub mod dict;
|
||||||
|
|
|
@ -56,7 +56,7 @@ fn run() -> Result<i32, anyhow::Error> {
|
||||||
.include_chars(config.default.identifier_include_chars().to_owned())
|
.include_chars(config.default.identifier_include_chars().to_owned())
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
let dictionary = crate::dict::BuiltIn::new();
|
let dictionary = crate::dict::BuiltIn::new(config.default.locale());
|
||||||
|
|
||||||
let mut settings = typos::checks::TyposSettings::new();
|
let mut settings = typos::checks::TyposSettings::new();
|
||||||
settings
|
settings
|
||||||
|
|
Loading…
Reference in a new issue