mirror of
https://github.com/crate-ci/typos.git
synced 2024-12-24 00:22:15 -05:00
feat: Support english dialects
The goal is to be as accepting and unobtrusive to new code bases as possible. To this end, we correct typos into the closest english dialect. If someone wants to opt-in, they can have typos correct to a specific english dialect. Fixes #52 Fixes #22
This commit is contained in:
parent
f1cf48b6be
commit
ab4a5bbdaf
22 changed files with 236208 additions and 37310 deletions
4
.ignore
4
.ignore
|
@ -1,3 +1,3 @@
|
|||
*_codegen.rs
|
||||
*codegen.rs
|
||||
assets/
|
||||
typos/benches/corrections.rs
|
||||
benches/corrections.rs
|
||||
|
|
268
Cargo.lock
generated
268
Cargo.lock
generated
|
@ -2,9 +2,9 @@
|
|||
# It is not intended for manual editing.
|
||||
[[package]]
|
||||
name = "aho-corasick"
|
||||
version = "0.7.10"
|
||||
version = "0.7.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8716408b8bc624ed7f65d223ddb9ac2d044c0547b6fa4b0d554f3a9540496ada"
|
||||
checksum = "043164d8ba5c4c3035fec9bbee8647c0261d788f3474306f93bb65901cae0e86"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
@ -20,18 +20,15 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "anyhow"
|
||||
version = "1.0.31"
|
||||
version = "1.0.32"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "85bb70cc08ec97ca5450e6eba421deeea5f172c0fc61f78b5357b2a8e8be195f"
|
||||
checksum = "6b602bfe940d21c130f3895acd65221e8a61270debe89d628b9cb4e3ccb8569b"
|
||||
|
||||
[[package]]
|
||||
name = "arrayvec"
|
||||
version = "0.4.12"
|
||||
version = "0.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cd9fd44efafa8690358b7408d253adf110036b88f55672a933f01d616ad9b1b9"
|
||||
dependencies = [
|
||||
"nodrop",
|
||||
]
|
||||
checksum = "cff77d8686867eceff3105329d4698d96c2391c176d5d03adc90c7389162b5b8"
|
||||
|
||||
[[package]]
|
||||
name = "assert_fs"
|
||||
|
@ -90,15 +87,15 @@ checksum = "08c48aae112d48ed9f069b33538ea9e3e90aa263cfa3d1c24309612b1f7472de"
|
|||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
version = "0.1.9"
|
||||
version = "0.1.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b486ce3ccf7ffd79fdeb678eac06a9e6c09fc88d33836340becb8fffe87c5e33"
|
||||
checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822"
|
||||
|
||||
[[package]]
|
||||
name = "clap"
|
||||
version = "2.33.1"
|
||||
version = "2.33.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bdfa80d47f954d53a35a64987ca1422f495b8d6483c0fe9f7117b36c2a792129"
|
||||
checksum = "37e58ac78573c40708d45522f0d80fa2f01cc4f9b4e2bf749807255454312002"
|
||||
dependencies = [
|
||||
"ansi_term",
|
||||
"atty",
|
||||
|
@ -203,10 +200,10 @@ checksum = "f0c960ae2da4de88a91b2d920c2a7233b400bc33cb28453a2987822d8392519b"
|
|||
dependencies = [
|
||||
"fnv",
|
||||
"ident_case",
|
||||
"proc-macro2 1.0.18",
|
||||
"quote 1.0.6",
|
||||
"proc-macro2 1.0.19",
|
||||
"quote 1.0.7",
|
||||
"strsim 0.9.3",
|
||||
"syn 1.0.33",
|
||||
"syn 1.0.38",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -216,8 +213,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "d9b5a2f4ac4969822c62224815d069952656cadc7084fdca9751e6d959189b72"
|
||||
dependencies = [
|
||||
"darling_core",
|
||||
"quote 1.0.6",
|
||||
"syn 1.0.33",
|
||||
"quote 1.0.7",
|
||||
"syn 1.0.38",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -240,9 +237,9 @@ version = "0.99.9"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "298998b1cf6b5b2c8a7b023dfd45821825ce3ba8a8af55c921a0e734e4653f76"
|
||||
dependencies = [
|
||||
"proc-macro2 1.0.18",
|
||||
"quote 1.0.6",
|
||||
"syn 1.0.33",
|
||||
"proc-macro2 1.0.19",
|
||||
"quote 1.0.7",
|
||||
"syn 1.0.38",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -252,9 +249,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "6604612c19dd3bb353650b715b61f09bcb089dd17bdca1a9a42637079bf5e428"
|
||||
dependencies = [
|
||||
"darling",
|
||||
"proc-macro2 1.0.18",
|
||||
"quote 1.0.6",
|
||||
"syn 1.0.33",
|
||||
"proc-macro2 1.0.19",
|
||||
"quote 1.0.7",
|
||||
"syn 1.0.38",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -270,10 +267,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10"
|
||||
|
||||
[[package]]
|
||||
name = "either"
|
||||
version = "1.5.3"
|
||||
name = "edit-distance"
|
||||
version = "2.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bb1f6b1ce1c140482ea30ddd3335fc0024ac7ee112895426e0a629a6c20adfe3"
|
||||
checksum = "bbbaaaf38131deb9ca518a274a45bfdb8771f139517b073b16c2d3d32ae5037b"
|
||||
|
||||
[[package]]
|
||||
name = "either"
|
||||
version = "1.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cd56b59865bce947ac5958779cfa508f6c3b9497cc762b7e24a12d11ccde2c4f"
|
||||
|
||||
[[package]]
|
||||
name = "enumflags2"
|
||||
|
@ -290,9 +293,9 @@ version = "0.6.4"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "946ee94e3dbf58fdd324f9ce245c7b238d46a66f00e86a020b71996349e46cce"
|
||||
dependencies = [
|
||||
"proc-macro2 1.0.18",
|
||||
"quote 1.0.6",
|
||||
"syn 1.0.33",
|
||||
"proc-macro2 1.0.19",
|
||||
"quote 1.0.7",
|
||||
"syn 1.0.38",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -310,9 +313,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "float-cmp"
|
||||
version = "0.6.0"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "da62c4f1b81918835a8c6a484a397775fff5953fe83529afd51b05f5c6a6617d"
|
||||
checksum = "e1267f4ac4f343772758f7b1bdcbe767c218bbab93bb432acbf5162bbf85a6c4"
|
||||
dependencies = [
|
||||
"num-traits",
|
||||
]
|
||||
|
@ -368,9 +371,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "hermit-abi"
|
||||
version = "0.1.13"
|
||||
version = "0.1.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "91780f809e750b0a89f5544be56617ff6b1227ee485bcb06ebe10cdf89bd3b71"
|
||||
checksum = "3deed196b6e7f9e44a2ae8d94225d80302d81208b1bb673fd21fe634645c85a9"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
@ -419,9 +422,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "itoa"
|
||||
version = "0.4.5"
|
||||
version = "0.4.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b8b7a7c0c47db5545ed3fef7468ee7bb5b74691498139e4b3f6a20685dc6dd8e"
|
||||
checksum = "dc6f3ad7b9d11a0c00842ff8de1b60ee58661048eb8049ed33c73594f359d7e6"
|
||||
|
||||
[[package]]
|
||||
name = "lazy_static"
|
||||
|
@ -431,29 +434,28 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
|||
|
||||
[[package]]
|
||||
name = "lexical-core"
|
||||
version = "0.6.7"
|
||||
version = "0.7.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f86d66d380c9c5a685aaac7a11818bdfa1f733198dfd9ec09c70b762cd12ad6f"
|
||||
checksum = "db65c6da02e61f55dae90a0ae427b2a5f6b3e8db09f58d10efab23af92592616"
|
||||
dependencies = [
|
||||
"arrayvec",
|
||||
"bitflags",
|
||||
"cfg-if",
|
||||
"rustc_version",
|
||||
"ryu",
|
||||
"static_assertions",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.71"
|
||||
version = "0.2.74"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9457b06509d27052635f90d6466700c65095fdf75409b3fbdd903e988b886f49"
|
||||
checksum = "a2f02823cf78b754822df5f7f268fb59822e7296276d3e069d8e8cb26a14bd10"
|
||||
|
||||
[[package]]
|
||||
name = "log"
|
||||
version = "0.4.8"
|
||||
version = "0.4.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "14b6052be84e6b71ab17edffc2eeabf5c2c3ae1fdb464aae35ac50c67a44e1f7"
|
||||
checksum = "4fabed175da42fed1fa0746b0ea71f412aa9d35e76e95e59b192c64b9dc2bf8b"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
]
|
||||
|
@ -486,12 +488,6 @@ dependencies = [
|
|||
"unicase",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nodrop"
|
||||
version = "0.1.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb"
|
||||
|
||||
[[package]]
|
||||
name = "nom"
|
||||
version = "5.1.2"
|
||||
|
@ -511,9 +507,9 @@ checksum = "61807f77802ff30975e01f4f071c8ba10c022052f98b3294119f3e615d13e5be"
|
|||
|
||||
[[package]]
|
||||
name = "num-traits"
|
||||
version = "0.2.11"
|
||||
version = "0.2.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c62be47e61d1842b9170f0fdeec8eba98e60e90e5446449a0545e5152acd7096"
|
||||
checksum = "ac267bcc07f48ee5f8935ab0d24f316fb722d7a1292e2913f0cc196b29ffd611"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
]
|
||||
|
@ -559,15 +555,15 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ppv-lite86"
|
||||
version = "0.2.8"
|
||||
version = "0.2.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "237a5ed80e274dbc66f86bd59c1e25edc039660be53194b5fe0a482e0f2612ea"
|
||||
checksum = "c36fa947111f5c62a733b652544dd0016a43ce89619538a8ef92724a6f501a20"
|
||||
|
||||
[[package]]
|
||||
name = "predicates"
|
||||
version = "1.0.4"
|
||||
version = "1.0.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "347a1b6f0b21e636bc9872fb60b83b8e185f6f5516298b8238699f7f9a531030"
|
||||
checksum = "96bfead12e90dccead362d62bb2c90a5f6fc4584963645bc7f71a735e0b0735a"
|
||||
dependencies = [
|
||||
"difference",
|
||||
"float-cmp",
|
||||
|
@ -594,27 +590,25 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "proc-macro-error"
|
||||
version = "1.0.2"
|
||||
version = "1.0.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "98e9e4b82e0ef281812565ea4751049f1bdcdfccda7d3f459f2e138a40c08678"
|
||||
checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c"
|
||||
dependencies = [
|
||||
"proc-macro-error-attr",
|
||||
"proc-macro2 1.0.18",
|
||||
"quote 1.0.6",
|
||||
"syn 1.0.33",
|
||||
"proc-macro2 1.0.19",
|
||||
"quote 1.0.7",
|
||||
"syn 1.0.38",
|
||||
"version_check",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro-error-attr"
|
||||
version = "1.0.2"
|
||||
version = "1.0.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4f5444ead4e9935abd7f27dc51f7e852a0569ac888096d5ec2499470794e2e53"
|
||||
checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869"
|
||||
dependencies = [
|
||||
"proc-macro2 1.0.18",
|
||||
"quote 1.0.6",
|
||||
"syn 1.0.33",
|
||||
"syn-mid",
|
||||
"proc-macro2 1.0.19",
|
||||
"quote 1.0.7",
|
||||
"version_check",
|
||||
]
|
||||
|
||||
|
@ -629,11 +623,11 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.18"
|
||||
version = "1.0.19"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "beae6331a816b1f65d04c45b078fd8e6c93e8071771f41b8163255bbd8d7c8fa"
|
||||
checksum = "04f5f085b5d71e2188cb8271e5da0161ad52c3f227a661a3c135fdf28e258b12"
|
||||
dependencies = [
|
||||
"unicode-xid 0.2.0",
|
||||
"unicode-xid 0.2.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -653,11 +647,11 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.6"
|
||||
version = "1.0.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "54a21852a652ad6f610c9510194f398ff6f8692e334fd1145fed931f7fbe44ea"
|
||||
checksum = "aa563d17ecb180e500da1cfd2b028310ac758de548efdd203e18f283af693f37"
|
||||
dependencies = [
|
||||
"proc-macro2 1.0.18",
|
||||
"proc-macro2 1.0.19",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -713,9 +707,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "redox_syscall"
|
||||
version = "0.1.56"
|
||||
version = "0.1.57"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2439c63f3f6139d1b57529d16bc3b8bb855230c8efcc5d3a896c8bea7c3b1e84"
|
||||
checksum = "41cc0f7e4d5d4544e8861606a285bb08d3e70712ccc7d2b84d7c0ccfaf4b05ce"
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
|
@ -746,9 +740,9 @@ checksum = "26412eb97c6b088a6997e05f69403a802a92d520de2f8e63c2b65f9e0f47c4e8"
|
|||
|
||||
[[package]]
|
||||
name = "remove_dir_all"
|
||||
version = "0.5.2"
|
||||
version = "0.5.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4a83fa3702a688b9359eccba92d153ac33fd2e8462f9e0e3fdf155239ea7792e"
|
||||
checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7"
|
||||
dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
@ -794,29 +788,29 @@ checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3"
|
|||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.114"
|
||||
version = "1.0.115"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5317f7588f0a5078ee60ef675ef96735a1442132dc645eb1d12c018620ed8cd3"
|
||||
checksum = "e54c9a88f2da7238af84b5101443f0c0d0a3bbdc455e34a5c9497b1903ed55d5"
|
||||
dependencies = [
|
||||
"serde_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_derive"
|
||||
version = "1.0.114"
|
||||
version = "1.0.115"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2a0be94b04690fbaed37cddffc5c134bf537c8e3329d53e982fe04c374978f8e"
|
||||
checksum = "609feed1d0a73cc36a0182a840a9b37b4a82f0b1150369f0536a9e3f2a31dc48"
|
||||
dependencies = [
|
||||
"proc-macro2 1.0.18",
|
||||
"quote 1.0.6",
|
||||
"syn 1.0.33",
|
||||
"proc-macro2 1.0.19",
|
||||
"quote 1.0.7",
|
||||
"syn 1.0.38",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_json"
|
||||
version = "1.0.56"
|
||||
version = "1.0.57"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3433e879a558dde8b5e8feb2a04899cf34fdde1fafb894687e52105fc1162ac3"
|
||||
checksum = "164eacbdb13512ec2745fb09d51fd5b22b0d65ed294a1dcf7285a360c80a675c"
|
||||
dependencies = [
|
||||
"itoa",
|
||||
"ryu",
|
||||
|
@ -831,9 +825,9 @@ checksum = "fa8f3741c7372e75519bd9346068370c9cdaabcc1f9599cbcf2a2719352286b7"
|
|||
|
||||
[[package]]
|
||||
name = "static_assertions"
|
||||
version = "0.3.4"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7f3eb36b47e512f8f1c9e3d10c2c1965bc992bd9cdb024fa581e2194501c83d3"
|
||||
checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
|
||||
|
||||
[[package]]
|
||||
name = "strsim"
|
||||
|
@ -849,9 +843,9 @@ checksum = "6446ced80d6c486436db5c078dde11a9f73d42b57fb273121e160b84f63d894c"
|
|||
|
||||
[[package]]
|
||||
name = "structopt"
|
||||
version = "0.3.15"
|
||||
version = "0.3.16"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "de2f5e239ee807089b62adce73e48c625e0ed80df02c7ab3f068f5db5281065c"
|
||||
checksum = "de5472fb24d7e80ae84a7801b7978f95a19ec32cb1876faea59ab711eb901976"
|
||||
dependencies = [
|
||||
"clap",
|
||||
"lazy_static",
|
||||
|
@ -860,15 +854,15 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "structopt-derive"
|
||||
version = "0.4.8"
|
||||
version = "0.4.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "510413f9de616762a4fbeab62509bf15c729603b72d7cd71280fbca431b1c118"
|
||||
checksum = "1e0eb37335aeeebe51be42e2dc07f031163fbabfa6ac67d7ea68b5c2f68d5f99"
|
||||
dependencies = [
|
||||
"heck",
|
||||
"proc-macro-error",
|
||||
"proc-macro2 1.0.18",
|
||||
"quote 1.0.6",
|
||||
"syn 1.0.33",
|
||||
"proc-macro2 1.0.19",
|
||||
"quote 1.0.7",
|
||||
"syn 1.0.38",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -884,24 +878,13 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "1.0.33"
|
||||
version = "1.0.38"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e8d5d96e8cbb005d6959f119f773bfaebb5684296108fb32600c00cde305b2cd"
|
||||
checksum = "e69abc24912995b3038597a7a593be5053eb0fb44f3cc5beec0deb421790c1f4"
|
||||
dependencies = [
|
||||
"proc-macro2 1.0.18",
|
||||
"quote 1.0.6",
|
||||
"unicode-xid 0.2.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "syn-mid"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7be3539f6c128a931cf19dcee741c1af532c7fd387baa739c03dd2e96479338a"
|
||||
dependencies = [
|
||||
"proc-macro2 1.0.18",
|
||||
"quote 1.0.6",
|
||||
"syn 1.0.33",
|
||||
"proc-macro2 1.0.19",
|
||||
"quote 1.0.7",
|
||||
"unicode-xid 0.2.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -951,9 +934,9 @@ version = "1.0.20"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bd80fc12f73063ac132ac92aceea36734f04a1d93c1240c6944e23a3b8841793"
|
||||
dependencies = [
|
||||
"proc-macro2 1.0.18",
|
||||
"quote 1.0.6",
|
||||
"syn 1.0.33",
|
||||
"proc-macro2 1.0.19",
|
||||
"quote 1.0.7",
|
||||
"syn 1.0.38",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -1017,11 +1000,21 @@ dependencies = [
|
|||
"toml",
|
||||
"typos",
|
||||
"typos-dict",
|
||||
"typos-vars",
|
||||
"unicase",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "typos-codegen"
|
||||
name = "typos-dict"
|
||||
version = "0.2.1"
|
||||
dependencies = [
|
||||
"log",
|
||||
"phf",
|
||||
"unicase",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "typos-dict-codegen"
|
||||
version = "1.0.2"
|
||||
dependencies = [
|
||||
"codegenrs",
|
||||
|
@ -1033,12 +1026,45 @@ dependencies = [
|
|||
]
|
||||
|
||||
[[package]]
|
||||
name = "typos-dict"
|
||||
name = "typos-dict-verify"
|
||||
version = "1.0.2"
|
||||
dependencies = [
|
||||
"codegenrs",
|
||||
"csv",
|
||||
"edit-distance",
|
||||
"itertools",
|
||||
"structopt",
|
||||
"unicase",
|
||||
"varcon",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "typos-vars"
|
||||
version = "0.2.1"
|
||||
dependencies = [
|
||||
"log",
|
||||
"phf",
|
||||
"unicase",
|
||||
"varcon-core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "typos-vars-codegen"
|
||||
version = "1.0.2"
|
||||
dependencies = [
|
||||
"clap",
|
||||
"clap-verbosity-flag",
|
||||
"codegenrs",
|
||||
"env_logger",
|
||||
"itertools",
|
||||
"log",
|
||||
"phf",
|
||||
"phf_codegen",
|
||||
"structopt",
|
||||
"typos",
|
||||
"unicase",
|
||||
"varcon",
|
||||
"varcon-core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -1058,9 +1084,9 @@ checksum = "e83e153d1053cbb5a118eeff7fd5be06ed99153f00dbcd8ae310c5fb2b22edc0"
|
|||
|
||||
[[package]]
|
||||
name = "unicode-width"
|
||||
version = "0.1.7"
|
||||
version = "0.1.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "caaa9d531767d1ff2150b9332433f32a24622147e5ebb1f26409d5da67afd479"
|
||||
checksum = "9337591893a19b88d8d87f2cec1e73fad5cdfd10e5a6f349f498ad6ea2ffb1e3"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-xid"
|
||||
|
@ -1070,9 +1096,9 @@ checksum = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc"
|
|||
|
||||
[[package]]
|
||||
name = "unicode-xid"
|
||||
version = "0.2.0"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "826e7639553986605ec5979c7dd957c7895e93eabed50ab2ffa7f6128a75097c"
|
||||
checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564"
|
||||
|
||||
[[package]]
|
||||
name = "varcon"
|
||||
|
@ -1150,9 +1176,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "winapi"
|
||||
version = "0.3.8"
|
||||
version = "0.3.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8093091eeb260906a183e6ae1abdba2ef5ef2257a21801128899c3fc699229c6"
|
||||
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
|
||||
dependencies = [
|
||||
"winapi-i686-pc-windows-gnu",
|
||||
"winapi-x86_64-pc-windows-gnu",
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
[workspace]
|
||||
members = [
|
||||
"crates/typos",
|
||||
"crates/typos-dict", "crates/typos-dict/codegen",
|
||||
"crates/typos-dict", "crates/typos-dict/codegen", "crates/typos-dict/verify",
|
||||
"crates/typos-vars", "crates/typos-vars/codegen",
|
||||
"crates/codespell-dict", "crates/codespell-dict/codegen",
|
||||
"crates/misspell-dict", "crates/misspell-dict/codegen",
|
||||
"crates/wikipedia-dict", "crates/wikipedia-dict/codegen",
|
||||
|
@ -32,6 +33,7 @@ codecov = { repository = "crate-ci/typos" }
|
|||
[dependencies]
|
||||
typos = { version = "^0.3", path = "crates/typos" }
|
||||
typos-dict = { version = "^0.2", path = "crates/typos-dict" }
|
||||
typos-vars = { version = "^0.2", path = "crates/typos-vars" }
|
||||
phf = { version = "0.8", features = ["unicase"] }
|
||||
unicase = "2.5"
|
||||
anyhow = "1.0"
|
||||
|
|
|
@ -154,7 +154,13 @@ stages:
|
|||
steps:
|
||||
- template: install-rust.yml@templates
|
||||
- script: |
|
||||
cargo run --package typos-codegen -- --output crates/typos-dict/src/dict_codegen.rs --check
|
||||
cargo run --package typos-dict-codegen -- --output crates/typos-dict/src/dict_codegen.rs --check
|
||||
displayName: Verify typos-dict
|
||||
- script: |
|
||||
cargo run --package typos-vars-codegen -- --output crates/typos-vars/src/vars_codegen.rs --check
|
||||
displayName: Verify typos-dict
|
||||
- script: |
|
||||
cargo run --package typos-dict-verify -- --input crates/typos-dict/assets/words.csv --output crates/typos-dict/assets/words.csv --check
|
||||
displayName: Verify typos-dict
|
||||
- script: |
|
||||
cargo run --package codespell-codegen -- --output crates/codespell-dict/src/dict_codegen.rs --check
|
||||
|
|
|
@ -4,12 +4,12 @@ extern crate test;
|
|||
|
||||
#[bench]
|
||||
fn load_corrections(b: &mut test::Bencher) {
|
||||
b.iter(|| typos_cli::dict::BuiltIn::new());
|
||||
b.iter(|| typos_cli::dict::BuiltIn::new(Default::default()));
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn correct_word_hit(b: &mut test::Bencher) {
|
||||
let corrections = typos_cli::dict::BuiltIn::new();
|
||||
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
|
||||
let input = typos::tokens::Word::new("successs", 0).unwrap();
|
||||
assert_eq!(
|
||||
corrections.correct_word(input),
|
||||
|
@ -20,7 +20,7 @@ fn correct_word_hit(b: &mut test::Bencher) {
|
|||
|
||||
#[bench]
|
||||
fn correct_word_miss(b: &mut test::Bencher) {
|
||||
let corrections = typos_cli::dict::BuiltIn::new();
|
||||
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
|
||||
let input = typos::tokens::Word::new("success", 0).unwrap();
|
||||
assert!(corrections.correct_word(input).is_empty());
|
||||
b.iter(|| corrections.correct_word(input));
|
||||
|
|
|
@ -184,7 +184,7 @@ fn bench_check_file(data: &str, b: &mut test::Bencher) {
|
|||
let sample_path = temp.child("sample");
|
||||
sample_path.write_str(data).unwrap();
|
||||
|
||||
let corrections = typos_cli::dict::BuiltIn::new();
|
||||
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
|
||||
let parser = typos::tokens::Parser::new();
|
||||
let checks = typos::checks::TyposSettings::new().build_checks();
|
||||
b.iter(|| {
|
||||
|
|
1794
crates/typos-dict/assets/words.csv
vendored
1794
crates/typos-dict/assets/words.csv
vendored
File diff suppressed because it is too large
Load diff
|
@ -1,5 +1,5 @@
|
|||
[package]
|
||||
name = "typos-codegen"
|
||||
name = "typos-dict-codegen"
|
||||
version = "1.0.2"
|
||||
authors = ["Ed Page <eopage@gmail.com>"]
|
||||
description = "Source Code Spelling Correction"
|
||||
|
|
File diff suppressed because it is too large
Load diff
24
crates/typos-dict/verify/Cargo.toml
Normal file
24
crates/typos-dict/verify/Cargo.toml
Normal file
|
@ -0,0 +1,24 @@
|
|||
[package]
|
||||
name = "typos-dict-verify"
|
||||
version = "1.0.2"
|
||||
authors = ["Ed Page <eopage@gmail.com>"]
|
||||
description = "Source Code Spelling Correction"
|
||||
repository = "https://github.com/crate-ci/typos"
|
||||
readme = "../../../README.md"
|
||||
categories = ["text-processing"]
|
||||
license = "MIT"
|
||||
edition = "2018"
|
||||
publish = false
|
||||
|
||||
[badges]
|
||||
azure-devops = { project = "crate-ci", pipeline = "typos" }
|
||||
codecov = { repository = "crate-ci/typos" }
|
||||
|
||||
[dependencies]
|
||||
csv = "1.0"
|
||||
unicase = "2.5"
|
||||
codegenrs = "0.1"
|
||||
structopt = "0.3"
|
||||
varcon = { version = "0.2", path = "../../varcon" }
|
||||
itertools = "0.9"
|
||||
edit-distance = "2.1"
|
99
crates/typos-dict/verify/src/main.rs
Normal file
99
crates/typos-dict/verify/src/main.rs
Normal file
|
@ -0,0 +1,99 @@
|
|||
use std::collections::HashMap;
|
||||
use std::collections::HashSet;
|
||||
|
||||
use structopt::StructOpt;
|
||||
|
||||
fn generate<W: std::io::Write>(file: &mut W, dict: &[u8]) {
|
||||
let mut wtr = csv::Writer::from_writer(file);
|
||||
|
||||
let disallowed_typos = disallowed_typos();
|
||||
let related_words = related_words();
|
||||
|
||||
let mut reader = csv::ReaderBuilder::new()
|
||||
.has_headers(false)
|
||||
.from_reader(dict);
|
||||
for record in reader.records() {
|
||||
let record = record.unwrap();
|
||||
let typo = &record[0];
|
||||
let correction = &record[1];
|
||||
if disallowed_typos.contains(&unicase::UniCase::new(typo)) {
|
||||
continue;
|
||||
}
|
||||
let correction = related_words
|
||||
.get(correction)
|
||||
.and_then(|words| find_best_match(typo, correction, words))
|
||||
.unwrap_or(correction);
|
||||
wtr.write_record(&[typo, correction]).unwrap();
|
||||
}
|
||||
wtr.flush().unwrap();
|
||||
}
|
||||
|
||||
fn disallowed_typos() -> HashSet<unicase::UniCase<&'static str>> {
|
||||
varcon::VARCON
|
||||
.iter()
|
||||
.flat_map(|c| c.entries.iter())
|
||||
.flat_map(|e| e.variants.iter())
|
||||
.map(|v| unicase::UniCase::new(v.word))
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn related_words() -> HashMap<&'static str, HashSet<&'static str>> {
|
||||
let mut words: HashMap<&'static str, HashSet<&'static str>> = HashMap::new();
|
||||
for entry in varcon::VARCON.iter().flat_map(|c| c.entries.iter()) {
|
||||
let variants: HashSet<_> = entry
|
||||
.variants
|
||||
.iter()
|
||||
.filter(|v| v.types.iter().any(|t| t.tag != Some(varcon::Tag::Improper)))
|
||||
.map(|v| v.word)
|
||||
.collect();
|
||||
for variant in variants.iter() {
|
||||
let set = words.entry(variant).or_insert_with(HashSet::new);
|
||||
set.extend(variants.iter().filter(|v| *v != variant));
|
||||
}
|
||||
}
|
||||
words
|
||||
}
|
||||
|
||||
fn find_best_match<'c>(
|
||||
typo: &'c str,
|
||||
correction: &'c str,
|
||||
related_words: &HashSet<&'static str>,
|
||||
) -> Option<&'c str> {
|
||||
assert!(!related_words.contains(correction));
|
||||
let current = edit_distance::edit_distance(typo, correction);
|
||||
let mut matches: Vec<_> = related_words
|
||||
.iter()
|
||||
.map(|r| (edit_distance::edit_distance(typo, r), *r))
|
||||
.filter(|(d, _)| *d < current)
|
||||
.collect();
|
||||
matches.sort_unstable();
|
||||
matches.into_iter().next().map(|(_, r)| r)
|
||||
}
|
||||
|
||||
#[derive(Debug, StructOpt)]
|
||||
#[structopt(rename_all = "kebab-case")]
|
||||
struct Options {
|
||||
#[structopt(short("-i"), long, parse(from_os_str))]
|
||||
input: std::path::PathBuf,
|
||||
#[structopt(flatten)]
|
||||
codegen: codegenrs::CodeGenArgs,
|
||||
}
|
||||
|
||||
fn run() -> Result<i32, Box<dyn std::error::Error>> {
|
||||
let options = Options::from_args();
|
||||
|
||||
let data = std::fs::read(&options.input).unwrap();
|
||||
|
||||
let mut content = vec![];
|
||||
generate(&mut content, &data);
|
||||
|
||||
let content = String::from_utf8(content)?;
|
||||
options.codegen.write_str(&content)?;
|
||||
|
||||
Ok(0)
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let code = run().unwrap();
|
||||
std::process::exit(code);
|
||||
}
|
21
crates/typos-vars/Cargo.toml
Normal file
21
crates/typos-vars/Cargo.toml
Normal file
|
@ -0,0 +1,21 @@
|
|||
[package]
|
||||
name = "typos-vars"
|
||||
version = "0.2.1"
|
||||
authors = ["Ed Page <eopage@gmail.com>"]
|
||||
description = "Source Code Spelling Correction"
|
||||
repository = "https://github.com/crate-ci/typos"
|
||||
readme = "../../README.md"
|
||||
categories = ["development-tools", "text-processing"]
|
||||
keywords = ["development", "spelling"]
|
||||
license = "MIT"
|
||||
edition = "2018"
|
||||
|
||||
[badges]
|
||||
azure-devops = { project = "crate-ci", pipeline = "typos" }
|
||||
codecov = { repository = "crate-ci/typos" }
|
||||
|
||||
[dependencies]
|
||||
phf = { version = "0.8", features = ["unicase"] }
|
||||
unicase = "2.5"
|
||||
log = "0.4"
|
||||
varcon-core = { version = "1.0", path = "../varcon-core", features = ["flags"] }
|
30
crates/typos-vars/codegen/Cargo.toml
Normal file
30
crates/typos-vars/codegen/Cargo.toml
Normal file
|
@ -0,0 +1,30 @@
|
|||
[package]
|
||||
name = "typos-vars-codegen"
|
||||
version = "1.0.2"
|
||||
authors = ["Ed Page <eopage@gmail.com>"]
|
||||
description = "Source Code Spelling Correction"
|
||||
repository = "https://github.com/crate-ci/typos"
|
||||
readme = "../../../README.md"
|
||||
categories = ["text-processing"]
|
||||
license = "MIT"
|
||||
edition = "2018"
|
||||
publish = false
|
||||
|
||||
[badges]
|
||||
azure-devops = { project = "crate-ci", pipeline = "typos" }
|
||||
codecov = { repository = "crate-ci/typos" }
|
||||
|
||||
[dependencies]
|
||||
phf = { version = "0.8", features = ["unicase"] }
|
||||
phf_codegen = "0.8"
|
||||
varcon = { version = "0.2", path = "../../varcon", features = ["flags"] }
|
||||
varcon-core = { version = "1.0", path = "../../varcon-core", features = ["flags"] }
|
||||
typos = { version = "^0.3", path = "../../typos" }
|
||||
unicase = "2.5"
|
||||
codegenrs = "0.1"
|
||||
structopt = "0.3"
|
||||
clap = "2"
|
||||
log = "0.4"
|
||||
env_logger = "0.7"
|
||||
clap-verbosity-flag = "0.3"
|
||||
itertools = "0.9"
|
314
crates/typos-vars/codegen/src/main.rs
Normal file
314
crates/typos-vars/codegen/src/main.rs
Normal file
|
@ -0,0 +1,314 @@
|
|||
use std::collections::BTreeMap;
|
||||
use std::collections::HashSet;
|
||||
use std::io::Write;
|
||||
|
||||
use structopt::StructOpt;
|
||||
|
||||
static CATEGORIES: [varcon::Category; 4] = [
|
||||
varcon::Category::American,
|
||||
varcon::Category::BritishIse,
|
||||
// For now, only want to support one form of British, so going with -ise as it seems more
|
||||
// popular.
|
||||
varcon::Category::Canadian,
|
||||
varcon::Category::Australian,
|
||||
// Other basically means all
|
||||
];
|
||||
|
||||
fn generate_variations<W: std::io::Write>(file: &mut W) {
|
||||
let entries = entries();
|
||||
|
||||
writeln!(
|
||||
file,
|
||||
"// This file is code-genned by {}",
|
||||
env!("CARGO_PKG_NAME")
|
||||
)
|
||||
.unwrap();
|
||||
writeln!(file, "#![allow(clippy::unreadable_literal)]",).unwrap();
|
||||
writeln!(file).unwrap();
|
||||
|
||||
writeln!(file, "use unicase::UniCase;").unwrap();
|
||||
writeln!(file).unwrap();
|
||||
|
||||
writeln!(file, "pub type Variants = &'static [&'static str];",).unwrap();
|
||||
writeln!(
|
||||
file,
|
||||
"pub type VariantsMap = [Variants; {}];",
|
||||
CATEGORIES.len()
|
||||
)
|
||||
.unwrap();
|
||||
writeln!(file).unwrap();
|
||||
|
||||
writeln!(file, "pub fn all_categories() -> crate::CategorySet {{",).unwrap();
|
||||
writeln!(
|
||||
file,
|
||||
" {}",
|
||||
itertools::join(
|
||||
CATEGORIES
|
||||
.iter()
|
||||
.map(|c| format!("crate::Category::{:?}", c)),
|
||||
" | "
|
||||
)
|
||||
)
|
||||
.unwrap();
|
||||
writeln!(file, "}}",).unwrap();
|
||||
writeln!(file).unwrap();
|
||||
|
||||
writeln!(
|
||||
file,
|
||||
"pub fn corrections(category: crate::Category, options: VariantsMap) -> &'static [&'static str] {{",
|
||||
)
|
||||
.unwrap();
|
||||
writeln!(file, " match category {{").unwrap();
|
||||
for (index, category) in CATEGORIES.iter().enumerate() {
|
||||
writeln!(
|
||||
file,
|
||||
" crate::Category::{:?} => options[{}],",
|
||||
category, index
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
writeln!(
|
||||
file,
|
||||
" crate::Category::BritishIze | crate::Category::Other => unreachable!(\"{{:?}} is unused\", category),",
|
||||
)
|
||||
.unwrap();
|
||||
writeln!(file, " }}").unwrap();
|
||||
writeln!(file, "}}").unwrap();
|
||||
writeln!(file).unwrap();
|
||||
|
||||
writeln!(
|
||||
file,
|
||||
"pub static VARS_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &'static [(u8, &VariantsMap)]> = "
|
||||
)
|
||||
.unwrap();
|
||||
let entry_sets = entry_sets(entries.iter());
|
||||
let mut referenced_symbols: HashSet<&str> = HashSet::new();
|
||||
let mut builder = phf_codegen::Map::new();
|
||||
for (word, data) in entry_sets.iter() {
|
||||
if is_always_valid(data) {
|
||||
// No need to convert from current form to target form
|
||||
continue;
|
||||
}
|
||||
referenced_symbols.extend(data.iter().map(|(s, _)| s));
|
||||
let value = generate_link(&data);
|
||||
builder.entry(unicase::UniCase::new(word), &value);
|
||||
}
|
||||
let codegenned = builder.build();
|
||||
writeln!(file, "{}", codegenned).unwrap();
|
||||
writeln!(file, ";").unwrap();
|
||||
|
||||
for (symbol, entry) in entries.iter() {
|
||||
if !referenced_symbols.contains(symbol.as_str()) {
|
||||
continue;
|
||||
}
|
||||
generate_entry(file, symbol, entry);
|
||||
}
|
||||
}
|
||||
|
||||
fn generate_entry(file: &mut impl std::io::Write, symbol: &str, entry: &varcon_core::Entry) {
|
||||
writeln!(file, "pub(crate) static {}: VariantsMap = [", symbol).unwrap();
|
||||
for category in &CATEGORIES {
|
||||
let corrections = collect_correct(entry, *category);
|
||||
let mut corrections: Vec<_> = corrections.iter().collect();
|
||||
corrections.sort_unstable();
|
||||
writeln!(file, " &[").unwrap();
|
||||
for correction in &corrections {
|
||||
writeln!(file, " {:?},", correction).unwrap();
|
||||
}
|
||||
writeln!(file, " ],").unwrap();
|
||||
}
|
||||
writeln!(file, "];").unwrap();
|
||||
writeln!(file).unwrap();
|
||||
}
|
||||
|
||||
fn generate_link(data: &[(&str, varcon::CategorySet)]) -> String {
|
||||
let mut output = Vec::new();
|
||||
|
||||
write!(output, "&[").unwrap();
|
||||
for (symbol, set) in data.iter() {
|
||||
write!(output, "(0b{:05b}, &{}), ", set.bits(), symbol).unwrap();
|
||||
}
|
||||
write!(output, "]").unwrap();
|
||||
|
||||
String::from_utf8(output).unwrap()
|
||||
}
|
||||
|
||||
fn is_always_valid(data: &[(&str, varcon::CategorySet)]) -> bool {
|
||||
let valid_categories = valid_categories();
|
||||
for (_symbol, set) in data.iter() {
|
||||
if *set == valid_categories {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
fn entries() -> BTreeMap<String, varcon_core::Entry> {
|
||||
varcon::VARCON
|
||||
.iter()
|
||||
.flat_map(|c| c.entries.iter())
|
||||
.filter(|e| {
|
||||
e.variants
|
||||
.iter()
|
||||
.all(|v| typos::tokens::Word::new(&v.word, 0).is_ok())
|
||||
})
|
||||
.map(|e| {
|
||||
let mut e = e.into_owned();
|
||||
for variant in e.variants.iter_mut() {
|
||||
variant.word.make_ascii_lowercase();
|
||||
}
|
||||
(entry_symbol(&e), e)
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn entry_symbol(entry: &varcon_core::Entry) -> String {
|
||||
let mut hasher = std::collections::hash_map::DefaultHasher::new();
|
||||
std::hash::Hash::hash(entry, &mut hasher);
|
||||
let hash = std::hash::Hasher::finish(&hasher);
|
||||
format!(
|
||||
"ENTRY_{}_{}",
|
||||
entry.variants[0].word.to_ascii_uppercase(),
|
||||
hash
|
||||
)
|
||||
}
|
||||
|
||||
fn entry_sets<'e>(
|
||||
entries: impl Iterator<Item = (&'e String, &'e varcon_core::Entry)>,
|
||||
) -> BTreeMap<&'e str, Vec<(&'e str, varcon::CategorySet)>> {
|
||||
let mut sets = BTreeMap::new();
|
||||
for (symbol, entry) in entries {
|
||||
for (word, set) in entry_set(entry).iter() {
|
||||
let v = sets.entry(*word).or_insert_with(Vec::new);
|
||||
v.push((symbol.as_str(), *set));
|
||||
}
|
||||
}
|
||||
sets
|
||||
}
|
||||
|
||||
fn entry_set(entry: &varcon_core::Entry) -> BTreeMap<&str, varcon::CategorySet> {
|
||||
let mut sets = BTreeMap::new();
|
||||
let valid_categories = valid_categories();
|
||||
for variant in entry.variants.iter() {
|
||||
let set = sets
|
||||
.entry(variant.word.as_str())
|
||||
.or_insert_with(varcon::CategorySet::empty);
|
||||
for t in variant.types.iter() {
|
||||
match t.category {
|
||||
varcon::Category::Other => *set |= valid_categories,
|
||||
varcon::Category::BritishIze => (),
|
||||
_ => set.insert(t.category),
|
||||
}
|
||||
}
|
||||
}
|
||||
sets
|
||||
}
|
||||
|
||||
fn valid_categories() -> varcon::CategorySet {
|
||||
let mut c = varcon::CategorySet::empty();
|
||||
for cat in CATEGORIES.iter() {
|
||||
c.insert(*cat);
|
||||
}
|
||||
c
|
||||
}
|
||||
|
||||
fn collect_correct(entry: &varcon_core::Entry, category: varcon::Category) -> HashSet<&str> {
|
||||
// If there is ambiguity, collect all potential options.
|
||||
let mut primary = HashSet::new();
|
||||
let mut backup = HashSet::new();
|
||||
for variant in entry.variants.iter().filter(|v| !ignore_variant(v)) {
|
||||
for t in variant
|
||||
.types
|
||||
.iter()
|
||||
.filter(|t| t.category == category || t.category == varcon::Category::Other)
|
||||
{
|
||||
let tag = t.tag.unwrap_or(varcon::Tag::Eq);
|
||||
if tag == varcon::Tag::Eq {
|
||||
primary.insert(variant.word.as_str());
|
||||
}
|
||||
if tag != varcon::Tag::Improper {
|
||||
backup.insert(variant.word.as_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if primary.len() == 1 {
|
||||
primary
|
||||
} else {
|
||||
backup
|
||||
}
|
||||
}
|
||||
|
||||
fn ignore_variant(variant: &varcon_core::Variant) -> bool {
|
||||
if variant.word == "anesthetisation"
|
||||
&& variant.types.len() == 1
|
||||
&& variant.types[0].category == varcon::Category::Australian
|
||||
&& (variant.types[0].tag == Some(varcon::Tag::Variant)
|
||||
|| variant.types[0].tag == Some(varcon::Tag::Seldom))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
// dict needs
|
||||
// all words, with bitfags, pointing to list of entry names
|
||||
//
|
||||
// varcon needs
|
||||
// all entries by name
|
||||
|
||||
#[derive(Debug, StructOpt)]
|
||||
#[structopt(rename_all = "kebab-case")]
|
||||
struct Options {
|
||||
#[structopt(flatten)]
|
||||
codegen: codegenrs::CodeGenArgs,
|
||||
#[structopt(flatten)]
|
||||
rustmft: codegenrs::RustfmtArgs,
|
||||
|
||||
#[structopt(flatten)]
|
||||
pub(crate) verbose: clap_verbosity_flag::Verbosity,
|
||||
}
|
||||
|
||||
fn init_logging(level: Option<log::Level>) {
|
||||
if let Some(level) = level {
|
||||
let mut builder = env_logger::Builder::new();
|
||||
|
||||
builder.filter(None, level.to_level_filter());
|
||||
|
||||
if level == log::LevelFilter::Trace {
|
||||
builder.format_timestamp_secs();
|
||||
} else {
|
||||
builder.format(|f, record| {
|
||||
writeln!(
|
||||
f,
|
||||
"[{}] {}",
|
||||
record.level().to_string().to_lowercase(),
|
||||
record.args()
|
||||
)
|
||||
});
|
||||
}
|
||||
|
||||
builder.init();
|
||||
}
|
||||
}
|
||||
|
||||
fn run() -> Result<i32, Box<dyn std::error::Error>> {
|
||||
let mut options = Options::from_args();
|
||||
options.verbose.set_default(Some(log::Level::Info));
|
||||
init_logging(options.verbose.log_level());
|
||||
|
||||
let mut content = vec![];
|
||||
generate_variations(&mut content);
|
||||
|
||||
let content = String::from_utf8(content)?;
|
||||
let content = options.rustmft.reformat(&content)?;
|
||||
options.codegen.write_str(&content)?;
|
||||
|
||||
Ok(0)
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let code = run().unwrap();
|
||||
std::process::exit(code);
|
||||
}
|
6
crates/typos-vars/src/lib.rs
Normal file
6
crates/typos-vars/src/lib.rs
Normal file
|
@ -0,0 +1,6 @@
|
|||
mod vars_codegen;
|
||||
|
||||
pub use crate::vars_codegen::*;
|
||||
|
||||
pub use varcon_core::Category;
|
||||
pub use varcon_core::CategorySet;
|
201827
crates/typos-vars/src/vars_codegen.rs
Normal file
201827
crates/typos-vars/src/vars_codegen.rs
Normal file
File diff suppressed because it is too large
Load diff
|
@ -14,15 +14,18 @@ Configuration is read from the following (in precedence order)
|
|||
|
||||
| Field | Argument | Format | Description |
|
||||
|------------------------|-------------------|--------|-------------|
|
||||
| files.binary | --binary | bool | |
|
||||
| files.ignore-hidden | --hidden | bool | |
|
||||
| files.ignore-files | --ignore | bool | |
|
||||
| files.ignore-dot | --ignore-dot | bool | |
|
||||
| files.ignore-vcs | --ignore-vcs | bool | |
|
||||
| files.ignore-global | --ignore-global | bool | |
|
||||
| files.ignore-parent | --ignore-parent | bool | |
|
||||
| default.check-filename | \- | bool | |
|
||||
| default.check-file | \- | bool | |
|
||||
| default.ignore-hex | \- | bool | |
|
||||
| default.identifier-include-digits | \- | bool | |
|
||||
| default.identifier-include-chars | \- | string | |
|
||||
| files.binary | --binary | bool | Check binary files as text |
|
||||
| files.ignore-hidden | --hidden | bool | Skip hidden files and directories. |
|
||||
| files.ignore-files | --ignore | bool | Respect ignore files. |
|
||||
| files.ignore-dot | --ignore-dot | bool | Respect .ignore files. |
|
||||
| files.ignore-vcs | --ignore-vcs | bool | Respect ignore files in vcs directories. |
|
||||
| files.ignore-global | --ignore-global | bool | Respect global ignore files. |
|
||||
| files.ignore-parent | --ignore-parent | bool | Respect ignore files in parent directories. |
|
||||
| default.check-filename | \- | bool | Verifying spelling in file names. |
|
||||
| default.check-file | \- | bool | Verifying spelling in files. |
|
||||
| default.ignore-hex | \- | bool | Do not check identifiers that appear to be hexadecimal values. |
|
||||
| default.identifier-leading-digits | \- | bool | Allow identifiers to start with digits, in addition to letters. |
|
||||
| default.identifier-include-digits | \- | bool | Allow identifiers to include digits, in addition to letters. |
|
||||
| default.identifier-leading-chars | \- | string | Allow identifiers to start with one of these characters. |
|
||||
| default.identifier-include-chars | \- | string | Allow identifiers to include these characters. |
|
||||
| default.locale | \- | en, en-us, en-gb, en-ca, en-au | |
|
||||
|
|
10
src/args.rs
10
src/args.rs
|
@ -112,6 +112,12 @@ pub(crate) struct FileArgs {
|
|||
no_hex: bool,
|
||||
#[structopt(long, overrides_with("no-hex"), hidden(true))]
|
||||
hex: bool,
|
||||
|
||||
#[structopt(
|
||||
long,
|
||||
possible_values(&config::Locale::variants()),
|
||||
)]
|
||||
pub(crate) locale: Option<config::Locale>,
|
||||
}
|
||||
|
||||
impl config::FileSource for FileArgs {
|
||||
|
@ -141,6 +147,10 @@ impl config::FileSource for FileArgs {
|
|||
(_, _) => unreachable!("StructOpt should make this impossible"),
|
||||
}
|
||||
}
|
||||
|
||||
fn locale(&self) -> Option<config::Locale> {
|
||||
self.locale
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, StructOpt)]
|
||||
|
|
|
@ -82,6 +82,10 @@ pub trait FileSource {
|
|||
fn identifier_include_chars(&self) -> Option<&str> {
|
||||
None
|
||||
}
|
||||
|
||||
fn locale(&self) -> Option<Locale> {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
|
||||
|
@ -247,6 +251,7 @@ pub struct FileConfig {
|
|||
pub identifier_leading_chars: Option<String>,
|
||||
pub identifier_include_digits: Option<bool>,
|
||||
pub identifier_include_chars: Option<String>,
|
||||
pub locale: Option<Locale>,
|
||||
}
|
||||
|
||||
impl FileConfig {
|
||||
|
@ -272,6 +277,9 @@ impl FileConfig {
|
|||
if let Some(source) = source.identifier_include_chars() {
|
||||
self.identifier_include_chars = Some(source.to_owned());
|
||||
}
|
||||
if let Some(source) = source.locale() {
|
||||
self.locale = Some(source);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn check_filename(&self) -> bool {
|
||||
|
@ -301,6 +309,10 @@ impl FileConfig {
|
|||
pub fn identifier_include_chars(&self) -> &str {
|
||||
self.identifier_include_chars.as_deref().unwrap_or("_'")
|
||||
}
|
||||
|
||||
pub fn locale(&self) -> Locale {
|
||||
self.locale.unwrap_or_default()
|
||||
}
|
||||
}
|
||||
|
||||
impl FileSource for FileConfig {
|
||||
|
@ -331,6 +343,10 @@ impl FileSource for FileConfig {
|
|||
fn identifier_include_chars(&self) -> Option<&str> {
|
||||
self.identifier_include_chars.as_deref()
|
||||
}
|
||||
|
||||
fn locale(&self) -> Option<Locale> {
|
||||
self.locale
|
||||
}
|
||||
}
|
||||
|
||||
fn find_project_file(dir: std::path::PathBuf, name: &str) -> Option<std::path::PathBuf> {
|
||||
|
@ -346,3 +362,62 @@ fn find_project_file(dir: std::path::PathBuf, name: &str) -> Option<std::path::P
|
|||
}
|
||||
Some(file_path)
|
||||
}
|
||||
|
||||
#[derive(Debug, Copy, Clone, serde::Serialize, serde::Deserialize)]
|
||||
#[serde(rename_all = "kebab-case")]
|
||||
pub enum Locale {
|
||||
En,
|
||||
EnUs,
|
||||
EnGb,
|
||||
EnCa,
|
||||
EnAu,
|
||||
}
|
||||
|
||||
impl Locale {
|
||||
pub fn category(self) -> Option<typos_vars::Category> {
|
||||
match self {
|
||||
Locale::En => None,
|
||||
Locale::EnUs => Some(typos_vars::Category::American),
|
||||
Locale::EnGb => Some(typos_vars::Category::BritishIse),
|
||||
Locale::EnCa => Some(typos_vars::Category::Canadian),
|
||||
Locale::EnAu => Some(typos_vars::Category::Australian),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn variants() -> [&'static str; 5] {
|
||||
["en", "en-us", "en-gb", "en-ca", "en-au"]
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for Locale {
|
||||
fn default() -> Self {
|
||||
Locale::En
|
||||
}
|
||||
}
|
||||
|
||||
impl std::str::FromStr for Locale {
|
||||
type Err = String;
|
||||
|
||||
fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
|
||||
match s {
|
||||
"en" => Ok(Locale::En),
|
||||
"en-us" => Ok(Locale::EnUs),
|
||||
"en-gb" => Ok(Locale::EnGb),
|
||||
"en-ca" => Ok(Locale::EnCa),
|
||||
"en-au" => Ok(Locale::EnAu),
|
||||
_ => Err("valid values: en, en-us, en-gb, en-ca, en-au".to_owned()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Locale {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
match *self {
|
||||
Locale::En => write!(f, "en"),
|
||||
Locale::EnUs => write!(f, "en-us"),
|
||||
Locale::EnGb => write!(f, "en-gb"),
|
||||
Locale::EnCa => write!(f, "en-ca"),
|
||||
Locale::EnAu => write!(f, "en-au"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
75
src/dict.rs
75
src/dict.rs
|
@ -5,11 +5,15 @@ use unicase::UniCase;
|
|||
use typos::tokens::Case;
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct BuiltIn {}
|
||||
pub struct BuiltIn {
|
||||
locale: Option<typos_vars::Category>,
|
||||
}
|
||||
|
||||
impl BuiltIn {
|
||||
pub fn new() -> Self {
|
||||
Self {}
|
||||
pub fn new(locale: crate::config::Locale) -> Self {
|
||||
Self {
|
||||
locale: locale.category(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn correct_ident<'s, 'w>(
|
||||
|
@ -19,12 +23,66 @@ impl BuiltIn {
|
|||
Vec::new()
|
||||
}
|
||||
|
||||
pub fn correct_word<'s, 'w>(&'s self, word: typos::tokens::Word<'w>) -> Vec<Cow<'s, str>> {
|
||||
map_lookup(&typos_dict::WORD_DICTIONARY, word.token())
|
||||
.map(|s| case_correct(s, word.case()))
|
||||
pub fn correct_word<'s, 'w>(
|
||||
&'s self,
|
||||
word_token: typos::tokens::Word<'w>,
|
||||
) -> Vec<Cow<'s, str>> {
|
||||
let word = word_token.token();
|
||||
let corrections = if let Some(correction) = self.correct_with_dict(word) {
|
||||
self.correct_with_vars(word)
|
||||
.unwrap_or_else(|| vec![correction])
|
||||
} else {
|
||||
self.correct_with_vars(word).unwrap_or_else(Vec::new)
|
||||
};
|
||||
corrections
|
||||
.into_iter()
|
||||
.map(|s| case_correct(s, word_token.case()))
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn correct_with_dict(&self, word: &str) -> Option<&'static str> {
|
||||
map_lookup(&typos_dict::WORD_DICTIONARY, word)
|
||||
}
|
||||
|
||||
fn correct_with_vars(&self, word: &str) -> Option<Vec<&'static str>> {
|
||||
let variants = map_lookup(&typos_vars::VARS_DICTIONARY, word)?;
|
||||
self.select_variant(variants)
|
||||
}
|
||||
|
||||
fn select_variant(
|
||||
&self,
|
||||
vars: &'static [(u8, &'static typos_vars::VariantsMap)],
|
||||
) -> Option<Vec<&'static str>> {
|
||||
let var = vars[0];
|
||||
let var_categories = unsafe {
|
||||
// Code-genned from a checked category-set, so known to be safe
|
||||
typos_vars::CategorySet::new(var.0)
|
||||
};
|
||||
if let Some(locale) = self.locale {
|
||||
if var_categories.contains(locale) {
|
||||
// Already valid for the current locale.
|
||||
None
|
||||
} else {
|
||||
Some(
|
||||
typos_vars::corrections(locale, *var.1)
|
||||
.iter()
|
||||
.copied()
|
||||
.collect(),
|
||||
)
|
||||
}
|
||||
} else {
|
||||
// All locales are valid
|
||||
if var_categories.is_empty() {
|
||||
// But the word is never valid.
|
||||
let mut unique: Vec<_> = var.1.iter().flat_map(|v| v.iter()).copied().collect();
|
||||
unique.sort_unstable();
|
||||
unique.dedup();
|
||||
Some(unique)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl typos::Dictionary for BuiltIn {
|
||||
|
@ -37,10 +95,7 @@ impl typos::Dictionary for BuiltIn {
|
|||
}
|
||||
}
|
||||
|
||||
fn map_lookup(
|
||||
map: &'static phf::Map<UniCase<&'static str>, &'static str>,
|
||||
key: &str,
|
||||
) -> Option<&'static str> {
|
||||
fn map_lookup<V: Clone>(map: &'static phf::Map<UniCase<&'static str>, V>, key: &str) -> Option<V> {
|
||||
// This transmute should be safe as `get` will not store the reference with
|
||||
// the expanded lifetime. This is due to `Borrow` being overly strict and
|
||||
// can't have an impl for `&'static str` to `Borrow<&'a str>`.
|
||||
|
|
|
@ -1 +1,2 @@
|
|||
pub mod config;
|
||||
pub mod dict;
|
||||
|
|
|
@ -56,7 +56,7 @@ fn run() -> Result<i32, anyhow::Error> {
|
|||
.include_chars(config.default.identifier_include_chars().to_owned())
|
||||
.build();
|
||||
|
||||
let dictionary = crate::dict::BuiltIn::new();
|
||||
let dictionary = crate::dict::BuiltIn::new(config.default.locale());
|
||||
|
||||
let mut settings = typos::checks::TyposSettings::new();
|
||||
settings
|
||||
|
|
Loading…
Reference in a new issue