Merge pull request #128 from epage/varcon

feat: Support english dialects
This commit is contained in:
Ed Page 2020-08-21 10:46:34 -05:00 committed by GitHub
commit 443aa5c4fe
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
22 changed files with 236208 additions and 37310 deletions

View file

@ -1,3 +1,3 @@
*_codegen.rs *codegen.rs
assets/ assets/
typos/benches/corrections.rs benches/corrections.rs

268
Cargo.lock generated
View file

@ -2,9 +2,9 @@
# It is not intended for manual editing. # It is not intended for manual editing.
[[package]] [[package]]
name = "aho-corasick" name = "aho-corasick"
version = "0.7.10" version = "0.7.13"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8716408b8bc624ed7f65d223ddb9ac2d044c0547b6fa4b0d554f3a9540496ada" checksum = "043164d8ba5c4c3035fec9bbee8647c0261d788f3474306f93bb65901cae0e86"
dependencies = [ dependencies = [
"memchr", "memchr",
] ]
@ -20,18 +20,15 @@ dependencies = [
[[package]] [[package]]
name = "anyhow" name = "anyhow"
version = "1.0.31" version = "1.0.32"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "85bb70cc08ec97ca5450e6eba421deeea5f172c0fc61f78b5357b2a8e8be195f" checksum = "6b602bfe940d21c130f3895acd65221e8a61270debe89d628b9cb4e3ccb8569b"
[[package]] [[package]]
name = "arrayvec" name = "arrayvec"
version = "0.4.12" version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cd9fd44efafa8690358b7408d253adf110036b88f55672a933f01d616ad9b1b9" checksum = "cff77d8686867eceff3105329d4698d96c2391c176d5d03adc90c7389162b5b8"
dependencies = [
"nodrop",
]
[[package]] [[package]]
name = "assert_fs" name = "assert_fs"
@ -90,15 +87,15 @@ checksum = "08c48aae112d48ed9f069b33538ea9e3e90aa263cfa3d1c24309612b1f7472de"
[[package]] [[package]]
name = "cfg-if" name = "cfg-if"
version = "0.1.9" version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b486ce3ccf7ffd79fdeb678eac06a9e6c09fc88d33836340becb8fffe87c5e33" checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822"
[[package]] [[package]]
name = "clap" name = "clap"
version = "2.33.1" version = "2.33.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bdfa80d47f954d53a35a64987ca1422f495b8d6483c0fe9f7117b36c2a792129" checksum = "37e58ac78573c40708d45522f0d80fa2f01cc4f9b4e2bf749807255454312002"
dependencies = [ dependencies = [
"ansi_term", "ansi_term",
"atty", "atty",
@ -203,10 +200,10 @@ checksum = "f0c960ae2da4de88a91b2d920c2a7233b400bc33cb28453a2987822d8392519b"
dependencies = [ dependencies = [
"fnv", "fnv",
"ident_case", "ident_case",
"proc-macro2 1.0.18", "proc-macro2 1.0.19",
"quote 1.0.6", "quote 1.0.7",
"strsim 0.9.3", "strsim 0.9.3",
"syn 1.0.33", "syn 1.0.38",
] ]
[[package]] [[package]]
@ -216,8 +213,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9b5a2f4ac4969822c62224815d069952656cadc7084fdca9751e6d959189b72" checksum = "d9b5a2f4ac4969822c62224815d069952656cadc7084fdca9751e6d959189b72"
dependencies = [ dependencies = [
"darling_core", "darling_core",
"quote 1.0.6", "quote 1.0.7",
"syn 1.0.33", "syn 1.0.38",
] ]
[[package]] [[package]]
@ -240,9 +237,9 @@ version = "0.99.9"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "298998b1cf6b5b2c8a7b023dfd45821825ce3ba8a8af55c921a0e734e4653f76" checksum = "298998b1cf6b5b2c8a7b023dfd45821825ce3ba8a8af55c921a0e734e4653f76"
dependencies = [ dependencies = [
"proc-macro2 1.0.18", "proc-macro2 1.0.19",
"quote 1.0.6", "quote 1.0.7",
"syn 1.0.33", "syn 1.0.38",
] ]
[[package]] [[package]]
@ -252,9 +249,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6604612c19dd3bb353650b715b61f09bcb089dd17bdca1a9a42637079bf5e428" checksum = "6604612c19dd3bb353650b715b61f09bcb089dd17bdca1a9a42637079bf5e428"
dependencies = [ dependencies = [
"darling", "darling",
"proc-macro2 1.0.18", "proc-macro2 1.0.19",
"quote 1.0.6", "quote 1.0.7",
"syn 1.0.33", "syn 1.0.38",
] ]
[[package]] [[package]]
@ -270,10 +267,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10"
[[package]] [[package]]
name = "either" name = "edit-distance"
version = "1.5.3" version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bb1f6b1ce1c140482ea30ddd3335fc0024ac7ee112895426e0a629a6c20adfe3" checksum = "bbbaaaf38131deb9ca518a274a45bfdb8771f139517b073b16c2d3d32ae5037b"
[[package]]
name = "either"
version = "1.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cd56b59865bce947ac5958779cfa508f6c3b9497cc762b7e24a12d11ccde2c4f"
[[package]] [[package]]
name = "enumflags2" name = "enumflags2"
@ -290,9 +293,9 @@ version = "0.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "946ee94e3dbf58fdd324f9ce245c7b238d46a66f00e86a020b71996349e46cce" checksum = "946ee94e3dbf58fdd324f9ce245c7b238d46a66f00e86a020b71996349e46cce"
dependencies = [ dependencies = [
"proc-macro2 1.0.18", "proc-macro2 1.0.19",
"quote 1.0.6", "quote 1.0.7",
"syn 1.0.33", "syn 1.0.38",
] ]
[[package]] [[package]]
@ -310,9 +313,9 @@ dependencies = [
[[package]] [[package]]
name = "float-cmp" name = "float-cmp"
version = "0.6.0" version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "da62c4f1b81918835a8c6a484a397775fff5953fe83529afd51b05f5c6a6617d" checksum = "e1267f4ac4f343772758f7b1bdcbe767c218bbab93bb432acbf5162bbf85a6c4"
dependencies = [ dependencies = [
"num-traits", "num-traits",
] ]
@ -368,9 +371,9 @@ dependencies = [
[[package]] [[package]]
name = "hermit-abi" name = "hermit-abi"
version = "0.1.13" version = "0.1.15"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "91780f809e750b0a89f5544be56617ff6b1227ee485bcb06ebe10cdf89bd3b71" checksum = "3deed196b6e7f9e44a2ae8d94225d80302d81208b1bb673fd21fe634645c85a9"
dependencies = [ dependencies = [
"libc", "libc",
] ]
@ -419,9 +422,9 @@ dependencies = [
[[package]] [[package]]
name = "itoa" name = "itoa"
version = "0.4.5" version = "0.4.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b8b7a7c0c47db5545ed3fef7468ee7bb5b74691498139e4b3f6a20685dc6dd8e" checksum = "dc6f3ad7b9d11a0c00842ff8de1b60ee58661048eb8049ed33c73594f359d7e6"
[[package]] [[package]]
name = "lazy_static" name = "lazy_static"
@ -431,29 +434,28 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]] [[package]]
name = "lexical-core" name = "lexical-core"
version = "0.6.7" version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f86d66d380c9c5a685aaac7a11818bdfa1f733198dfd9ec09c70b762cd12ad6f" checksum = "db65c6da02e61f55dae90a0ae427b2a5f6b3e8db09f58d10efab23af92592616"
dependencies = [ dependencies = [
"arrayvec", "arrayvec",
"bitflags", "bitflags",
"cfg-if", "cfg-if",
"rustc_version",
"ryu", "ryu",
"static_assertions", "static_assertions",
] ]
[[package]] [[package]]
name = "libc" name = "libc"
version = "0.2.71" version = "0.2.74"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9457b06509d27052635f90d6466700c65095fdf75409b3fbdd903e988b886f49" checksum = "a2f02823cf78b754822df5f7f268fb59822e7296276d3e069d8e8cb26a14bd10"
[[package]] [[package]]
name = "log" name = "log"
version = "0.4.8" version = "0.4.11"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "14b6052be84e6b71ab17edffc2eeabf5c2c3ae1fdb464aae35ac50c67a44e1f7" checksum = "4fabed175da42fed1fa0746b0ea71f412aa9d35e76e95e59b192c64b9dc2bf8b"
dependencies = [ dependencies = [
"cfg-if", "cfg-if",
] ]
@ -486,12 +488,6 @@ dependencies = [
"unicase", "unicase",
] ]
[[package]]
name = "nodrop"
version = "0.1.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb"
[[package]] [[package]]
name = "nom" name = "nom"
version = "5.1.2" version = "5.1.2"
@ -511,9 +507,9 @@ checksum = "61807f77802ff30975e01f4f071c8ba10c022052f98b3294119f3e615d13e5be"
[[package]] [[package]]
name = "num-traits" name = "num-traits"
version = "0.2.11" version = "0.2.12"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c62be47e61d1842b9170f0fdeec8eba98e60e90e5446449a0545e5152acd7096" checksum = "ac267bcc07f48ee5f8935ab0d24f316fb722d7a1292e2913f0cc196b29ffd611"
dependencies = [ dependencies = [
"autocfg", "autocfg",
] ]
@ -559,15 +555,15 @@ dependencies = [
[[package]] [[package]]
name = "ppv-lite86" name = "ppv-lite86"
version = "0.2.8" version = "0.2.9"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "237a5ed80e274dbc66f86bd59c1e25edc039660be53194b5fe0a482e0f2612ea" checksum = "c36fa947111f5c62a733b652544dd0016a43ce89619538a8ef92724a6f501a20"
[[package]] [[package]]
name = "predicates" name = "predicates"
version = "1.0.4" version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "347a1b6f0b21e636bc9872fb60b83b8e185f6f5516298b8238699f7f9a531030" checksum = "96bfead12e90dccead362d62bb2c90a5f6fc4584963645bc7f71a735e0b0735a"
dependencies = [ dependencies = [
"difference", "difference",
"float-cmp", "float-cmp",
@ -594,27 +590,25 @@ dependencies = [
[[package]] [[package]]
name = "proc-macro-error" name = "proc-macro-error"
version = "1.0.2" version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "98e9e4b82e0ef281812565ea4751049f1bdcdfccda7d3f459f2e138a40c08678" checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c"
dependencies = [ dependencies = [
"proc-macro-error-attr", "proc-macro-error-attr",
"proc-macro2 1.0.18", "proc-macro2 1.0.19",
"quote 1.0.6", "quote 1.0.7",
"syn 1.0.33", "syn 1.0.38",
"version_check", "version_check",
] ]
[[package]] [[package]]
name = "proc-macro-error-attr" name = "proc-macro-error-attr"
version = "1.0.2" version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4f5444ead4e9935abd7f27dc51f7e852a0569ac888096d5ec2499470794e2e53" checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869"
dependencies = [ dependencies = [
"proc-macro2 1.0.18", "proc-macro2 1.0.19",
"quote 1.0.6", "quote 1.0.7",
"syn 1.0.33",
"syn-mid",
"version_check", "version_check",
] ]
@ -629,11 +623,11 @@ dependencies = [
[[package]] [[package]]
name = "proc-macro2" name = "proc-macro2"
version = "1.0.18" version = "1.0.19"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "beae6331a816b1f65d04c45b078fd8e6c93e8071771f41b8163255bbd8d7c8fa" checksum = "04f5f085b5d71e2188cb8271e5da0161ad52c3f227a661a3c135fdf28e258b12"
dependencies = [ dependencies = [
"unicode-xid 0.2.0", "unicode-xid 0.2.1",
] ]
[[package]] [[package]]
@ -653,11 +647,11 @@ dependencies = [
[[package]] [[package]]
name = "quote" name = "quote"
version = "1.0.6" version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "54a21852a652ad6f610c9510194f398ff6f8692e334fd1145fed931f7fbe44ea" checksum = "aa563d17ecb180e500da1cfd2b028310ac758de548efdd203e18f283af693f37"
dependencies = [ dependencies = [
"proc-macro2 1.0.18", "proc-macro2 1.0.19",
] ]
[[package]] [[package]]
@ -713,9 +707,9 @@ dependencies = [
[[package]] [[package]]
name = "redox_syscall" name = "redox_syscall"
version = "0.1.56" version = "0.1.57"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2439c63f3f6139d1b57529d16bc3b8bb855230c8efcc5d3a896c8bea7c3b1e84" checksum = "41cc0f7e4d5d4544e8861606a285bb08d3e70712ccc7d2b84d7c0ccfaf4b05ce"
[[package]] [[package]]
name = "regex" name = "regex"
@ -746,9 +740,9 @@ checksum = "26412eb97c6b088a6997e05f69403a802a92d520de2f8e63c2b65f9e0f47c4e8"
[[package]] [[package]]
name = "remove_dir_all" name = "remove_dir_all"
version = "0.5.2" version = "0.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4a83fa3702a688b9359eccba92d153ac33fd2e8462f9e0e3fdf155239ea7792e" checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7"
dependencies = [ dependencies = [
"winapi", "winapi",
] ]
@ -794,29 +788,29 @@ checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3"
[[package]] [[package]]
name = "serde" name = "serde"
version = "1.0.114" version = "1.0.115"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5317f7588f0a5078ee60ef675ef96735a1442132dc645eb1d12c018620ed8cd3" checksum = "e54c9a88f2da7238af84b5101443f0c0d0a3bbdc455e34a5c9497b1903ed55d5"
dependencies = [ dependencies = [
"serde_derive", "serde_derive",
] ]
[[package]] [[package]]
name = "serde_derive" name = "serde_derive"
version = "1.0.114" version = "1.0.115"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2a0be94b04690fbaed37cddffc5c134bf537c8e3329d53e982fe04c374978f8e" checksum = "609feed1d0a73cc36a0182a840a9b37b4a82f0b1150369f0536a9e3f2a31dc48"
dependencies = [ dependencies = [
"proc-macro2 1.0.18", "proc-macro2 1.0.19",
"quote 1.0.6", "quote 1.0.7",
"syn 1.0.33", "syn 1.0.38",
] ]
[[package]] [[package]]
name = "serde_json" name = "serde_json"
version = "1.0.56" version = "1.0.57"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3433e879a558dde8b5e8feb2a04899cf34fdde1fafb894687e52105fc1162ac3" checksum = "164eacbdb13512ec2745fb09d51fd5b22b0d65ed294a1dcf7285a360c80a675c"
dependencies = [ dependencies = [
"itoa", "itoa",
"ryu", "ryu",
@ -831,9 +825,9 @@ checksum = "fa8f3741c7372e75519bd9346068370c9cdaabcc1f9599cbcf2a2719352286b7"
[[package]] [[package]]
name = "static_assertions" name = "static_assertions"
version = "0.3.4" version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f3eb36b47e512f8f1c9e3d10c2c1965bc992bd9cdb024fa581e2194501c83d3" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
[[package]] [[package]]
name = "strsim" name = "strsim"
@ -849,9 +843,9 @@ checksum = "6446ced80d6c486436db5c078dde11a9f73d42b57fb273121e160b84f63d894c"
[[package]] [[package]]
name = "structopt" name = "structopt"
version = "0.3.15" version = "0.3.16"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "de2f5e239ee807089b62adce73e48c625e0ed80df02c7ab3f068f5db5281065c" checksum = "de5472fb24d7e80ae84a7801b7978f95a19ec32cb1876faea59ab711eb901976"
dependencies = [ dependencies = [
"clap", "clap",
"lazy_static", "lazy_static",
@ -860,15 +854,15 @@ dependencies = [
[[package]] [[package]]
name = "structopt-derive" name = "structopt-derive"
version = "0.4.8" version = "0.4.9"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "510413f9de616762a4fbeab62509bf15c729603b72d7cd71280fbca431b1c118" checksum = "1e0eb37335aeeebe51be42e2dc07f031163fbabfa6ac67d7ea68b5c2f68d5f99"
dependencies = [ dependencies = [
"heck", "heck",
"proc-macro-error", "proc-macro-error",
"proc-macro2 1.0.18", "proc-macro2 1.0.19",
"quote 1.0.6", "quote 1.0.7",
"syn 1.0.33", "syn 1.0.38",
] ]
[[package]] [[package]]
@ -884,24 +878,13 @@ dependencies = [
[[package]] [[package]]
name = "syn" name = "syn"
version = "1.0.33" version = "1.0.38"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e8d5d96e8cbb005d6959f119f773bfaebb5684296108fb32600c00cde305b2cd" checksum = "e69abc24912995b3038597a7a593be5053eb0fb44f3cc5beec0deb421790c1f4"
dependencies = [ dependencies = [
"proc-macro2 1.0.18", "proc-macro2 1.0.19",
"quote 1.0.6", "quote 1.0.7",
"unicode-xid 0.2.0", "unicode-xid 0.2.1",
]
[[package]]
name = "syn-mid"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7be3539f6c128a931cf19dcee741c1af532c7fd387baa739c03dd2e96479338a"
dependencies = [
"proc-macro2 1.0.18",
"quote 1.0.6",
"syn 1.0.33",
] ]
[[package]] [[package]]
@ -951,9 +934,9 @@ version = "1.0.20"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bd80fc12f73063ac132ac92aceea36734f04a1d93c1240c6944e23a3b8841793" checksum = "bd80fc12f73063ac132ac92aceea36734f04a1d93c1240c6944e23a3b8841793"
dependencies = [ dependencies = [
"proc-macro2 1.0.18", "proc-macro2 1.0.19",
"quote 1.0.6", "quote 1.0.7",
"syn 1.0.33", "syn 1.0.38",
] ]
[[package]] [[package]]
@ -1017,11 +1000,21 @@ dependencies = [
"toml", "toml",
"typos", "typos",
"typos-dict", "typos-dict",
"typos-vars",
"unicase", "unicase",
] ]
[[package]] [[package]]
name = "typos-codegen" name = "typos-dict"
version = "0.2.1"
dependencies = [
"log",
"phf",
"unicase",
]
[[package]]
name = "typos-dict-codegen"
version = "1.0.2" version = "1.0.2"
dependencies = [ dependencies = [
"codegenrs", "codegenrs",
@ -1033,12 +1026,45 @@ dependencies = [
] ]
[[package]] [[package]]
name = "typos-dict" name = "typos-dict-verify"
version = "1.0.2"
dependencies = [
"codegenrs",
"csv",
"edit-distance",
"itertools",
"structopt",
"unicase",
"varcon",
]
[[package]]
name = "typos-vars"
version = "0.2.1" version = "0.2.1"
dependencies = [ dependencies = [
"log", "log",
"phf", "phf",
"unicase", "unicase",
"varcon-core",
]
[[package]]
name = "typos-vars-codegen"
version = "1.0.2"
dependencies = [
"clap",
"clap-verbosity-flag",
"codegenrs",
"env_logger",
"itertools",
"log",
"phf",
"phf_codegen",
"structopt",
"typos",
"unicase",
"varcon",
"varcon-core",
] ]
[[package]] [[package]]
@ -1058,9 +1084,9 @@ checksum = "e83e153d1053cbb5a118eeff7fd5be06ed99153f00dbcd8ae310c5fb2b22edc0"
[[package]] [[package]]
name = "unicode-width" name = "unicode-width"
version = "0.1.7" version = "0.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "caaa9d531767d1ff2150b9332433f32a24622147e5ebb1f26409d5da67afd479" checksum = "9337591893a19b88d8d87f2cec1e73fad5cdfd10e5a6f349f498ad6ea2ffb1e3"
[[package]] [[package]]
name = "unicode-xid" name = "unicode-xid"
@ -1070,9 +1096,9 @@ checksum = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc"
[[package]] [[package]]
name = "unicode-xid" name = "unicode-xid"
version = "0.2.0" version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "826e7639553986605ec5979c7dd957c7895e93eabed50ab2ffa7f6128a75097c" checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564"
[[package]] [[package]]
name = "varcon" name = "varcon"
@ -1150,9 +1176,9 @@ dependencies = [
[[package]] [[package]]
name = "winapi" name = "winapi"
version = "0.3.8" version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8093091eeb260906a183e6ae1abdba2ef5ef2257a21801128899c3fc699229c6" checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [ dependencies = [
"winapi-i686-pc-windows-gnu", "winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu", "winapi-x86_64-pc-windows-gnu",

View file

@ -1,7 +1,8 @@
[workspace] [workspace]
members = [ members = [
"crates/typos", "crates/typos",
"crates/typos-dict", "crates/typos-dict/codegen", "crates/typos-dict", "crates/typos-dict/codegen", "crates/typos-dict/verify",
"crates/typos-vars", "crates/typos-vars/codegen",
"crates/codespell-dict", "crates/codespell-dict/codegen", "crates/codespell-dict", "crates/codespell-dict/codegen",
"crates/misspell-dict", "crates/misspell-dict/codegen", "crates/misspell-dict", "crates/misspell-dict/codegen",
"crates/wikipedia-dict", "crates/wikipedia-dict/codegen", "crates/wikipedia-dict", "crates/wikipedia-dict/codegen",
@ -32,6 +33,7 @@ codecov = { repository = "crate-ci/typos" }
[dependencies] [dependencies]
typos = { version = "^0.3", path = "crates/typos" } typos = { version = "^0.3", path = "crates/typos" }
typos-dict = { version = "^0.2", path = "crates/typos-dict" } typos-dict = { version = "^0.2", path = "crates/typos-dict" }
typos-vars = { version = "^0.2", path = "crates/typos-vars" }
phf = { version = "0.8", features = ["unicase"] } phf = { version = "0.8", features = ["unicase"] }
unicase = "2.5" unicase = "2.5"
anyhow = "1.0" anyhow = "1.0"

View file

@ -154,7 +154,13 @@ stages:
steps: steps:
- template: install-rust.yml@templates - template: install-rust.yml@templates
- script: | - script: |
cargo run --package typos-codegen -- --output crates/typos-dict/src/dict_codegen.rs --check cargo run --package typos-dict-codegen -- --output crates/typos-dict/src/dict_codegen.rs --check
displayName: Verify typos-dict
- script: |
cargo run --package typos-vars-codegen -- --output crates/typos-vars/src/vars_codegen.rs --check
displayName: Verify typos-dict
- script: |
cargo run --package typos-dict-verify -- --input crates/typos-dict/assets/words.csv --output crates/typos-dict/assets/words.csv --check
displayName: Verify typos-dict displayName: Verify typos-dict
- script: | - script: |
cargo run --package codespell-codegen -- --output crates/codespell-dict/src/dict_codegen.rs --check cargo run --package codespell-codegen -- --output crates/codespell-dict/src/dict_codegen.rs --check

View file

@ -4,12 +4,12 @@ extern crate test;
#[bench] #[bench]
fn load_corrections(b: &mut test::Bencher) { fn load_corrections(b: &mut test::Bencher) {
b.iter(|| typos_cli::dict::BuiltIn::new()); b.iter(|| typos_cli::dict::BuiltIn::new(Default::default()));
} }
#[bench] #[bench]
fn correct_word_hit(b: &mut test::Bencher) { fn correct_word_hit(b: &mut test::Bencher) {
let corrections = typos_cli::dict::BuiltIn::new(); let corrections = typos_cli::dict::BuiltIn::new(Default::default());
let input = typos::tokens::Word::new("successs", 0).unwrap(); let input = typos::tokens::Word::new("successs", 0).unwrap();
assert_eq!( assert_eq!(
corrections.correct_word(input), corrections.correct_word(input),
@ -20,7 +20,7 @@ fn correct_word_hit(b: &mut test::Bencher) {
#[bench] #[bench]
fn correct_word_miss(b: &mut test::Bencher) { fn correct_word_miss(b: &mut test::Bencher) {
let corrections = typos_cli::dict::BuiltIn::new(); let corrections = typos_cli::dict::BuiltIn::new(Default::default());
let input = typos::tokens::Word::new("success", 0).unwrap(); let input = typos::tokens::Word::new("success", 0).unwrap();
assert!(corrections.correct_word(input).is_empty()); assert!(corrections.correct_word(input).is_empty());
b.iter(|| corrections.correct_word(input)); b.iter(|| corrections.correct_word(input));

View file

@ -184,7 +184,7 @@ fn bench_check_file(data: &str, b: &mut test::Bencher) {
let sample_path = temp.child("sample"); let sample_path = temp.child("sample");
sample_path.write_str(data).unwrap(); sample_path.write_str(data).unwrap();
let corrections = typos_cli::dict::BuiltIn::new(); let corrections = typos_cli::dict::BuiltIn::new(Default::default());
let parser = typos::tokens::Parser::new(); let parser = typos::tokens::Parser::new();
let checks = typos::checks::TyposSettings::new().build_checks(); let checks = typos::checks::TyposSettings::new().build_checks();
b.iter(|| { b.iter(|| {

File diff suppressed because it is too large Load diff

View file

@ -1,5 +1,5 @@
[package] [package]
name = "typos-codegen" name = "typos-dict-codegen"
version = "1.0.2" version = "1.0.2"
authors = ["Ed Page <eopage@gmail.com>"] authors = ["Ed Page <eopage@gmail.com>"]
description = "Source Code Spelling Correction" description = "Source Code Spelling Correction"

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,24 @@
[package]
name = "typos-dict-verify"
version = "1.0.2"
authors = ["Ed Page <eopage@gmail.com>"]
description = "Source Code Spelling Correction"
repository = "https://github.com/crate-ci/typos"
readme = "../../../README.md"
categories = ["text-processing"]
license = "MIT"
edition = "2018"
publish = false
[badges]
azure-devops = { project = "crate-ci", pipeline = "typos" }
codecov = { repository = "crate-ci/typos" }
[dependencies]
csv = "1.0"
unicase = "2.5"
codegenrs = "0.1"
structopt = "0.3"
varcon = { version = "0.2", path = "../../varcon" }
itertools = "0.9"
edit-distance = "2.1"

View file

@ -0,0 +1,99 @@
use std::collections::HashMap;
use std::collections::HashSet;
use structopt::StructOpt;
fn generate<W: std::io::Write>(file: &mut W, dict: &[u8]) {
let mut wtr = csv::Writer::from_writer(file);
let disallowed_typos = disallowed_typos();
let related_words = related_words();
let mut reader = csv::ReaderBuilder::new()
.has_headers(false)
.from_reader(dict);
for record in reader.records() {
let record = record.unwrap();
let typo = &record[0];
let correction = &record[1];
if disallowed_typos.contains(&unicase::UniCase::new(typo)) {
continue;
}
let correction = related_words
.get(correction)
.and_then(|words| find_best_match(typo, correction, words))
.unwrap_or(correction);
wtr.write_record(&[typo, correction]).unwrap();
}
wtr.flush().unwrap();
}
fn disallowed_typos() -> HashSet<unicase::UniCase<&'static str>> {
varcon::VARCON
.iter()
.flat_map(|c| c.entries.iter())
.flat_map(|e| e.variants.iter())
.map(|v| unicase::UniCase::new(v.word))
.collect()
}
fn related_words() -> HashMap<&'static str, HashSet<&'static str>> {
let mut words: HashMap<&'static str, HashSet<&'static str>> = HashMap::new();
for entry in varcon::VARCON.iter().flat_map(|c| c.entries.iter()) {
let variants: HashSet<_> = entry
.variants
.iter()
.filter(|v| v.types.iter().any(|t| t.tag != Some(varcon::Tag::Improper)))
.map(|v| v.word)
.collect();
for variant in variants.iter() {
let set = words.entry(variant).or_insert_with(HashSet::new);
set.extend(variants.iter().filter(|v| *v != variant));
}
}
words
}
fn find_best_match<'c>(
typo: &'c str,
correction: &'c str,
related_words: &HashSet<&'static str>,
) -> Option<&'c str> {
assert!(!related_words.contains(correction));
let current = edit_distance::edit_distance(typo, correction);
let mut matches: Vec<_> = related_words
.iter()
.map(|r| (edit_distance::edit_distance(typo, r), *r))
.filter(|(d, _)| *d < current)
.collect();
matches.sort_unstable();
matches.into_iter().next().map(|(_, r)| r)
}
#[derive(Debug, StructOpt)]
#[structopt(rename_all = "kebab-case")]
struct Options {
#[structopt(short("-i"), long, parse(from_os_str))]
input: std::path::PathBuf,
#[structopt(flatten)]
codegen: codegenrs::CodeGenArgs,
}
fn run() -> Result<i32, Box<dyn std::error::Error>> {
let options = Options::from_args();
let data = std::fs::read(&options.input).unwrap();
let mut content = vec![];
generate(&mut content, &data);
let content = String::from_utf8(content)?;
options.codegen.write_str(&content)?;
Ok(0)
}
fn main() {
let code = run().unwrap();
std::process::exit(code);
}

View file

@ -0,0 +1,21 @@
[package]
name = "typos-vars"
version = "0.2.1"
authors = ["Ed Page <eopage@gmail.com>"]
description = "Source Code Spelling Correction"
repository = "https://github.com/crate-ci/typos"
readme = "../../README.md"
categories = ["development-tools", "text-processing"]
keywords = ["development", "spelling"]
license = "MIT"
edition = "2018"
[badges]
azure-devops = { project = "crate-ci", pipeline = "typos" }
codecov = { repository = "crate-ci/typos" }
[dependencies]
phf = { version = "0.8", features = ["unicase"] }
unicase = "2.5"
log = "0.4"
varcon-core = { version = "1.0", path = "../varcon-core", features = ["flags"] }

View file

@ -0,0 +1,30 @@
[package]
name = "typos-vars-codegen"
version = "1.0.2"
authors = ["Ed Page <eopage@gmail.com>"]
description = "Source Code Spelling Correction"
repository = "https://github.com/crate-ci/typos"
readme = "../../../README.md"
categories = ["text-processing"]
license = "MIT"
edition = "2018"
publish = false
[badges]
azure-devops = { project = "crate-ci", pipeline = "typos" }
codecov = { repository = "crate-ci/typos" }
[dependencies]
phf = { version = "0.8", features = ["unicase"] }
phf_codegen = "0.8"
varcon = { version = "0.2", path = "../../varcon", features = ["flags"] }
varcon-core = { version = "1.0", path = "../../varcon-core", features = ["flags"] }
typos = { version = "^0.3", path = "../../typos" }
unicase = "2.5"
codegenrs = "0.1"
structopt = "0.3"
clap = "2"
log = "0.4"
env_logger = "0.7"
clap-verbosity-flag = "0.3"
itertools = "0.9"

View file

@ -0,0 +1,314 @@
use std::collections::BTreeMap;
use std::collections::HashSet;
use std::io::Write;
use structopt::StructOpt;
static CATEGORIES: [varcon::Category; 4] = [
varcon::Category::American,
varcon::Category::BritishIse,
// For now, only want to support one form of British, so going with -ise as it seems more
// popular.
varcon::Category::Canadian,
varcon::Category::Australian,
// Other basically means all
];
fn generate_variations<W: std::io::Write>(file: &mut W) {
let entries = entries();
writeln!(
file,
"// This file is code-genned by {}",
env!("CARGO_PKG_NAME")
)
.unwrap();
writeln!(file, "#![allow(clippy::unreadable_literal)]",).unwrap();
writeln!(file).unwrap();
writeln!(file, "use unicase::UniCase;").unwrap();
writeln!(file).unwrap();
writeln!(file, "pub type Variants = &'static [&'static str];",).unwrap();
writeln!(
file,
"pub type VariantsMap = [Variants; {}];",
CATEGORIES.len()
)
.unwrap();
writeln!(file).unwrap();
writeln!(file, "pub fn all_categories() -> crate::CategorySet {{",).unwrap();
writeln!(
file,
" {}",
itertools::join(
CATEGORIES
.iter()
.map(|c| format!("crate::Category::{:?}", c)),
" | "
)
)
.unwrap();
writeln!(file, "}}",).unwrap();
writeln!(file).unwrap();
writeln!(
file,
"pub fn corrections(category: crate::Category, options: VariantsMap) -> &'static [&'static str] {{",
)
.unwrap();
writeln!(file, " match category {{").unwrap();
for (index, category) in CATEGORIES.iter().enumerate() {
writeln!(
file,
" crate::Category::{:?} => options[{}],",
category, index
)
.unwrap();
}
writeln!(
file,
" crate::Category::BritishIze | crate::Category::Other => unreachable!(\"{{:?}} is unused\", category),",
)
.unwrap();
writeln!(file, " }}").unwrap();
writeln!(file, "}}").unwrap();
writeln!(file).unwrap();
writeln!(
file,
"pub static VARS_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &'static [(u8, &VariantsMap)]> = "
)
.unwrap();
let entry_sets = entry_sets(entries.iter());
let mut referenced_symbols: HashSet<&str> = HashSet::new();
let mut builder = phf_codegen::Map::new();
for (word, data) in entry_sets.iter() {
if is_always_valid(data) {
// No need to convert from current form to target form
continue;
}
referenced_symbols.extend(data.iter().map(|(s, _)| s));
let value = generate_link(&data);
builder.entry(unicase::UniCase::new(word), &value);
}
let codegenned = builder.build();
writeln!(file, "{}", codegenned).unwrap();
writeln!(file, ";").unwrap();
for (symbol, entry) in entries.iter() {
if !referenced_symbols.contains(symbol.as_str()) {
continue;
}
generate_entry(file, symbol, entry);
}
}
fn generate_entry(file: &mut impl std::io::Write, symbol: &str, entry: &varcon_core::Entry) {
writeln!(file, "pub(crate) static {}: VariantsMap = [", symbol).unwrap();
for category in &CATEGORIES {
let corrections = collect_correct(entry, *category);
let mut corrections: Vec<_> = corrections.iter().collect();
corrections.sort_unstable();
writeln!(file, " &[").unwrap();
for correction in &corrections {
writeln!(file, " {:?},", correction).unwrap();
}
writeln!(file, " ],").unwrap();
}
writeln!(file, "];").unwrap();
writeln!(file).unwrap();
}
fn generate_link(data: &[(&str, varcon::CategorySet)]) -> String {
let mut output = Vec::new();
write!(output, "&[").unwrap();
for (symbol, set) in data.iter() {
write!(output, "(0b{:05b}, &{}), ", set.bits(), symbol).unwrap();
}
write!(output, "]").unwrap();
String::from_utf8(output).unwrap()
}
fn is_always_valid(data: &[(&str, varcon::CategorySet)]) -> bool {
let valid_categories = valid_categories();
for (_symbol, set) in data.iter() {
if *set == valid_categories {
return true;
}
}
false
}
fn entries() -> BTreeMap<String, varcon_core::Entry> {
varcon::VARCON
.iter()
.flat_map(|c| c.entries.iter())
.filter(|e| {
e.variants
.iter()
.all(|v| typos::tokens::Word::new(&v.word, 0).is_ok())
})
.map(|e| {
let mut e = e.into_owned();
for variant in e.variants.iter_mut() {
variant.word.make_ascii_lowercase();
}
(entry_symbol(&e), e)
})
.collect()
}
fn entry_symbol(entry: &varcon_core::Entry) -> String {
let mut hasher = std::collections::hash_map::DefaultHasher::new();
std::hash::Hash::hash(entry, &mut hasher);
let hash = std::hash::Hasher::finish(&hasher);
format!(
"ENTRY_{}_{}",
entry.variants[0].word.to_ascii_uppercase(),
hash
)
}
fn entry_sets<'e>(
entries: impl Iterator<Item = (&'e String, &'e varcon_core::Entry)>,
) -> BTreeMap<&'e str, Vec<(&'e str, varcon::CategorySet)>> {
let mut sets = BTreeMap::new();
for (symbol, entry) in entries {
for (word, set) in entry_set(entry).iter() {
let v = sets.entry(*word).or_insert_with(Vec::new);
v.push((symbol.as_str(), *set));
}
}
sets
}
fn entry_set(entry: &varcon_core::Entry) -> BTreeMap<&str, varcon::CategorySet> {
let mut sets = BTreeMap::new();
let valid_categories = valid_categories();
for variant in entry.variants.iter() {
let set = sets
.entry(variant.word.as_str())
.or_insert_with(varcon::CategorySet::empty);
for t in variant.types.iter() {
match t.category {
varcon::Category::Other => *set |= valid_categories,
varcon::Category::BritishIze => (),
_ => set.insert(t.category),
}
}
}
sets
}
fn valid_categories() -> varcon::CategorySet {
let mut c = varcon::CategorySet::empty();
for cat in CATEGORIES.iter() {
c.insert(*cat);
}
c
}
fn collect_correct(entry: &varcon_core::Entry, category: varcon::Category) -> HashSet<&str> {
// If there is ambiguity, collect all potential options.
let mut primary = HashSet::new();
let mut backup = HashSet::new();
for variant in entry.variants.iter().filter(|v| !ignore_variant(v)) {
for t in variant
.types
.iter()
.filter(|t| t.category == category || t.category == varcon::Category::Other)
{
let tag = t.tag.unwrap_or(varcon::Tag::Eq);
if tag == varcon::Tag::Eq {
primary.insert(variant.word.as_str());
}
if tag != varcon::Tag::Improper {
backup.insert(variant.word.as_str());
}
}
}
if primary.len() == 1 {
primary
} else {
backup
}
}
fn ignore_variant(variant: &varcon_core::Variant) -> bool {
if variant.word == "anesthetisation"
&& variant.types.len() == 1
&& variant.types[0].category == varcon::Category::Australian
&& (variant.types[0].tag == Some(varcon::Tag::Variant)
|| variant.types[0].tag == Some(varcon::Tag::Seldom))
{
return true;
}
false
}
// dict needs
// all words, with bitfags, pointing to list of entry names
//
// varcon needs
// all entries by name
#[derive(Debug, StructOpt)]
#[structopt(rename_all = "kebab-case")]
struct Options {
#[structopt(flatten)]
codegen: codegenrs::CodeGenArgs,
#[structopt(flatten)]
rustmft: codegenrs::RustfmtArgs,
#[structopt(flatten)]
pub(crate) verbose: clap_verbosity_flag::Verbosity,
}
fn init_logging(level: Option<log::Level>) {
if let Some(level) = level {
let mut builder = env_logger::Builder::new();
builder.filter(None, level.to_level_filter());
if level == log::LevelFilter::Trace {
builder.format_timestamp_secs();
} else {
builder.format(|f, record| {
writeln!(
f,
"[{}] {}",
record.level().to_string().to_lowercase(),
record.args()
)
});
}
builder.init();
}
}
fn run() -> Result<i32, Box<dyn std::error::Error>> {
let mut options = Options::from_args();
options.verbose.set_default(Some(log::Level::Info));
init_logging(options.verbose.log_level());
let mut content = vec![];
generate_variations(&mut content);
let content = String::from_utf8(content)?;
let content = options.rustmft.reformat(&content)?;
options.codegen.write_str(&content)?;
Ok(0)
}
fn main() {
let code = run().unwrap();
std::process::exit(code);
}

View file

@ -0,0 +1,6 @@
mod vars_codegen;
pub use crate::vars_codegen::*;
pub use varcon_core::Category;
pub use varcon_core::CategorySet;

File diff suppressed because it is too large Load diff

View file

@ -14,15 +14,18 @@ Configuration is read from the following (in precedence order)
| Field | Argument | Format | Description | | Field | Argument | Format | Description |
|------------------------|-------------------|--------|-------------| |------------------------|-------------------|--------|-------------|
| files.binary | --binary | bool | | | files.binary | --binary | bool | Check binary files as text |
| files.ignore-hidden | --hidden | bool | | | files.ignore-hidden | --hidden | bool | Skip hidden files and directories. |
| files.ignore-files | --ignore | bool | | | files.ignore-files | --ignore | bool | Respect ignore files. |
| files.ignore-dot | --ignore-dot | bool | | | files.ignore-dot | --ignore-dot | bool | Respect .ignore files. |
| files.ignore-vcs | --ignore-vcs | bool | | | files.ignore-vcs | --ignore-vcs | bool | Respect ignore files in vcs directories. |
| files.ignore-global | --ignore-global | bool | | | files.ignore-global | --ignore-global | bool | Respect global ignore files. |
| files.ignore-parent | --ignore-parent | bool | | | files.ignore-parent | --ignore-parent | bool | Respect ignore files in parent directories. |
| default.check-filename | \- | bool | | | default.check-filename | \- | bool | Verifying spelling in file names. |
| default.check-file | \- | bool | | | default.check-file | \- | bool | Verifying spelling in files. |
| default.ignore-hex | \- | bool | | | default.ignore-hex | \- | bool | Do not check identifiers that appear to be hexadecimal values. |
| default.identifier-include-digits | \- | bool | | | default.identifier-leading-digits | \- | bool | Allow identifiers to start with digits, in addition to letters. |
| default.identifier-include-chars | \- | string | | | default.identifier-include-digits | \- | bool | Allow identifiers to include digits, in addition to letters. |
| default.identifier-leading-chars | \- | string | Allow identifiers to start with one of these characters. |
| default.identifier-include-chars | \- | string | Allow identifiers to include these characters. |
| default.locale | \- | en, en-us, en-gb, en-ca, en-au | |

View file

@ -112,6 +112,12 @@ pub(crate) struct FileArgs {
no_hex: bool, no_hex: bool,
#[structopt(long, overrides_with("no-hex"), hidden(true))] #[structopt(long, overrides_with("no-hex"), hidden(true))]
hex: bool, hex: bool,
#[structopt(
long,
possible_values(&config::Locale::variants()),
)]
pub(crate) locale: Option<config::Locale>,
} }
impl config::FileSource for FileArgs { impl config::FileSource for FileArgs {
@ -141,6 +147,10 @@ impl config::FileSource for FileArgs {
(_, _) => unreachable!("StructOpt should make this impossible"), (_, _) => unreachable!("StructOpt should make this impossible"),
} }
} }
fn locale(&self) -> Option<config::Locale> {
self.locale
}
} }
#[derive(Debug, StructOpt)] #[derive(Debug, StructOpt)]

View file

@ -82,6 +82,10 @@ pub trait FileSource {
fn identifier_include_chars(&self) -> Option<&str> { fn identifier_include_chars(&self) -> Option<&str> {
None None
} }
fn locale(&self) -> Option<Locale> {
None
}
} }
#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)] #[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
@ -247,6 +251,7 @@ pub struct FileConfig {
pub identifier_leading_chars: Option<String>, pub identifier_leading_chars: Option<String>,
pub identifier_include_digits: Option<bool>, pub identifier_include_digits: Option<bool>,
pub identifier_include_chars: Option<String>, pub identifier_include_chars: Option<String>,
pub locale: Option<Locale>,
} }
impl FileConfig { impl FileConfig {
@ -272,6 +277,9 @@ impl FileConfig {
if let Some(source) = source.identifier_include_chars() { if let Some(source) = source.identifier_include_chars() {
self.identifier_include_chars = Some(source.to_owned()); self.identifier_include_chars = Some(source.to_owned());
} }
if let Some(source) = source.locale() {
self.locale = Some(source);
}
} }
pub fn check_filename(&self) -> bool { pub fn check_filename(&self) -> bool {
@ -301,6 +309,10 @@ impl FileConfig {
pub fn identifier_include_chars(&self) -> &str { pub fn identifier_include_chars(&self) -> &str {
self.identifier_include_chars.as_deref().unwrap_or("_'") self.identifier_include_chars.as_deref().unwrap_or("_'")
} }
pub fn locale(&self) -> Locale {
self.locale.unwrap_or_default()
}
} }
impl FileSource for FileConfig { impl FileSource for FileConfig {
@ -331,6 +343,10 @@ impl FileSource for FileConfig {
fn identifier_include_chars(&self) -> Option<&str> { fn identifier_include_chars(&self) -> Option<&str> {
self.identifier_include_chars.as_deref() self.identifier_include_chars.as_deref()
} }
fn locale(&self) -> Option<Locale> {
self.locale
}
} }
fn find_project_file(dir: std::path::PathBuf, name: &str) -> Option<std::path::PathBuf> { fn find_project_file(dir: std::path::PathBuf, name: &str) -> Option<std::path::PathBuf> {
@ -346,3 +362,62 @@ fn find_project_file(dir: std::path::PathBuf, name: &str) -> Option<std::path::P
} }
Some(file_path) Some(file_path)
} }
#[derive(Debug, Copy, Clone, serde::Serialize, serde::Deserialize)]
#[serde(rename_all = "kebab-case")]
pub enum Locale {
En,
EnUs,
EnGb,
EnCa,
EnAu,
}
impl Locale {
pub fn category(self) -> Option<typos_vars::Category> {
match self {
Locale::En => None,
Locale::EnUs => Some(typos_vars::Category::American),
Locale::EnGb => Some(typos_vars::Category::BritishIse),
Locale::EnCa => Some(typos_vars::Category::Canadian),
Locale::EnAu => Some(typos_vars::Category::Australian),
}
}
pub fn variants() -> [&'static str; 5] {
["en", "en-us", "en-gb", "en-ca", "en-au"]
}
}
impl Default for Locale {
fn default() -> Self {
Locale::En
}
}
impl std::str::FromStr for Locale {
type Err = String;
fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
match s {
"en" => Ok(Locale::En),
"en-us" => Ok(Locale::EnUs),
"en-gb" => Ok(Locale::EnGb),
"en-ca" => Ok(Locale::EnCa),
"en-au" => Ok(Locale::EnAu),
_ => Err("valid values: en, en-us, en-gb, en-ca, en-au".to_owned()),
}
}
}
impl std::fmt::Display for Locale {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match *self {
Locale::En => write!(f, "en"),
Locale::EnUs => write!(f, "en-us"),
Locale::EnGb => write!(f, "en-gb"),
Locale::EnCa => write!(f, "en-ca"),
Locale::EnAu => write!(f, "en-au"),
}
}
}

View file

@ -5,11 +5,15 @@ use unicase::UniCase;
use typos::tokens::Case; use typos::tokens::Case;
#[derive(Default)] #[derive(Default)]
pub struct BuiltIn {} pub struct BuiltIn {
locale: Option<typos_vars::Category>,
}
impl BuiltIn { impl BuiltIn {
pub fn new() -> Self { pub fn new(locale: crate::config::Locale) -> Self {
Self {} Self {
locale: locale.category(),
}
} }
pub fn correct_ident<'s, 'w>( pub fn correct_ident<'s, 'w>(
@ -19,12 +23,66 @@ impl BuiltIn {
Vec::new() Vec::new()
} }
pub fn correct_word<'s, 'w>(&'s self, word: typos::tokens::Word<'w>) -> Vec<Cow<'s, str>> { pub fn correct_word<'s, 'w>(
map_lookup(&typos_dict::WORD_DICTIONARY, word.token()) &'s self,
.map(|s| case_correct(s, word.case())) word_token: typos::tokens::Word<'w>,
) -> Vec<Cow<'s, str>> {
let word = word_token.token();
let corrections = if let Some(correction) = self.correct_with_dict(word) {
self.correct_with_vars(word)
.unwrap_or_else(|| vec![correction])
} else {
self.correct_with_vars(word).unwrap_or_else(Vec::new)
};
corrections
.into_iter() .into_iter()
.map(|s| case_correct(s, word_token.case()))
.collect() .collect()
} }
fn correct_with_dict(&self, word: &str) -> Option<&'static str> {
map_lookup(&typos_dict::WORD_DICTIONARY, word)
}
fn correct_with_vars(&self, word: &str) -> Option<Vec<&'static str>> {
let variants = map_lookup(&typos_vars::VARS_DICTIONARY, word)?;
self.select_variant(variants)
}
fn select_variant(
&self,
vars: &'static [(u8, &'static typos_vars::VariantsMap)],
) -> Option<Vec<&'static str>> {
let var = vars[0];
let var_categories = unsafe {
// Code-genned from a checked category-set, so known to be safe
typos_vars::CategorySet::new(var.0)
};
if let Some(locale) = self.locale {
if var_categories.contains(locale) {
// Already valid for the current locale.
None
} else {
Some(
typos_vars::corrections(locale, *var.1)
.iter()
.copied()
.collect(),
)
}
} else {
// All locales are valid
if var_categories.is_empty() {
// But the word is never valid.
let mut unique: Vec<_> = var.1.iter().flat_map(|v| v.iter()).copied().collect();
unique.sort_unstable();
unique.dedup();
Some(unique)
} else {
None
}
}
}
} }
impl typos::Dictionary for BuiltIn { impl typos::Dictionary for BuiltIn {
@ -37,10 +95,7 @@ impl typos::Dictionary for BuiltIn {
} }
} }
fn map_lookup( fn map_lookup<V: Clone>(map: &'static phf::Map<UniCase<&'static str>, V>, key: &str) -> Option<V> {
map: &'static phf::Map<UniCase<&'static str>, &'static str>,
key: &str,
) -> Option<&'static str> {
// This transmute should be safe as `get` will not store the reference with // This transmute should be safe as `get` will not store the reference with
// the expanded lifetime. This is due to `Borrow` being overly strict and // the expanded lifetime. This is due to `Borrow` being overly strict and
// can't have an impl for `&'static str` to `Borrow<&'a str>`. // can't have an impl for `&'static str` to `Borrow<&'a str>`.

View file

@ -1 +1,2 @@
pub mod config;
pub mod dict; pub mod dict;

View file

@ -56,7 +56,7 @@ fn run() -> Result<i32, anyhow::Error> {
.include_chars(config.default.identifier_include_chars().to_owned()) .include_chars(config.default.identifier_include_chars().to_owned())
.build(); .build();
let dictionary = crate::dict::BuiltIn::new(); let dictionary = crate::dict::BuiltIn::new(config.default.locale());
let mut settings = typos::checks::TyposSettings::new(); let mut settings = typos::checks::TyposSettings::new();
settings settings