mirror of
https://github.com/crate-ci/typos.git
synced 2024-11-28 20:11:05 -05:00
commit
fc05aa9633
11 changed files with 489 additions and 283 deletions
170
Cargo.lock
generated
170
Cargo.lock
generated
|
@ -4,9 +4,9 @@ version = 3
|
|||
|
||||
[[package]]
|
||||
name = "addr2line"
|
||||
version = "0.15.1"
|
||||
version = "0.15.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "03345e98af8f3d786b6d9f656ccfa6ac316d954e92bc4841f0bba20789d5fb5a"
|
||||
checksum = "e7a2e47a1fbe209ee101dd6d61285226744c6c8d3c21c8dc878ba6cb9f467f3a"
|
||||
dependencies = [
|
||||
"gimli",
|
||||
]
|
||||
|
@ -48,9 +48,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "anyhow"
|
||||
version = "1.0.40"
|
||||
version = "1.0.41"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "28b2cd92db5cbd74e8e5028f7e27dd7aa3090e89e4f2a197cc7c8dfb69c7063b"
|
||||
checksum = "15af2628f6890fe2609a3b91bef4c83450512802e59489f9c1cb1fa5df064a61"
|
||||
|
||||
[[package]]
|
||||
name = "arrayvec"
|
||||
|
@ -60,9 +60,9 @@ checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b"
|
|||
|
||||
[[package]]
|
||||
name = "assert_cmd"
|
||||
version = "1.0.4"
|
||||
version = "1.0.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8f57fec1ac7e4de72dcc69811795f1a7172ed06012f80a5d1ee651b62484f588"
|
||||
checksum = "a88b6bd5df287567ffdf4ddf4d33060048e1068308e5f62d81c6f9824a045a48"
|
||||
dependencies = [
|
||||
"bstr",
|
||||
"doc-comment",
|
||||
|
@ -105,9 +105,9 @@ checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a"
|
|||
|
||||
[[package]]
|
||||
name = "backtrace"
|
||||
version = "0.3.59"
|
||||
version = "0.3.60"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4717cfcbfaa661a0fd48f8453951837ae7e8f81e481fbb136e3202d72805a744"
|
||||
checksum = "b7815ea54e4d821e791162e078acbebfd6d8c8939cd559c9335dceb1c8ca7282"
|
||||
dependencies = [
|
||||
"addr2line",
|
||||
"cc",
|
||||
|
@ -154,12 +154,6 @@ version = "3.7.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9c59e7af012c713f529e7a3ee57ce9b31ddd858d4b512923602f74608b009631"
|
||||
|
||||
[[package]]
|
||||
name = "byteorder"
|
||||
version = "1.4.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610"
|
||||
|
||||
[[package]]
|
||||
name = "cast"
|
||||
version = "0.2.6"
|
||||
|
@ -224,7 +218,7 @@ version = "0.4.0"
|
|||
dependencies = [
|
||||
"codegenrs",
|
||||
"dictgen",
|
||||
"itertools 0.10.0",
|
||||
"itertools 0.10.1",
|
||||
"structopt",
|
||||
"unicase",
|
||||
]
|
||||
|
@ -263,7 +257,7 @@ dependencies = [
|
|||
"clap",
|
||||
"criterion-plot",
|
||||
"csv",
|
||||
"itertools 0.10.0",
|
||||
"itertools 0.10.1",
|
||||
"lazy_static",
|
||||
"num-traits",
|
||||
"oorandom",
|
||||
|
@ -417,6 +411,9 @@ dependencies = [
|
|||
name = "dictgen"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"phf 0.9.0",
|
||||
"phf_codegen",
|
||||
"phf_shared 0.9.0",
|
||||
"unicase",
|
||||
]
|
||||
|
||||
|
@ -549,9 +546,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "env_logger"
|
||||
version = "0.8.3"
|
||||
version = "0.8.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "17392a012ea30ef05a610aa97dfb49496e71c9f676b27879922ea5bdf60d9d3f"
|
||||
checksum = "a19187fea3ac7e84da7dacf48de0c45d63c6a76f9490dae389aead16c243fce3"
|
||||
dependencies = [
|
||||
"log",
|
||||
"termcolor",
|
||||
|
@ -597,9 +594,9 @@ checksum = "0e4075386626662786ddb0ec9081e7c7eeb1ba31951f447ca780ef9f5d568189"
|
|||
|
||||
[[package]]
|
||||
name = "globset"
|
||||
version = "0.4.6"
|
||||
version = "0.4.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c152169ef1e421390738366d2f796655fec62621dabbd0fd476f905934061e4a"
|
||||
checksum = "10463d9ff00a2a068db14231982f5132edebad0d7660cd956a1c30292dbcbfbd"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"bstr",
|
||||
|
@ -627,18 +624,18 @@ checksum = "62aca2aba2d62b4a7f5b33f3712cb1b0692779a56fb510499d5c0aa594daeaf3"
|
|||
|
||||
[[package]]
|
||||
name = "heck"
|
||||
version = "0.3.2"
|
||||
version = "0.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "87cbf45460356b7deeb5e3415b5563308c0a9b057c85e12b06ad551f98d0a6ac"
|
||||
checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c"
|
||||
dependencies = [
|
||||
"unicode-segmentation",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hermit-abi"
|
||||
version = "0.1.18"
|
||||
version = "0.1.19"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "322f4de77956e22ed0e5032c359a0f1273f1f7f0d79bfa3b8ffbc730d7fbcc5c"
|
||||
checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
@ -675,9 +672,9 @@ checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
|
|||
|
||||
[[package]]
|
||||
name = "ignore"
|
||||
version = "0.4.17"
|
||||
version = "0.4.18"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b287fb45c60bb826a0dc68ff08742b9d88a2fea13d6e0c286b3172065aaf878c"
|
||||
checksum = "713f1b139373f96a2e0ce3ac931cd01ee973c3c5dd7c40c0c2efe96ad2b6751d"
|
||||
dependencies = [
|
||||
"crossbeam-utils",
|
||||
"globset",
|
||||
|
@ -702,9 +699,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "itertools"
|
||||
version = "0.10.0"
|
||||
version = "0.10.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "37d572918e350e82412fe766d24b15e6682fb2ed2bbe018280caa810397cb319"
|
||||
checksum = "69ddb889f9d0d08a67338271fa9b62996bc788c7796a5c18cf057420aaed5eaf"
|
||||
dependencies = [
|
||||
"either",
|
||||
]
|
||||
|
@ -754,9 +751,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.95"
|
||||
version = "0.2.97"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "789da6d93f1b866ffe175afc5322a4d76c038605a1c3319bb57b06967ca98a36"
|
||||
checksum = "12b8adadd720df158f4d70dfe7ccc6adb0472d7c55ca83445f6a5ab3e36f8fb6"
|
||||
|
||||
[[package]]
|
||||
name = "log"
|
||||
|
@ -804,7 +801,7 @@ version = "0.4.0"
|
|||
dependencies = [
|
||||
"codegenrs",
|
||||
"dictgen",
|
||||
"itertools 0.10.0",
|
||||
"itertools 0.10.1",
|
||||
"regex",
|
||||
"structopt",
|
||||
"unicase",
|
||||
|
@ -859,15 +856,18 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "object"
|
||||
version = "0.24.0"
|
||||
version = "0.25.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1a5b3dd1c072ee7963717671d1ca129f1048fda25edea6b752bfc71ac8854170"
|
||||
checksum = "a38f2be3697a57b4060074ff41b44c16870d916ad7877c17696e063257482bc7"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "once_cell"
|
||||
version = "1.7.2"
|
||||
version = "1.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "af8b08b04175473088b46763e51ee54da5f9a164bc162f615b91bc179dbf15a3"
|
||||
checksum = "692fcb63b64b1758029e0a96ee63e049ce8c5948587f2f7208df04625e5f6b56"
|
||||
|
||||
[[package]]
|
||||
name = "oorandom"
|
||||
|
@ -899,7 +899,36 @@ version = "0.8.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3dfb61232e34fcb633f43d12c58f83c1df82962dcdfa565a4e866ffc17dafe12"
|
||||
dependencies = [
|
||||
"phf_shared",
|
||||
"phf_shared 0.8.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf"
|
||||
version = "0.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b2ac8b67553a7ca9457ce0e526948cad581819238f4a9d1ea74545851fa24f37"
|
||||
dependencies = [
|
||||
"phf_shared 0.9.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf_codegen"
|
||||
version = "0.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "963adb11cf22ee65dfd401cf75577c1aa0eca58c0b97f9337d2da61d3e640503"
|
||||
dependencies = [
|
||||
"phf_generator",
|
||||
"phf_shared 0.9.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf_generator"
|
||||
version = "0.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0fc1437ada0f3a97d538f0bb608137bf53c53969028cab74c89893e1e9a12f0e"
|
||||
dependencies = [
|
||||
"phf_shared 0.9.0",
|
||||
"rand",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -912,6 +941,16 @@ dependencies = [
|
|||
"unicase",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf_shared"
|
||||
version = "0.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a68318426de33640f02be62b4ae8eb1261be2efbc337b60c54d845bf4484e0d9"
|
||||
dependencies = [
|
||||
"siphasher",
|
||||
"unicase",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "plotters"
|
||||
version = "0.3.1"
|
||||
|
@ -927,15 +966,15 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "plotters-backend"
|
||||
version = "0.3.0"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b07fffcddc1cb3a1de753caa4e4df03b79922ba43cf882acc1bdd7e8df9f4590"
|
||||
checksum = "fd8be10f7485c8a323ea100b20d6052c27cf5968f08f8e3a56ee9f0cf38ebd3d"
|
||||
|
||||
[[package]]
|
||||
name = "plotters-svg"
|
||||
version = "0.3.0"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b38a02e23bd9604b842a812063aec4ef702b57989c37b655254bb61c471ad211"
|
||||
checksum = "521fa9638fa597e1dc53e9412a4f9cefb01187ee1f7413076f9e6749e2885ba9"
|
||||
dependencies = [
|
||||
"plotters-backend",
|
||||
]
|
||||
|
@ -1037,9 +1076,9 @@ checksum = "941ba9d78d8e2f7ce474c015eea4d9c6d25b6a3327f9832ee29a4de27f91bbb8"
|
|||
|
||||
[[package]]
|
||||
name = "rand"
|
||||
version = "0.8.3"
|
||||
version = "0.8.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0ef9e7e66b4468674bfcb0c81af8b7fa0bb154fa9f28eb840da5c447baeb8d7e"
|
||||
checksum = "2e7573632e6454cf6b99d7aac4ccca54be06da05aca2ef7423d22d27d4d4bcd8"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"rand_chacha",
|
||||
|
@ -1049,9 +1088,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "rand_chacha"
|
||||
version = "0.3.0"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e12735cf05c9e10bf21534da50a147b924d555dc7a547c42e6bb2d5b6017ae0d"
|
||||
checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
|
||||
dependencies = [
|
||||
"ppv-lite86",
|
||||
"rand_core",
|
||||
|
@ -1059,18 +1098,18 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "rand_core"
|
||||
version = "0.6.2"
|
||||
version = "0.6.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "34cf66eb183df1c5876e2dcf6b13d57340741e8dc255b48e40a26de954d06ae7"
|
||||
checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7"
|
||||
dependencies = [
|
||||
"getrandom",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_hc"
|
||||
version = "0.3.0"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3190ef7066a446f2e7f42e239d161e905420ccab01eb967c9eb27d21b2322a73"
|
||||
checksum = "d51e9f596de227fda2ea6c84607f5558e196eeaf43c986b724ba4fb8fdf497e7"
|
||||
dependencies = [
|
||||
"rand_core",
|
||||
]
|
||||
|
@ -1102,9 +1141,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "redox_syscall"
|
||||
version = "0.2.8"
|
||||
version = "0.2.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "742739e41cd49414de871ea5e549afb7e2a3ac77b589bcbebe8c82fab37147fc"
|
||||
checksum = "5ab49abadf3f9e1c4bc499e8845e152ad87d2ad2d30371841171169e9d75feee"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
]
|
||||
|
@ -1122,12 +1161,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "regex-automata"
|
||||
version = "0.1.9"
|
||||
version = "0.1.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ae1ded71d66a4a97f5e961fd0cb25a5f366a42a41570d16a763a69c092c26ae4"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
]
|
||||
checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
|
||||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
|
@ -1146,9 +1182,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "rustc-demangle"
|
||||
version = "0.1.19"
|
||||
version = "0.1.20"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "410f7acf3cb3a44527c5d9546bad4bf4e6c460915d5f9f2fc524498bfe8f70ce"
|
||||
checksum = "dead70b0b5e03e9c814bcb6b01e03e68f7c57a80aa48c72ec92152ab3e818d49"
|
||||
|
||||
[[package]]
|
||||
name = "rustc_version"
|
||||
|
@ -1295,9 +1331,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "1.0.72"
|
||||
version = "1.0.73"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a1e8cdbefb79a9a5a65e0db8b47b723ee907b7c7f8496c76a1770b5c310bab82"
|
||||
checksum = "f71489ff30030d2ae598524f61326b902466f72a0fb1a8564c001cc63425bcc7"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
|
@ -1408,7 +1444,7 @@ version = "0.7.0"
|
|||
dependencies = [
|
||||
"anyhow",
|
||||
"bstr",
|
||||
"itertools 0.10.0",
|
||||
"itertools 0.10.1",
|
||||
"log",
|
||||
"nom",
|
||||
"once_cell",
|
||||
|
@ -1437,15 +1473,15 @@ dependencies = [
|
|||
"derive_setters",
|
||||
"difflib",
|
||||
"encoding",
|
||||
"env_logger 0.8.3",
|
||||
"env_logger 0.8.4",
|
||||
"human-panic",
|
||||
"ignore",
|
||||
"itertools 0.10.0",
|
||||
"itertools 0.10.1",
|
||||
"kstring",
|
||||
"log",
|
||||
"maplit",
|
||||
"once_cell",
|
||||
"phf",
|
||||
"phf 0.8.0",
|
||||
"predicates",
|
||||
"proc-exit",
|
||||
"serde",
|
||||
|
@ -1478,7 +1514,7 @@ dependencies = [
|
|||
"codegenrs",
|
||||
"csv",
|
||||
"dictgen",
|
||||
"itertools 0.10.0",
|
||||
"itertools 0.10.1",
|
||||
"structopt",
|
||||
"unicase",
|
||||
]
|
||||
|
@ -1490,7 +1526,7 @@ dependencies = [
|
|||
"codegenrs",
|
||||
"csv",
|
||||
"edit-distance",
|
||||
"itertools 0.10.0",
|
||||
"itertools 0.10.1",
|
||||
"structopt",
|
||||
"unicase",
|
||||
"varcon",
|
||||
|
@ -1515,7 +1551,7 @@ dependencies = [
|
|||
"codegenrs",
|
||||
"dictgen",
|
||||
"env_logger 0.7.1",
|
||||
"itertools 0.10.0",
|
||||
"itertools 0.10.1",
|
||||
"log",
|
||||
"structopt",
|
||||
"typos",
|
||||
|
@ -1698,7 +1734,7 @@ version = "0.4.0"
|
|||
dependencies = [
|
||||
"codegenrs",
|
||||
"dictgen",
|
||||
"itertools 0.10.0",
|
||||
"itertools 0.10.1",
|
||||
"structopt",
|
||||
"unicase",
|
||||
]
|
||||
|
|
|
@ -22,4 +22,4 @@ unicase = "2.5"
|
|||
itertools = "0.10"
|
||||
codegenrs = "1.0"
|
||||
structopt = "0.3"
|
||||
dictgen = { version = "0.1", path = "../../dictgen" }
|
||||
dictgen = { version = "0.1", path = "../../dictgen", features = ["codegen"] }
|
||||
|
|
|
@ -1,7 +1,21 @@
|
|||
[package]
|
||||
name = "dictgen"
|
||||
version = "0.1.0"
|
||||
description = "Compile-time case-insensitive map"
|
||||
repository = "https://github.com/crate-ci/typos"
|
||||
categories = ["development-tools", "text-processing"]
|
||||
keywords = ["development", "spelling", "no_std"]
|
||||
license = "MIT"
|
||||
edition = "2018"
|
||||
|
||||
[features]
|
||||
default = ["std"]
|
||||
std = []
|
||||
codegen = ["std", "phf_codegen"]
|
||||
map = ["phf", "phf_shared"]
|
||||
|
||||
[dependencies]
|
||||
unicase = "2.5"
|
||||
phf = { version = "0.9", features = ["unicase"], optional = true }
|
||||
phf_codegen = { version = "0.9", optional = true }
|
||||
phf_shared = { version = "0.9", optional = true }
|
||||
|
|
|
@ -1,5 +1,9 @@
|
|||
#[cfg(feature = "map")]
|
||||
mod map;
|
||||
mod table;
|
||||
mod trie;
|
||||
|
||||
#[cfg(feature = "map")]
|
||||
pub use map::*;
|
||||
pub use table::*;
|
||||
pub use trie::*;
|
||||
|
|
91
crates/dictgen/src/map.rs
Normal file
91
crates/dictgen/src/map.rs
Normal file
|
@ -0,0 +1,91 @@
|
|||
#[cfg(feature = "codegen")]
|
||||
pub fn generate_map<'d, W: std::io::Write, V: std::fmt::Display>(
|
||||
file: &mut W,
|
||||
name: &str,
|
||||
value_type: &str,
|
||||
data: impl Iterator<Item = (&'d str, V)>,
|
||||
) -> Result<(), std::io::Error> {
|
||||
let mut data: Vec<_> = data.collect();
|
||||
data.sort_unstable_by_key(|v| unicase::UniCase::new(v.0));
|
||||
|
||||
let mut smallest = usize::MAX;
|
||||
let mut largest = usize::MIN;
|
||||
|
||||
writeln!(
|
||||
file,
|
||||
"pub static {}: dictgen::DictTable<{}> = dictgen::DictTable {{",
|
||||
name, value_type
|
||||
)?;
|
||||
writeln!(file, " keys: &[")?;
|
||||
for (key, _value) in data.iter() {
|
||||
smallest = std::cmp::min(smallest, key.len());
|
||||
largest = std::cmp::max(largest, key.len());
|
||||
|
||||
let key = if key.is_ascii() {
|
||||
format!("dictgen::InsensitiveStr::Ascii({:?})", key)
|
||||
} else {
|
||||
format!("dictgen::InsensitiveStr::Unicode({:?})", key)
|
||||
};
|
||||
|
||||
writeln!(file, " {},", key)?;
|
||||
}
|
||||
if largest == 0 {
|
||||
smallest = 0;
|
||||
}
|
||||
writeln!(file, " ],")?;
|
||||
writeln!(file, " values: &[")?;
|
||||
for (_key, value) in data.iter() {
|
||||
writeln!(file, " {},", value)?;
|
||||
}
|
||||
writeln!(file, " ],")?;
|
||||
writeln!(file, " range: {}..={},", smallest, largest)?;
|
||||
writeln!(file, "}};")?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub struct DictMap<V: 'static> {
|
||||
pub map: phf::Map<crate::InsensitiveStr<'static>, V>,
|
||||
pub range: std::ops::RangeInclusive<usize>,
|
||||
}
|
||||
|
||||
impl<V> DictMap<V> {
|
||||
pub fn find(&self, word: &'_ unicase::UniCase<&str>) -> Option<&V> {
|
||||
if self.range.contains(&word.len()) {
|
||||
self.map.get(&(*word).into())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
pub fn iter(&self) -> impl Iterator<Item = (unicase::UniCase<&str>, &V)> + '_ {
|
||||
self.map.entries().map(|(k, v)| (k.convert(), v))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> phf_shared::PhfHash for crate::InsensitiveStr<'s> {
|
||||
#[inline]
|
||||
fn phf_hash<H: core::hash::Hasher>(&self, state: &mut H) {
|
||||
core::hash::Hash::hash(self, state)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> phf_shared::FmtConst for crate::InsensitiveStr<'s> {
|
||||
fn fmt_const(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
|
||||
match self {
|
||||
crate::InsensitiveStr::Ascii(_) => f.write_str("dictgen::InsensitiveStr::Ascii(")?,
|
||||
crate::InsensitiveStr::Unicode(_) => {
|
||||
f.write_str("dictgen::InsensitiveStr::Unicode(")?
|
||||
}
|
||||
}
|
||||
|
||||
self.into_inner().fmt_const(f)?;
|
||||
f.write_str(")")
|
||||
}
|
||||
}
|
||||
|
||||
impl<'b, 'a: 'b> phf_shared::PhfBorrow<crate::InsensitiveStr<'b>> for crate::InsensitiveStr<'a> {
|
||||
fn borrow(&self) -> &crate::InsensitiveStr<'b> {
|
||||
self
|
||||
}
|
||||
}
|
|
@ -1,3 +1,4 @@
|
|||
#[cfg(feature = "codegen")]
|
||||
pub fn generate_table<'d, W: std::io::Write, V: std::fmt::Display>(
|
||||
file: &mut W,
|
||||
name: &str,
|
||||
|
@ -44,9 +45,9 @@ pub fn generate_table<'d, W: std::io::Write, V: std::fmt::Display>(
|
|||
}
|
||||
|
||||
pub struct DictTable<V: 'static> {
|
||||
pub keys: &'static [InsensitiveStr],
|
||||
pub keys: &'static [InsensitiveStr<'static>],
|
||||
pub values: &'static [V],
|
||||
pub range: std::ops::RangeInclusive<usize>,
|
||||
pub range: core::ops::RangeInclusive<usize>,
|
||||
}
|
||||
|
||||
impl<V> DictTable<V> {
|
||||
|
@ -66,18 +67,64 @@ impl<V> DictTable<V> {
|
|||
}
|
||||
}
|
||||
|
||||
// Avoid unicase's use of const-fn so large tables don't OOM
|
||||
#[derive(Copy, Clone, Debug)]
|
||||
pub enum InsensitiveStr {
|
||||
Unicode(&'static str),
|
||||
Ascii(&'static str),
|
||||
/// UniCase look-alike that avoids const-fn so large tables don't OOM
|
||||
#[derive(Copy, Clone)]
|
||||
pub enum InsensitiveStr<'s> {
|
||||
Unicode(&'s str),
|
||||
Ascii(&'s str),
|
||||
}
|
||||
|
||||
impl InsensitiveStr {
|
||||
fn convert(self) -> unicase::UniCase<&'static str> {
|
||||
impl<'s> InsensitiveStr<'s> {
|
||||
pub fn convert(self) -> unicase::UniCase<&'s str> {
|
||||
match self {
|
||||
InsensitiveStr::Unicode(s) => unicase::UniCase::unicode(s),
|
||||
InsensitiveStr::Ascii(s) => unicase::UniCase::ascii(s),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn into_inner(self) -> &'s str {
|
||||
match self {
|
||||
InsensitiveStr::Unicode(s) | InsensitiveStr::Ascii(s) => s,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> From<unicase::UniCase<&'s str>> for InsensitiveStr<'s> {
|
||||
fn from(other: unicase::UniCase<&'s str>) -> Self {
|
||||
if other.is_ascii() {
|
||||
InsensitiveStr::Ascii(other.into_inner())
|
||||
} else {
|
||||
InsensitiveStr::Unicode(other.into_inner())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s1, 's2> PartialEq<InsensitiveStr<'s2>> for InsensitiveStr<'s1> {
|
||||
#[inline]
|
||||
fn eq(&self, other: &InsensitiveStr<'s2>) -> bool {
|
||||
self.convert() == other.convert()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> Eq for InsensitiveStr<'s> {}
|
||||
|
||||
impl<'s> core::hash::Hash for InsensitiveStr<'s> {
|
||||
#[inline]
|
||||
fn hash<H: core::hash::Hasher>(&self, hasher: &mut H) {
|
||||
self.convert().hash(hasher)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> core::fmt::Debug for InsensitiveStr<'s> {
|
||||
#[inline]
|
||||
fn fmt(&self, fmt: &mut core::fmt::Formatter) -> core::fmt::Result {
|
||||
core::fmt::Debug::fmt(self.into_inner(), fmt)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> core::fmt::Display for InsensitiveStr<'s> {
|
||||
#[inline]
|
||||
fn fmt(&self, fmt: &mut core::fmt::Formatter) -> core::fmt::Result {
|
||||
core::fmt::Display::fmt(self.into_inner(), fmt)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,12 +1,87 @@
|
|||
/// # Panics
|
||||
///
|
||||
/// - On duplicate entry
|
||||
#[cfg(feature = "codegen")]
|
||||
pub fn generate_trie<'d, W: std::io::Write, V: std::fmt::Display>(
|
||||
file: &mut W,
|
||||
prefix: &str,
|
||||
value_type: &str,
|
||||
data: impl Iterator<Item = (&'d str, V)>,
|
||||
limit: usize,
|
||||
) -> Result<(), std::io::Error> {
|
||||
codegen::generate_trie(file, prefix, value_type, data, limit)
|
||||
}
|
||||
|
||||
pub struct DictTrie<V: 'static> {
|
||||
pub root: &'static DictTrieNode<V>,
|
||||
pub unicode: &'static crate::DictTable<V>,
|
||||
pub range: core::ops::RangeInclusive<usize>,
|
||||
}
|
||||
|
||||
impl<V> DictTrie<V> {
|
||||
pub fn find(&self, word: &'_ unicase::UniCase<&str>) -> Option<&'static V> {
|
||||
if self.range.contains(&word.len()) {
|
||||
let bytes = word.as_bytes();
|
||||
|
||||
let mut child = &self.root;
|
||||
for i in 0..bytes.len() {
|
||||
match child.children {
|
||||
DictTrieChild::Nested(n) => {
|
||||
let byte = bytes[i];
|
||||
let index = if (b'a'..b'z').contains(&byte) {
|
||||
byte - b'a'
|
||||
} else if (b'A'..b'Z').contains(&byte) {
|
||||
byte - b'A'
|
||||
} else {
|
||||
return self.unicode.find(word);
|
||||
};
|
||||
debug_assert!(index < 26);
|
||||
if let Some(next) = n[index as usize].as_ref() {
|
||||
child = next;
|
||||
} else {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
DictTrieChild::Flat(t) => {
|
||||
let remaining = &bytes[i..bytes.len()];
|
||||
// Unsafe: Everything before has been proven to be ASCII, so this should be
|
||||
// safe.
|
||||
let remaining = unsafe { core::str::from_utf8_unchecked(remaining) };
|
||||
// Reuse the prior ascii check, rather than doing it again
|
||||
let remaining = if word.is_ascii() {
|
||||
unicase::UniCase::ascii(remaining)
|
||||
} else {
|
||||
unicase::UniCase::unicode(remaining)
|
||||
};
|
||||
return t.find(&remaining);
|
||||
}
|
||||
}
|
||||
}
|
||||
child.value.as_ref()
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct DictTrieNode<V: 'static> {
|
||||
pub children: DictTrieChild<V>,
|
||||
pub value: Option<V>,
|
||||
}
|
||||
|
||||
pub enum DictTrieChild<V: 'static> {
|
||||
Nested(&'static [Option<&'static DictTrieNode<V>>; 26]),
|
||||
Flat(&'static crate::DictTable<V>),
|
||||
}
|
||||
|
||||
#[cfg(feature = "codegen")]
|
||||
mod codegen {
|
||||
pub(super) fn generate_trie<'d, W: std::io::Write, V: std::fmt::Display>(
|
||||
file: &mut W,
|
||||
prefix: &str,
|
||||
value_type: &str,
|
||||
data: impl Iterator<Item = (&'d str, V)>,
|
||||
limit: usize,
|
||||
) -> Result<(), std::io::Error> {
|
||||
let mut root = DynRoot::new(data);
|
||||
root.burst(limit);
|
||||
|
@ -121,68 +196,6 @@ fn gen_type_name<V>(leaf: &DynChild<V>) -> &'static str {
|
|||
}
|
||||
}
|
||||
|
||||
pub struct DictTrie<V: 'static> {
|
||||
pub root: &'static DictTrieNode<V>,
|
||||
pub unicode: &'static crate::DictTable<V>,
|
||||
pub range: std::ops::RangeInclusive<usize>,
|
||||
}
|
||||
|
||||
impl<V> DictTrie<V> {
|
||||
pub fn find(&self, word: &'_ unicase::UniCase<&str>) -> Option<&'static V> {
|
||||
if self.range.contains(&word.len()) {
|
||||
let bytes = word.as_bytes();
|
||||
|
||||
let mut child = &self.root;
|
||||
for i in 0..bytes.len() {
|
||||
match child.children {
|
||||
DictTrieChild::Nested(n) => {
|
||||
let byte = bytes[i];
|
||||
let index = if (b'a'..b'z').contains(&byte) {
|
||||
byte - b'a'
|
||||
} else if (b'A'..b'Z').contains(&byte) {
|
||||
byte - b'A'
|
||||
} else {
|
||||
return self.unicode.find(word);
|
||||
};
|
||||
debug_assert!(index < 26);
|
||||
if let Some(next) = n[index as usize].as_ref() {
|
||||
child = next;
|
||||
} else {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
DictTrieChild::Flat(t) => {
|
||||
let remaining = &bytes[i..bytes.len()];
|
||||
// Unsafe: Everything before has been proven to be ASCII, so this should be
|
||||
// safe.
|
||||
let remaining = unsafe { std::str::from_utf8_unchecked(remaining) };
|
||||
// Reuse the prior ascii check, rather than doing it again
|
||||
let remaining = if word.is_ascii() {
|
||||
unicase::UniCase::ascii(remaining)
|
||||
} else {
|
||||
unicase::UniCase::unicode(remaining)
|
||||
};
|
||||
return t.find(&remaining);
|
||||
}
|
||||
}
|
||||
}
|
||||
child.value.as_ref()
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct DictTrieNode<V: 'static> {
|
||||
pub children: DictTrieChild<V>,
|
||||
pub value: Option<V>,
|
||||
}
|
||||
|
||||
pub enum DictTrieChild<V: 'static> {
|
||||
Nested(&'static [Option<&'static DictTrieNode<V>>; 26]),
|
||||
Flat(&'static crate::DictTable<V>),
|
||||
}
|
||||
|
||||
struct DynRoot<'s, V> {
|
||||
root: DynNode<'s, V>,
|
||||
unicode: Vec<(&'s str, V)>,
|
||||
|
@ -288,3 +301,4 @@ impl<'s, V> DynChild<'s, V> {
|
|||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,4 +23,4 @@ itertools = "0.10"
|
|||
codegenrs = "1.0"
|
||||
structopt = "0.3"
|
||||
regex = "1"
|
||||
dictgen = { version = "0.1", path = "../../dictgen" }
|
||||
dictgen = { version = "0.1", path = "../../dictgen", features = ["codegen"] }
|
||||
|
|
|
@ -23,4 +23,4 @@ itertools = "0.10"
|
|||
unicase = "2.5"
|
||||
codegenrs = "1.0"
|
||||
structopt = "0.3"
|
||||
dictgen = { version = "0.1", path = "../../dictgen" }
|
||||
dictgen = { version = "0.1", path = "../../dictgen", features = ["codegen"] }
|
||||
|
|
|
@ -29,4 +29,4 @@ log = "0.4"
|
|||
env_logger = "0.7"
|
||||
clap-verbosity-flag = "0.3"
|
||||
itertools = "0.10"
|
||||
dictgen = { version = "0.1", path = "../../dictgen" }
|
||||
dictgen = { version = "0.1", path = "../../dictgen", features = ["codegen"] }
|
||||
|
|
|
@ -22,4 +22,4 @@ unicase = "2.5"
|
|||
itertools = "0.10"
|
||||
codegenrs = "1.0"
|
||||
structopt = "0.3"
|
||||
dictgen = { version = "0.1", path = "../../dictgen" }
|
||||
dictgen = { version = "0.1", path = "../../dictgen", features = ["codegen"] }
|
||||
|
|
Loading…
Reference in a new issue