mirror of
https://github.com/crate-ci/typos.git
synced 2024-11-25 02:20:58 -05:00
feat(dict): Perform case-insensitive comparisons
This commit is contained in:
parent
719cc7d43b
commit
af66072272
4 changed files with 55 additions and 14 deletions
17
Cargo.lock
generated
17
Cargo.lock
generated
|
@ -162,6 +162,7 @@ dependencies = [
|
||||||
"serde_derive 1.0.85 (registry+https://github.com/rust-lang/crates.io-index)",
|
"serde_derive 1.0.85 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"serde_json 1.0.36 (registry+https://github.com/rust-lang/crates.io-index)",
|
"serde_json 1.0.36 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"structopt 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)",
|
"structopt 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
"unicase 1.4.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -434,6 +435,7 @@ version = "0.7.24"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"siphasher 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
"siphasher 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
"unicase 1.4.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -786,6 +788,14 @@ name = "ucd-util"
|
||||||
version = "0.1.3"
|
version = "0.1.3"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "unicase"
|
||||||
|
version = "1.4.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
dependencies = [
|
||||||
|
"version_check 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "unicode-segmentation"
|
name = "unicode-segmentation"
|
||||||
version = "1.2.1"
|
version = "1.2.1"
|
||||||
|
@ -819,6 +829,11 @@ name = "vec_map"
|
||||||
version = "0.8.1"
|
version = "0.8.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "version_check"
|
||||||
|
version = "0.1.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "void"
|
name = "void"
|
||||||
version = "1.0.2"
|
version = "1.0.2"
|
||||||
|
@ -955,12 +970,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
"checksum thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c6b53e329000edc2b34dbe8545fd20e55a333362d0a321909685a19bd28c3f1b"
|
"checksum thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c6b53e329000edc2b34dbe8545fd20e55a333362d0a321909685a19bd28c3f1b"
|
||||||
"checksum treeline 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a7f741b240f1a48843f9b8e0444fb55fb2a4ff67293b50a9179dfd5ea67f8d41"
|
"checksum treeline 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a7f741b240f1a48843f9b8e0444fb55fb2a4ff67293b50a9179dfd5ea67f8d41"
|
||||||
"checksum ucd-util 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "535c204ee4d8434478593480b8f86ab45ec9aae0e83c568ca81abf0fd0e88f86"
|
"checksum ucd-util 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "535c204ee4d8434478593480b8f86ab45ec9aae0e83c568ca81abf0fd0e88f86"
|
||||||
|
"checksum unicase 1.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7f4765f83163b74f957c797ad9253caf97f103fb064d3999aea9568d09fc8a33"
|
||||||
"checksum unicode-segmentation 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "aa6024fc12ddfd1c6dbc14a80fa2324d4568849869b779f6bd37e5e4c03344d1"
|
"checksum unicode-segmentation 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "aa6024fc12ddfd1c6dbc14a80fa2324d4568849869b779f6bd37e5e4c03344d1"
|
||||||
"checksum unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "882386231c45df4700b275c7ff55b6f3698780a650026380e72dabe76fa46526"
|
"checksum unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "882386231c45df4700b275c7ff55b6f3698780a650026380e72dabe76fa46526"
|
||||||
"checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc"
|
"checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc"
|
||||||
"checksum unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "382810877fe448991dfc7f0dd6e3ae5d58088fd0ea5e35189655f84e6814fa56"
|
"checksum unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "382810877fe448991dfc7f0dd6e3ae5d58088fd0ea5e35189655f84e6814fa56"
|
||||||
"checksum utf8-ranges 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "796f7e48bef87609f7ade7e06495a87d5cd06c7866e6a5cbfceffc558a243737"
|
"checksum utf8-ranges 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "796f7e48bef87609f7ade7e06495a87d5cd06c7866e6a5cbfceffc558a243737"
|
||||||
"checksum vec_map 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "05c78687fb1a80548ae3250346c3db86a80a7cdd77bda190189f2d0a0987c81a"
|
"checksum vec_map 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "05c78687fb1a80548ae3250346c3db86a80a7cdd77bda190189f2d0a0987c81a"
|
||||||
|
"checksum version_check 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "914b1a6776c4c929a602fafd8bc742e06365d4bcbe48c30f9cca5824f70dc9dd"
|
||||||
"checksum void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d"
|
"checksum void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d"
|
||||||
"checksum walkdir 2.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "9d9d7ed3431229a144296213105a390676cc49c9b6a72bd19f3176c98e129fa1"
|
"checksum walkdir 2.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "9d9d7ed3431229a144296213105a390676cc49c9b6a72bd19f3176c98e129fa1"
|
||||||
"checksum winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "92c1eb33641e276cfa214a0522acad57be5c56b10cb348b3c5117db75f3ac4b0"
|
"checksum winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "92c1eb33641e276cfa214a0522acad57be5c56b10cb348b3c5117db75f3ac4b0"
|
||||||
|
|
|
@ -20,7 +20,7 @@ failure = "0.1"
|
||||||
structopt = "0.2"
|
structopt = "0.2"
|
||||||
clap = "2"
|
clap = "2"
|
||||||
ignore = "0.4"
|
ignore = "0.4"
|
||||||
phf = "0.7"
|
phf = { version = "0.7", features = ["unicase"] }
|
||||||
regex = "1.0"
|
regex = "1.0"
|
||||||
lazy_static = "1.2.0"
|
lazy_static = "1.2.0"
|
||||||
grep-searcher = "0.1"
|
grep-searcher = "0.1"
|
||||||
|
@ -28,6 +28,7 @@ serde = "1.0"
|
||||||
serde_derive = "1.0"
|
serde_derive = "1.0"
|
||||||
serde_json = "1.0"
|
serde_json = "1.0"
|
||||||
itertools = "0.8"
|
itertools = "0.8"
|
||||||
|
unicase = "1.1"
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
assert_fs = "0.10"
|
assert_fs = "0.10"
|
||||||
|
@ -35,3 +36,4 @@ assert_fs = "0.10"
|
||||||
[build-dependencies]
|
[build-dependencies]
|
||||||
phf_codegen = "0.7"
|
phf_codegen = "0.7"
|
||||||
csv = "1.0"
|
csv = "1.0"
|
||||||
|
unicase = "1.1"
|
||||||
|
|
18
build.rs
18
build.rs
|
@ -10,15 +10,21 @@ fn main() {
|
||||||
let mut file = BufWriter::new(File::create(&path).unwrap());
|
let mut file = BufWriter::new(File::create(&path).unwrap());
|
||||||
|
|
||||||
println!("rerun-if-changed=./assets/words.csv");
|
println!("rerun-if-changed=./assets/words.csv");
|
||||||
write!(&mut file, "static DICTIONARY: phf::Map<&'static str, &'static str> = ").unwrap();
|
write!(&mut file, "use unicase::UniCase;").unwrap();
|
||||||
|
write!(
|
||||||
|
&mut file,
|
||||||
|
"static DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &'static str> = "
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
let mut builder = phf_codegen::Map::new();
|
let mut builder = phf_codegen::Map::new();
|
||||||
let records: Vec<_> = csv::Reader::from_reader(CORPUS).records().map(|r| r.unwrap()).collect();
|
let records: Vec<_> = csv::Reader::from_reader(CORPUS)
|
||||||
|
.records()
|
||||||
|
.map(|r| r.unwrap())
|
||||||
|
.collect();
|
||||||
for record in &records {
|
for record in &records {
|
||||||
let value = format!(r#""{}""#, &record[1]);
|
let value = format!(r#""{}""#, &record[1]);
|
||||||
builder.entry(&record[0], &value);
|
builder.entry(unicase::UniCase(&record[0]), &value);
|
||||||
}
|
}
|
||||||
builder
|
builder.build(&mut file).unwrap();
|
||||||
.build(&mut file)
|
|
||||||
.unwrap();
|
|
||||||
write!(&mut file, ";\n").unwrap();
|
write!(&mut file, ";\n").unwrap();
|
||||||
}
|
}
|
||||||
|
|
30
src/dict.rs
30
src/dict.rs
|
@ -1,18 +1,34 @@
|
||||||
include!(concat!(env!("OUT_DIR"), "/codegen.rs"));
|
include!(concat!(env!("OUT_DIR"), "/codegen.rs"));
|
||||||
|
|
||||||
pub struct Dictionary {
|
pub struct Dictionary {}
|
||||||
}
|
|
||||||
|
|
||||||
impl Dictionary {
|
impl Dictionary {
|
||||||
pub fn new() -> Self {
|
pub fn new() -> Self {
|
||||||
Dictionary { }
|
Dictionary {}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn correct_str<'s>(&'s self, word: &str) -> Option<&'s str> {
|
pub fn correct_str<'s, 'w>(&'s self, word: &'w str) -> Option<&'s str> {
|
||||||
DICTIONARY.get(word).map(|s| *s)
|
map_lookup(&DICTIONARY, word)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn correct_bytes<'s>(&'s self, word: &[u8]) -> Option<&'s str> {
|
pub fn correct_bytes<'s, 'w>(&'s self, word: &'w [u8]) -> Option<&'s str> {
|
||||||
std::str::from_utf8(word).ok().and_then(|word| DICTIONARY.get(word)).map(|s| *s)
|
std::str::from_utf8(word)
|
||||||
|
.ok()
|
||||||
|
.and_then(|word| self.correct_str(word))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn map_lookup(
|
||||||
|
map: &'static phf::Map<UniCase<&'static str>, &'static str>,
|
||||||
|
key: &str,
|
||||||
|
) -> Option<&'static str> {
|
||||||
|
// This transmute should be safe as `get` will not store the reference with
|
||||||
|
// the expanded lifetime. This is due to `Borrow` being overly strict and
|
||||||
|
// can't have an impl for `&'static str` to `Borrow<&'a str>`.
|
||||||
|
//
|
||||||
|
// See https://github.com/rust-lang/rust/issues/28853#issuecomment-158735548
|
||||||
|
unsafe {
|
||||||
|
let key = ::std::mem::transmute::<_, &'static str>(key);
|
||||||
|
map.get(&UniCase(key)).map(|s| *s)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue