mirror of
https://github.com/crate-ci/typos.git
synced 2024-12-22 07:32:18 -05:00
feat(dict): Perform case-insensitive comparisons
This commit is contained in:
parent
719cc7d43b
commit
af66072272
4 changed files with 55 additions and 14 deletions
17
Cargo.lock
generated
17
Cargo.lock
generated
|
@ -162,6 +162,7 @@ dependencies = [
|
|||
"serde_derive 1.0.85 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde_json 1.0.36 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"structopt 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"unicase 1.4.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -434,6 +435,7 @@ version = "0.7.24"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"siphasher 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"unicase 1.4.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -786,6 +788,14 @@ name = "ucd-util"
|
|||
version = "0.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "unicase"
|
||||
version = "1.4.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"version_check 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-segmentation"
|
||||
version = "1.2.1"
|
||||
|
@ -819,6 +829,11 @@ name = "vec_map"
|
|||
version = "0.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "version_check"
|
||||
version = "0.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "void"
|
||||
version = "1.0.2"
|
||||
|
@ -955,12 +970,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
"checksum thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c6b53e329000edc2b34dbe8545fd20e55a333362d0a321909685a19bd28c3f1b"
|
||||
"checksum treeline 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a7f741b240f1a48843f9b8e0444fb55fb2a4ff67293b50a9179dfd5ea67f8d41"
|
||||
"checksum ucd-util 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "535c204ee4d8434478593480b8f86ab45ec9aae0e83c568ca81abf0fd0e88f86"
|
||||
"checksum unicase 1.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7f4765f83163b74f957c797ad9253caf97f103fb064d3999aea9568d09fc8a33"
|
||||
"checksum unicode-segmentation 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "aa6024fc12ddfd1c6dbc14a80fa2324d4568849869b779f6bd37e5e4c03344d1"
|
||||
"checksum unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "882386231c45df4700b275c7ff55b6f3698780a650026380e72dabe76fa46526"
|
||||
"checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc"
|
||||
"checksum unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "382810877fe448991dfc7f0dd6e3ae5d58088fd0ea5e35189655f84e6814fa56"
|
||||
"checksum utf8-ranges 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "796f7e48bef87609f7ade7e06495a87d5cd06c7866e6a5cbfceffc558a243737"
|
||||
"checksum vec_map 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "05c78687fb1a80548ae3250346c3db86a80a7cdd77bda190189f2d0a0987c81a"
|
||||
"checksum version_check 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "914b1a6776c4c929a602fafd8bc742e06365d4bcbe48c30f9cca5824f70dc9dd"
|
||||
"checksum void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d"
|
||||
"checksum walkdir 2.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "9d9d7ed3431229a144296213105a390676cc49c9b6a72bd19f3176c98e129fa1"
|
||||
"checksum winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "92c1eb33641e276cfa214a0522acad57be5c56b10cb348b3c5117db75f3ac4b0"
|
||||
|
|
|
@ -20,7 +20,7 @@ failure = "0.1"
|
|||
structopt = "0.2"
|
||||
clap = "2"
|
||||
ignore = "0.4"
|
||||
phf = "0.7"
|
||||
phf = { version = "0.7", features = ["unicase"] }
|
||||
regex = "1.0"
|
||||
lazy_static = "1.2.0"
|
||||
grep-searcher = "0.1"
|
||||
|
@ -28,6 +28,7 @@ serde = "1.0"
|
|||
serde_derive = "1.0"
|
||||
serde_json = "1.0"
|
||||
itertools = "0.8"
|
||||
unicase = "1.1"
|
||||
|
||||
[dev-dependencies]
|
||||
assert_fs = "0.10"
|
||||
|
@ -35,3 +36,4 @@ assert_fs = "0.10"
|
|||
[build-dependencies]
|
||||
phf_codegen = "0.7"
|
||||
csv = "1.0"
|
||||
unicase = "1.1"
|
||||
|
|
18
build.rs
18
build.rs
|
@ -10,15 +10,21 @@ fn main() {
|
|||
let mut file = BufWriter::new(File::create(&path).unwrap());
|
||||
|
||||
println!("rerun-if-changed=./assets/words.csv");
|
||||
write!(&mut file, "static DICTIONARY: phf::Map<&'static str, &'static str> = ").unwrap();
|
||||
write!(&mut file, "use unicase::UniCase;").unwrap();
|
||||
write!(
|
||||
&mut file,
|
||||
"static DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &'static str> = "
|
||||
)
|
||||
.unwrap();
|
||||
let mut builder = phf_codegen::Map::new();
|
||||
let records: Vec<_> = csv::Reader::from_reader(CORPUS).records().map(|r| r.unwrap()).collect();
|
||||
let records: Vec<_> = csv::Reader::from_reader(CORPUS)
|
||||
.records()
|
||||
.map(|r| r.unwrap())
|
||||
.collect();
|
||||
for record in &records {
|
||||
let value = format!(r#""{}""#, &record[1]);
|
||||
builder.entry(&record[0], &value);
|
||||
builder.entry(unicase::UniCase(&record[0]), &value);
|
||||
}
|
||||
builder
|
||||
.build(&mut file)
|
||||
.unwrap();
|
||||
builder.build(&mut file).unwrap();
|
||||
write!(&mut file, ";\n").unwrap();
|
||||
}
|
||||
|
|
30
src/dict.rs
30
src/dict.rs
|
@ -1,18 +1,34 @@
|
|||
include!(concat!(env!("OUT_DIR"), "/codegen.rs"));
|
||||
|
||||
pub struct Dictionary {
|
||||
}
|
||||
pub struct Dictionary {}
|
||||
|
||||
impl Dictionary {
|
||||
pub fn new() -> Self {
|
||||
Dictionary { }
|
||||
Dictionary {}
|
||||
}
|
||||
|
||||
pub fn correct_str<'s>(&'s self, word: &str) -> Option<&'s str> {
|
||||
DICTIONARY.get(word).map(|s| *s)
|
||||
pub fn correct_str<'s, 'w>(&'s self, word: &'w str) -> Option<&'s str> {
|
||||
map_lookup(&DICTIONARY, word)
|
||||
}
|
||||
|
||||
pub fn correct_bytes<'s>(&'s self, word: &[u8]) -> Option<&'s str> {
|
||||
std::str::from_utf8(word).ok().and_then(|word| DICTIONARY.get(word)).map(|s| *s)
|
||||
pub fn correct_bytes<'s, 'w>(&'s self, word: &'w [u8]) -> Option<&'s str> {
|
||||
std::str::from_utf8(word)
|
||||
.ok()
|
||||
.and_then(|word| self.correct_str(word))
|
||||
}
|
||||
}
|
||||
|
||||
fn map_lookup(
|
||||
map: &'static phf::Map<UniCase<&'static str>, &'static str>,
|
||||
key: &str,
|
||||
) -> Option<&'static str> {
|
||||
// This transmute should be safe as `get` will not store the reference with
|
||||
// the expanded lifetime. This is due to `Borrow` being overly strict and
|
||||
// can't have an impl for `&'static str` to `Borrow<&'a str>`.
|
||||
//
|
||||
// See https://github.com/rust-lang/rust/issues/28853#issuecomment-158735548
|
||||
unsafe {
|
||||
let key = ::std::mem::transmute::<_, &'static str>(key);
|
||||
map.get(&UniCase(key)).map(|s| *s)
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue