mirror of
https://github.com/crate-ci/typos.git
synced 2024-11-22 09:01:04 -05:00
perf(dict): Avoid hashing unknwon words
Bypass hashing when we know (through str::len) that a word won't be in the dict. Master: ``` real 0m26.675s user 0m33.683s sys 0m4.535s ``` With this change: ``` real 0m24.060s user 0m31.559s sys 0m4.258s ```
This commit is contained in:
parent
18e31fa578
commit
beaa0f4091
3 changed files with 18 additions and 1 deletions
|
@ -13,6 +13,9 @@ fn generate<W: std::io::Write>(file: &mut W) {
|
||||||
writeln!(file).unwrap();
|
writeln!(file).unwrap();
|
||||||
writeln!(file, "use unicase::UniCase;").unwrap();
|
writeln!(file, "use unicase::UniCase;").unwrap();
|
||||||
|
|
||||||
|
let mut smallest = usize::MAX;
|
||||||
|
let mut largest = usize::MIN;
|
||||||
|
|
||||||
writeln!(
|
writeln!(
|
||||||
file,
|
file,
|
||||||
"pub static WORD_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &'static str> = "
|
"pub static WORD_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &'static str> = "
|
||||||
|
@ -26,12 +29,17 @@ fn generate<W: std::io::Write>(file: &mut W) {
|
||||||
.map(|r| r.unwrap())
|
.map(|r| r.unwrap())
|
||||||
.collect();
|
.collect();
|
||||||
for record in &records {
|
for record in &records {
|
||||||
|
smallest = std::cmp::min(smallest, record[0].len());
|
||||||
|
largest = std::cmp::max(largest, record[0].len());
|
||||||
let value = format!(r#""{}""#, &record[1]);
|
let value = format!(r#""{}""#, &record[1]);
|
||||||
builder.entry(unicase::UniCase::new(&record[0]), &value);
|
builder.entry(unicase::UniCase::new(&record[0]), &value);
|
||||||
}
|
}
|
||||||
let codegenned = builder.build();
|
let codegenned = builder.build();
|
||||||
writeln!(file, "{}", codegenned).unwrap();
|
writeln!(file, "{}", codegenned).unwrap();
|
||||||
writeln!(file, ";").unwrap();
|
writeln!(file, ";").unwrap();
|
||||||
|
writeln!(file).unwrap();
|
||||||
|
writeln!(file, "pub const WORD_MIN: usize = {};", smallest).unwrap();
|
||||||
|
writeln!(file, "pub const WORD_MAX: usize = {};", largest).unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, StructOpt)]
|
#[derive(Debug, StructOpt)]
|
||||||
|
|
|
@ -33648,3 +33648,6 @@ pub static WORD_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &'static st
|
||||||
(UniCase::ascii("presumpton"), "presumption"),
|
(UniCase::ascii("presumpton"), "presumption"),
|
||||||
]),
|
]),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
pub const WORD_MIN: usize = 3;
|
||||||
|
pub const WORD_MAX: usize = 19;
|
||||||
|
|
|
@ -44,7 +44,13 @@ impl BuiltIn {
|
||||||
|
|
||||||
// Not using `Status` to avoid the allocations
|
// Not using `Status` to avoid the allocations
|
||||||
fn correct_with_dict(&self, word: &str) -> Option<&'static str> {
|
fn correct_with_dict(&self, word: &str) -> Option<&'static str> {
|
||||||
map_lookup(&typos_dict::WORD_DICTIONARY, word)
|
const WORD_RANGE: std::ops::RangeInclusive<usize> =
|
||||||
|
typos_dict::WORD_MIN..=typos_dict::WORD_MAX;
|
||||||
|
if WORD_RANGE.contains(&word.len()) {
|
||||||
|
map_lookup(&typos_dict::WORD_DICTIONARY, word)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn correct_with_vars(&self, word: &str) -> Option<Status<'static>> {
|
fn correct_with_vars(&self, word: &str) -> Option<Status<'static>> {
|
||||||
|
|
Loading…
Reference in a new issue