mirror of
https://github.com/crate-ci/typos.git
synced 2024-11-22 00:51:11 -05:00
perf(dict): Avoid hashing unknwon words
Bypass hashing when we know (through str::len) that a word won't be in the dict. Master: ``` real 0m26.675s user 0m33.683s sys 0m4.535s ``` With this change: ``` real 0m24.060s user 0m31.559s sys 0m4.258s ```
This commit is contained in:
parent
18e31fa578
commit
beaa0f4091
3 changed files with 18 additions and 1 deletions
|
@ -13,6 +13,9 @@ fn generate<W: std::io::Write>(file: &mut W) {
|
|||
writeln!(file).unwrap();
|
||||
writeln!(file, "use unicase::UniCase;").unwrap();
|
||||
|
||||
let mut smallest = usize::MAX;
|
||||
let mut largest = usize::MIN;
|
||||
|
||||
writeln!(
|
||||
file,
|
||||
"pub static WORD_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &'static str> = "
|
||||
|
@ -26,12 +29,17 @@ fn generate<W: std::io::Write>(file: &mut W) {
|
|||
.map(|r| r.unwrap())
|
||||
.collect();
|
||||
for record in &records {
|
||||
smallest = std::cmp::min(smallest, record[0].len());
|
||||
largest = std::cmp::max(largest, record[0].len());
|
||||
let value = format!(r#""{}""#, &record[1]);
|
||||
builder.entry(unicase::UniCase::new(&record[0]), &value);
|
||||
}
|
||||
let codegenned = builder.build();
|
||||
writeln!(file, "{}", codegenned).unwrap();
|
||||
writeln!(file, ";").unwrap();
|
||||
writeln!(file).unwrap();
|
||||
writeln!(file, "pub const WORD_MIN: usize = {};", smallest).unwrap();
|
||||
writeln!(file, "pub const WORD_MAX: usize = {};", largest).unwrap();
|
||||
}
|
||||
|
||||
#[derive(Debug, StructOpt)]
|
||||
|
|
|
@ -33648,3 +33648,6 @@ pub static WORD_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &'static st
|
|||
(UniCase::ascii("presumpton"), "presumption"),
|
||||
]),
|
||||
};
|
||||
|
||||
pub const WORD_MIN: usize = 3;
|
||||
pub const WORD_MAX: usize = 19;
|
||||
|
|
|
@ -44,7 +44,13 @@ impl BuiltIn {
|
|||
|
||||
// Not using `Status` to avoid the allocations
|
||||
fn correct_with_dict(&self, word: &str) -> Option<&'static str> {
|
||||
map_lookup(&typos_dict::WORD_DICTIONARY, word)
|
||||
const WORD_RANGE: std::ops::RangeInclusive<usize> =
|
||||
typos_dict::WORD_MIN..=typos_dict::WORD_MAX;
|
||||
if WORD_RANGE.contains(&word.len()) {
|
||||
map_lookup(&typos_dict::WORD_DICTIONARY, word)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn correct_with_vars(&self, word: &str) -> Option<Status<'static>> {
|
||||
|
|
Loading…
Reference in a new issue