perf(dict): Avoid hashing unknwon words

Bypass hashing when we know (through str::len) that a word won't be in
the dict.

Master:
```
real    0m26.675s
user    0m33.683s
sys     0m4.535s
```

With this change
```
real    0m24.432s
user    0m32.492s
sys     0m4.190s
```
This commit is contained in:
Ed Page 2020-11-10 20:50:10 -06:00
parent beaa0f4091
commit 6bdbd821e3
3 changed files with 20 additions and 1 deletions

View file

@ -76,6 +76,9 @@ fn generate_variations<W: std::io::Write>(file: &mut W) {
writeln!(file, "}}").unwrap();
writeln!(file).unwrap();
let mut smallest = usize::MAX;
let mut largest = usize::MIN;
writeln!(
file,
"pub static VARS_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &'static [(u8, &VariantsMap)]> = "
@ -92,11 +95,17 @@ fn generate_variations<W: std::io::Write>(file: &mut W) {
referenced_symbols.extend(data.iter().map(|(s, _)| s));
let value = generate_link(&data);
builder.entry(unicase::UniCase::new(word), &value);
smallest = std::cmp::min(smallest, word.len());
largest = std::cmp::max(largest, word.len());
}
let codegenned = builder.build();
writeln!(file, "{}", codegenned).unwrap();
writeln!(file, ";").unwrap();
writeln!(file).unwrap();
writeln!(file, "pub const WORD_MIN: usize = {};", smallest).unwrap();
writeln!(file, "pub const WORD_MAX: usize = {};", largest).unwrap();
for (symbol, entry) in entries.iter() {
if !referenced_symbols.contains(symbol.as_str()) {
continue;

View file

@ -113081,6 +113081,9 @@ pub static VARS_DICTIONARY: phf::Map<
),
]),
};
pub const WORD_MIN: usize = 2;
pub const WORD_MAX: usize = 24;
pub(crate) static ENTRY_ABETTORS_7043394254318611656: VariantsMap =
[&["abettors"], &["abetters"], &["abettors"], &["abetters"]];

View file

@ -54,7 +54,14 @@ impl BuiltIn {
}
fn correct_with_vars(&self, word: &str) -> Option<Status<'static>> {
map_lookup(&typos_vars::VARS_DICTIONARY, word).map(|variants| self.select_variant(variants))
const WORD_RANGE: std::ops::RangeInclusive<usize> =
typos_vars::WORD_MIN..=typos_vars::WORD_MAX;
if WORD_RANGE.contains(&word.len()) {
map_lookup(&typos_vars::VARS_DICTIONARY, word)
.map(|variants| self.select_variant(variants))
} else {
None
}
}
fn select_variant(