perf(dict): Avoid hashing unknwon words

Bypass hashing when we know (through str::len) that a word won't be in
the dict.

Master:
```
real    0m26.675s
user    0m33.683s
sys     0m4.535s
```

With this change:
```
real    0m24.060s
user    0m31.559s
sys     0m4.258s
```
This commit is contained in:
Ed Page 2020-11-10 20:45:57 -06:00
parent 18e31fa578
commit beaa0f4091
3 changed files with 18 additions and 1 deletions

View file

@ -13,6 +13,9 @@ fn generate<W: std::io::Write>(file: &mut W) {
writeln!(file).unwrap(); writeln!(file).unwrap();
writeln!(file, "use unicase::UniCase;").unwrap(); writeln!(file, "use unicase::UniCase;").unwrap();
let mut smallest = usize::MAX;
let mut largest = usize::MIN;
writeln!( writeln!(
file, file,
"pub static WORD_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &'static str> = " "pub static WORD_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &'static str> = "
@ -26,12 +29,17 @@ fn generate<W: std::io::Write>(file: &mut W) {
.map(|r| r.unwrap()) .map(|r| r.unwrap())
.collect(); .collect();
for record in &records { for record in &records {
smallest = std::cmp::min(smallest, record[0].len());
largest = std::cmp::max(largest, record[0].len());
let value = format!(r#""{}""#, &record[1]); let value = format!(r#""{}""#, &record[1]);
builder.entry(unicase::UniCase::new(&record[0]), &value); builder.entry(unicase::UniCase::new(&record[0]), &value);
} }
let codegenned = builder.build(); let codegenned = builder.build();
writeln!(file, "{}", codegenned).unwrap(); writeln!(file, "{}", codegenned).unwrap();
writeln!(file, ";").unwrap(); writeln!(file, ";").unwrap();
writeln!(file).unwrap();
writeln!(file, "pub const WORD_MIN: usize = {};", smallest).unwrap();
writeln!(file, "pub const WORD_MAX: usize = {};", largest).unwrap();
} }
#[derive(Debug, StructOpt)] #[derive(Debug, StructOpt)]

View file

@ -33648,3 +33648,6 @@ pub static WORD_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &'static st
(UniCase::ascii("presumpton"), "presumption"), (UniCase::ascii("presumpton"), "presumption"),
]), ]),
}; };
pub const WORD_MIN: usize = 3;
pub const WORD_MAX: usize = 19;

View file

@ -44,7 +44,13 @@ impl BuiltIn {
// Not using `Status` to avoid the allocations // Not using `Status` to avoid the allocations
fn correct_with_dict(&self, word: &str) -> Option<&'static str> { fn correct_with_dict(&self, word: &str) -> Option<&'static str> {
const WORD_RANGE: std::ops::RangeInclusive<usize> =
typos_dict::WORD_MIN..=typos_dict::WORD_MAX;
if WORD_RANGE.contains(&word.len()) {
map_lookup(&typos_dict::WORD_DICTIONARY, word) map_lookup(&typos_dict::WORD_DICTIONARY, word)
} else {
None
}
} }
fn correct_with_vars(&self, word: &str) -> Option<Status<'static>> { fn correct_with_vars(&self, word: &str) -> Option<Status<'static>> {