perf: Avoid hashing withut custom dict

`HashMap::get` (at least hashbrown) hashes before getting and doesn't
check if dict is empty.  For the custom dict, a common use case will
have the dict be empty.

Master:
```
real    0m26.675s
user    0m33.683s
sys     0m4.535s
```

Bypassing `HashMap::get`
```
real    0m16.415s
user    0m14.519s
sys     0m4.118s
```

On a moderately sized repo.
This commit is contained in:
Ed Page 2020-11-10 20:19:12 -06:00
parent 150c5bfdc1
commit 18e31fa578

View file

@ -168,7 +168,7 @@ impl<'i, 'w, D: typos::Dictionary> Override<'i, 'w, D> {
.collect(); .collect();
} }
pub fn interpret<'z, I: Iterator<Item = (&'z str, &'z str)>>( fn interpret<'z, I: Iterator<Item = (&'z str, &'z str)>>(
cases: I, cases: I,
) -> impl Iterator<Item = (&'z str, Status<'z>)> { ) -> impl Iterator<Item = (&'z str, Status<'z>)> {
cases.map(|(typo, correction)| { cases.map(|(typo, correction)| {
@ -186,19 +186,29 @@ impl<'i, 'w, D: typos::Dictionary> Override<'i, 'w, D> {
impl<'i, 'w, D: typos::Dictionary> typos::Dictionary for Override<'i, 'w, D> { impl<'i, 'w, D: typos::Dictionary> typos::Dictionary for Override<'i, 'w, D> {
fn correct_ident<'s, 't>(&'s self, ident: typos::tokens::Identifier<'t>) -> Option<Status<'s>> { fn correct_ident<'s, 't>(&'s self, ident: typos::tokens::Identifier<'t>) -> Option<Status<'s>> {
// Skip hashing if we can
if !self.identifiers.is_empty() {
self.identifiers self.identifiers
.get(ident.token()) .get(ident.token())
.map(|c| c.borrow()) .map(|c| c.borrow())
.or_else(|| self.inner.correct_ident(ident)) .or_else(|| self.inner.correct_ident(ident))
} else {
None
}
} }
fn correct_word<'s, 't>(&'s self, word: typos::tokens::Word<'t>) -> Option<Status<'s>> { fn correct_word<'s, 't>(&'s self, word: typos::tokens::Word<'t>) -> Option<Status<'s>> {
// Skip hashing if we can
if !self.words.is_empty() {
let w = UniCase::new(word.token()); let w = UniCase::new(word.token());
// HACK: couldn't figure out the lifetime issue with replacing `cloned` with `borrow` // HACK: couldn't figure out the lifetime issue with replacing `cloned` with `borrow`
self.words self.words
.get(&w) .get(&w)
.cloned() .cloned()
.or_else(|| self.inner.correct_word(word)) .or_else(|| self.inner.correct_word(word))
} else {
None
}
} }
} }