From 6bdbd821e38cbe279615b602d8c8acee973e1293 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Tue, 10 Nov 2020 20:50:10 -0600 Subject: [PATCH] perf(dict): Avoid hashing unknwon words Bypass hashing when we know (through str::len) that a word won't be in the dict. Master: ``` real 0m26.675s user 0m33.683s sys 0m4.535s ``` With this change ``` real 0m24.432s user 0m32.492s sys 0m4.190s ``` --- crates/typos-vars/codegen/src/main.rs | 9 +++++++++ crates/typos-vars/src/vars_codegen.rs | 3 +++ src/dict.rs | 9 ++++++++- 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/crates/typos-vars/codegen/src/main.rs b/crates/typos-vars/codegen/src/main.rs index c718ee8..981a85c 100644 --- a/crates/typos-vars/codegen/src/main.rs +++ b/crates/typos-vars/codegen/src/main.rs @@ -76,6 +76,9 @@ fn generate_variations(file: &mut W) { writeln!(file, "}}").unwrap(); writeln!(file).unwrap(); + let mut smallest = usize::MAX; + let mut largest = usize::MIN; + writeln!( file, "pub static VARS_DICTIONARY: phf::Map, &'static [(u8, &VariantsMap)]> = " @@ -92,11 +95,17 @@ fn generate_variations(file: &mut W) { referenced_symbols.extend(data.iter().map(|(s, _)| s)); let value = generate_link(&data); builder.entry(unicase::UniCase::new(word), &value); + smallest = std::cmp::min(smallest, word.len()); + largest = std::cmp::max(largest, word.len()); } let codegenned = builder.build(); writeln!(file, "{}", codegenned).unwrap(); writeln!(file, ";").unwrap(); + writeln!(file).unwrap(); + writeln!(file, "pub const WORD_MIN: usize = {};", smallest).unwrap(); + writeln!(file, "pub const WORD_MAX: usize = {};", largest).unwrap(); + for (symbol, entry) in entries.iter() { if !referenced_symbols.contains(symbol.as_str()) { continue; diff --git a/crates/typos-vars/src/vars_codegen.rs b/crates/typos-vars/src/vars_codegen.rs index 03ac102..e511b01 100644 --- a/crates/typos-vars/src/vars_codegen.rs +++ b/crates/typos-vars/src/vars_codegen.rs @@ -113081,6 +113081,9 @@ pub static VARS_DICTIONARY: phf::Map< ), ]), }; + +pub const WORD_MIN: usize = 2; +pub const WORD_MAX: usize = 24; pub(crate) static ENTRY_ABETTORS_7043394254318611656: VariantsMap = [&["abettors"], &["abetters"], &["abettors"], &["abetters"]]; diff --git a/src/dict.rs b/src/dict.rs index 504bec1..d0e0aa9 100644 --- a/src/dict.rs +++ b/src/dict.rs @@ -54,7 +54,14 @@ impl BuiltIn { } fn correct_with_vars(&self, word: &str) -> Option> { - map_lookup(&typos_vars::VARS_DICTIONARY, word).map(|variants| self.select_variant(variants)) + const WORD_RANGE: std::ops::RangeInclusive = + typos_vars::WORD_MIN..=typos_vars::WORD_MAX; + if WORD_RANGE.contains(&word.len()) { + map_lookup(&typos_vars::VARS_DICTIONARY, word) + .map(|variants| self.select_variant(variants)) + } else { + None + } } fn select_variant(