Merge pull request #254 from epage/perf

perf(dict): Bypass vars when possible
This commit is contained in:
Ed Page 2021-05-19 19:42:08 -05:00 committed by GitHub
commit 36935481eb
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 64 additions and 30 deletions

View file

@ -78,6 +78,7 @@ fn generate_variations<W: std::io::Write>(file: &mut W) {
let mut smallest = usize::MAX; let mut smallest = usize::MAX;
let mut largest = usize::MIN; let mut largest = usize::MIN;
let mut no_invalid = true;
writeln!( writeln!(
file, file,
@ -97,6 +98,8 @@ fn generate_variations<W: std::io::Write>(file: &mut W) {
builder.entry(unicase::UniCase::new(word), &value); builder.entry(unicase::UniCase::new(word), &value);
smallest = std::cmp::min(smallest, word.len()); smallest = std::cmp::min(smallest, word.len());
largest = std::cmp::max(largest, word.len()); largest = std::cmp::max(largest, word.len());
no_invalid &= !is_always_invalid(data);
} }
let codegenned = builder.build(); let codegenned = builder.build();
writeln!(file, "{}", codegenned).unwrap(); writeln!(file, "{}", codegenned).unwrap();
@ -110,6 +113,10 @@ fn generate_variations<W: std::io::Write>(file: &mut W) {
) )
.unwrap(); .unwrap();
writeln!(file).unwrap();
writeln!(file, "pub const NO_INVALID: bool = {:?};", no_invalid,).unwrap();
writeln!(file).unwrap();
for (symbol, entry) in entries.iter() { for (symbol, entry) in entries.iter() {
if !referenced_symbols.contains(symbol.as_str()) { if !referenced_symbols.contains(symbol.as_str()) {
continue; continue;
@ -156,6 +163,15 @@ fn is_always_valid(data: &[(&str, varcon::CategorySet)]) -> bool {
false false
} }
fn is_always_invalid(data: &[(&str, varcon::CategorySet)]) -> bool {
for (_symbol, set) in data.iter() {
if set.is_empty() {
return true;
}
}
false
}
fn entries() -> BTreeMap<String, varcon_core::Entry> { fn entries() -> BTreeMap<String, varcon_core::Entry> {
varcon::VARCON varcon::VARCON
.iter() .iter()

View file

@ -113083,6 +113083,9 @@ pub static VARS_DICTIONARY: phf::Map<
}; };
pub const WORD_RANGE: std::ops::RangeInclusive<usize> = 2..=24; pub const WORD_RANGE: std::ops::RangeInclusive<usize> = 2..=24;
pub const NO_INVALID: bool = true;
pub(crate) static ENTRY_ABETTORS_7043394254318611656: VariantsMap = pub(crate) static ENTRY_ABETTORS_7043394254318611656: VariantsMap =
[&["abettors"], &["abetters"], &["abettors"], &["abetters"]]; [&["abettors"], &["abetters"], &["abettors"], &["abetters"]];

View file

@ -48,8 +48,10 @@ impl BuiltIn {
.for_each(|mut s| case_correct(&mut s, word_token.case())); .for_each(|mut s| case_correct(&mut s, word_token.case()));
Some(corrections) Some(corrections)
} }
}
#[cfg(feature = "dict")] #[cfg(feature = "dict")]
impl BuiltIn {
// Not using `Status` to avoid the allocations // Not using `Status` to avoid the allocations
fn correct_with_dict(&self, word: &str) -> Option<&'static [&'static str]> { fn correct_with_dict(&self, word: &str) -> Option<&'static [&'static str]> {
if typos_dict::WORD_RANGE.contains(&word.len()) { if typos_dict::WORD_RANGE.contains(&word.len()) {
@ -58,40 +60,42 @@ impl BuiltIn {
None None
} }
} }
}
#[cfg(not(feature = "dict"))] #[cfg(not(feature = "dict"))]
impl BuiltIn {
fn correct_with_dict(&self, _word: &str) -> Option<&'static [&'static str]> { fn correct_with_dict(&self, _word: &str) -> Option<&'static [&'static str]> {
None None
} }
}
#[cfg(feature = "vars")] #[cfg(feature = "vars")]
impl BuiltIn {
fn chain_with_vars(&self, corrections: &'static [&'static str]) -> Status<'static> { fn chain_with_vars(&self, corrections: &'static [&'static str]) -> Status<'static> {
let mut chained: Vec<_> = corrections if self.is_vars_enabled() {
.iter() let mut chained: Vec<_> = corrections
.flat_map(|c| match self.correct_with_vars(c) { .iter()
Some(Status::Valid) | None => vec![Cow::Borrowed(*c)], .flat_map(|c| match self.correct_with_vars(c) {
Some(Status::Corrections(vars)) => vars, Some(Status::Valid) | None => vec![Cow::Borrowed(*c)],
Some(Status::Invalid) => { Some(Status::Corrections(vars)) => vars,
unreachable!("correct_with_vars should always have valid suggestions") Some(Status::Invalid) => {
} unreachable!("correct_with_vars should always have valid suggestions")
}) }
.collect(); })
if chained.len() != 1 { .collect();
chained.sort_unstable(); if chained.len() != 1 {
chained.dedup(); chained.sort_unstable();
chained.dedup();
}
debug_assert!(!chained.is_empty());
Status::Corrections(chained)
} else {
Status::Corrections(corrections.iter().map(|c| Cow::Borrowed(*c)).collect())
} }
debug_assert!(!chained.is_empty());
Status::Corrections(chained)
} }
#[cfg(not(feature = "vars"))]
fn chain_with_vars(&self, corrections: &'static [&'static str]) -> Status<'static> {
Status::Corrections(corrections.iter().map(|c| Cow::Borrowed(*c)).collect())
}
#[cfg(feature = "vars")]
fn correct_with_vars(&self, word: &str) -> Option<Status<'static>> { fn correct_with_vars(&self, word: &str) -> Option<Status<'static>> {
if typos_vars::WORD_RANGE.contains(&word.len()) { if self.is_vars_enabled() && typos_vars::WORD_RANGE.contains(&word.len()) {
map_lookup(&typos_vars::VARS_DICTIONARY, word) map_lookup(&typos_vars::VARS_DICTIONARY, word)
.map(|variants| self.select_variant(variants)) .map(|variants| self.select_variant(variants))
} else { } else {
@ -99,12 +103,12 @@ impl BuiltIn {
} }
} }
#[cfg(not(feature = "vars"))] fn is_vars_enabled(&self) -> bool {
fn correct_with_vars(&self, _word: &str) -> Option<Status<'static>> { #![allow(clippy::assertions_on_constants)]
None debug_assert!(typos_vars::NO_INVALID);
self.locale.is_some()
} }
#[cfg(feature = "vars")]
fn select_variant( fn select_variant(
&self, &self,
vars: &'static [(u8, &'static typos_vars::VariantsMap)], vars: &'static [(u8, &'static typos_vars::VariantsMap)],
@ -148,6 +152,17 @@ impl BuiltIn {
} }
} }
#[cfg(not(feature = "vars"))]
impl BuiltIn {
fn chain_with_vars(&self, corrections: &'static [&'static str]) -> Status<'static> {
Status::Corrections(corrections.iter().map(|c| Cow::Borrowed(*c)).collect())
}
fn correct_with_vars(&self, _word: &str) -> Option<Status<'static>> {
None
}
}
impl typos::Dictionary for BuiltIn { impl typos::Dictionary for BuiltIn {
fn correct_ident<'s, 'w>(&'s self, ident: typos::tokens::Identifier<'w>) -> Option<Status<'s>> { fn correct_ident<'s, 'w>(&'s self, ident: typos::tokens::Identifier<'w>) -> Option<Status<'s>> {
BuiltIn::correct_ident(self, ident) BuiltIn::correct_ident(self, ident)
@ -296,7 +311,7 @@ mod test {
typos::tokens::Case::Lower, typos::tokens::Case::Lower,
0, 0,
)); ));
assert_eq!(correction, Some(Status::Valid)); assert_eq!(correction, None);
} }
#[cfg(feature = "vars")] #[cfg(feature = "vars")]