mirror of
https://github.com/crate-ci/typos.git
synced 2024-11-22 09:01:04 -05:00
perf(dict): Bypass vars when possible
Variant support slows us down by 10-50$. I assume most people will run with `en` and so most of this overhead is to waste. So instead of merging vars with dict, let's instead get a quick win by just skipping vars when we don't need to. If the assumptions behind this change over time or if there is need for speeding up a specific locale, we can re-address this. Before: ``` check_file/Typos/code time: [35.860 us 36.021 us 36.187 us] thrpt: [8.0117 MiB/s 8.0486 MiB/s 8.0846 MiB/s] check_file/Typos/corpus time: [26.966 ms 27.215 ms 27.521 ms] thrpt: [21.127 MiB/s 21.365 MiB/s 21.562 MiB/s] ``` After: ``` check_file/Typos/code time: [33.837 us 33.928 us 34.031 us] thrpt: [8.5191 MiB/s 8.5452 MiB/s 8.5680 MiB/s] check_file/Typos/corpus time: [17.521 ms 17.620 ms 17.730 ms] thrpt: [32.794 MiB/s 32.999 MiB/s 33.184 MiB/s] ``` This puts us inline with `--no-default-features --features dict` Fixes #253
This commit is contained in:
parent
d65fa79d0e
commit
b99f32dea8
3 changed files with 46 additions and 17 deletions
|
@ -78,6 +78,7 @@ fn generate_variations<W: std::io::Write>(file: &mut W) {
|
|||
|
||||
let mut smallest = usize::MAX;
|
||||
let mut largest = usize::MIN;
|
||||
let mut no_invalid = true;
|
||||
|
||||
writeln!(
|
||||
file,
|
||||
|
@ -97,6 +98,8 @@ fn generate_variations<W: std::io::Write>(file: &mut W) {
|
|||
builder.entry(unicase::UniCase::new(word), &value);
|
||||
smallest = std::cmp::min(smallest, word.len());
|
||||
largest = std::cmp::max(largest, word.len());
|
||||
|
||||
no_invalid &= !is_always_invalid(data);
|
||||
}
|
||||
let codegenned = builder.build();
|
||||
writeln!(file, "{}", codegenned).unwrap();
|
||||
|
@ -110,6 +113,10 @@ fn generate_variations<W: std::io::Write>(file: &mut W) {
|
|||
)
|
||||
.unwrap();
|
||||
|
||||
writeln!(file).unwrap();
|
||||
writeln!(file, "pub const NO_INVALID: bool = {:?};", no_invalid,).unwrap();
|
||||
|
||||
writeln!(file).unwrap();
|
||||
for (symbol, entry) in entries.iter() {
|
||||
if !referenced_symbols.contains(symbol.as_str()) {
|
||||
continue;
|
||||
|
@ -156,6 +163,15 @@ fn is_always_valid(data: &[(&str, varcon::CategorySet)]) -> bool {
|
|||
false
|
||||
}
|
||||
|
||||
fn is_always_invalid(data: &[(&str, varcon::CategorySet)]) -> bool {
|
||||
for (_symbol, set) in data.iter() {
|
||||
if set.is_empty() {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
fn entries() -> BTreeMap<String, varcon_core::Entry> {
|
||||
varcon::VARCON
|
||||
.iter()
|
||||
|
|
|
@ -113083,6 +113083,9 @@ pub static VARS_DICTIONARY: phf::Map<
|
|||
};
|
||||
|
||||
pub const WORD_RANGE: std::ops::RangeInclusive<usize> = 2..=24;
|
||||
|
||||
pub const NO_INVALID: bool = true;
|
||||
|
||||
pub(crate) static ENTRY_ABETTORS_7043394254318611656: VariantsMap =
|
||||
[&["abettors"], &["abetters"], &["abettors"], &["abetters"]];
|
||||
|
||||
|
|
44
src/dict.rs
44
src/dict.rs
|
@ -72,26 +72,30 @@ impl BuiltIn {
|
|||
#[cfg(feature = "vars")]
|
||||
impl BuiltIn {
|
||||
fn chain_with_vars(&self, corrections: &'static [&'static str]) -> Status<'static> {
|
||||
let mut chained: Vec<_> = corrections
|
||||
.iter()
|
||||
.flat_map(|c| match self.correct_with_vars(c) {
|
||||
Some(Status::Valid) | None => vec![Cow::Borrowed(*c)],
|
||||
Some(Status::Corrections(vars)) => vars,
|
||||
Some(Status::Invalid) => {
|
||||
unreachable!("correct_with_vars should always have valid suggestions")
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
if chained.len() != 1 {
|
||||
chained.sort_unstable();
|
||||
chained.dedup();
|
||||
if self.is_vars_enabled() {
|
||||
let mut chained: Vec<_> = corrections
|
||||
.iter()
|
||||
.flat_map(|c| match self.correct_with_vars(c) {
|
||||
Some(Status::Valid) | None => vec![Cow::Borrowed(*c)],
|
||||
Some(Status::Corrections(vars)) => vars,
|
||||
Some(Status::Invalid) => {
|
||||
unreachable!("correct_with_vars should always have valid suggestions")
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
if chained.len() != 1 {
|
||||
chained.sort_unstable();
|
||||
chained.dedup();
|
||||
}
|
||||
debug_assert!(!chained.is_empty());
|
||||
Status::Corrections(chained)
|
||||
} else {
|
||||
Status::Corrections(corrections.iter().map(|c| Cow::Borrowed(*c)).collect())
|
||||
}
|
||||
debug_assert!(!chained.is_empty());
|
||||
Status::Corrections(chained)
|
||||
}
|
||||
|
||||
fn correct_with_vars(&self, word: &str) -> Option<Status<'static>> {
|
||||
if typos_vars::WORD_RANGE.contains(&word.len()) {
|
||||
if self.is_vars_enabled() && typos_vars::WORD_RANGE.contains(&word.len()) {
|
||||
map_lookup(&typos_vars::VARS_DICTIONARY, word)
|
||||
.map(|variants| self.select_variant(variants))
|
||||
} else {
|
||||
|
@ -99,6 +103,12 @@ impl BuiltIn {
|
|||
}
|
||||
}
|
||||
|
||||
fn is_vars_enabled(&self) -> bool {
|
||||
#![allow(clippy::assertions_on_constants)]
|
||||
debug_assert!(typos_vars::NO_INVALID);
|
||||
self.locale.is_some()
|
||||
}
|
||||
|
||||
fn select_variant(
|
||||
&self,
|
||||
vars: &'static [(u8, &'static typos_vars::VariantsMap)],
|
||||
|
@ -301,7 +311,7 @@ mod test {
|
|||
typos::tokens::Case::Lower,
|
||||
0,
|
||||
));
|
||||
assert_eq!(correction, Some(Status::Valid));
|
||||
assert_eq!(correction, None);
|
||||
}
|
||||
|
||||
#[cfg(feature = "vars")]
|
||||
|
|
Loading…
Reference in a new issue