mirror of
https://github.com/crate-ci/typos.git
synced 2025-01-08 15:54:47 -05:00
refactor(varcon): Move away from PHF
This is mostly to give implementation flexibility for changing out how we store the data to reduce compilation memory usage. This does have performance impact, jumping from ~220ns to ~320ns for a dict lookup, according to our micro benchmarks.
This commit is contained in:
parent
5a05a06a70
commit
b1cf03c7eb
7 changed files with 107691 additions and 113073 deletions
3
Cargo.lock
generated
3
Cargo.lock
generated
|
@ -1577,7 +1577,6 @@ name = "typos-vars"
|
|||
version = "0.5.0"
|
||||
dependencies = [
|
||||
"log",
|
||||
"phf",
|
||||
"unicase",
|
||||
"varcon-core",
|
||||
]
|
||||
|
@ -1592,8 +1591,6 @@ dependencies = [
|
|||
"env_logger 0.7.1",
|
||||
"itertools 0.10.0",
|
||||
"log",
|
||||
"phf",
|
||||
"phf_codegen",
|
||||
"structopt",
|
||||
"typos",
|
||||
"unicase",
|
||||
|
|
|
@ -15,7 +15,6 @@ azure-devops = { project = "crate-ci", pipeline = "typos" }
|
|||
codecov = { repository = "crate-ci/typos" }
|
||||
|
||||
[dependencies]
|
||||
phf = { version = "0.8", features = ["unicase"] }
|
||||
unicase = "2.5"
|
||||
log = "0.4"
|
||||
varcon-core = { version = "^2.0", path = "../varcon-core", features = ["flags"] }
|
||||
|
|
|
@ -15,8 +15,6 @@ azure-devops = { project = "crate-ci", pipeline = "typos" }
|
|||
codecov = { repository = "crate-ci/typos" }
|
||||
|
||||
[dependencies]
|
||||
phf = { version = "0.8", features = ["unicase"] }
|
||||
phf_codegen = "0.8"
|
||||
varcon = { version = "^0.5", path = "../../varcon", features = ["flags"] }
|
||||
varcon-core = { version = "^2.0", path = "../../varcon-core", features = ["flags"] }
|
||||
typos = { version = "^0.6", path = "../../typos" }
|
||||
|
|
|
@ -26,9 +26,6 @@ fn generate_variations<W: std::io::Write>(file: &mut W) {
|
|||
writeln!(file, "#![allow(clippy::unreadable_literal)]",).unwrap();
|
||||
writeln!(file).unwrap();
|
||||
|
||||
writeln!(file, "use unicase::UniCase;").unwrap();
|
||||
writeln!(file).unwrap();
|
||||
|
||||
writeln!(file, "pub type Variants = &'static [&'static str];",).unwrap();
|
||||
writeln!(
|
||||
file,
|
||||
|
@ -82,12 +79,11 @@ fn generate_variations<W: std::io::Write>(file: &mut W) {
|
|||
|
||||
writeln!(
|
||||
file,
|
||||
"pub static VARS_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &'static [(u8, &VariantsMap)]> = "
|
||||
"pub(crate) static VARS_DICTIONARY: &[(unicase::UniCase<&'static str>, &'static [(u8, &VariantsMap)])] = &["
|
||||
)
|
||||
.unwrap();
|
||||
let entry_sets = entry_sets(entries.iter());
|
||||
let mut referenced_symbols: HashSet<&str> = HashSet::new();
|
||||
let mut builder = phf_codegen::Map::new();
|
||||
for (word, data) in entry_sets.iter() {
|
||||
if is_always_valid(data) {
|
||||
// No need to convert from current form to target form
|
||||
|
@ -95,15 +91,19 @@ fn generate_variations<W: std::io::Write>(file: &mut W) {
|
|||
}
|
||||
referenced_symbols.extend(data.iter().map(|(s, _)| s));
|
||||
let value = generate_link(&data);
|
||||
builder.entry(unicase::UniCase::new(word), &value);
|
||||
let word = unicase::UniCase::new(word);
|
||||
let key = if word.is_ascii() {
|
||||
format!("unicase::UniCase::ascii({:?})", word)
|
||||
} else {
|
||||
format!("unicase::UniCase::unicode({:?})", word)
|
||||
};
|
||||
writeln!(file, " ({}, {}),", key, &value).unwrap();
|
||||
smallest = std::cmp::min(smallest, word.len());
|
||||
largest = std::cmp::max(largest, word.len());
|
||||
|
||||
no_invalid &= !is_always_invalid(data);
|
||||
}
|
||||
let codegenned = builder.build();
|
||||
writeln!(file, "{}", codegenned).unwrap();
|
||||
writeln!(file, ";").unwrap();
|
||||
writeln!(file, "];").unwrap();
|
||||
|
||||
writeln!(file).unwrap();
|
||||
writeln!(
|
||||
|
|
|
@ -4,3 +4,10 @@ pub use crate::vars_codegen::*;
|
|||
|
||||
pub use varcon_core::Category;
|
||||
pub use varcon_core::CategorySet;
|
||||
|
||||
pub fn find(word: &'_ unicase::UniCase<&str>) -> Option<&'static [(u8, &'static VariantsMap)]> {
|
||||
VARS_DICTIONARY
|
||||
.binary_search_by_key(word, |(key, _)| *key)
|
||||
.map(|i| VARS_DICTIONARY[i].1)
|
||||
.ok()
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -96,8 +96,8 @@ impl BuiltIn {
|
|||
|
||||
fn correct_with_vars(&self, word: &str) -> Option<Status<'static>> {
|
||||
if self.is_vars_enabled() && typos_vars::WORD_RANGE.contains(&word.len()) {
|
||||
map_lookup(&typos_vars::VARS_DICTIONARY, word)
|
||||
.map(|variants| self.select_variant(variants))
|
||||
let word_case = unicase::UniCase::new(word);
|
||||
typos_vars::find(&word_case).map(|variants| self.select_variant(variants))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue