refactor(varcon): Move away from PHF

This is mostly to give implementation flexibility for changing out how
we store the data to reduce compilation memory usage.

This does have performance impact, jumping from ~220ns to ~320ns for a
dict lookup, according to our micro benchmarks.
This commit is contained in:
Ed Page 2021-05-31 21:29:39 -05:00
parent 5a05a06a70
commit b1cf03c7eb
7 changed files with 107691 additions and 113073 deletions

3
Cargo.lock generated
View file

@ -1577,7 +1577,6 @@ name = "typos-vars"
version = "0.5.0"
dependencies = [
"log",
"phf",
"unicase",
"varcon-core",
]
@ -1592,8 +1591,6 @@ dependencies = [
"env_logger 0.7.1",
"itertools 0.10.0",
"log",
"phf",
"phf_codegen",
"structopt",
"typos",
"unicase",

View file

@ -15,7 +15,6 @@ azure-devops = { project = "crate-ci", pipeline = "typos" }
codecov = { repository = "crate-ci/typos" }
[dependencies]
phf = { version = "0.8", features = ["unicase"] }
unicase = "2.5"
log = "0.4"
varcon-core = { version = "^2.0", path = "../varcon-core", features = ["flags"] }

View file

@ -15,8 +15,6 @@ azure-devops = { project = "crate-ci", pipeline = "typos" }
codecov = { repository = "crate-ci/typos" }
[dependencies]
phf = { version = "0.8", features = ["unicase"] }
phf_codegen = "0.8"
varcon = { version = "^0.5", path = "../../varcon", features = ["flags"] }
varcon-core = { version = "^2.0", path = "../../varcon-core", features = ["flags"] }
typos = { version = "^0.6", path = "../../typos" }

View file

@ -26,9 +26,6 @@ fn generate_variations<W: std::io::Write>(file: &mut W) {
writeln!(file, "#![allow(clippy::unreadable_literal)]",).unwrap();
writeln!(file).unwrap();
writeln!(file, "use unicase::UniCase;").unwrap();
writeln!(file).unwrap();
writeln!(file, "pub type Variants = &'static [&'static str];",).unwrap();
writeln!(
file,
@ -82,12 +79,11 @@ fn generate_variations<W: std::io::Write>(file: &mut W) {
writeln!(
file,
"pub static VARS_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &'static [(u8, &VariantsMap)]> = "
"pub(crate) static VARS_DICTIONARY: &[(unicase::UniCase<&'static str>, &'static [(u8, &VariantsMap)])] = &["
)
.unwrap();
let entry_sets = entry_sets(entries.iter());
let mut referenced_symbols: HashSet<&str> = HashSet::new();
let mut builder = phf_codegen::Map::new();
for (word, data) in entry_sets.iter() {
if is_always_valid(data) {
// No need to convert from current form to target form
@ -95,15 +91,19 @@ fn generate_variations<W: std::io::Write>(file: &mut W) {
}
referenced_symbols.extend(data.iter().map(|(s, _)| s));
let value = generate_link(&data);
builder.entry(unicase::UniCase::new(word), &value);
let word = unicase::UniCase::new(word);
let key = if word.is_ascii() {
format!("unicase::UniCase::ascii({:?})", word)
} else {
format!("unicase::UniCase::unicode({:?})", word)
};
writeln!(file, " ({}, {}),", key, &value).unwrap();
smallest = std::cmp::min(smallest, word.len());
largest = std::cmp::max(largest, word.len());
no_invalid &= !is_always_invalid(data);
}
let codegenned = builder.build();
writeln!(file, "{}", codegenned).unwrap();
writeln!(file, ";").unwrap();
writeln!(file, "];").unwrap();
writeln!(file).unwrap();
writeln!(

View file

@ -4,3 +4,10 @@ pub use crate::vars_codegen::*;
pub use varcon_core::Category;
pub use varcon_core::CategorySet;
pub fn find(word: &'_ unicase::UniCase<&str>) -> Option<&'static [(u8, &'static VariantsMap)]> {
VARS_DICTIONARY
.binary_search_by_key(word, |(key, _)| *key)
.map(|i| VARS_DICTIONARY[i].1)
.ok()
}

File diff suppressed because it is too large Load diff

View file

@ -96,8 +96,8 @@ impl BuiltIn {
fn correct_with_vars(&self, word: &str) -> Option<Status<'static>> {
if self.is_vars_enabled() && typos_vars::WORD_RANGE.contains(&word.len()) {
map_lookup(&typos_vars::VARS_DICTIONARY, word)
.map(|variants| self.select_variant(variants))
let word_case = unicase::UniCase::new(word);
typos_vars::find(&word_case).map(|variants| self.select_variant(variants))
} else {
None
}