Merge pull request #272 from epage/phf1

refactor(varcon): Remove reliance on const-fn
This commit is contained in:
Ed Page 2021-06-05 11:50:35 -05:00 committed by GitHub
commit 0aaa2c0d60
WARNING! Although there is a key with this ID in the database it does not verify this commit! This commit is SUSPICIOUS.
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 107770 additions and 113095 deletions

3
Cargo.lock generated
View file

@ -1577,7 +1577,6 @@ name = "typos-vars"
version = "0.5.0" version = "0.5.0"
dependencies = [ dependencies = [
"log", "log",
"phf",
"unicase", "unicase",
"varcon-core", "varcon-core",
] ]
@ -1592,8 +1591,6 @@ dependencies = [
"env_logger 0.7.1", "env_logger 0.7.1",
"itertools 0.10.0", "itertools 0.10.0",
"log", "log",
"phf",
"phf_codegen",
"structopt", "structopt",
"typos", "typos",
"unicase", "unicase",

View file

@ -8,27 +8,72 @@ fn bench_dict_load(c: &mut Criterion) {
group.finish(); group.finish();
} }
fn bench_dict_lookup(c: &mut Criterion) { fn bench_dict_correct_word(c: &mut Criterion) {
let mut group = c.benchmark_group("lookup"); let mut group = c.benchmark_group("correct_word");
group.bench_function(BenchmarkId::new("lookup", "hit"), |b| {
let corrections = typos_cli::dict::BuiltIn::new(Default::default()); {
let input = typos::tokens::Word::new("successs", 0).unwrap(); let case = "dict_fine";
let input = "finalizes";
group.bench_function(BenchmarkId::new("en", case), |b| {
let corrections = typos_cli::dict::BuiltIn::new(typos_cli::config::Locale::En);
let input = typos::tokens::Word::new(input, 0).unwrap();
#[cfg(feature = "vars")]
assert!(corrections.correct_word(input).is_none());
b.iter(|| corrections.correct_word(input));
});
}
{
let case = "dict_correct";
let input = "finallizes";
let output = "finalizes";
group.bench_function(BenchmarkId::new("en", case), |b| {
let corrections = typos_cli::dict::BuiltIn::new(typos_cli::config::Locale::En);
let input = typos::tokens::Word::new(input, 0).unwrap();
assert_eq!( assert_eq!(
corrections.correct_word(input), corrections.correct_word(input),
Some(typos::Status::Corrections(vec![ Some(typos::Status::Corrections(vec![
std::borrow::Cow::Borrowed("successes") std::borrow::Cow::Borrowed(output)
])) ]))
); );
b.iter(|| corrections.correct_word(input)); b.iter(|| corrections.correct_word(input));
}); });
group.bench_function(BenchmarkId::new("lookup", "miss"), |b| { }
let corrections = typos_cli::dict::BuiltIn::new(Default::default()); {
let input = typos::tokens::Word::new("success", 0).unwrap(); let case = "dict_correct_case";
assert!(corrections.correct_word(input).is_none()); let input = "FINALLIZES";
let output = "FINALIZES";
group.bench_function(BenchmarkId::new("en", case), |b| {
let corrections = typos_cli::dict::BuiltIn::new(typos_cli::config::Locale::En);
let input = typos::tokens::Word::new(input, 0).unwrap();
assert_eq!(
corrections.correct_word(input),
Some(typos::Status::Corrections(vec![
std::borrow::Cow::Borrowed(output)
]))
);
b.iter(|| corrections.correct_word(input)); b.iter(|| corrections.correct_word(input));
}); });
}
#[cfg(feature = "vars")]
{
let case = "dict_to_varcon";
let input = "finalizes";
let output = "finalises";
group.bench_function(BenchmarkId::new("en-gb", case), |b| {
let corrections = typos_cli::dict::BuiltIn::new(typos_cli::config::Locale::EnGb);
let input = typos::tokens::Word::new(input, 0).unwrap();
assert_eq!(
corrections.correct_word(input),
Some(typos::Status::Corrections(vec![
std::borrow::Cow::Borrowed(output)
]))
);
b.iter(|| corrections.correct_word(input));
});
}
group.finish(); group.finish();
} }
criterion_group!(benches, bench_dict_load, bench_dict_lookup); criterion_group!(benches, bench_dict_load, bench_dict_correct_word);
criterion_main!(benches); criterion_main!(benches);

View file

@ -15,7 +15,6 @@ azure-devops = { project = "crate-ci", pipeline = "typos" }
codecov = { repository = "crate-ci/typos" } codecov = { repository = "crate-ci/typos" }
[dependencies] [dependencies]
phf = { version = "0.8", features = ["unicase"] }
unicase = "2.5" unicase = "2.5"
log = "0.4" log = "0.4"
varcon-core = { version = "^2.0", path = "../varcon-core", features = ["flags"] } varcon-core = { version = "^2.0", path = "../varcon-core", features = ["flags"] }

View file

@ -15,8 +15,6 @@ azure-devops = { project = "crate-ci", pipeline = "typos" }
codecov = { repository = "crate-ci/typos" } codecov = { repository = "crate-ci/typos" }
[dependencies] [dependencies]
phf = { version = "0.8", features = ["unicase"] }
phf_codegen = "0.8"
varcon = { version = "^0.5", path = "../../varcon", features = ["flags"] } varcon = { version = "^0.5", path = "../../varcon", features = ["flags"] }
varcon-core = { version = "^2.0", path = "../../varcon-core", features = ["flags"] } varcon-core = { version = "^2.0", path = "../../varcon-core", features = ["flags"] }
typos = { version = "^0.6", path = "../../typos" } typos = { version = "^0.6", path = "../../typos" }

View file

@ -26,9 +26,6 @@ fn generate_variations<W: std::io::Write>(file: &mut W) {
writeln!(file, "#![allow(clippy::unreadable_literal)]",).unwrap(); writeln!(file, "#![allow(clippy::unreadable_literal)]",).unwrap();
writeln!(file).unwrap(); writeln!(file).unwrap();
writeln!(file, "use unicase::UniCase;").unwrap();
writeln!(file).unwrap();
writeln!(file, "pub type Variants = &'static [&'static str];",).unwrap(); writeln!(file, "pub type Variants = &'static [&'static str];",).unwrap();
writeln!( writeln!(
file, file,
@ -82,12 +79,11 @@ fn generate_variations<W: std::io::Write>(file: &mut W) {
writeln!( writeln!(
file, file,
"pub static VARS_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &'static [(u8, &VariantsMap)]> = " "pub(crate) static VARS_DICTIONARY: &[(crate::EncodedStr, &[(u8, &VariantsMap)])] = &["
) )
.unwrap(); .unwrap();
let entry_sets = entry_sets(entries.iter()); let entry_sets = entry_sets(entries.iter());
let mut referenced_symbols: HashSet<&str> = HashSet::new(); let mut referenced_symbols: HashSet<&str> = HashSet::new();
let mut builder = phf_codegen::Map::new();
for (word, data) in entry_sets.iter() { for (word, data) in entry_sets.iter() {
if is_always_valid(data) { if is_always_valid(data) {
// No need to convert from current form to target form // No need to convert from current form to target form
@ -95,15 +91,19 @@ fn generate_variations<W: std::io::Write>(file: &mut W) {
} }
referenced_symbols.extend(data.iter().map(|(s, _)| s)); referenced_symbols.extend(data.iter().map(|(s, _)| s));
let value = generate_link(&data); let value = generate_link(&data);
builder.entry(unicase::UniCase::new(word), &value); let word = unicase::UniCase::new(word);
let key = if word.is_ascii() {
format!("crate::EncodedStr::Ascii({:?})", word)
} else {
format!("crate::EncodedStr::Unicode({:?})", word)
};
writeln!(file, " ({}, {}),", key, &value).unwrap();
smallest = std::cmp::min(smallest, word.len()); smallest = std::cmp::min(smallest, word.len());
largest = std::cmp::max(largest, word.len()); largest = std::cmp::max(largest, word.len());
no_invalid &= !is_always_invalid(data); no_invalid &= !is_always_invalid(data);
} }
let codegenned = builder.build(); writeln!(file, "];").unwrap();
writeln!(file, "{}", codegenned).unwrap();
writeln!(file, ";").unwrap();
writeln!(file).unwrap(); writeln!(file).unwrap();
writeln!( writeln!(

View file

@ -4,3 +4,25 @@ pub use crate::vars_codegen::*;
pub use varcon_core::Category; pub use varcon_core::Category;
pub use varcon_core::CategorySet; pub use varcon_core::CategorySet;
pub fn find(word: &'_ unicase::UniCase<&str>) -> Option<&'static [(u8, &'static VariantsMap)]> {
VARS_DICTIONARY
.binary_search_by_key(word, |(key, _)| key.convert())
.map(|i| VARS_DICTIONARY[i].1)
.ok()
}
#[derive(Copy, Clone, Debug)]
pub(crate) enum EncodedStr {
//Unicode(&'static str),
Ascii(&'static str),
}
impl EncodedStr {
fn convert(self) -> unicase::UniCase<&'static str> {
match self {
//EncodedStr::Unicode(s) => unicase::UniCase::unicode(s),
EncodedStr::Ascii(s) => unicase::UniCase::ascii(s),
}
}
}

File diff suppressed because it is too large Load diff

View file

@ -96,8 +96,8 @@ impl BuiltIn {
fn correct_with_vars(&self, word: &str) -> Option<Status<'static>> { fn correct_with_vars(&self, word: &str) -> Option<Status<'static>> {
if self.is_vars_enabled() && typos_vars::WORD_RANGE.contains(&word.len()) { if self.is_vars_enabled() && typos_vars::WORD_RANGE.contains(&word.len()) {
map_lookup(&typos_vars::VARS_DICTIONARY, word) let word_case = unicase::UniCase::new(word);
.map(|variants| self.select_variant(variants)) typos_vars::find(&word_case).map(|variants| self.select_variant(variants))
} else { } else {
None None
} }