mirror of
https://github.com/crate-ci/typos.git
synced 2025-01-10 00:34:45 -05:00
Merge pull request #272 from epage/phf1
refactor(varcon): Remove reliance on const-fn
This commit is contained in:
commit
0aaa2c0d60
8 changed files with 107770 additions and 113095 deletions
3
Cargo.lock
generated
3
Cargo.lock
generated
|
@ -1577,7 +1577,6 @@ name = "typos-vars"
|
||||||
version = "0.5.0"
|
version = "0.5.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"log",
|
"log",
|
||||||
"phf",
|
|
||||||
"unicase",
|
"unicase",
|
||||||
"varcon-core",
|
"varcon-core",
|
||||||
]
|
]
|
||||||
|
@ -1592,8 +1591,6 @@ dependencies = [
|
||||||
"env_logger 0.7.1",
|
"env_logger 0.7.1",
|
||||||
"itertools 0.10.0",
|
"itertools 0.10.0",
|
||||||
"log",
|
"log",
|
||||||
"phf",
|
|
||||||
"phf_codegen",
|
|
||||||
"structopt",
|
"structopt",
|
||||||
"typos",
|
"typos",
|
||||||
"unicase",
|
"unicase",
|
||||||
|
|
|
@ -8,27 +8,72 @@ fn bench_dict_load(c: &mut Criterion) {
|
||||||
group.finish();
|
group.finish();
|
||||||
}
|
}
|
||||||
|
|
||||||
fn bench_dict_lookup(c: &mut Criterion) {
|
fn bench_dict_correct_word(c: &mut Criterion) {
|
||||||
let mut group = c.benchmark_group("lookup");
|
let mut group = c.benchmark_group("correct_word");
|
||||||
group.bench_function(BenchmarkId::new("lookup", "hit"), |b| {
|
|
||||||
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
|
{
|
||||||
let input = typos::tokens::Word::new("successs", 0).unwrap();
|
let case = "dict_fine";
|
||||||
assert_eq!(
|
let input = "finalizes";
|
||||||
corrections.correct_word(input),
|
group.bench_function(BenchmarkId::new("en", case), |b| {
|
||||||
Some(typos::Status::Corrections(vec![
|
let corrections = typos_cli::dict::BuiltIn::new(typos_cli::config::Locale::En);
|
||||||
std::borrow::Cow::Borrowed("successes")
|
let input = typos::tokens::Word::new(input, 0).unwrap();
|
||||||
]))
|
#[cfg(feature = "vars")]
|
||||||
);
|
assert!(corrections.correct_word(input).is_none());
|
||||||
b.iter(|| corrections.correct_word(input));
|
b.iter(|| corrections.correct_word(input));
|
||||||
});
|
});
|
||||||
group.bench_function(BenchmarkId::new("lookup", "miss"), |b| {
|
}
|
||||||
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
|
{
|
||||||
let input = typos::tokens::Word::new("success", 0).unwrap();
|
let case = "dict_correct";
|
||||||
assert!(corrections.correct_word(input).is_none());
|
let input = "finallizes";
|
||||||
b.iter(|| corrections.correct_word(input));
|
let output = "finalizes";
|
||||||
});
|
group.bench_function(BenchmarkId::new("en", case), |b| {
|
||||||
|
let corrections = typos_cli::dict::BuiltIn::new(typos_cli::config::Locale::En);
|
||||||
|
let input = typos::tokens::Word::new(input, 0).unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
corrections.correct_word(input),
|
||||||
|
Some(typos::Status::Corrections(vec![
|
||||||
|
std::borrow::Cow::Borrowed(output)
|
||||||
|
]))
|
||||||
|
);
|
||||||
|
b.iter(|| corrections.correct_word(input));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
{
|
||||||
|
let case = "dict_correct_case";
|
||||||
|
let input = "FINALLIZES";
|
||||||
|
let output = "FINALIZES";
|
||||||
|
group.bench_function(BenchmarkId::new("en", case), |b| {
|
||||||
|
let corrections = typos_cli::dict::BuiltIn::new(typos_cli::config::Locale::En);
|
||||||
|
let input = typos::tokens::Word::new(input, 0).unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
corrections.correct_word(input),
|
||||||
|
Some(typos::Status::Corrections(vec![
|
||||||
|
std::borrow::Cow::Borrowed(output)
|
||||||
|
]))
|
||||||
|
);
|
||||||
|
b.iter(|| corrections.correct_word(input));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
#[cfg(feature = "vars")]
|
||||||
|
{
|
||||||
|
let case = "dict_to_varcon";
|
||||||
|
let input = "finalizes";
|
||||||
|
let output = "finalises";
|
||||||
|
group.bench_function(BenchmarkId::new("en-gb", case), |b| {
|
||||||
|
let corrections = typos_cli::dict::BuiltIn::new(typos_cli::config::Locale::EnGb);
|
||||||
|
let input = typos::tokens::Word::new(input, 0).unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
corrections.correct_word(input),
|
||||||
|
Some(typos::Status::Corrections(vec![
|
||||||
|
std::borrow::Cow::Borrowed(output)
|
||||||
|
]))
|
||||||
|
);
|
||||||
|
b.iter(|| corrections.correct_word(input));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
group.finish();
|
group.finish();
|
||||||
}
|
}
|
||||||
|
|
||||||
criterion_group!(benches, bench_dict_load, bench_dict_lookup);
|
criterion_group!(benches, bench_dict_load, bench_dict_correct_word);
|
||||||
criterion_main!(benches);
|
criterion_main!(benches);
|
||||||
|
|
|
@ -15,7 +15,6 @@ azure-devops = { project = "crate-ci", pipeline = "typos" }
|
||||||
codecov = { repository = "crate-ci/typos" }
|
codecov = { repository = "crate-ci/typos" }
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
phf = { version = "0.8", features = ["unicase"] }
|
|
||||||
unicase = "2.5"
|
unicase = "2.5"
|
||||||
log = "0.4"
|
log = "0.4"
|
||||||
varcon-core = { version = "^2.0", path = "../varcon-core", features = ["flags"] }
|
varcon-core = { version = "^2.0", path = "../varcon-core", features = ["flags"] }
|
||||||
|
|
|
@ -15,8 +15,6 @@ azure-devops = { project = "crate-ci", pipeline = "typos" }
|
||||||
codecov = { repository = "crate-ci/typos" }
|
codecov = { repository = "crate-ci/typos" }
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
phf = { version = "0.8", features = ["unicase"] }
|
|
||||||
phf_codegen = "0.8"
|
|
||||||
varcon = { version = "^0.5", path = "../../varcon", features = ["flags"] }
|
varcon = { version = "^0.5", path = "../../varcon", features = ["flags"] }
|
||||||
varcon-core = { version = "^2.0", path = "../../varcon-core", features = ["flags"] }
|
varcon-core = { version = "^2.0", path = "../../varcon-core", features = ["flags"] }
|
||||||
typos = { version = "^0.6", path = "../../typos" }
|
typos = { version = "^0.6", path = "../../typos" }
|
||||||
|
|
|
@ -26,9 +26,6 @@ fn generate_variations<W: std::io::Write>(file: &mut W) {
|
||||||
writeln!(file, "#![allow(clippy::unreadable_literal)]",).unwrap();
|
writeln!(file, "#![allow(clippy::unreadable_literal)]",).unwrap();
|
||||||
writeln!(file).unwrap();
|
writeln!(file).unwrap();
|
||||||
|
|
||||||
writeln!(file, "use unicase::UniCase;").unwrap();
|
|
||||||
writeln!(file).unwrap();
|
|
||||||
|
|
||||||
writeln!(file, "pub type Variants = &'static [&'static str];",).unwrap();
|
writeln!(file, "pub type Variants = &'static [&'static str];",).unwrap();
|
||||||
writeln!(
|
writeln!(
|
||||||
file,
|
file,
|
||||||
|
@ -82,12 +79,11 @@ fn generate_variations<W: std::io::Write>(file: &mut W) {
|
||||||
|
|
||||||
writeln!(
|
writeln!(
|
||||||
file,
|
file,
|
||||||
"pub static VARS_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &'static [(u8, &VariantsMap)]> = "
|
"pub(crate) static VARS_DICTIONARY: &[(crate::EncodedStr, &[(u8, &VariantsMap)])] = &["
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
let entry_sets = entry_sets(entries.iter());
|
let entry_sets = entry_sets(entries.iter());
|
||||||
let mut referenced_symbols: HashSet<&str> = HashSet::new();
|
let mut referenced_symbols: HashSet<&str> = HashSet::new();
|
||||||
let mut builder = phf_codegen::Map::new();
|
|
||||||
for (word, data) in entry_sets.iter() {
|
for (word, data) in entry_sets.iter() {
|
||||||
if is_always_valid(data) {
|
if is_always_valid(data) {
|
||||||
// No need to convert from current form to target form
|
// No need to convert from current form to target form
|
||||||
|
@ -95,15 +91,19 @@ fn generate_variations<W: std::io::Write>(file: &mut W) {
|
||||||
}
|
}
|
||||||
referenced_symbols.extend(data.iter().map(|(s, _)| s));
|
referenced_symbols.extend(data.iter().map(|(s, _)| s));
|
||||||
let value = generate_link(&data);
|
let value = generate_link(&data);
|
||||||
builder.entry(unicase::UniCase::new(word), &value);
|
let word = unicase::UniCase::new(word);
|
||||||
|
let key = if word.is_ascii() {
|
||||||
|
format!("crate::EncodedStr::Ascii({:?})", word)
|
||||||
|
} else {
|
||||||
|
format!("crate::EncodedStr::Unicode({:?})", word)
|
||||||
|
};
|
||||||
|
writeln!(file, " ({}, {}),", key, &value).unwrap();
|
||||||
smallest = std::cmp::min(smallest, word.len());
|
smallest = std::cmp::min(smallest, word.len());
|
||||||
largest = std::cmp::max(largest, word.len());
|
largest = std::cmp::max(largest, word.len());
|
||||||
|
|
||||||
no_invalid &= !is_always_invalid(data);
|
no_invalid &= !is_always_invalid(data);
|
||||||
}
|
}
|
||||||
let codegenned = builder.build();
|
writeln!(file, "];").unwrap();
|
||||||
writeln!(file, "{}", codegenned).unwrap();
|
|
||||||
writeln!(file, ";").unwrap();
|
|
||||||
|
|
||||||
writeln!(file).unwrap();
|
writeln!(file).unwrap();
|
||||||
writeln!(
|
writeln!(
|
||||||
|
|
|
@ -4,3 +4,25 @@ pub use crate::vars_codegen::*;
|
||||||
|
|
||||||
pub use varcon_core::Category;
|
pub use varcon_core::Category;
|
||||||
pub use varcon_core::CategorySet;
|
pub use varcon_core::CategorySet;
|
||||||
|
|
||||||
|
pub fn find(word: &'_ unicase::UniCase<&str>) -> Option<&'static [(u8, &'static VariantsMap)]> {
|
||||||
|
VARS_DICTIONARY
|
||||||
|
.binary_search_by_key(word, |(key, _)| key.convert())
|
||||||
|
.map(|i| VARS_DICTIONARY[i].1)
|
||||||
|
.ok()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Copy, Clone, Debug)]
|
||||||
|
pub(crate) enum EncodedStr {
|
||||||
|
//Unicode(&'static str),
|
||||||
|
Ascii(&'static str),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl EncodedStr {
|
||||||
|
fn convert(self) -> unicase::UniCase<&'static str> {
|
||||||
|
match self {
|
||||||
|
//EncodedStr::Unicode(s) => unicase::UniCase::unicode(s),
|
||||||
|
EncodedStr::Ascii(s) => unicase::UniCase::ascii(s),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -96,8 +96,8 @@ impl BuiltIn {
|
||||||
|
|
||||||
fn correct_with_vars(&self, word: &str) -> Option<Status<'static>> {
|
fn correct_with_vars(&self, word: &str) -> Option<Status<'static>> {
|
||||||
if self.is_vars_enabled() && typos_vars::WORD_RANGE.contains(&word.len()) {
|
if self.is_vars_enabled() && typos_vars::WORD_RANGE.contains(&word.len()) {
|
||||||
map_lookup(&typos_vars::VARS_DICTIONARY, word)
|
let word_case = unicase::UniCase::new(word);
|
||||||
.map(|variants| self.select_variant(variants))
|
typos_vars::find(&word_case).map(|variants| self.select_variant(variants))
|
||||||
} else {
|
} else {
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue