mirror of
https://github.com/crate-ci/typos.git
synced 2025-01-24 15:38:57 -05:00
Merge pull request #272 from epage/phf1
refactor(varcon): Remove reliance on const-fn
This commit is contained in:
commit
0aaa2c0d60
8 changed files with 107770 additions and 113095 deletions
3
Cargo.lock
generated
3
Cargo.lock
generated
|
@ -1577,7 +1577,6 @@ name = "typos-vars"
|
|||
version = "0.5.0"
|
||||
dependencies = [
|
||||
"log",
|
||||
"phf",
|
||||
"unicase",
|
||||
"varcon-core",
|
||||
]
|
||||
|
@ -1592,8 +1591,6 @@ dependencies = [
|
|||
"env_logger 0.7.1",
|
||||
"itertools 0.10.0",
|
||||
"log",
|
||||
"phf",
|
||||
"phf_codegen",
|
||||
"structopt",
|
||||
"typos",
|
||||
"unicase",
|
||||
|
|
|
@ -8,27 +8,72 @@ fn bench_dict_load(c: &mut Criterion) {
|
|||
group.finish();
|
||||
}
|
||||
|
||||
fn bench_dict_lookup(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("lookup");
|
||||
group.bench_function(BenchmarkId::new("lookup", "hit"), |b| {
|
||||
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
|
||||
let input = typos::tokens::Word::new("successs", 0).unwrap();
|
||||
fn bench_dict_correct_word(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("correct_word");
|
||||
|
||||
{
|
||||
let case = "dict_fine";
|
||||
let input = "finalizes";
|
||||
group.bench_function(BenchmarkId::new("en", case), |b| {
|
||||
let corrections = typos_cli::dict::BuiltIn::new(typos_cli::config::Locale::En);
|
||||
let input = typos::tokens::Word::new(input, 0).unwrap();
|
||||
#[cfg(feature = "vars")]
|
||||
assert!(corrections.correct_word(input).is_none());
|
||||
b.iter(|| corrections.correct_word(input));
|
||||
});
|
||||
}
|
||||
{
|
||||
let case = "dict_correct";
|
||||
let input = "finallizes";
|
||||
let output = "finalizes";
|
||||
group.bench_function(BenchmarkId::new("en", case), |b| {
|
||||
let corrections = typos_cli::dict::BuiltIn::new(typos_cli::config::Locale::En);
|
||||
let input = typos::tokens::Word::new(input, 0).unwrap();
|
||||
assert_eq!(
|
||||
corrections.correct_word(input),
|
||||
Some(typos::Status::Corrections(vec![
|
||||
std::borrow::Cow::Borrowed("successes")
|
||||
std::borrow::Cow::Borrowed(output)
|
||||
]))
|
||||
);
|
||||
b.iter(|| corrections.correct_word(input));
|
||||
});
|
||||
group.bench_function(BenchmarkId::new("lookup", "miss"), |b| {
|
||||
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
|
||||
let input = typos::tokens::Word::new("success", 0).unwrap();
|
||||
assert!(corrections.correct_word(input).is_none());
|
||||
}
|
||||
{
|
||||
let case = "dict_correct_case";
|
||||
let input = "FINALLIZES";
|
||||
let output = "FINALIZES";
|
||||
group.bench_function(BenchmarkId::new("en", case), |b| {
|
||||
let corrections = typos_cli::dict::BuiltIn::new(typos_cli::config::Locale::En);
|
||||
let input = typos::tokens::Word::new(input, 0).unwrap();
|
||||
assert_eq!(
|
||||
corrections.correct_word(input),
|
||||
Some(typos::Status::Corrections(vec![
|
||||
std::borrow::Cow::Borrowed(output)
|
||||
]))
|
||||
);
|
||||
b.iter(|| corrections.correct_word(input));
|
||||
});
|
||||
}
|
||||
#[cfg(feature = "vars")]
|
||||
{
|
||||
let case = "dict_to_varcon";
|
||||
let input = "finalizes";
|
||||
let output = "finalises";
|
||||
group.bench_function(BenchmarkId::new("en-gb", case), |b| {
|
||||
let corrections = typos_cli::dict::BuiltIn::new(typos_cli::config::Locale::EnGb);
|
||||
let input = typos::tokens::Word::new(input, 0).unwrap();
|
||||
assert_eq!(
|
||||
corrections.correct_word(input),
|
||||
Some(typos::Status::Corrections(vec![
|
||||
std::borrow::Cow::Borrowed(output)
|
||||
]))
|
||||
);
|
||||
b.iter(|| corrections.correct_word(input));
|
||||
});
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
criterion_group!(benches, bench_dict_load, bench_dict_lookup);
|
||||
criterion_group!(benches, bench_dict_load, bench_dict_correct_word);
|
||||
criterion_main!(benches);
|
||||
|
|
|
@ -15,7 +15,6 @@ azure-devops = { project = "crate-ci", pipeline = "typos" }
|
|||
codecov = { repository = "crate-ci/typos" }
|
||||
|
||||
[dependencies]
|
||||
phf = { version = "0.8", features = ["unicase"] }
|
||||
unicase = "2.5"
|
||||
log = "0.4"
|
||||
varcon-core = { version = "^2.0", path = "../varcon-core", features = ["flags"] }
|
||||
|
|
|
@ -15,8 +15,6 @@ azure-devops = { project = "crate-ci", pipeline = "typos" }
|
|||
codecov = { repository = "crate-ci/typos" }
|
||||
|
||||
[dependencies]
|
||||
phf = { version = "0.8", features = ["unicase"] }
|
||||
phf_codegen = "0.8"
|
||||
varcon = { version = "^0.5", path = "../../varcon", features = ["flags"] }
|
||||
varcon-core = { version = "^2.0", path = "../../varcon-core", features = ["flags"] }
|
||||
typos = { version = "^0.6", path = "../../typos" }
|
||||
|
|
|
@ -26,9 +26,6 @@ fn generate_variations<W: std::io::Write>(file: &mut W) {
|
|||
writeln!(file, "#![allow(clippy::unreadable_literal)]",).unwrap();
|
||||
writeln!(file).unwrap();
|
||||
|
||||
writeln!(file, "use unicase::UniCase;").unwrap();
|
||||
writeln!(file).unwrap();
|
||||
|
||||
writeln!(file, "pub type Variants = &'static [&'static str];",).unwrap();
|
||||
writeln!(
|
||||
file,
|
||||
|
@ -82,12 +79,11 @@ fn generate_variations<W: std::io::Write>(file: &mut W) {
|
|||
|
||||
writeln!(
|
||||
file,
|
||||
"pub static VARS_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &'static [(u8, &VariantsMap)]> = "
|
||||
"pub(crate) static VARS_DICTIONARY: &[(crate::EncodedStr, &[(u8, &VariantsMap)])] = &["
|
||||
)
|
||||
.unwrap();
|
||||
let entry_sets = entry_sets(entries.iter());
|
||||
let mut referenced_symbols: HashSet<&str> = HashSet::new();
|
||||
let mut builder = phf_codegen::Map::new();
|
||||
for (word, data) in entry_sets.iter() {
|
||||
if is_always_valid(data) {
|
||||
// No need to convert from current form to target form
|
||||
|
@ -95,15 +91,19 @@ fn generate_variations<W: std::io::Write>(file: &mut W) {
|
|||
}
|
||||
referenced_symbols.extend(data.iter().map(|(s, _)| s));
|
||||
let value = generate_link(&data);
|
||||
builder.entry(unicase::UniCase::new(word), &value);
|
||||
let word = unicase::UniCase::new(word);
|
||||
let key = if word.is_ascii() {
|
||||
format!("crate::EncodedStr::Ascii({:?})", word)
|
||||
} else {
|
||||
format!("crate::EncodedStr::Unicode({:?})", word)
|
||||
};
|
||||
writeln!(file, " ({}, {}),", key, &value).unwrap();
|
||||
smallest = std::cmp::min(smallest, word.len());
|
||||
largest = std::cmp::max(largest, word.len());
|
||||
|
||||
no_invalid &= !is_always_invalid(data);
|
||||
}
|
||||
let codegenned = builder.build();
|
||||
writeln!(file, "{}", codegenned).unwrap();
|
||||
writeln!(file, ";").unwrap();
|
||||
writeln!(file, "];").unwrap();
|
||||
|
||||
writeln!(file).unwrap();
|
||||
writeln!(
|
||||
|
|
|
@ -4,3 +4,25 @@ pub use crate::vars_codegen::*;
|
|||
|
||||
pub use varcon_core::Category;
|
||||
pub use varcon_core::CategorySet;
|
||||
|
||||
pub fn find(word: &'_ unicase::UniCase<&str>) -> Option<&'static [(u8, &'static VariantsMap)]> {
|
||||
VARS_DICTIONARY
|
||||
.binary_search_by_key(word, |(key, _)| key.convert())
|
||||
.map(|i| VARS_DICTIONARY[i].1)
|
||||
.ok()
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug)]
|
||||
pub(crate) enum EncodedStr {
|
||||
//Unicode(&'static str),
|
||||
Ascii(&'static str),
|
||||
}
|
||||
|
||||
impl EncodedStr {
|
||||
fn convert(self) -> unicase::UniCase<&'static str> {
|
||||
match self {
|
||||
//EncodedStr::Unicode(s) => unicase::UniCase::unicode(s),
|
||||
EncodedStr::Ascii(s) => unicase::UniCase::ascii(s),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -96,8 +96,8 @@ impl BuiltIn {
|
|||
|
||||
fn correct_with_vars(&self, word: &str) -> Option<Status<'static>> {
|
||||
if self.is_vars_enabled() && typos_vars::WORD_RANGE.contains(&word.len()) {
|
||||
map_lookup(&typos_vars::VARS_DICTIONARY, word)
|
||||
.map(|variants| self.select_variant(variants))
|
||||
let word_case = unicase::UniCase::new(word);
|
||||
typos_vars::find(&word_case).map(|variants| self.select_variant(variants))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue