mirror of
https://github.com/crate-ci/typos.git
synced 2024-11-22 00:51:11 -05:00
refactor(dict): Pull out table-lookup logic
Before, only some dicts did we guarentee were pre-sorted. Now, all are for-sure pre-sorted. This also gives each dict the size-check to avoid lookup. But this is really about refactoring in prep for playing with other lookup options, like tries.
This commit is contained in:
parent
bfa7888f82
commit
a1e95bc7c0
22 changed files with 273300 additions and 174464 deletions
17
Cargo.lock
generated
17
Cargo.lock
generated
|
@ -223,6 +223,7 @@ name = "codespell-codegen"
|
||||||
version = "0.4.0"
|
version = "0.4.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"codegenrs",
|
"codegenrs",
|
||||||
|
"dictgen",
|
||||||
"itertools 0.10.0",
|
"itertools 0.10.0",
|
||||||
"structopt",
|
"structopt",
|
||||||
"unicase",
|
"unicase",
|
||||||
|
@ -232,7 +233,7 @@ dependencies = [
|
||||||
name = "codespell-dict"
|
name = "codespell-dict"
|
||||||
version = "0.4.0"
|
version = "0.4.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"log",
|
"dictgen",
|
||||||
"unicase",
|
"unicase",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -412,6 +413,13 @@ dependencies = [
|
||||||
"syn",
|
"syn",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "dictgen"
|
||||||
|
version = "0.1.0"
|
||||||
|
dependencies = [
|
||||||
|
"unicase",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "difference"
|
name = "difference"
|
||||||
version = "2.0.0"
|
version = "2.0.0"
|
||||||
|
@ -806,6 +814,7 @@ name = "misspell-codegen"
|
||||||
version = "0.4.0"
|
version = "0.4.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"codegenrs",
|
"codegenrs",
|
||||||
|
"dictgen",
|
||||||
"itertools 0.10.0",
|
"itertools 0.10.0",
|
||||||
"regex",
|
"regex",
|
||||||
"structopt",
|
"structopt",
|
||||||
|
@ -816,6 +825,7 @@ dependencies = [
|
||||||
name = "misspell-dict"
|
name = "misspell-dict"
|
||||||
version = "0.4.0"
|
version = "0.4.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"dictgen",
|
||||||
"log",
|
"log",
|
||||||
"unicase",
|
"unicase",
|
||||||
]
|
]
|
||||||
|
@ -1573,6 +1583,7 @@ dependencies = [
|
||||||
name = "typos-vars"
|
name = "typos-vars"
|
||||||
version = "0.6.0"
|
version = "0.6.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"dictgen",
|
||||||
"log",
|
"log",
|
||||||
"unicase",
|
"unicase",
|
||||||
"varcon-core",
|
"varcon-core",
|
||||||
|
@ -1585,6 +1596,7 @@ dependencies = [
|
||||||
"clap",
|
"clap",
|
||||||
"clap-verbosity-flag",
|
"clap-verbosity-flag",
|
||||||
"codegenrs",
|
"codegenrs",
|
||||||
|
"dictgen",
|
||||||
"env_logger 0.7.1",
|
"env_logger 0.7.1",
|
||||||
"itertools 0.10.0",
|
"itertools 0.10.0",
|
||||||
"log",
|
"log",
|
||||||
|
@ -1774,6 +1786,7 @@ name = "wikipedia-codegen"
|
||||||
version = "0.4.0"
|
version = "0.4.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"codegenrs",
|
"codegenrs",
|
||||||
|
"dictgen",
|
||||||
"itertools 0.10.0",
|
"itertools 0.10.0",
|
||||||
"structopt",
|
"structopt",
|
||||||
"unicase",
|
"unicase",
|
||||||
|
@ -1783,7 +1796,7 @@ dependencies = [
|
||||||
name = "wikipedia-dict"
|
name = "wikipedia-dict"
|
||||||
version = "0.4.0"
|
version = "0.4.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"log",
|
"dictgen",
|
||||||
"unicase",
|
"unicase",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
|
@ -3,6 +3,7 @@ members = [
|
||||||
"crates/typos",
|
"crates/typos",
|
||||||
"crates/typos-dict", "crates/typos-dict/codegen", "crates/typos-dict/verify",
|
"crates/typos-dict", "crates/typos-dict/codegen", "crates/typos-dict/verify",
|
||||||
"crates/typos-vars", "crates/typos-vars/codegen",
|
"crates/typos-vars", "crates/typos-vars/codegen",
|
||||||
|
"crates/dictgen",
|
||||||
"crates/codespell-dict", "crates/codespell-dict/codegen",
|
"crates/codespell-dict", "crates/codespell-dict/codegen",
|
||||||
"crates/misspell-dict", "crates/misspell-dict/codegen",
|
"crates/misspell-dict", "crates/misspell-dict/codegen",
|
||||||
"crates/wikipedia-dict", "crates/wikipedia-dict/codegen",
|
"crates/wikipedia-dict", "crates/wikipedia-dict/codegen",
|
||||||
|
|
|
@ -20,4 +20,4 @@ disable-release = true
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
unicase = "2.5"
|
unicase = "2.5"
|
||||||
log = "0.4"
|
dictgen = { version = "0.1", path = "../dictgen" }
|
||||||
|
|
|
@ -22,3 +22,4 @@ unicase = "2.5"
|
||||||
itertools = "0.10"
|
itertools = "0.10"
|
||||||
codegenrs = "1.0"
|
codegenrs = "1.0"
|
||||||
structopt = "0.3"
|
structopt = "0.3"
|
||||||
|
dictgen = { version = "0.1", path = "../../dictgen" }
|
||||||
|
|
|
@ -30,20 +30,17 @@ fn generate<W: std::io::Write>(file: &mut W) {
|
||||||
env!("CARGO_PKG_NAME")
|
env!("CARGO_PKG_NAME")
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
writeln!(file, "#![allow(clippy::unreadable_literal)]",).unwrap();
|
|
||||||
writeln!(file).unwrap();
|
writeln!(file).unwrap();
|
||||||
|
|
||||||
let dict = parse_dict(DICT);
|
let dict = parse_dict(DICT);
|
||||||
|
|
||||||
writeln!(file, "pub static WORD_DICTIONARY: &[(&str, &[&str])] = &[").unwrap();
|
dictgen::generate_table(
|
||||||
for (typo, corrections) in dict {
|
file,
|
||||||
let value = itertools::join(corrections.iter().map(|s| format!("{:?}", s)), ", ");
|
"WORD_DICTIONARY",
|
||||||
let value = format!("&[{}]", value);
|
"&[&str]",
|
||||||
|
dict.map(|kv| (kv.0, format!("&{:?}", kv.1))),
|
||||||
let key = format!("{:?}", typo);
|
)
|
||||||
writeln!(file, " ({}, {}),", key, &value).unwrap();
|
.unwrap();
|
||||||
}
|
|
||||||
writeln!(file, "];").unwrap();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, StructOpt)]
|
#[derive(Debug, StructOpt)]
|
||||||
|
|
File diff suppressed because it is too large
Load diff
7
crates/dictgen/Cargo.toml
Normal file
7
crates/dictgen/Cargo.toml
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
[package]
|
||||||
|
name = "dictgen"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2018"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
unicase = "2.5"
|
74
crates/dictgen/src/lib.rs
Normal file
74
crates/dictgen/src/lib.rs
Normal file
|
@ -0,0 +1,74 @@
|
||||||
|
pub fn generate_table<'d, W: std::io::Write, V: std::fmt::Display>(
|
||||||
|
file: &mut W,
|
||||||
|
name: &str,
|
||||||
|
value_type: &str,
|
||||||
|
data: impl Iterator<Item = (&'d str, V)>,
|
||||||
|
) -> Result<(), std::io::Error> {
|
||||||
|
let mut data: Vec<_> = data.collect();
|
||||||
|
data.sort_unstable_by_key(|v| unicase::UniCase::new(v.0));
|
||||||
|
|
||||||
|
let mut smallest = usize::MAX;
|
||||||
|
let mut largest = usize::MIN;
|
||||||
|
|
||||||
|
writeln!(
|
||||||
|
file,
|
||||||
|
"pub static {}: dictgen::DictTable<{}> = dictgen::DictTable {{",
|
||||||
|
name, value_type
|
||||||
|
)?;
|
||||||
|
writeln!(file, " table: &[")?;
|
||||||
|
for (key, value) in data {
|
||||||
|
smallest = std::cmp::min(smallest, key.len());
|
||||||
|
largest = std::cmp::max(largest, key.len());
|
||||||
|
|
||||||
|
let key = if key.is_ascii() {
|
||||||
|
format!("dictgen::InsensitiveStr::Ascii({:?})", key)
|
||||||
|
} else {
|
||||||
|
format!("dictgen::InsensitiveStr::Unicode({:?})", key)
|
||||||
|
};
|
||||||
|
|
||||||
|
writeln!(file, " ({}, {}),", key, value)?;
|
||||||
|
}
|
||||||
|
writeln!(file, " ],")?;
|
||||||
|
writeln!(file, " range: {}..={},", smallest, largest)?;
|
||||||
|
writeln!(file, "}};")?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct DictTable<V: 'static> {
|
||||||
|
pub table: &'static [(InsensitiveStr, V)],
|
||||||
|
pub range: std::ops::RangeInclusive<usize>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<V> DictTable<V> {
|
||||||
|
pub fn find(&self, word: &'_ unicase::UniCase<&str>) -> Option<&'static V> {
|
||||||
|
if self.range.contains(&word.len()) {
|
||||||
|
self.table
|
||||||
|
.binary_search_by_key(word, |(key, _)| key.convert())
|
||||||
|
.map(|i| &self.table[i].1)
|
||||||
|
.ok()
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn iter(&self) -> impl Iterator<Item = (unicase::UniCase<&'static str>, &'static V)> {
|
||||||
|
self.table.iter().map(|row| (row.0.convert(), &row.1))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Avoid unicase's use of const-fn so large tables don't OOM
|
||||||
|
#[derive(Copy, Clone, Debug)]
|
||||||
|
pub enum InsensitiveStr {
|
||||||
|
Unicode(&'static str),
|
||||||
|
Ascii(&'static str),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl InsensitiveStr {
|
||||||
|
fn convert(self) -> unicase::UniCase<&'static str> {
|
||||||
|
match self {
|
||||||
|
InsensitiveStr::Unicode(s) => unicase::UniCase::unicode(s),
|
||||||
|
InsensitiveStr::Ascii(s) => unicase::UniCase::ascii(s),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -21,3 +21,4 @@ disable-release = true
|
||||||
[dependencies]
|
[dependencies]
|
||||||
unicase = "2.5"
|
unicase = "2.5"
|
||||||
log = "0.4"
|
log = "0.4"
|
||||||
|
dictgen = { version = "0.1", path = "../dictgen" }
|
||||||
|
|
|
@ -23,3 +23,4 @@ itertools = "0.10"
|
||||||
codegenrs = "1.0"
|
codegenrs = "1.0"
|
||||||
structopt = "0.3"
|
structopt = "0.3"
|
||||||
regex = "1"
|
regex = "1"
|
||||||
|
dictgen = { version = "0.1", path = "../../dictgen" }
|
||||||
|
|
|
@ -62,7 +62,6 @@ fn generate<W: std::io::Write>(file: &mut W) {
|
||||||
env!("CARGO_PKG_NAME")
|
env!("CARGO_PKG_NAME")
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
writeln!(file, "#![allow(clippy::unreadable_literal)]",).unwrap();
|
|
||||||
writeln!(file).unwrap();
|
writeln!(file).unwrap();
|
||||||
|
|
||||||
let Words {
|
let Words {
|
||||||
|
@ -70,64 +69,32 @@ fn generate<W: std::io::Write>(file: &mut W) {
|
||||||
american,
|
american,
|
||||||
british,
|
british,
|
||||||
} = parse_dict(DICT);
|
} = parse_dict(DICT);
|
||||||
let mut main: Vec<_> = main.into_iter().collect();
|
|
||||||
main.sort_unstable_by(|a, b| {
|
|
||||||
unicase::UniCase::new(a.0)
|
|
||||||
.partial_cmp(&unicase::UniCase::new(b.0))
|
|
||||||
.unwrap()
|
|
||||||
});
|
|
||||||
let mut american: Vec<_> = american.into_iter().collect();
|
|
||||||
american.sort_unstable_by(|a, b| {
|
|
||||||
unicase::UniCase::new(a.0)
|
|
||||||
.partial_cmp(&unicase::UniCase::new(b.0))
|
|
||||||
.unwrap()
|
|
||||||
});
|
|
||||||
let mut british: Vec<_> = british.into_iter().collect();
|
|
||||||
british.sort_unstable_by(|a, b| {
|
|
||||||
unicase::UniCase::new(a.0)
|
|
||||||
.partial_cmp(&unicase::UniCase::new(b.0))
|
|
||||||
.unwrap()
|
|
||||||
});
|
|
||||||
|
|
||||||
writeln!(file, "pub static MAIN_DICTIONARY: &[(&str, &[&str])] = &[").unwrap();
|
dictgen::generate_table(
|
||||||
for (typo, corrections) in main.into_iter() {
|
|
||||||
let value = itertools::join(corrections.iter().map(|s| format!("{:?}", s)), ", ");
|
|
||||||
let value = format!("&[{}]", value);
|
|
||||||
|
|
||||||
let key = format!("{:?}", typo);
|
|
||||||
writeln!(file, " ({}, {}),", key, &value).unwrap();
|
|
||||||
}
|
|
||||||
writeln!(file, "];").unwrap();
|
|
||||||
writeln!(file).unwrap();
|
|
||||||
|
|
||||||
writeln!(
|
|
||||||
file,
|
file,
|
||||||
"pub static AMERICAN_DICTIONARY: &[(&str, &[&str])] = &["
|
"MAIN_DICTIONARY",
|
||||||
|
"&[&str]",
|
||||||
|
main.into_iter().map(|kv| (kv.0, format!("&{:?}", kv.1))),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
for (typo, corrections) in american.into_iter() {
|
|
||||||
let value = itertools::join(corrections.iter().map(|s| format!("{:?}", s)), ", ");
|
|
||||||
let value = format!("&[{}]", value);
|
|
||||||
|
|
||||||
let key = format!("{:?}", typo);
|
dictgen::generate_table(
|
||||||
writeln!(file, " ({}, {}),", key, &value).unwrap();
|
|
||||||
}
|
|
||||||
writeln!(file, "];").unwrap();
|
|
||||||
writeln!(file).unwrap();
|
|
||||||
|
|
||||||
writeln!(
|
|
||||||
file,
|
file,
|
||||||
"pub static BRITISH_DICTIONARY: &[(&str, &[&str])] = &["
|
"AMERICAN_DICTIONARY",
|
||||||
|
"&[&str]",
|
||||||
|
american
|
||||||
|
.into_iter()
|
||||||
|
.map(|kv| (kv.0, format!("&{:?}", kv.1))),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
for (typo, corrections) in british.into_iter() {
|
|
||||||
let value = itertools::join(corrections.iter().map(|s| format!("{:?}", s)), ", ");
|
|
||||||
let value = format!("&[{}]", value);
|
|
||||||
|
|
||||||
let key = format!("{:?}", typo);
|
dictgen::generate_table(
|
||||||
writeln!(file, " ({}, {}),", key, &value).unwrap();
|
file,
|
||||||
}
|
"BRITISH_DICTIONARY",
|
||||||
writeln!(file, "];").unwrap();
|
"&[&str]",
|
||||||
|
british.into_iter().map(|kv| (kv.0, format!("&{:?}", kv.1))),
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, StructOpt)]
|
#[derive(Debug, StructOpt)]
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -17,4 +17,5 @@ codecov = { repository = "crate-ci/typos" }
|
||||||
[dependencies]
|
[dependencies]
|
||||||
unicase = "2.5"
|
unicase = "2.5"
|
||||||
log = "0.4"
|
log = "0.4"
|
||||||
|
dictgen = { version = "0.1", path = "../dictgen" }
|
||||||
varcon-core = { version = "^2.0", path = "../varcon-core", features = ["flags"] }
|
varcon-core = { version = "^2.0", path = "../varcon-core", features = ["flags"] }
|
||||||
|
|
|
@ -29,3 +29,4 @@ log = "0.4"
|
||||||
env_logger = "0.7"
|
env_logger = "0.7"
|
||||||
clap-verbosity-flag = "0.3"
|
clap-verbosity-flag = "0.3"
|
||||||
itertools = "0.10"
|
itertools = "0.10"
|
||||||
|
dictgen = { version = "0.1", path = "../../dictgen" }
|
||||||
|
|
|
@ -73,46 +73,27 @@ fn generate_variations<W: std::io::Write>(file: &mut W) {
|
||||||
writeln!(file, "}}").unwrap();
|
writeln!(file, "}}").unwrap();
|
||||||
writeln!(file).unwrap();
|
writeln!(file).unwrap();
|
||||||
|
|
||||||
let mut smallest = usize::MAX;
|
|
||||||
let mut largest = usize::MIN;
|
|
||||||
let mut no_invalid = true;
|
|
||||||
|
|
||||||
writeln!(
|
|
||||||
file,
|
|
||||||
"pub(crate) static VARS_DICTIONARY: &[(crate::EncodedStr, &[(u8, &VariantsMap)])] = &["
|
|
||||||
)
|
|
||||||
.unwrap();
|
|
||||||
let entry_sets = entry_sets(entries.iter());
|
let entry_sets = entry_sets(entries.iter());
|
||||||
let mut referenced_symbols: HashSet<&str> = HashSet::new();
|
let mut referenced_symbols: HashSet<&str> = HashSet::new();
|
||||||
for (word, data) in entry_sets.iter() {
|
dictgen::generate_table(
|
||||||
if is_always_valid(data) {
|
|
||||||
// No need to convert from current form to target form
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
referenced_symbols.extend(data.iter().map(|(s, _)| s));
|
|
||||||
let value = generate_link(&data);
|
|
||||||
let word = unicase::UniCase::new(word);
|
|
||||||
let key = if word.is_ascii() {
|
|
||||||
format!("crate::EncodedStr::Ascii({:?})", word)
|
|
||||||
} else {
|
|
||||||
format!("crate::EncodedStr::Unicode({:?})", word)
|
|
||||||
};
|
|
||||||
writeln!(file, " ({}, {}),", key, &value).unwrap();
|
|
||||||
smallest = std::cmp::min(smallest, word.len());
|
|
||||||
largest = std::cmp::max(largest, word.len());
|
|
||||||
|
|
||||||
no_invalid &= !is_always_invalid(data);
|
|
||||||
}
|
|
||||||
writeln!(file, "];").unwrap();
|
|
||||||
|
|
||||||
writeln!(file).unwrap();
|
|
||||||
writeln!(
|
|
||||||
file,
|
file,
|
||||||
"pub const WORD_RANGE: std::ops::RangeInclusive<usize> = {}..={};",
|
"VARS_DICTIONARY",
|
||||||
smallest, largest
|
"&[(u8, &VariantsMap)]",
|
||||||
|
entry_sets.iter().flat_map(|kv| {
|
||||||
|
let (word, data) = kv;
|
||||||
|
if is_always_valid(data) {
|
||||||
|
// No need to convert from current form to target form
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
referenced_symbols.extend(data.iter().map(|(s, _)| s));
|
||||||
|
let value = generate_link(&data);
|
||||||
|
Some((*word, value))
|
||||||
|
}
|
||||||
|
}),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
let no_invalid = entry_sets.values().all(|data| !is_always_invalid(data));
|
||||||
writeln!(file).unwrap();
|
writeln!(file).unwrap();
|
||||||
writeln!(file, "pub const NO_INVALID: bool = {:?};", no_invalid,).unwrap();
|
writeln!(file, "pub const NO_INVALID: bool = {:?};", no_invalid,).unwrap();
|
||||||
|
|
||||||
|
|
|
@ -4,25 +4,3 @@ pub use crate::vars_codegen::*;
|
||||||
|
|
||||||
pub use varcon_core::Category;
|
pub use varcon_core::Category;
|
||||||
pub use varcon_core::CategorySet;
|
pub use varcon_core::CategorySet;
|
||||||
|
|
||||||
pub fn find(word: &'_ unicase::UniCase<&str>) -> Option<&'static [(u8, &'static VariantsMap)]> {
|
|
||||||
VARS_DICTIONARY
|
|
||||||
.binary_search_by_key(word, |(key, _)| key.convert())
|
|
||||||
.map(|i| VARS_DICTIONARY[i].1)
|
|
||||||
.ok()
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Copy, Clone, Debug)]
|
|
||||||
pub(crate) enum EncodedStr {
|
|
||||||
//Unicode(&'static str),
|
|
||||||
Ascii(&'static str),
|
|
||||||
}
|
|
||||||
|
|
||||||
impl EncodedStr {
|
|
||||||
fn convert(self) -> unicase::UniCase<&'static str> {
|
|
||||||
match self {
|
|
||||||
//EncodedStr::Unicode(s) => unicase::UniCase::unicode(s),
|
|
||||||
EncodedStr::Ascii(s) => unicase::UniCase::ascii(s),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -20,4 +20,4 @@ disable-release = true
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
unicase = "2.5"
|
unicase = "2.5"
|
||||||
log = "0.4"
|
dictgen = { version = "0.1", path = "../dictgen" }
|
||||||
|
|
|
@ -22,3 +22,4 @@ unicase = "2.5"
|
||||||
itertools = "0.10"
|
itertools = "0.10"
|
||||||
codegenrs = "1.0"
|
codegenrs = "1.0"
|
||||||
structopt = "0.3"
|
structopt = "0.3"
|
||||||
|
dictgen = { version = "0.1", path = "../../dictgen" }
|
||||||
|
|
|
@ -30,20 +30,17 @@ fn generate<W: std::io::Write>(file: &mut W) {
|
||||||
env!("CARGO_PKG_NAME")
|
env!("CARGO_PKG_NAME")
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
writeln!(file, "#![allow(clippy::unreadable_literal)]",).unwrap();
|
|
||||||
writeln!(file).unwrap();
|
writeln!(file).unwrap();
|
||||||
|
|
||||||
let dict = parse_dict(DICT);
|
let dict = parse_dict(DICT);
|
||||||
|
|
||||||
writeln!(file, "pub static WORD_DICTIONARY: &[(&str, &[&str])] = &[").unwrap();
|
dictgen::generate_table(
|
||||||
for (typo, corrections) in dict {
|
file,
|
||||||
let value = itertools::join(corrections.iter().map(|s| format!("{:?}", s)), ", ");
|
"WORD_DICTIONARY",
|
||||||
let value = format!("&[{}]", value);
|
"&[&str]",
|
||||||
|
dict.map(|kv| (kv.0, format!("&{:?}", kv.1))),
|
||||||
let key = format!("{:?}", typo);
|
)
|
||||||
writeln!(file, " ({}, {}),", key, &value).unwrap();
|
.unwrap();
|
||||||
}
|
|
||||||
writeln!(file, "];").unwrap();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, StructOpt)]
|
#[derive(Debug, StructOpt)]
|
||||||
|
|
File diff suppressed because it is too large
Load diff
31
src/dict.rs
31
src/dict.rs
|
@ -34,14 +34,15 @@ impl BuiltIn {
|
||||||
}
|
}
|
||||||
|
|
||||||
let word = word_token.token();
|
let word = word_token.token();
|
||||||
let mut corrections = if let Some(corrections) = self.correct_with_dict(word) {
|
let word_case = unicase::UniCase::new(word);
|
||||||
|
let mut corrections = if let Some(corrections) = self.correct_with_dict(word_case) {
|
||||||
if corrections.is_empty() {
|
if corrections.is_empty() {
|
||||||
Status::Invalid
|
Status::Invalid
|
||||||
} else {
|
} else {
|
||||||
self.chain_with_vars(corrections)
|
self.chain_with_vars(corrections)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
self.correct_with_vars(word)?
|
self.correct_with_vars(word_case)?
|
||||||
};
|
};
|
||||||
corrections
|
corrections
|
||||||
.corrections_mut()
|
.corrections_mut()
|
||||||
|
@ -53,7 +54,7 @@ impl BuiltIn {
|
||||||
#[cfg(feature = "dict")]
|
#[cfg(feature = "dict")]
|
||||||
impl BuiltIn {
|
impl BuiltIn {
|
||||||
// Not using `Status` to avoid the allocations
|
// Not using `Status` to avoid the allocations
|
||||||
fn correct_with_dict(&self, word: &str) -> Option<&'static [&'static str]> {
|
fn correct_with_dict(&self, word: unicase::UniCase<&str>) -> Option<&'static [&'static str]> {
|
||||||
if typos_dict::WORD_RANGE.contains(&word.len()) {
|
if typos_dict::WORD_RANGE.contains(&word.len()) {
|
||||||
map_lookup(&typos_dict::WORD_DICTIONARY, word)
|
map_lookup(&typos_dict::WORD_DICTIONARY, word)
|
||||||
} else {
|
} else {
|
||||||
|
@ -64,7 +65,7 @@ impl BuiltIn {
|
||||||
|
|
||||||
#[cfg(not(feature = "dict"))]
|
#[cfg(not(feature = "dict"))]
|
||||||
impl BuiltIn {
|
impl BuiltIn {
|
||||||
fn correct_with_dict(&self, _word: &str) -> Option<&'static [&'static str]> {
|
fn correct_with_dict(&self, _word: unicase::UniCase<&str>) -> Option<&'static [&'static str]> {
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -75,7 +76,7 @@ impl BuiltIn {
|
||||||
if self.is_vars_enabled() {
|
if self.is_vars_enabled() {
|
||||||
let mut chained: Vec<_> = corrections
|
let mut chained: Vec<_> = corrections
|
||||||
.iter()
|
.iter()
|
||||||
.flat_map(|c| match self.correct_with_vars(c) {
|
.flat_map(|c| match self.correct_with_vars(unicase::UniCase::new(c)) {
|
||||||
Some(Status::Valid) | None => vec![Cow::Borrowed(*c)],
|
Some(Status::Valid) | None => vec![Cow::Borrowed(*c)],
|
||||||
Some(Status::Corrections(vars)) => vars,
|
Some(Status::Corrections(vars)) => vars,
|
||||||
Some(Status::Invalid) => {
|
Some(Status::Invalid) => {
|
||||||
|
@ -94,10 +95,11 @@ impl BuiltIn {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn correct_with_vars(&self, word: &str) -> Option<Status<'static>> {
|
fn correct_with_vars(&self, word: unicase::UniCase<&str>) -> Option<Status<'static>> {
|
||||||
if self.is_vars_enabled() && typos_vars::WORD_RANGE.contains(&word.len()) {
|
if self.is_vars_enabled() {
|
||||||
let word_case = unicase::UniCase::new(word);
|
typos_vars::VARS_DICTIONARY
|
||||||
typos_vars::find(&word_case).map(|variants| self.select_variant(variants))
|
.find(&word)
|
||||||
|
.map(|variants| self.select_variant(variants))
|
||||||
} else {
|
} else {
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
|
@ -158,7 +160,7 @@ impl BuiltIn {
|
||||||
Status::Corrections(corrections.iter().map(|c| Cow::Borrowed(*c)).collect())
|
Status::Corrections(corrections.iter().map(|c| Cow::Borrowed(*c)).collect())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn correct_with_vars(&self, _word: &str) -> Option<Status<'static>> {
|
fn correct_with_vars(&self, _word: unicase::UniCase<&str>) -> Option<Status<'static>> {
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -173,7 +175,10 @@ impl typos::Dictionary for BuiltIn {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn map_lookup<V: Clone>(map: &'static phf::Map<UniCase<&'static str>, V>, key: &str) -> Option<V> {
|
fn map_lookup<V: Clone>(
|
||||||
|
map: &'static phf::Map<UniCase<&'static str>, V>,
|
||||||
|
key: unicase::UniCase<&str>,
|
||||||
|
) -> Option<V> {
|
||||||
// This transmute should be safe as `get` will not store the reference with
|
// This transmute should be safe as `get` will not store the reference with
|
||||||
// the expanded lifetime. This is due to `Borrow` being overly strict and
|
// the expanded lifetime. This is due to `Borrow` being overly strict and
|
||||||
// can't have an impl for `&'static str` to `Borrow<&'a str>`.
|
// can't have an impl for `&'static str` to `Borrow<&'a str>`.
|
||||||
|
@ -181,8 +186,8 @@ fn map_lookup<V: Clone>(map: &'static phf::Map<UniCase<&'static str>, V>, key: &
|
||||||
//
|
//
|
||||||
// See https://github.com/rust-lang/rust/issues/28853#issuecomment-158735548
|
// See https://github.com/rust-lang/rust/issues/28853#issuecomment-158735548
|
||||||
unsafe {
|
unsafe {
|
||||||
let key = ::std::mem::transmute::<_, &'static str>(key);
|
let key = ::std::mem::transmute::<_, unicase::UniCase<&'static str>>(key);
|
||||||
map.get(&UniCase::new(key)).cloned()
|
map.get(&key).cloned()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue