mirror of
https://github.com/crate-ci/typos.git
synced 2024-12-23 16:12:25 -05:00
commit
36709b6f37
7 changed files with 86 additions and 17 deletions
26
Cargo.lock
generated
26
Cargo.lock
generated
|
@ -1,5 +1,15 @@
|
||||||
# This file is automatically @generated by Cargo.
|
# This file is automatically @generated by Cargo.
|
||||||
# It is not intended for manual editing.
|
# It is not intended for manual editing.
|
||||||
|
[[package]]
|
||||||
|
name = "ahash"
|
||||||
|
version = "0.5.8"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "4eb6ec8807cd25b59e6b8100815afc73f54e294f1a425a2e555971969889a8f8"
|
||||||
|
dependencies = [
|
||||||
|
"getrandom 0.2.0",
|
||||||
|
"lazy_static",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "aho-corasick"
|
name = "aho-corasick"
|
||||||
version = "0.7.15"
|
version = "0.7.15"
|
||||||
|
@ -377,6 +387,17 @@ dependencies = [
|
||||||
"wasi",
|
"wasi",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "getrandom"
|
||||||
|
version = "0.2.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ee8025cf36f917e6a52cce185b7c7177689b838b7ec138364e50cc2277a56cf4"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if",
|
||||||
|
"libc",
|
||||||
|
"wasi",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "globset"
|
name = "globset"
|
||||||
version = "0.4.6"
|
version = "0.4.6"
|
||||||
|
@ -719,7 +740,7 @@ version = "0.7.3"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03"
|
checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"getrandom",
|
"getrandom 0.1.15",
|
||||||
"libc",
|
"libc",
|
||||||
"rand_chacha",
|
"rand_chacha",
|
||||||
"rand_core",
|
"rand_core",
|
||||||
|
@ -743,7 +764,7 @@ version = "0.5.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19"
|
checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"getrandom",
|
"getrandom 0.1.15",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -1051,6 +1072,7 @@ dependencies = [
|
||||||
name = "typos-cli"
|
name = "typos-cli"
|
||||||
version = "0.1.4"
|
version = "0.1.4"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"ahash",
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"assert_fs",
|
"assert_fs",
|
||||||
"bstr",
|
"bstr",
|
||||||
|
|
|
@ -46,6 +46,7 @@ toml = "0.5"
|
||||||
log = "0.4"
|
log = "0.4"
|
||||||
env_logger = "0.8"
|
env_logger = "0.8"
|
||||||
bstr = "0.2"
|
bstr = "0.2"
|
||||||
|
ahash = "0.5.8"
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
assert_fs = "1.0"
|
assert_fs = "1.0"
|
||||||
|
|
|
@ -13,6 +13,9 @@ fn generate<W: std::io::Write>(file: &mut W) {
|
||||||
writeln!(file).unwrap();
|
writeln!(file).unwrap();
|
||||||
writeln!(file, "use unicase::UniCase;").unwrap();
|
writeln!(file, "use unicase::UniCase;").unwrap();
|
||||||
|
|
||||||
|
let mut smallest = usize::MAX;
|
||||||
|
let mut largest = usize::MIN;
|
||||||
|
|
||||||
writeln!(
|
writeln!(
|
||||||
file,
|
file,
|
||||||
"pub static WORD_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &'static str> = "
|
"pub static WORD_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &'static str> = "
|
||||||
|
@ -26,12 +29,17 @@ fn generate<W: std::io::Write>(file: &mut W) {
|
||||||
.map(|r| r.unwrap())
|
.map(|r| r.unwrap())
|
||||||
.collect();
|
.collect();
|
||||||
for record in &records {
|
for record in &records {
|
||||||
|
smallest = std::cmp::min(smallest, record[0].len());
|
||||||
|
largest = std::cmp::max(largest, record[0].len());
|
||||||
let value = format!(r#""{}""#, &record[1]);
|
let value = format!(r#""{}""#, &record[1]);
|
||||||
builder.entry(unicase::UniCase::new(&record[0]), &value);
|
builder.entry(unicase::UniCase::new(&record[0]), &value);
|
||||||
}
|
}
|
||||||
let codegenned = builder.build();
|
let codegenned = builder.build();
|
||||||
writeln!(file, "{}", codegenned).unwrap();
|
writeln!(file, "{}", codegenned).unwrap();
|
||||||
writeln!(file, ";").unwrap();
|
writeln!(file, ";").unwrap();
|
||||||
|
writeln!(file).unwrap();
|
||||||
|
writeln!(file, "pub const WORD_MIN: usize = {};", smallest).unwrap();
|
||||||
|
writeln!(file, "pub const WORD_MAX: usize = {};", largest).unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, StructOpt)]
|
#[derive(Debug, StructOpt)]
|
||||||
|
|
|
@ -33648,3 +33648,6 @@ pub static WORD_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &'static st
|
||||||
(UniCase::ascii("presumpton"), "presumption"),
|
(UniCase::ascii("presumpton"), "presumption"),
|
||||||
]),
|
]),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
pub const WORD_MIN: usize = 3;
|
||||||
|
pub const WORD_MAX: usize = 19;
|
||||||
|
|
|
@ -76,6 +76,9 @@ fn generate_variations<W: std::io::Write>(file: &mut W) {
|
||||||
writeln!(file, "}}").unwrap();
|
writeln!(file, "}}").unwrap();
|
||||||
writeln!(file).unwrap();
|
writeln!(file).unwrap();
|
||||||
|
|
||||||
|
let mut smallest = usize::MAX;
|
||||||
|
let mut largest = usize::MIN;
|
||||||
|
|
||||||
writeln!(
|
writeln!(
|
||||||
file,
|
file,
|
||||||
"pub static VARS_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &'static [(u8, &VariantsMap)]> = "
|
"pub static VARS_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &'static [(u8, &VariantsMap)]> = "
|
||||||
|
@ -92,11 +95,17 @@ fn generate_variations<W: std::io::Write>(file: &mut W) {
|
||||||
referenced_symbols.extend(data.iter().map(|(s, _)| s));
|
referenced_symbols.extend(data.iter().map(|(s, _)| s));
|
||||||
let value = generate_link(&data);
|
let value = generate_link(&data);
|
||||||
builder.entry(unicase::UniCase::new(word), &value);
|
builder.entry(unicase::UniCase::new(word), &value);
|
||||||
|
smallest = std::cmp::min(smallest, word.len());
|
||||||
|
largest = std::cmp::max(largest, word.len());
|
||||||
}
|
}
|
||||||
let codegenned = builder.build();
|
let codegenned = builder.build();
|
||||||
writeln!(file, "{}", codegenned).unwrap();
|
writeln!(file, "{}", codegenned).unwrap();
|
||||||
writeln!(file, ";").unwrap();
|
writeln!(file, ";").unwrap();
|
||||||
|
|
||||||
|
writeln!(file).unwrap();
|
||||||
|
writeln!(file, "pub const WORD_MIN: usize = {};", smallest).unwrap();
|
||||||
|
writeln!(file, "pub const WORD_MAX: usize = {};", largest).unwrap();
|
||||||
|
|
||||||
for (symbol, entry) in entries.iter() {
|
for (symbol, entry) in entries.iter() {
|
||||||
if !referenced_symbols.contains(symbol.as_str()) {
|
if !referenced_symbols.contains(symbol.as_str()) {
|
||||||
continue;
|
continue;
|
||||||
|
|
|
@ -113081,6 +113081,9 @@ pub static VARS_DICTIONARY: phf::Map<
|
||||||
),
|
),
|
||||||
]),
|
]),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
pub const WORD_MIN: usize = 2;
|
||||||
|
pub const WORD_MAX: usize = 24;
|
||||||
pub(crate) static ENTRY_ABETTORS_7043394254318611656: VariantsMap =
|
pub(crate) static ENTRY_ABETTORS_7043394254318611656: VariantsMap =
|
||||||
[&["abettors"], &["abetters"], &["abettors"], &["abetters"]];
|
[&["abettors"], &["abetters"], &["abettors"], &["abetters"]];
|
||||||
|
|
||||||
|
|
31
src/dict.rs
31
src/dict.rs
|
@ -44,11 +44,24 @@ impl BuiltIn {
|
||||||
|
|
||||||
// Not using `Status` to avoid the allocations
|
// Not using `Status` to avoid the allocations
|
||||||
fn correct_with_dict(&self, word: &str) -> Option<&'static str> {
|
fn correct_with_dict(&self, word: &str) -> Option<&'static str> {
|
||||||
|
const WORD_RANGE: std::ops::RangeInclusive<usize> =
|
||||||
|
typos_dict::WORD_MIN..=typos_dict::WORD_MAX;
|
||||||
|
if WORD_RANGE.contains(&word.len()) {
|
||||||
map_lookup(&typos_dict::WORD_DICTIONARY, word)
|
map_lookup(&typos_dict::WORD_DICTIONARY, word)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn correct_with_vars(&self, word: &str) -> Option<Status<'static>> {
|
fn correct_with_vars(&self, word: &str) -> Option<Status<'static>> {
|
||||||
map_lookup(&typos_vars::VARS_DICTIONARY, word).map(|variants| self.select_variant(variants))
|
const WORD_RANGE: std::ops::RangeInclusive<usize> =
|
||||||
|
typos_vars::WORD_MIN..=typos_vars::WORD_MAX;
|
||||||
|
if WORD_RANGE.contains(&word.len()) {
|
||||||
|
map_lookup(&typos_vars::VARS_DICTIONARY, word)
|
||||||
|
.map(|variants| self.select_variant(variants))
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn select_variant(
|
fn select_variant(
|
||||||
|
@ -144,8 +157,8 @@ fn case_correct(correction: &mut Cow<'_, str>, case: Case) {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct Override<'i, 'w, D> {
|
pub struct Override<'i, 'w, D> {
|
||||||
identifiers: HashMap<&'i str, Status<'i>>,
|
identifiers: HashMap<&'i str, Status<'i>, ahash::RandomState>,
|
||||||
words: HashMap<unicase::UniCase<&'w str>, Status<'w>>,
|
words: HashMap<unicase::UniCase<&'w str>, Status<'w>, ahash::RandomState>,
|
||||||
inner: D,
|
inner: D,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -168,7 +181,7 @@ impl<'i, 'w, D: typos::Dictionary> Override<'i, 'w, D> {
|
||||||
.collect();
|
.collect();
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn interpret<'z, I: Iterator<Item = (&'z str, &'z str)>>(
|
fn interpret<'z, I: Iterator<Item = (&'z str, &'z str)>>(
|
||||||
cases: I,
|
cases: I,
|
||||||
) -> impl Iterator<Item = (&'z str, Status<'z>)> {
|
) -> impl Iterator<Item = (&'z str, Status<'z>)> {
|
||||||
cases.map(|(typo, correction)| {
|
cases.map(|(typo, correction)| {
|
||||||
|
@ -186,19 +199,29 @@ impl<'i, 'w, D: typos::Dictionary> Override<'i, 'w, D> {
|
||||||
|
|
||||||
impl<'i, 'w, D: typos::Dictionary> typos::Dictionary for Override<'i, 'w, D> {
|
impl<'i, 'w, D: typos::Dictionary> typos::Dictionary for Override<'i, 'w, D> {
|
||||||
fn correct_ident<'s, 't>(&'s self, ident: typos::tokens::Identifier<'t>) -> Option<Status<'s>> {
|
fn correct_ident<'s, 't>(&'s self, ident: typos::tokens::Identifier<'t>) -> Option<Status<'s>> {
|
||||||
|
// Skip hashing if we can
|
||||||
|
if !self.identifiers.is_empty() {
|
||||||
self.identifiers
|
self.identifiers
|
||||||
.get(ident.token())
|
.get(ident.token())
|
||||||
.map(|c| c.borrow())
|
.map(|c| c.borrow())
|
||||||
.or_else(|| self.inner.correct_ident(ident))
|
.or_else(|| self.inner.correct_ident(ident))
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn correct_word<'s, 't>(&'s self, word: typos::tokens::Word<'t>) -> Option<Status<'s>> {
|
fn correct_word<'s, 't>(&'s self, word: typos::tokens::Word<'t>) -> Option<Status<'s>> {
|
||||||
|
// Skip hashing if we can
|
||||||
|
if !self.words.is_empty() {
|
||||||
let w = UniCase::new(word.token());
|
let w = UniCase::new(word.token());
|
||||||
// HACK: couldn't figure out the lifetime issue with replacing `cloned` with `borrow`
|
// HACK: couldn't figure out the lifetime issue with replacing `cloned` with `borrow`
|
||||||
self.words
|
self.words
|
||||||
.get(&w)
|
.get(&w)
|
||||||
.cloned()
|
.cloned()
|
||||||
.or_else(|| self.inner.correct_word(word))
|
.or_else(|| self.inner.correct_word(word))
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue