mirror of
https://github.com/crate-ci/typos.git
synced 2024-11-22 09:01:04 -05:00
feat(dict): Add more corrections
This commit is contained in:
parent
ce3760d125
commit
49459cede7
3 changed files with 126053 additions and 17220 deletions
25426
crates/typos-dict/assets/words.csv
vendored
25426
crates/typos-dict/assets/words.csv
vendored
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
@ -27,6 +27,19 @@ fn generate<W: std::io::Write>(file: &mut W, dict: &[u8]) {
|
|||
}));
|
||||
});
|
||||
|
||||
let rows: Dict = rows
|
||||
.into_iter()
|
||||
.filter(|(t, _)| is_word(t))
|
||||
.filter_map(|(t, c)| {
|
||||
let new_c: Vec<_> = c.into_iter().filter(|c| is_word(c)).collect();
|
||||
if new_c.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some((t, new_c))
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
let disallowed_typos = varcon_words();
|
||||
let word_variants = proper_word_variants();
|
||||
let rows: Dict = rows
|
||||
|
@ -67,6 +80,10 @@ fn generate<W: std::io::Write>(file: &mut W, dict: &[u8]) {
|
|||
wtr.flush().unwrap();
|
||||
}
|
||||
|
||||
fn is_word(word: &str) -> bool {
|
||||
word.chars().all(|c| c.is_alphabetic())
|
||||
}
|
||||
|
||||
fn varcon_words() -> HashSet<unicase::UniCase<&'static str>> {
|
||||
// Even include improper ones because we should be letting varcon handle that rather than our
|
||||
// dictionary
|
||||
|
|
Loading…
Reference in a new issue