Merge pull request #315 from epage/more

feat(dict): Add more corrections
This commit is contained in:
Ed Page 2021-07-27 15:08:18 -05:00 committed by GitHub
commit ffb167964f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 126053 additions and 17220 deletions

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -27,6 +27,19 @@ fn generate<W: std::io::Write>(file: &mut W, dict: &[u8]) {
})); }));
}); });
let rows: Dict = rows
.into_iter()
.filter(|(t, _)| is_word(t))
.filter_map(|(t, c)| {
let new_c: Vec<_> = c.into_iter().filter(|c| is_word(c)).collect();
if new_c.is_empty() {
None
} else {
Some((t, new_c))
}
})
.collect();
let disallowed_typos = varcon_words(); let disallowed_typos = varcon_words();
let word_variants = proper_word_variants(); let word_variants = proper_word_variants();
let rows: Dict = rows let rows: Dict = rows
@ -67,6 +80,10 @@ fn generate<W: std::io::Write>(file: &mut W, dict: &[u8]) {
wtr.flush().unwrap(); wtr.flush().unwrap();
} }
fn is_word(word: &str) -> bool {
word.chars().all(|c| c.is_alphabetic())
}
fn varcon_words() -> HashSet<unicase::UniCase<&'static str>> { fn varcon_words() -> HashSet<unicase::UniCase<&'static str>> {
// Even include improper ones because we should be letting varcon handle that rather than our // Even include improper ones because we should be letting varcon handle that rather than our
// dictionary // dictionary