Merge pull request #635 from epage/nilable

fix(dict): Remove nilable
This commit is contained in:
Ed Page 2022-12-06 12:21:39 -06:00 committed by GitHub
commit 9114a2451e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 34 additions and 6 deletions

3
crates/typos-dict/assets/allowed.csv vendored Normal file
View file

@ -0,0 +1,3 @@
nilable,used in ruby community
thead,html tag
hardlinked,filesystem term
1 nilable used in ruby community
2 thead html tag
3 hardlinked filesystem term

View file

@ -33022,7 +33022,6 @@ nigthmares,nightmares
nihilim,nihilism nihilim,nihilism
nihilisim,nihilism nihilisim,nihilism
nihilsim,nihilism nihilsim,nihilism
nilable,nillable
nilihism,nihilism nilihism,nihilism
nimutes,minutes nimutes,minutes
nin,inn,min,bin,nine nin,inn,min,bin,nine

Can't render this file because it is too large.

View file

@ -68764,7 +68764,6 @@ pub static WORD_NI_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dictg
dictgen::InsensitiveStr::Ascii("hilim"), dictgen::InsensitiveStr::Ascii("hilim"),
dictgen::InsensitiveStr::Ascii("hilisim"), dictgen::InsensitiveStr::Ascii("hilisim"),
dictgen::InsensitiveStr::Ascii("hilsim"), dictgen::InsensitiveStr::Ascii("hilsim"),
dictgen::InsensitiveStr::Ascii("lable"),
dictgen::InsensitiveStr::Ascii("lihism"), dictgen::InsensitiveStr::Ascii("lihism"),
dictgen::InsensitiveStr::Ascii("mutes"), dictgen::InsensitiveStr::Ascii("mutes"),
dictgen::InsensitiveStr::Ascii("n"), dictgen::InsensitiveStr::Ascii("n"),
@ -68817,7 +68816,6 @@ pub static WORD_NI_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dictg
&["nihilism"], &["nihilism"],
&["nihilism"], &["nihilism"],
&["nihilism"], &["nihilism"],
&["nillable"],
&["nihilism"], &["nihilism"],
&["minutes"], &["minutes"],
&["inn", "min", "bin", "nine"], &["inn", "min", "bin", "nine"],

View file

@ -50,17 +50,26 @@ fn generate<W: std::io::Write>(file: &mut W, dict: &[u8]) {
}) })
.collect(); .collect();
let disallowed_typos = varcon_words(); let varcon_words = varcon_words();
let allowed_words = allowed_words();
let word_variants = proper_word_variants(); let word_variants = proper_word_variants();
let rows: Dict = rows let rows: Dict = rows
.into_iter() .into_iter()
.filter(|(typo, _)| { .filter(|(typo, _)| {
let is_disallowed = disallowed_typos.contains(&unicase::UniCase::new(typo)); let is_disallowed = varcon_words.contains(&unicase::UniCase::new(typo));
if is_disallowed { if is_disallowed {
eprintln!("{:?} is disallowed", typo); eprintln!("{:?} is disallowed; in varcon", typo);
} }
!is_disallowed !is_disallowed
}) })
.filter(|(typo, _)| {
if let Some(reason) = allowed_words.get(typo.as_ref()) {
eprintln!("{:?} is disallowed; {}", typo, reason);
false
} else {
true
}
})
.map(|(typo, corrections)| { .map(|(typo, corrections)| {
let mut new_corrections = vec![]; let mut new_corrections = vec![];
for correction in corrections { for correction in corrections {
@ -137,3 +146,22 @@ fn find_best_match<'c>(
matches.sort_unstable(); matches.sort_unstable();
matches.into_iter().next().map(|(_, r)| r) matches.into_iter().next().map(|(_, r)| r)
} }
fn allowed_words() -> std::collections::HashMap<String, String> {
let allowed_path = "assets/allowed.csv";
let data = std::fs::read(allowed_path).unwrap();
csv::ReaderBuilder::new()
.has_headers(false)
.flexible(true)
.from_reader(data.as_slice())
.records()
.map(Result::unwrap)
.map(|r| {
let mut i = r.iter();
let mut typo = i.next().expect("typo").to_owned();
typo.make_ascii_lowercase();
let reason = i.next().expect("reason").to_owned();
(typo, reason)
})
.collect()
}