From c963f680830bcbd4dd2ca063809073fba9f533d9 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Tue, 6 Dec 2022 10:47:08 -0600 Subject: [PATCH] fix(dict): Remove nilable See conversation in #613 --- crates/typos-dict/assets/allowed.csv | 3 +++ crates/typos-dict/assets/words.csv | 1 - crates/typos-dict/src/dict_codegen.rs | 2 -- crates/typos-dict/tests/verify.rs | 34 ++++++++++++++++++++++++--- 4 files changed, 34 insertions(+), 6 deletions(-) create mode 100644 crates/typos-dict/assets/allowed.csv diff --git a/crates/typos-dict/assets/allowed.csv b/crates/typos-dict/assets/allowed.csv new file mode 100644 index 0000000..68663ba --- /dev/null +++ b/crates/typos-dict/assets/allowed.csv @@ -0,0 +1,3 @@ +nilable,used in ruby community +thead,html tag +hardlinked,filesystem term diff --git a/crates/typos-dict/assets/words.csv b/crates/typos-dict/assets/words.csv index 9dfbf37..5d8219f 100644 --- a/crates/typos-dict/assets/words.csv +++ b/crates/typos-dict/assets/words.csv @@ -33022,7 +33022,6 @@ nigthmares,nightmares nihilim,nihilism nihilisim,nihilism nihilsim,nihilism -nilable,nillable nilihism,nihilism nimutes,minutes nin,inn,min,bin,nine diff --git a/crates/typos-dict/src/dict_codegen.rs b/crates/typos-dict/src/dict_codegen.rs index b9cfa86..380e8d6 100644 --- a/crates/typos-dict/src/dict_codegen.rs +++ b/crates/typos-dict/src/dict_codegen.rs @@ -68764,7 +68764,6 @@ pub static WORD_NI_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dictg dictgen::InsensitiveStr::Ascii("hilim"), dictgen::InsensitiveStr::Ascii("hilisim"), dictgen::InsensitiveStr::Ascii("hilsim"), - dictgen::InsensitiveStr::Ascii("lable"), dictgen::InsensitiveStr::Ascii("lihism"), dictgen::InsensitiveStr::Ascii("mutes"), dictgen::InsensitiveStr::Ascii("n"), @@ -68817,7 +68816,6 @@ pub static WORD_NI_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dictg &["nihilism"], &["nihilism"], &["nihilism"], - &["nillable"], &["nihilism"], &["minutes"], &["inn", "min", "bin", "nine"], diff --git a/crates/typos-dict/tests/verify.rs b/crates/typos-dict/tests/verify.rs index dc31d7d..2855582 100644 --- a/crates/typos-dict/tests/verify.rs +++ b/crates/typos-dict/tests/verify.rs @@ -50,17 +50,26 @@ fn generate(file: &mut W, dict: &[u8]) { }) .collect(); - let disallowed_typos = varcon_words(); + let varcon_words = varcon_words(); + let allowed_words = allowed_words(); let word_variants = proper_word_variants(); let rows: Dict = rows .into_iter() .filter(|(typo, _)| { - let is_disallowed = disallowed_typos.contains(&unicase::UniCase::new(typo)); + let is_disallowed = varcon_words.contains(&unicase::UniCase::new(typo)); if is_disallowed { - eprintln!("{:?} is disallowed", typo); + eprintln!("{:?} is disallowed; in varcon", typo); } !is_disallowed }) + .filter(|(typo, _)| { + if let Some(reason) = allowed_words.get(typo.as_ref()) { + eprintln!("{:?} is disallowed; {}", typo, reason); + false + } else { + true + } + }) .map(|(typo, corrections)| { let mut new_corrections = vec![]; for correction in corrections { @@ -137,3 +146,22 @@ fn find_best_match<'c>( matches.sort_unstable(); matches.into_iter().next().map(|(_, r)| r) } + +fn allowed_words() -> std::collections::HashMap { + let allowed_path = "assets/allowed.csv"; + let data = std::fs::read(allowed_path).unwrap(); + csv::ReaderBuilder::new() + .has_headers(false) + .flexible(true) + .from_reader(data.as_slice()) + .records() + .map(Result::unwrap) + .map(|r| { + let mut i = r.iter(); + let mut typo = i.next().expect("typo").to_owned(); + typo.make_ascii_lowercase(); + let reason = i.next().expect("reason").to_owned(); + (typo, reason) + }) + .collect() +}