use std::borrow::Cow; use std::collections::HashMap; use unicase::UniCase; use typos::tokens::Case; use typos::Status; #[derive(Default)] pub struct BuiltIn { locale: Option, } impl BuiltIn { pub fn new(locale: crate::config::Locale) -> Self { Self { locale: locale.category(), } } pub fn correct_ident<'s, 'w>( &'s self, _ident: typos::tokens::Identifier<'w>, ) -> Option> { None } pub fn correct_word<'s, 'w>( &'s self, word_token: typos::tokens::Word<'w>, ) -> Option> { if word_token.case() == typos::tokens::Case::None { return None; } let word = word_token.token(); let mut corrections = if let Some(correction) = self.correct_with_dict(word) { self.correct_with_vars(word) .unwrap_or_else(|| Status::Corrections(vec![Cow::Borrowed(correction)])) } else { self.correct_with_vars(word)? }; corrections .corrections_mut() .for_each(|mut s| case_correct(&mut s, word_token.case())); Some(corrections) } // Not using `Status` to avoid the allocations fn correct_with_dict(&self, word: &str) -> Option<&'static str> { const WORD_RANGE: std::ops::RangeInclusive = typos_dict::WORD_MIN..=typos_dict::WORD_MAX; if WORD_RANGE.contains(&word.len()) { map_lookup(&typos_dict::WORD_DICTIONARY, word) } else { None } } fn correct_with_vars(&self, word: &str) -> Option> { const WORD_RANGE: std::ops::RangeInclusive = typos_vars::WORD_MIN..=typos_vars::WORD_MAX; if WORD_RANGE.contains(&word.len()) { map_lookup(&typos_vars::VARS_DICTIONARY, word) .map(|variants| self.select_variant(variants)) } else { None } } fn select_variant( &self, vars: &'static [(u8, &'static typos_vars::VariantsMap)], ) -> Status<'static> { let var = vars[0]; let var_categories = unsafe { // Code-genned from a checked category-set, so known to be safe typos_vars::CategorySet::new(var.0) }; if let Some(locale) = self.locale { if var_categories.contains(locale) { // Already valid for the current locale. Status::Valid } else { Status::Corrections( typos_vars::corrections(locale, *var.1) .iter() .copied() .map(Cow::Borrowed) .collect(), ) } } else { // All locales are valid if var_categories.is_empty() { // But the word is never valid. let mut unique: Vec<_> = var .1 .iter() .flat_map(|v| v.iter()) .copied() .map(Cow::Borrowed) .collect(); unique.sort_unstable(); unique.dedup(); Status::Corrections(unique) } else { Status::Valid } } } } impl typos::Dictionary for BuiltIn { fn correct_ident<'s, 'w>(&'s self, ident: typos::tokens::Identifier<'w>) -> Option> { BuiltIn::correct_ident(self, ident) } fn correct_word<'s, 'w>(&'s self, word: typos::tokens::Word<'w>) -> Option> { BuiltIn::correct_word(self, word) } } fn map_lookup(map: &'static phf::Map, V>, key: &str) -> Option { // This transmute should be safe as `get` will not store the reference with // the expanded lifetime. This is due to `Borrow` being overly strict and // can't have an impl for `&'static str` to `Borrow<&'a str>`. // // // See https://github.com/rust-lang/rust/issues/28853#issuecomment-158735548 unsafe { let key = ::std::mem::transmute::<_, &'static str>(key); map.get(&UniCase::new(key)).cloned() } } fn case_correct(correction: &mut Cow<'_, str>, case: Case) { match case { Case::Lower | Case::None => (), Case::Title => match correction { Cow::Borrowed(s) => { let mut s = String::from(*s); s[0..1].make_ascii_uppercase(); *correction = s.into(); } Cow::Owned(s) => { s[0..1].make_ascii_uppercase(); } }, Case::Scream => match correction { Cow::Borrowed(s) => { let mut s = String::from(*s); s.make_ascii_uppercase(); *correction = s.into(); } Cow::Owned(s) => { s.make_ascii_uppercase(); } }, } } pub struct Override<'i, 'w, D> { identifiers: HashMap<&'i str, Status<'i>, ahash::RandomState>, words: HashMap, Status<'w>, ahash::RandomState>, inner: D, } impl<'i, 'w, D: typos::Dictionary> Override<'i, 'w, D> { pub fn new(inner: D) -> Self { Self { identifiers: Default::default(), words: Default::default(), inner, } } pub fn identifiers>(&mut self, identifiers: I) { self.identifiers = Self::interpret(identifiers).collect(); } pub fn words>(&mut self, words: I) { self.words = Self::interpret(words) .map(|(k, v)| (UniCase::new(k), v)) .collect(); } fn interpret<'z, I: Iterator>( cases: I, ) -> impl Iterator)> { cases.map(|(typo, correction)| { let correction = if typo == correction { Status::Valid } else if correction.is_empty() { Status::Invalid } else { Status::Corrections(vec![Cow::Borrowed(correction)]) }; (typo, correction) }) } } impl<'i, 'w, D: typos::Dictionary> typos::Dictionary for Override<'i, 'w, D> { fn correct_ident<'s, 't>(&'s self, ident: typos::tokens::Identifier<'t>) -> Option> { // Skip hashing if we can if !self.identifiers.is_empty() { self.identifiers .get(ident.token()) .map(|c| c.borrow()) .or_else(|| self.inner.correct_ident(ident)) } else { None } } fn correct_word<'s, 't>(&'s self, word: typos::tokens::Word<'t>) -> Option> { if word.case() == typos::tokens::Case::None { return None; } // Skip hashing if we can let custom = if !self.words.is_empty() { let w = UniCase::new(word.token()); // HACK: couldn't figure out the lifetime issue with replacing `cloned` with `borrow` self.words.get(&w).cloned() } else { None }; custom.or_else(|| self.inner.correct_word(word)) } } #[cfg(test)] mod test { use super::*; #[test] fn test_case_correct() { let cases = [ ("foo", Case::Lower, "foo"), ("foo", Case::None, "foo"), ("foo", Case::Title, "Foo"), ("foo", Case::Scream, "FOO"), ("fOo", Case::None, "fOo"), ]; for (correction, case, expected) in cases.iter() { let mut actual = Cow::Borrowed(*correction); case_correct(&mut actual, *case); assert_eq!(*expected, actual); let mut actual = Cow::Owned(String::from(*correction)); case_correct(&mut actual, *case); assert_eq!(*expected, actual); } } }