use std::borrow::Cow; use std::collections::HashMap; use unicase::UniCase; use typos::tokens::Case; use typos::Status; #[derive(Default)] pub struct BuiltIn { locale: Option, } impl BuiltIn { pub const fn new(locale: crate::config::Locale) -> Self { Self { locale: locale.category(), } } pub fn correct_ident<'s, 'w>( &'s self, _ident: typos::tokens::Identifier<'w>, ) -> Option> { None } pub fn correct_word<'s, 'w>( &'s self, word_token: typos::tokens::Word<'w>, ) -> Option> { if word_token.case() == typos::tokens::Case::None { return None; } let word = word_token.token(); let mut corrections = if let Some(corrections) = self.correct_with_dict(word) { if corrections.is_empty() { Status::Invalid } else { self.chain_with_vars(corrections) } } else { self.correct_with_vars(word)? }; corrections .corrections_mut() .for_each(|mut s| case_correct(&mut s, word_token.case())); Some(corrections) } } #[cfg(feature = "dict")] impl BuiltIn { // Not using `Status` to avoid the allocations fn correct_with_dict(&self, word: &str) -> Option<&'static [&'static str]> { if typos_dict::WORD_RANGE.contains(&word.len()) { map_lookup(&typos_dict::WORD_DICTIONARY, word) } else { None } } } #[cfg(not(feature = "dict"))] impl BuiltIn { fn correct_with_dict(&self, _word: &str) -> Option<&'static [&'static str]> { None } } #[cfg(feature = "vars")] impl BuiltIn { fn chain_with_vars(&self, corrections: &'static [&'static str]) -> Status<'static> { if self.is_vars_enabled() { let mut chained: Vec<_> = corrections .iter() .flat_map(|c| match self.correct_with_vars(c) { Some(Status::Valid) | None => vec![Cow::Borrowed(*c)], Some(Status::Corrections(vars)) => vars, Some(Status::Invalid) => { unreachable!("correct_with_vars should always have valid suggestions") } }) .collect(); if chained.len() != 1 { chained.sort_unstable(); chained.dedup(); } debug_assert!(!chained.is_empty()); Status::Corrections(chained) } else { Status::Corrections(corrections.iter().map(|c| Cow::Borrowed(*c)).collect()) } } fn correct_with_vars(&self, word: &str) -> Option> { if self.is_vars_enabled() && typos_vars::WORD_RANGE.contains(&word.len()) { let word_case = unicase::UniCase::new(word); typos_vars::find(&word_case).map(|variants| self.select_variant(variants)) } else { None } } fn is_vars_enabled(&self) -> bool { #![allow(clippy::assertions_on_constants)] debug_assert!(typos_vars::NO_INVALID); self.locale.is_some() } fn select_variant( &self, vars: &'static [(u8, &'static typos_vars::VariantsMap)], ) -> Status<'static> { let var = vars[0]; let var_categories = unsafe { // Code-genned from a checked category-set, so known to be safe typos_vars::CategorySet::from_bits_unchecked(var.0) }; if let Some(locale) = self.locale { if var_categories.contains(locale) { // Already valid for the current locale. Status::Valid } else { Status::Corrections( typos_vars::corrections(locale, *var.1) .iter() .copied() .map(Cow::Borrowed) .collect(), ) } } else { // All locales are valid if var_categories.is_empty() { // But the word is never valid. let mut unique: Vec<_> = var .1 .iter() .flat_map(|v| v.iter()) .copied() .map(Cow::Borrowed) .collect(); unique.sort_unstable(); unique.dedup(); Status::Corrections(unique) } else { Status::Valid } } } } #[cfg(not(feature = "vars"))] impl BuiltIn { fn chain_with_vars(&self, corrections: &'static [&'static str]) -> Status<'static> { Status::Corrections(corrections.iter().map(|c| Cow::Borrowed(*c)).collect()) } fn correct_with_vars(&self, _word: &str) -> Option> { None } } impl typos::Dictionary for BuiltIn { fn correct_ident<'s, 'w>(&'s self, ident: typos::tokens::Identifier<'w>) -> Option> { BuiltIn::correct_ident(self, ident) } fn correct_word<'s, 'w>(&'s self, word: typos::tokens::Word<'w>) -> Option> { BuiltIn::correct_word(self, word) } } fn map_lookup(map: &'static phf::Map, V>, key: &str) -> Option { // This transmute should be safe as `get` will not store the reference with // the expanded lifetime. This is due to `Borrow` being overly strict and // can't have an impl for `&'static str` to `Borrow<&'a str>`. // // // See https://github.com/rust-lang/rust/issues/28853#issuecomment-158735548 unsafe { let key = ::std::mem::transmute::<_, &'static str>(key); map.get(&UniCase::new(key)).cloned() } } fn case_correct(correction: &mut Cow<'_, str>, case: Case) { match case { Case::Lower | Case::None => (), Case::Title => match correction { Cow::Borrowed(s) => { let mut s = String::from(*s); s[0..1].make_ascii_uppercase(); *correction = s.into(); } Cow::Owned(s) => { s[0..1].make_ascii_uppercase(); } }, Case::Upper => match correction { Cow::Borrowed(s) => { let mut s = String::from(*s); s.make_ascii_uppercase(); *correction = s.into(); } Cow::Owned(s) => { s.make_ascii_uppercase(); } }, } } pub struct Override<'i, 'w, D> { identifiers: HashMap<&'i str, Status<'i>, ahash::RandomState>, words: HashMap, Status<'w>, ahash::RandomState>, inner: D, } impl<'i, 'w, D: typos::Dictionary> Override<'i, 'w, D> { pub fn new(inner: D) -> Self { Self { identifiers: Default::default(), words: Default::default(), inner, } } pub fn identifiers>(&mut self, identifiers: I) { self.identifiers = Self::interpret(identifiers).collect(); } pub fn words>(&mut self, words: I) { self.words = Self::interpret(words) .map(|(k, v)| (UniCase::new(k), v)) .collect(); } fn interpret<'z, I: Iterator>( cases: I, ) -> impl Iterator)> { cases.map(|(typo, correction)| { let correction = if typo == correction { Status::Valid } else if correction.is_empty() { Status::Invalid } else { Status::Corrections(vec![Cow::Borrowed(correction)]) }; (typo, correction) }) } } impl<'i, 'w, D: typos::Dictionary> typos::Dictionary for Override<'i, 'w, D> { fn correct_ident<'s, 't>(&'s self, ident: typos::tokens::Identifier<'t>) -> Option> { // Skip hashing if we can if !self.identifiers.is_empty() { self.identifiers .get(ident.token()) .map(|c| c.borrow()) .or_else(|| self.inner.correct_ident(ident)) } else { None } } fn correct_word<'s, 't>(&'s self, word: typos::tokens::Word<'t>) -> Option> { if word.case() == typos::tokens::Case::None { return None; } // Skip hashing if we can let custom = if !self.words.is_empty() { let w = UniCase::new(word.token()); // HACK: couldn't figure out the lifetime issue with replacing `cloned` with `borrow` self.words.get(&w).cloned() } else { None }; custom.or_else(|| self.inner.correct_word(word)) } } #[cfg(test)] mod test { use super::*; #[cfg(feature = "dict")] #[test] fn test_dict_correct() { let dict = BuiltIn::new(crate::config::Locale::default()); let correction = dict.correct_word(typos::tokens::Word::new_unchecked( "finallizes", typos::tokens::Case::Lower, 0, )); assert_eq!( correction, Some(Status::Corrections(vec!["finalizes".into()])) ); } #[cfg(feature = "vars")] #[test] fn test_varcon_no_locale() { let dict = BuiltIn::new(crate::config::Locale::En); let correction = dict.correct_word(typos::tokens::Word::new_unchecked( "finalizes", typos::tokens::Case::Lower, 0, )); assert_eq!(correction, None); } #[cfg(feature = "vars")] #[test] fn test_varcon_same_locale() { let dict = BuiltIn::new(crate::config::Locale::EnUs); let correction = dict.correct_word(typos::tokens::Word::new_unchecked( "finalizes", typos::tokens::Case::Lower, 0, )); assert_eq!(correction, Some(Status::Valid)); } #[cfg(feature = "vars")] #[test] fn test_varcon_different_locale() { let dict = BuiltIn::new(crate::config::Locale::EnGb); let correction = dict.correct_word(typos::tokens::Word::new_unchecked( "finalizes", typos::tokens::Case::Lower, 0, )); assert_eq!( correction, Some(Status::Corrections(vec!["finalises".into()])) ); } #[cfg(all(feature = "dict", feature = "vars"))] #[test] fn test_dict_to_varcon() { let dict = BuiltIn::new(crate::config::Locale::EnGb); let correction = dict.correct_word(typos::tokens::Word::new_unchecked( "finallizes", typos::tokens::Case::Lower, 0, )); assert_eq!( correction, Some(Status::Corrections(vec!["finalises".into()])) ); } #[test] fn test_case_correct() { let cases = [ ("foo", Case::Lower, "foo"), ("foo", Case::None, "foo"), ("foo", Case::Title, "Foo"), ("foo", Case::Upper, "FOO"), ("fOo", Case::None, "fOo"), ]; for (correction, case, expected) in cases.iter() { let mut actual = Cow::Borrowed(*correction); case_correct(&mut actual, *case); assert_eq!(*expected, actual); let mut actual = Cow::Owned(String::from(*correction)); case_correct(&mut actual, *case); assert_eq!(*expected, actual); } } }