mirror of
https://github.com/crate-ci/typos.git
synced 2024-12-26 01:22:08 -05:00
Merge pull request #251 from epage/vars
fix(dict): Correctly connect dict with varcon
This commit is contained in:
commit
444d2cca91
2 changed files with 87 additions and 10 deletions
|
@ -6,8 +6,8 @@ use structopt::StructOpt;
|
|||
fn generate<W: std::io::Write>(file: &mut W, dict: &[u8]) {
|
||||
let mut wtr = csv::Writer::from_writer(file);
|
||||
|
||||
let disallowed_typos = disallowed_typos();
|
||||
let related_words = related_words();
|
||||
let disallowed_typos = varcon_words();
|
||||
let word_variants = proper_word_variants();
|
||||
|
||||
let mut reader = csv::ReaderBuilder::new()
|
||||
.has_headers(false)
|
||||
|
@ -19,7 +19,7 @@ fn generate<W: std::io::Write>(file: &mut W, dict: &[u8]) {
|
|||
if disallowed_typos.contains(&unicase::UniCase::new(typo)) {
|
||||
continue;
|
||||
}
|
||||
let correction = related_words
|
||||
let correction = word_variants
|
||||
.get(correction)
|
||||
.and_then(|words| find_best_match(typo, correction, words))
|
||||
.unwrap_or(correction);
|
||||
|
@ -28,7 +28,9 @@ fn generate<W: std::io::Write>(file: &mut W, dict: &[u8]) {
|
|||
wtr.flush().unwrap();
|
||||
}
|
||||
|
||||
fn disallowed_typos() -> HashSet<unicase::UniCase<&'static str>> {
|
||||
fn varcon_words() -> HashSet<unicase::UniCase<&'static str>> {
|
||||
// Even include improper ones because we should be letting varcon handle that rather than our
|
||||
// dictionary
|
||||
varcon::VARCON
|
||||
.iter()
|
||||
.flat_map(|c| c.entries.iter())
|
||||
|
@ -37,7 +39,7 @@ fn disallowed_typos() -> HashSet<unicase::UniCase<&'static str>> {
|
|||
.collect()
|
||||
}
|
||||
|
||||
fn related_words() -> HashMap<&'static str, HashSet<&'static str>> {
|
||||
fn proper_word_variants() -> HashMap<&'static str, HashSet<&'static str>> {
|
||||
let mut words: HashMap<&'static str, HashSet<&'static str>> = HashMap::new();
|
||||
for entry in varcon::VARCON.iter().flat_map(|c| c.entries.iter()) {
|
||||
let variants: HashSet<_> = entry
|
||||
|
@ -57,11 +59,11 @@ fn related_words() -> HashMap<&'static str, HashSet<&'static str>> {
|
|||
fn find_best_match<'c>(
|
||||
typo: &'c str,
|
||||
correction: &'c str,
|
||||
related_words: &HashSet<&'static str>,
|
||||
word_variants: &HashSet<&'static str>,
|
||||
) -> Option<&'c str> {
|
||||
assert!(!related_words.contains(correction));
|
||||
assert!(!word_variants.contains(correction));
|
||||
let current = edit_distance::edit_distance(typo, correction);
|
||||
let mut matches: Vec<_> = related_words
|
||||
let mut matches: Vec<_> = word_variants
|
||||
.iter()
|
||||
.map(|r| (edit_distance::edit_distance(typo, r), *r))
|
||||
.filter(|(d, _)| *d < current)
|
||||
|
|
79
src/dict.rs
79
src/dict.rs
|
@ -35,8 +35,14 @@ impl BuiltIn {
|
|||
|
||||
let word = word_token.token();
|
||||
let mut corrections = if let Some(correction) = self.correct_with_dict(word) {
|
||||
self.correct_with_vars(word)
|
||||
.unwrap_or_else(|| Status::Corrections(vec![Cow::Borrowed(correction)]))
|
||||
match self.correct_with_vars(correction) {
|
||||
Some(Status::Valid) => Status::Corrections(vec![Cow::Borrowed(correction)]),
|
||||
Some(correction @ Status::Corrections(_)) => correction,
|
||||
Some(Status::Invalid) => {
|
||||
unreachable!("correct_with_vars should always have valid suggestions")
|
||||
}
|
||||
None => Status::Corrections(vec![Cow::Borrowed(correction)]),
|
||||
}
|
||||
} else {
|
||||
self.correct_with_vars(word)?
|
||||
};
|
||||
|
@ -244,6 +250,75 @@ impl<'i, 'w, D: typos::Dictionary> typos::Dictionary for Override<'i, 'w, D> {
|
|||
mod test {
|
||||
use super::*;
|
||||
|
||||
#[cfg(feature = "dict")]
|
||||
#[test]
|
||||
fn test_dict_correct() {
|
||||
let dict = BuiltIn::new(crate::config::Locale::default());
|
||||
let correction = dict.correct_word(typos::tokens::Word::new_unchecked(
|
||||
"finallizes",
|
||||
typos::tokens::Case::Lower,
|
||||
0,
|
||||
));
|
||||
assert_eq!(
|
||||
correction,
|
||||
Some(Status::Corrections(vec!["finalizes".into()]))
|
||||
);
|
||||
}
|
||||
|
||||
#[cfg(feature = "vars")]
|
||||
#[test]
|
||||
fn test_varcon_no_locale() {
|
||||
let dict = BuiltIn::new(crate::config::Locale::En);
|
||||
let correction = dict.correct_word(typos::tokens::Word::new_unchecked(
|
||||
"finalizes",
|
||||
typos::tokens::Case::Lower,
|
||||
0,
|
||||
));
|
||||
assert_eq!(correction, Some(Status::Valid));
|
||||
}
|
||||
|
||||
#[cfg(feature = "vars")]
|
||||
#[test]
|
||||
fn test_varcon_same_locale() {
|
||||
let dict = BuiltIn::new(crate::config::Locale::EnUs);
|
||||
let correction = dict.correct_word(typos::tokens::Word::new_unchecked(
|
||||
"finalizes",
|
||||
typos::tokens::Case::Lower,
|
||||
0,
|
||||
));
|
||||
assert_eq!(correction, Some(Status::Valid));
|
||||
}
|
||||
|
||||
#[cfg(feature = "vars")]
|
||||
#[test]
|
||||
fn test_varcon_different_locale() {
|
||||
let dict = BuiltIn::new(crate::config::Locale::EnGb);
|
||||
let correction = dict.correct_word(typos::tokens::Word::new_unchecked(
|
||||
"finalizes",
|
||||
typos::tokens::Case::Lower,
|
||||
0,
|
||||
));
|
||||
assert_eq!(
|
||||
correction,
|
||||
Some(Status::Corrections(vec!["finalises".into()]))
|
||||
);
|
||||
}
|
||||
|
||||
#[cfg(all(feature = "dict", feature = "vars"))]
|
||||
#[test]
|
||||
fn test_dict_to_varcon() {
|
||||
let dict = BuiltIn::new(crate::config::Locale::EnGb);
|
||||
let correction = dict.correct_word(typos::tokens::Word::new_unchecked(
|
||||
"finallizes",
|
||||
typos::tokens::Case::Lower,
|
||||
0,
|
||||
));
|
||||
assert_eq!(
|
||||
correction,
|
||||
Some(Status::Corrections(vec!["finalises".into()]))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_case_correct() {
|
||||
let cases = [
|
||||
|
|
Loading…
Reference in a new issue