mirror of
https://github.com/crate-ci/typos.git
synced 2024-11-29 04:21:06 -05:00
Merge pull request #251 from epage/vars
fix(dict): Correctly connect dict with varcon
This commit is contained in:
commit
444d2cca91
2 changed files with 87 additions and 10 deletions
|
@ -6,8 +6,8 @@ use structopt::StructOpt;
|
||||||
fn generate<W: std::io::Write>(file: &mut W, dict: &[u8]) {
|
fn generate<W: std::io::Write>(file: &mut W, dict: &[u8]) {
|
||||||
let mut wtr = csv::Writer::from_writer(file);
|
let mut wtr = csv::Writer::from_writer(file);
|
||||||
|
|
||||||
let disallowed_typos = disallowed_typos();
|
let disallowed_typos = varcon_words();
|
||||||
let related_words = related_words();
|
let word_variants = proper_word_variants();
|
||||||
|
|
||||||
let mut reader = csv::ReaderBuilder::new()
|
let mut reader = csv::ReaderBuilder::new()
|
||||||
.has_headers(false)
|
.has_headers(false)
|
||||||
|
@ -19,7 +19,7 @@ fn generate<W: std::io::Write>(file: &mut W, dict: &[u8]) {
|
||||||
if disallowed_typos.contains(&unicase::UniCase::new(typo)) {
|
if disallowed_typos.contains(&unicase::UniCase::new(typo)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
let correction = related_words
|
let correction = word_variants
|
||||||
.get(correction)
|
.get(correction)
|
||||||
.and_then(|words| find_best_match(typo, correction, words))
|
.and_then(|words| find_best_match(typo, correction, words))
|
||||||
.unwrap_or(correction);
|
.unwrap_or(correction);
|
||||||
|
@ -28,7 +28,9 @@ fn generate<W: std::io::Write>(file: &mut W, dict: &[u8]) {
|
||||||
wtr.flush().unwrap();
|
wtr.flush().unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
fn disallowed_typos() -> HashSet<unicase::UniCase<&'static str>> {
|
fn varcon_words() -> HashSet<unicase::UniCase<&'static str>> {
|
||||||
|
// Even include improper ones because we should be letting varcon handle that rather than our
|
||||||
|
// dictionary
|
||||||
varcon::VARCON
|
varcon::VARCON
|
||||||
.iter()
|
.iter()
|
||||||
.flat_map(|c| c.entries.iter())
|
.flat_map(|c| c.entries.iter())
|
||||||
|
@ -37,7 +39,7 @@ fn disallowed_typos() -> HashSet<unicase::UniCase<&'static str>> {
|
||||||
.collect()
|
.collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn related_words() -> HashMap<&'static str, HashSet<&'static str>> {
|
fn proper_word_variants() -> HashMap<&'static str, HashSet<&'static str>> {
|
||||||
let mut words: HashMap<&'static str, HashSet<&'static str>> = HashMap::new();
|
let mut words: HashMap<&'static str, HashSet<&'static str>> = HashMap::new();
|
||||||
for entry in varcon::VARCON.iter().flat_map(|c| c.entries.iter()) {
|
for entry in varcon::VARCON.iter().flat_map(|c| c.entries.iter()) {
|
||||||
let variants: HashSet<_> = entry
|
let variants: HashSet<_> = entry
|
||||||
|
@ -57,11 +59,11 @@ fn related_words() -> HashMap<&'static str, HashSet<&'static str>> {
|
||||||
fn find_best_match<'c>(
|
fn find_best_match<'c>(
|
||||||
typo: &'c str,
|
typo: &'c str,
|
||||||
correction: &'c str,
|
correction: &'c str,
|
||||||
related_words: &HashSet<&'static str>,
|
word_variants: &HashSet<&'static str>,
|
||||||
) -> Option<&'c str> {
|
) -> Option<&'c str> {
|
||||||
assert!(!related_words.contains(correction));
|
assert!(!word_variants.contains(correction));
|
||||||
let current = edit_distance::edit_distance(typo, correction);
|
let current = edit_distance::edit_distance(typo, correction);
|
||||||
let mut matches: Vec<_> = related_words
|
let mut matches: Vec<_> = word_variants
|
||||||
.iter()
|
.iter()
|
||||||
.map(|r| (edit_distance::edit_distance(typo, r), *r))
|
.map(|r| (edit_distance::edit_distance(typo, r), *r))
|
||||||
.filter(|(d, _)| *d < current)
|
.filter(|(d, _)| *d < current)
|
||||||
|
|
79
src/dict.rs
79
src/dict.rs
|
@ -35,8 +35,14 @@ impl BuiltIn {
|
||||||
|
|
||||||
let word = word_token.token();
|
let word = word_token.token();
|
||||||
let mut corrections = if let Some(correction) = self.correct_with_dict(word) {
|
let mut corrections = if let Some(correction) = self.correct_with_dict(word) {
|
||||||
self.correct_with_vars(word)
|
match self.correct_with_vars(correction) {
|
||||||
.unwrap_or_else(|| Status::Corrections(vec![Cow::Borrowed(correction)]))
|
Some(Status::Valid) => Status::Corrections(vec![Cow::Borrowed(correction)]),
|
||||||
|
Some(correction @ Status::Corrections(_)) => correction,
|
||||||
|
Some(Status::Invalid) => {
|
||||||
|
unreachable!("correct_with_vars should always have valid suggestions")
|
||||||
|
}
|
||||||
|
None => Status::Corrections(vec![Cow::Borrowed(correction)]),
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
self.correct_with_vars(word)?
|
self.correct_with_vars(word)?
|
||||||
};
|
};
|
||||||
|
@ -244,6 +250,75 @@ impl<'i, 'w, D: typos::Dictionary> typos::Dictionary for Override<'i, 'w, D> {
|
||||||
mod test {
|
mod test {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
|
#[cfg(feature = "dict")]
|
||||||
|
#[test]
|
||||||
|
fn test_dict_correct() {
|
||||||
|
let dict = BuiltIn::new(crate::config::Locale::default());
|
||||||
|
let correction = dict.correct_word(typos::tokens::Word::new_unchecked(
|
||||||
|
"finallizes",
|
||||||
|
typos::tokens::Case::Lower,
|
||||||
|
0,
|
||||||
|
));
|
||||||
|
assert_eq!(
|
||||||
|
correction,
|
||||||
|
Some(Status::Corrections(vec!["finalizes".into()]))
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "vars")]
|
||||||
|
#[test]
|
||||||
|
fn test_varcon_no_locale() {
|
||||||
|
let dict = BuiltIn::new(crate::config::Locale::En);
|
||||||
|
let correction = dict.correct_word(typos::tokens::Word::new_unchecked(
|
||||||
|
"finalizes",
|
||||||
|
typos::tokens::Case::Lower,
|
||||||
|
0,
|
||||||
|
));
|
||||||
|
assert_eq!(correction, Some(Status::Valid));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "vars")]
|
||||||
|
#[test]
|
||||||
|
fn test_varcon_same_locale() {
|
||||||
|
let dict = BuiltIn::new(crate::config::Locale::EnUs);
|
||||||
|
let correction = dict.correct_word(typos::tokens::Word::new_unchecked(
|
||||||
|
"finalizes",
|
||||||
|
typos::tokens::Case::Lower,
|
||||||
|
0,
|
||||||
|
));
|
||||||
|
assert_eq!(correction, Some(Status::Valid));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "vars")]
|
||||||
|
#[test]
|
||||||
|
fn test_varcon_different_locale() {
|
||||||
|
let dict = BuiltIn::new(crate::config::Locale::EnGb);
|
||||||
|
let correction = dict.correct_word(typos::tokens::Word::new_unchecked(
|
||||||
|
"finalizes",
|
||||||
|
typos::tokens::Case::Lower,
|
||||||
|
0,
|
||||||
|
));
|
||||||
|
assert_eq!(
|
||||||
|
correction,
|
||||||
|
Some(Status::Corrections(vec!["finalises".into()]))
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(all(feature = "dict", feature = "vars"))]
|
||||||
|
#[test]
|
||||||
|
fn test_dict_to_varcon() {
|
||||||
|
let dict = BuiltIn::new(crate::config::Locale::EnGb);
|
||||||
|
let correction = dict.correct_word(typos::tokens::Word::new_unchecked(
|
||||||
|
"finallizes",
|
||||||
|
typos::tokens::Case::Lower,
|
||||||
|
0,
|
||||||
|
));
|
||||||
|
assert_eq!(
|
||||||
|
correction,
|
||||||
|
Some(Status::Corrections(vec!["finalises".into()]))
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_case_correct() {
|
fn test_case_correct() {
|
||||||
let cases = [
|
let cases = [
|
||||||
|
|
Loading…
Reference in a new issue