mirror of
https://github.com/crate-ci/typos.git
synced 2024-11-26 02:51:08 -05:00
refactor(varcon): Clarify check's meanings
This commit is contained in:
parent
fa7ce95fd1
commit
77cfccb392
1 changed files with 10 additions and 8 deletions
|
@ -6,8 +6,8 @@ use structopt::StructOpt;
|
||||||
fn generate<W: std::io::Write>(file: &mut W, dict: &[u8]) {
|
fn generate<W: std::io::Write>(file: &mut W, dict: &[u8]) {
|
||||||
let mut wtr = csv::Writer::from_writer(file);
|
let mut wtr = csv::Writer::from_writer(file);
|
||||||
|
|
||||||
let disallowed_typos = disallowed_typos();
|
let disallowed_typos = varcon_words();
|
||||||
let related_words = related_words();
|
let word_variants = proper_word_variants();
|
||||||
|
|
||||||
let mut reader = csv::ReaderBuilder::new()
|
let mut reader = csv::ReaderBuilder::new()
|
||||||
.has_headers(false)
|
.has_headers(false)
|
||||||
|
@ -19,7 +19,7 @@ fn generate<W: std::io::Write>(file: &mut W, dict: &[u8]) {
|
||||||
if disallowed_typos.contains(&unicase::UniCase::new(typo)) {
|
if disallowed_typos.contains(&unicase::UniCase::new(typo)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
let correction = related_words
|
let correction = word_variants
|
||||||
.get(correction)
|
.get(correction)
|
||||||
.and_then(|words| find_best_match(typo, correction, words))
|
.and_then(|words| find_best_match(typo, correction, words))
|
||||||
.unwrap_or(correction);
|
.unwrap_or(correction);
|
||||||
|
@ -28,7 +28,9 @@ fn generate<W: std::io::Write>(file: &mut W, dict: &[u8]) {
|
||||||
wtr.flush().unwrap();
|
wtr.flush().unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
fn disallowed_typos() -> HashSet<unicase::UniCase<&'static str>> {
|
fn varcon_words() -> HashSet<unicase::UniCase<&'static str>> {
|
||||||
|
// Even include improper ones because we should be letting varcon handle that rather than our
|
||||||
|
// dictionary
|
||||||
varcon::VARCON
|
varcon::VARCON
|
||||||
.iter()
|
.iter()
|
||||||
.flat_map(|c| c.entries.iter())
|
.flat_map(|c| c.entries.iter())
|
||||||
|
@ -37,7 +39,7 @@ fn disallowed_typos() -> HashSet<unicase::UniCase<&'static str>> {
|
||||||
.collect()
|
.collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn related_words() -> HashMap<&'static str, HashSet<&'static str>> {
|
fn proper_word_variants() -> HashMap<&'static str, HashSet<&'static str>> {
|
||||||
let mut words: HashMap<&'static str, HashSet<&'static str>> = HashMap::new();
|
let mut words: HashMap<&'static str, HashSet<&'static str>> = HashMap::new();
|
||||||
for entry in varcon::VARCON.iter().flat_map(|c| c.entries.iter()) {
|
for entry in varcon::VARCON.iter().flat_map(|c| c.entries.iter()) {
|
||||||
let variants: HashSet<_> = entry
|
let variants: HashSet<_> = entry
|
||||||
|
@ -57,11 +59,11 @@ fn related_words() -> HashMap<&'static str, HashSet<&'static str>> {
|
||||||
fn find_best_match<'c>(
|
fn find_best_match<'c>(
|
||||||
typo: &'c str,
|
typo: &'c str,
|
||||||
correction: &'c str,
|
correction: &'c str,
|
||||||
related_words: &HashSet<&'static str>,
|
word_variants: &HashSet<&'static str>,
|
||||||
) -> Option<&'c str> {
|
) -> Option<&'c str> {
|
||||||
assert!(!related_words.contains(correction));
|
assert!(!word_variants.contains(correction));
|
||||||
let current = edit_distance::edit_distance(typo, correction);
|
let current = edit_distance::edit_distance(typo, correction);
|
||||||
let mut matches: Vec<_> = related_words
|
let mut matches: Vec<_> = word_variants
|
||||||
.iter()
|
.iter()
|
||||||
.map(|r| (edit_distance::edit_distance(typo, r), *r))
|
.map(|r| (edit_distance::edit_distance(typo, r), *r))
|
||||||
.filter(|(d, _)| *d < current)
|
.filter(|(d, _)| *d < current)
|
||||||
|
|
Loading…
Reference in a new issue