typos/crates/typos-dict/tests/verify.rs

use indexmap::IndexSet;
use std::collections::BTreeMap;
use std::collections::HashMap;
use std::collections::HashSet;
use unicase::UniCase;

type Dict = BTreeMap<UniCase<String>, IndexSet<String>>;

#[test]
fn verify() {
    let typos_dict = parse_dict("assets/words.csv");
    let new_dict = process(typos_dict);

    let mut content = vec![];

    let mut wtr = csv::WriterBuilder::new()
        .flexible(true)
        .from_writer(&mut content);
    for (typo, corrections) in new_dict {
        let mut row = vec![typo.as_str().to_owned()];
        row.extend(corrections);
        wtr.write_record(&row).unwrap();
    }
    wtr.flush().unwrap();
    drop(wtr);

    let content = String::from_utf8(content).unwrap();
    snapbox::assert_data_eq!(content, snapbox::file!["../assets/words.csv"].raw());
}

fn parse_dict(path: &str) -> Vec<(String, Vec<String>)> {
    let data = std::fs::read(path).unwrap();

    let mut reader = csv::ReaderBuilder::new()
        .has_headers(false)
        .flexible(true)
        .from_reader(&*data);

    reader
        .records()
        .map(Result::unwrap)
        .map(|record| {
            let mut iter = record.into_iter();
            let typo = iter.next().expect("typo");
            (
                typo.to_owned(),
                iter.map(ToOwned::to_owned).collect::<Vec<_>>(),
            )
        })
        .collect()
}

fn dict_from_iter<S: Into<String>>(
    iter: impl IntoIterator<Item = (S, impl IntoIterator<Item = S>)>,
) -> Dict {
    let mut dict = Dict::new();

    for (typo, corrections) in iter {
        let typo = UniCase::new(typo.into().to_ascii_lowercase());

        // duplicate entries are merged
        dict.entry(typo)
            .or_default()
            .extend(corrections.into_iter().map(|c| {
                let mut c = c.into();
                c.make_ascii_lowercase();
                c
            }));
    }

    dict
}

fn process<S: Into<String>>(
    iter: impl IntoIterator<Item = (S, impl IntoIterator<Item = S>)>,
) -> Dict {
    let dict = dict_from_iter(iter);

    let rows: Dict = dict
        .into_iter()
        .filter(|(t, _)| is_word(t))
        .map(|(t, c)| {
            let new_c: IndexSet<_> = c.into_iter().filter(|c| is_word(c)).collect();
            (t, new_c)
        })
        .collect();

    let varcon_words = varcon_words();
    let allowed_words = allowed_words();
    let word_variants = proper_word_variants();
    let rows: Vec<_> = rows
        .into_iter()
        .filter(|(typo, _)| {
            let is_disallowed = varcon_words.contains(&UniCase::new(typo));
            if is_disallowed {
                eprintln!("{typo:?} is disallowed; in varcon");
            }
            !is_disallowed
        })
        .filter(|(typo, _)| {
            if let Some(reason) = allowed_words.get(typo.as_ref()) {
                eprintln!("{typo:?} is disallowed; {reason}");
                false
            } else {
                true
            }
        })
        .map(|(typo, corrections)| {
            let mut new_corrections = IndexSet::new();
            for correction in corrections {
                let correction = word_variants
                    .get(correction.as_str())
                    .and_then(|words| find_best_match(&typo, correction.as_str(), words))
                    .unwrap_or(&correction);
                new_corrections.insert(correction.to_owned());
            }
            (typo, new_corrections)
        })
        .collect();
    let mut dict = Dict::new();
    for (bad, good) in rows {
        let current = dict.entry(bad).or_default();
        current.extend(good);
    }

    let corrections: HashMap<_, _> = dict
        .iter()
        .flat_map(|(bad, good)| good.iter().map(|good| (good.to_owned(), bad.to_owned())))
        .collect();
    dict.into_iter()
        .filter(|(typo, _)| {
            if let Some(correction) = corrections.get(typo.as_str()) {
                eprintln!("{typo} <-> {correction} cycle detected");
                false
            } else {
                true
            }
        })
        .collect()
}

#[test]
fn test_preserve_correction_order() {
    let dict = process([("foo", ["xyz", "abc"])]);
    let mut corrections = dict.get(&UniCase::new("foo".into())).unwrap().iter();
    assert_eq!(corrections.next().unwrap(), "xyz");
    assert_eq!(corrections.next().unwrap(), "abc");
}

#[test]
fn test_merge_duplicates() {
    assert_eq!(
        process([("foo", ["bar"]), ("foo", ["baz"])]),
        dict_from_iter([("foo", ["bar", "baz"])])
    );
}

#[test]
fn test_duplicate_correction_removal() {
    let dict = process([("foo", ["bar", "bar"])]);
    assert_eq!(dict, dict_from_iter([("foo", ["bar"])]));
}

#[test]
fn test_cycle_removal() {
    assert!(process([("foo", ["foobar"]), ("foobar", ["foo"])]).is_empty());
}

#[test]
fn test_varcon_removal() {
    assert!(process([("colour", ["color"])]).is_empty());
}

#[test]
fn test_varcon_best_match() {
    assert_eq!(
        process([(
            "neighourhood", // note the missing 'b'
            ["neighborhood"],
        )]),
        dict_from_iter([(
            "neighourhood",
            ["neighbourhood"] // note that 'bor' has become 'bour' to match the typo
        )])
    );
}

fn is_word(word: &str) -> bool {
    word.chars().all(|c| c.is_alphabetic())
}

fn varcon_words() -> HashSet<UniCase<&'static str>> {
    // Even include improper ones because we should be letting varcon handle that rather than our
    // dictionary
    varcon::VARCON
        .iter()
        .filter(|c| c.verified)
        .flat_map(|c| c.entries.iter())
        .flat_map(|e| e.variants.iter())
        .map(|v| UniCase::new(v.word))
        .collect()
}

fn proper_word_variants() -> HashMap<&'static str, HashSet<&'static str>> {
    let mut words: HashMap<&'static str, HashSet<&'static str>> = HashMap::new();
    for entry in varcon::VARCON.iter().flat_map(|c| c.entries.iter()) {
        let variants: HashSet<_> = entry
            .variants
            .iter()
            .filter(|v| v.types.iter().any(|t| t.tag != Some(varcon::Tag::Improper)))
            .map(|v| v.word)
            .collect();
        for variant in variants.iter() {
            let set = words.entry(variant).or_default();
            set.extend(variants.iter().filter(|v| *v != variant));
        }
    }
    words
}

fn find_best_match<'c>(
    typo: &'c str,
    correction: &'c str,
    word_variants: &HashSet<&'static str>,
) -> Option<&'c str> {
    assert!(!word_variants.contains(correction));
    #[allow(clippy::single_match)]
    match (typo, correction) {
        // Picking the worst option due to a letter swap being an edit distance of two
        ("alinging", "aligning") => {
            return None;
        }
        _ => {}
    }
    let current = edit_distance::edit_distance(typo, correction);
    let mut matches: Vec<_> = word_variants
        .iter()
        .map(|r| (edit_distance::edit_distance(typo, r), *r))
        .filter(|(d, _)| *d < current)
        .collect();
    matches.sort_unstable();
    matches.into_iter().next().map(|(_, r)| r)
}

fn allowed_words() -> HashMap<String, String> {
    let allowed_path = "assets/english.csv";
    let english_data = std::fs::read(allowed_path).unwrap();
    let mut allowed_english = csv::ReaderBuilder::new()
        .has_headers(false)
        .flexible(true)
        .from_reader(english_data.as_slice());
    let allowed_english = allowed_english.records().map(Result::unwrap).map(|r| {
        let mut i = r.iter();
        let mut typo = i.next().expect("typo").to_owned();
        typo.make_ascii_lowercase();
        (typo, String::from("english word"))
    });

    let allowed_path = "assets/allowed.csv";
    let local_data = std::fs::read(allowed_path).unwrap();
    let mut allowed_local = csv::ReaderBuilder::new()
        .has_headers(false)
        .flexible(true)
        .from_reader(local_data.as_slice());
    let allowed_local = allowed_local.records().map(Result::unwrap).map(|r| {
        let mut i = r.iter();
        let mut typo = i.next().expect("typo").to_owned();
        typo.make_ascii_lowercase();
        let reason = i.next().expect("reason").to_owned();
        (typo, reason)
    });

    allowed_english.chain(allowed_local).collect()
}
feat(dict): Preserve correction order We want to be able to recommend more likely corrections first, e.g. for "poped" we want to recommend "popped" before "pooped". 2023-06-26 15:33:59 -04:00			`use indexmap::IndexSet;`
test: Ensure words.csv stays sorted 2021-07-27 15:09:51 -04:00			`use std::collections::BTreeMap;`
feat: Support english dialects The goal is to be as accepting and unobtrusive to new code bases as possible. To this end, we correct typos into the closest english dialect. If someone wants to opt-in, they can have typos correct to a specific english dialect. Fixes #52 Fixes #22 2020-05-27 21:46:41 -04:00			`use std::collections::HashMap;`
			`use std::collections::HashSet;`
test: Ensure words.csv stays sorted 2021-07-27 15:09:51 -04:00			`use unicase::UniCase;`
feat: Support english dialects The goal is to be as accepting and unobtrusive to new code bases as possible. To this end, we correct typos into the closest english dialect. If someone wants to opt-in, they can have typos correct to a specific english dialect. Fixes #52 Fixes #22 2020-05-27 21:46:41 -04:00
feat(dict): Preserve correction order We want to be able to recommend more likely corrections first, e.g. for "poped" we want to recommend "popped" before "pooped". 2023-06-26 15:33:59 -04:00			`type Dict = BTreeMap<UniCase<String>, IndexSet<String>>;`
test: Ensure words.csv stays sorted 2021-07-27 15:09:51 -04:00
test: Move codegen to tests 2022-08-01 15:45:58 -04:00			`#[test]`
			`fn verify() {`
chore: Update snapbox 2024-02-14 21:28:51 -05:00			`let typos_dict = parse_dict("assets/words.csv");`
refactor: Make dict processing logic testable Previously all the dictionary cleanup logic was in the function: fn generate<W: std::io::Write>(file: &mut W, dict: &[u8]) which parsed the provided buffer as CSV and also took care of writing the processed dictionary back as CSV. This commit factors out the CSV handling, leaving a `process` function behind so that it can be easily tested in the following commit. 2023-06-24 05:14:40 -04:00			`let new_dict = process(typos_dict);`
test: Move codegen to tests 2022-08-01 15:45:58 -04:00
			`let mut content = vec![];`
refactor: Make dict processing logic testable Previously all the dictionary cleanup logic was in the function: fn generate<W: std::io::Write>(file: &mut W, dict: &[u8]) which parsed the provided buffer as CSV and also took care of writing the processed dictionary back as CSV. This commit factors out the CSV handling, leaving a `process` function behind so that it can be easily tested in the following commit. 2023-06-24 05:14:40 -04:00
			`let mut wtr = csv::WriterBuilder::new()`
			`.flexible(true)`
			`.from_writer(&mut content);`
			`for (typo, corrections) in new_dict {`
			`let mut row = vec![typo.as_str().to_owned()];`
			`row.extend(corrections);`
			`wtr.write_record(&row).unwrap();`
			`}`
			`wtr.flush().unwrap();`
			`drop(wtr);`
test: Move codegen to tests 2022-08-01 15:45:58 -04:00
			`let content = String::from_utf8(content).unwrap();`
refactor: Resolve deprecations 2024-05-27 23:09:20 -04:00			`snapbox::assert_data_eq!(content, snapbox::file!["../assets/words.csv"].raw());`
test: Move codegen to tests 2022-08-01 15:45:58 -04:00			`}`

refactor: Make dict processing logic testable Previously all the dictionary cleanup logic was in the function: fn generate<W: std::io::Write>(file: &mut W, dict: &[u8]) which parsed the provided buffer as CSV and also took care of writing the processed dictionary back as CSV. This commit factors out the CSV handling, leaving a `process` function behind so that it can be easily tested in the following commit. 2023-06-24 05:14:40 -04:00			`fn parse_dict(path: &str) -> Vec<(String, Vec<String>)> {`
			`let data = std::fs::read(path).unwrap();`

			`let mut reader = csv::ReaderBuilder::new()`
test: Prevent correcting corrections 2021-07-27 14:15:12 -04:00			`.has_headers(false)`
			`.flexible(true)`
refactor: Make dict processing logic testable Previously all the dictionary cleanup logic was in the function: fn generate<W: std::io::Write>(file: &mut W, dict: &[u8]) which parsed the provided buffer as CSV and also took care of writing the processed dictionary back as CSV. This commit factors out the CSV handling, leaving a `process` function behind so that it can be easily tested in the following commit. 2023-06-24 05:14:40 -04:00			`.from_reader(&*data);`

			`reader`
test: Prevent correcting corrections 2021-07-27 14:15:12 -04:00			`.records()`
			`.map(Result::unwrap)`
refactor: Make dict processing logic testable Previously all the dictionary cleanup logic was in the function: fn generate<W: std::io::Write>(file: &mut W, dict: &[u8]) which parsed the provided buffer as CSV and also took care of writing the processed dictionary back as CSV. This commit factors out the CSV handling, leaving a `process` function behind so that it can be easily tested in the following commit. 2023-06-24 05:14:40 -04:00			`.map(\|record\| {`
			`let mut iter = record.into_iter();`
			`let typo = iter.next().expect("typo");`
			`(`
			`typo.to_owned(),`
			`iter.map(ToOwned::to_owned).collect::<Vec<_>>(),`
			`)`
			`})`
			`.collect()`
			`}`

			`fn dict_from_iter<S: Into<String>>(`
			`iter: impl IntoIterator<Item = (S, impl IntoIterator<Item = S>)>,`
			`) -> Dict {`
			`let mut dict = Dict::new();`

			`for (typo, corrections) in iter {`
			`let typo = UniCase::new(typo.into().to_ascii_lowercase());`

			`// duplicate entries are merged`
			`dict.entry(typo)`
feat(dict): Preserve correction order We want to be able to recommend more likely corrections first, e.g. for "poped" we want to recommend "popped" before "pooped". 2023-06-26 15:33:59 -04:00			`.or_default()`
refactor: Make dict processing logic testable Previously all the dictionary cleanup logic was in the function: fn generate<W: std::io::Write>(file: &mut W, dict: &[u8]) which parsed the provided buffer as CSV and also took care of writing the processed dictionary back as CSV. This commit factors out the CSV handling, leaving a `process` function behind so that it can be easily tested in the following commit. 2023-06-24 05:14:40 -04:00			`.extend(corrections.into_iter().map(\|c\| {`
			`let mut c = c.into();`
			`c.make_ascii_lowercase();`
			`c`
			`}));`
			`}`

			`dict`
			`}`

			`fn process<S: Into<String>>(`
			`iter: impl IntoIterator<Item = (S, impl IntoIterator<Item = S>)>,`
			`) -> Dict {`
			`let dict = dict_from_iter(iter);`

			`let rows: Dict = dict`
feat(dict): Add more corrections 2021-07-27 15:40:34 -04:00			`.into_iter()`
			`.filter(\|(t, _)\| is_word(t))`
chore(dict): Don't clear disallowed words 2023-08-07 17:23:36 -04:00			`.map(\|(t, c)\| {`
feat(dict): Preserve correction order We want to be able to recommend more likely corrections first, e.g. for "poped" we want to recommend "popped" before "pooped". 2023-06-26 15:33:59 -04:00			`let new_c: IndexSet<_> = c.into_iter().filter(\|c\| is_word(c)).collect();`
chore(dict): Don't clear disallowed words 2023-08-07 17:23:36 -04:00			`(t, new_c)`
feat(dict): Add more corrections 2021-07-27 15:40:34 -04:00			`})`
			`.collect();`

fix(dict): Remove nilable See conversation in #613 2022-12-06 11:47:08 -05:00			`let varcon_words = varcon_words();`
			`let allowed_words = allowed_words();`
refactor(varcon): Clarify check's meanings 2021-05-15 20:29:27 -04:00			`let word_variants = proper_word_variants();`
chore(dict): Automate more cleanup 2023-06-08 09:54:36 -04:00			`let rows: Vec<_> = rows`
test: Prevent correcting corrections 2021-07-27 14:15:12 -04:00			`.into_iter()`
test: Ensure words.csv stays sorted 2021-07-27 15:09:51 -04:00			`.filter(\|(typo, _)\| {`
style: Address warnings 2024-05-02 12:59:32 -04:00			`let is_disallowed = varcon_words.contains(&UniCase::new(typo));`
test: Prevent correcting corrections 2021-07-27 14:15:12 -04:00			`if is_disallowed {`
style: Make clippy happy 2024-07-26 17:08:02 -04:00			`eprintln!("{typo:?} is disallowed; in varcon");`
test: Prevent correcting corrections 2021-07-27 14:15:12 -04:00			`}`
			`!is_disallowed`
			`})`
fix(dict): Remove nilable See conversation in #613 2022-12-06 11:47:08 -05:00			`.filter(\|(typo, _)\| {`
			`if let Some(reason) = allowed_words.get(typo.as_ref()) {`
style: Make clippy happy 2024-07-26 17:08:02 -04:00			`eprintln!("{typo:?} is disallowed; {reason}");`
fix(dict): Remove nilable See conversation in #613 2022-12-06 11:47:08 -05:00			`false`
			`} else {`
			`true`
			`}`
			`})`
test: Ensure words.csv stays sorted 2021-07-27 15:09:51 -04:00			`.map(\|(typo, corrections)\| {`
feat(dict): Preserve correction order We want to be able to recommend more likely corrections first, e.g. for "poped" we want to recommend "popped" before "pooped". 2023-06-26 15:33:59 -04:00			`let mut new_corrections = IndexSet::new();`
test: Ensure words.csv stays sorted 2021-07-27 15:09:51 -04:00			`for correction in corrections {`
test: Prevent correcting corrections 2021-07-27 14:15:12 -04:00			`let correction = word_variants`
			`.get(correction.as_str())`
			`.and_then(\|words\| find_best_match(&typo, correction.as_str(), words))`
			`.unwrap_or(&correction);`
chore(dict): Automate more cleanup 2023-06-08 09:54:36 -04:00			`new_corrections.insert(correction.to_owned());`
test: Prevent correcting corrections 2021-07-27 14:15:12 -04:00			`}`
test: Ensure words.csv stays sorted 2021-07-27 15:09:51 -04:00			`(typo, new_corrections)`
test: Prevent correcting corrections 2021-07-27 14:15:12 -04:00			`})`
			`.collect();`
chore(dict): Automate more cleanup 2023-06-08 09:54:36 -04:00			`let mut dict = Dict::new();`
			`for (bad, good) in rows {`
			`let current = dict.entry(bad).or_default();`
			`current.extend(good);`
			`}`
feat: Support english dialects The goal is to be as accepting and unobtrusive to new code bases as possible. To this end, we correct typos into the closest english dialect. If someone wants to opt-in, they can have typos correct to a specific english dialect. Fixes #52 Fixes #22 2020-05-27 21:46:41 -04:00
test(dict): Report more cases to user 2023-06-08 10:23:10 -04:00			`let corrections: HashMap<_, _> = dict`
			`.iter()`
			`.flat_map(\|(bad, good)\| good.iter().map(\|good\| (good.to_owned(), bad.to_owned())))`
			`.collect();`
refactor: Make dict processing logic testable Previously all the dictionary cleanup logic was in the function: fn generate<W: std::io::Write>(file: &mut W, dict: &[u8]) which parsed the provided buffer as CSV and also took care of writing the processed dictionary back as CSV. This commit factors out the CSV handling, leaving a `process` function behind so that it can be easily tested in the following commit. 2023-06-24 05:14:40 -04:00			`dict.into_iter()`
test(dict): Report more cases to user 2023-06-08 10:23:10 -04:00			`.filter(\|(typo, _)\| {`
			`if let Some(correction) = corrections.get(typo.as_str()) {`
			`eprintln!("{typo} <-> {correction} cycle detected");`
			`false`
			`} else {`
			`true`
			`}`
			`})`
refactor: Make dict processing logic testable Previously all the dictionary cleanup logic was in the function: fn generate<W: std::io::Write>(file: &mut W, dict: &[u8]) which parsed the provided buffer as CSV and also took care of writing the processed dictionary back as CSV. This commit factors out the CSV handling, leaving a `process` function behind so that it can be easily tested in the following commit. 2023-06-24 05:14:40 -04:00			`.collect()`
feat: Support english dialects The goal is to be as accepting and unobtrusive to new code bases as possible. To this end, we correct typos into the closest english dialect. If someone wants to opt-in, they can have typos correct to a specific english dialect. Fixes #52 Fixes #22 2020-05-27 21:46:41 -04:00			`}`

feat(dict): Preserve correction order We want to be able to recommend more likely corrections first, e.g. for "poped" we want to recommend "popped" before "pooped". 2023-06-26 15:33:59 -04:00			`#[test]`
			`fn test_preserve_correction_order() {`
			`let dict = process([("foo", ["xyz", "abc"])]);`
			`let mut corrections = dict.get(&UniCase::new("foo".into())).unwrap().iter();`
			`assert_eq!(corrections.next().unwrap(), "xyz");`
			`assert_eq!(corrections.next().unwrap(), "abc");`
			`}`

test: Add some tests for dict processing logic 2023-06-24 06:21:29 -04:00			`#[test]`
			`fn test_merge_duplicates() {`
			`assert_eq!(`
			`process([("foo", ["bar"]), ("foo", ["baz"])]),`
			`dict_from_iter([("foo", ["bar", "baz"])])`
			`);`
			`}`

			`#[test]`
			`fn test_duplicate_correction_removal() {`
			`let dict = process([("foo", ["bar", "bar"])]);`
			`assert_eq!(dict, dict_from_iter([("foo", ["bar"])]));`
			`}`

			`#[test]`
			`fn test_cycle_removal() {`
test(dict): Help prevent correctin valid words This dictionary was taken from OpenOfice 2023-11-01 12:35:01 -04:00			`assert!(process([("foo", ["foobar"]), ("foobar", ["foo"])]).is_empty());`
test: Add some tests for dict processing logic 2023-06-24 06:21:29 -04:00			`}`

			`#[test]`
			`fn test_varcon_removal() {`
			`assert!(process([("colour", ["color"])]).is_empty());`
			`}`

			`#[test]`
			`fn test_varcon_best_match() {`
			`assert_eq!(`
			`process([(`
			`"neighourhood", // note the missing 'b'`
			`["neighborhood"],`
			`)]),`
			`dict_from_iter([(`
			`"neighourhood",`
			`["neighbourhood"] // note that 'bor' has become 'bour' to match the typo`
			`)])`
			`);`
			`}`

feat(dict): Add more corrections 2021-07-27 15:40:34 -04:00			`fn is_word(word: &str) -> bool {`
			`word.chars().all(\|c\| c.is_alphabetic())`
			`}`

chore: Update from _rust/main template 2024-04-26 22:14:01 -04:00			`fn varcon_words() -> HashSet<UniCase<&'static str>> {`
refactor(varcon): Clarify check's meanings 2021-05-15 20:29:27 -04:00			`// Even include improper ones because we should be letting varcon handle that rather than our`
			`// dictionary`
feat: Support english dialects The goal is to be as accepting and unobtrusive to new code bases as possible. To this end, we correct typos into the closest english dialect. If someone wants to opt-in, they can have typos correct to a specific english dialect. Fixes #52 Fixes #22 2020-05-27 21:46:41 -04:00			`varcon::VARCON`
			`.iter()`
test(dict): Don't use unverified varcon This mirrors 17b4d0267eab0b23ff9c704ca0fcf1f2de7ea036 2024-11-05 16:24:27 -05:00			`.filter(\|c\| c.verified)`
feat: Support english dialects The goal is to be as accepting and unobtrusive to new code bases as possible. To this end, we correct typos into the closest english dialect. If someone wants to opt-in, they can have typos correct to a specific english dialect. Fixes #52 Fixes #22 2020-05-27 21:46:41 -04:00			`.flat_map(\|c\| c.entries.iter())`
			`.flat_map(\|e\| e.variants.iter())`
style: Address warnings 2024-05-02 12:59:32 -04:00			`.map(\|v\| UniCase::new(v.word))`
feat: Support english dialects The goal is to be as accepting and unobtrusive to new code bases as possible. To this end, we correct typos into the closest english dialect. If someone wants to opt-in, they can have typos correct to a specific english dialect. Fixes #52 Fixes #22 2020-05-27 21:46:41 -04:00			`.collect()`
			`}`

refactor(varcon): Clarify check's meanings 2021-05-15 20:29:27 -04:00			`fn proper_word_variants() -> HashMap<&'static str, HashSet<&'static str>> {`
feat: Support english dialects The goal is to be as accepting and unobtrusive to new code bases as possible. To this end, we correct typos into the closest english dialect. If someone wants to opt-in, they can have typos correct to a specific english dialect. Fixes #52 Fixes #22 2020-05-27 21:46:41 -04:00			`let mut words: HashMap<&'static str, HashSet<&'static str>> = HashMap::new();`
			`for entry in varcon::VARCON.iter().flat_map(\|c\| c.entries.iter()) {`
			`let variants: HashSet<_> = entry`
			`.variants`
			`.iter()`
			`.filter(\|v\| v.types.iter().any(\|t\| t.tag != Some(varcon::Tag::Improper)))`
			`.map(\|v\| v.word)`
			`.collect();`
			`for variant in variants.iter() {`
chore(deps): update msrv to v1.75 2023-12-28 11:47:51 -05:00			`let set = words.entry(variant).or_default();`
feat: Support english dialects The goal is to be as accepting and unobtrusive to new code bases as possible. To this end, we correct typos into the closest english dialect. If someone wants to opt-in, they can have typos correct to a specific english dialect. Fixes #52 Fixes #22 2020-05-27 21:46:41 -04:00			`set.extend(variants.iter().filter(\|v\| *v != variant));`
			`}`
			`}`
			`words`
			`}`

			`fn find_best_match<'c>(`
			`typo: &'c str,`
			`correction: &'c str,`
refactor(varcon): Clarify check's meanings 2021-05-15 20:29:27 -04:00			`word_variants: &HashSet<&'static str>,`
feat: Support english dialects The goal is to be as accepting and unobtrusive to new code bases as possible. To this end, we correct typos into the closest english dialect. If someone wants to opt-in, they can have typos correct to a specific english dialect. Fixes #52 Fixes #22 2020-05-27 21:46:41 -04:00			`) -> Option<&'c str> {`
refactor(varcon): Clarify check's meanings 2021-05-15 20:29:27 -04:00			`assert!(!word_variants.contains(correction));`
feat(dict): September updates - The correction to `usefull` and `becuase` has existed from the initial implementation, as far as I could tell. - `busses` is a valid variation of `buses` - Had to put in a hack so that `aligning` would be preferred over `alining` because of how the preference algorithm works Fixes #823 2023-10-02 09:18:50 -04:00			`#[allow(clippy::single_match)]`
			`match (typo, correction) {`
			`// Picking the worst option due to a letter swap being an edit distance of two`
			`("alinging", "aligning") => {`
			`return None;`
			`}`
			`_ => {}`
			`}`
feat: Support english dialects The goal is to be as accepting and unobtrusive to new code bases as possible. To this end, we correct typos into the closest english dialect. If someone wants to opt-in, they can have typos correct to a specific english dialect. Fixes #52 Fixes #22 2020-05-27 21:46:41 -04:00			`let current = edit_distance::edit_distance(typo, correction);`
refactor(varcon): Clarify check's meanings 2021-05-15 20:29:27 -04:00			`let mut matches: Vec<_> = word_variants`
feat: Support english dialects The goal is to be as accepting and unobtrusive to new code bases as possible. To this end, we correct typos into the closest english dialect. If someone wants to opt-in, they can have typos correct to a specific english dialect. Fixes #52 Fixes #22 2020-05-27 21:46:41 -04:00			`.iter()`
			`.map(\|r\| (edit_distance::edit_distance(typo, r), *r))`
			`.filter(\|(d, _)\| *d < current)`
			`.collect();`
			`matches.sort_unstable();`
			`matches.into_iter().next().map(\|(_, r)\| r)`
			`}`
fix(dict): Remove nilable See conversation in #613 2022-12-06 11:47:08 -05:00
chore: Update from _rust/main template 2024-04-26 22:14:01 -04:00			`fn allowed_words() -> HashMap<String, String> {`
test(dict): Help prevent correctin valid words This dictionary was taken from OpenOfice 2023-11-01 12:35:01 -04:00			`let allowed_path = "assets/english.csv";`
			`let english_data = std::fs::read(allowed_path).unwrap();`
			`let mut allowed_english = csv::ReaderBuilder::new()`
			`.has_headers(false)`
			`.flexible(true)`
			`.from_reader(english_data.as_slice());`
			`let allowed_english = allowed_english.records().map(Result::unwrap).map(\|r\| {`
			`let mut i = r.iter();`
			`let mut typo = i.next().expect("typo").to_owned();`
			`typo.make_ascii_lowercase();`
			`(typo, String::from("english word"))`
			`});`

fix(dict): Remove nilable See conversation in #613 2022-12-06 11:47:08 -05:00			`let allowed_path = "assets/allowed.csv";`
test(dict): Help prevent correctin valid words This dictionary was taken from OpenOfice 2023-11-01 12:35:01 -04:00			`let local_data = std::fs::read(allowed_path).unwrap();`
			`let mut allowed_local = csv::ReaderBuilder::new()`
fix(dict): Remove nilable See conversation in #613 2022-12-06 11:47:08 -05:00			`.has_headers(false)`
			`.flexible(true)`
test(dict): Help prevent correctin valid words This dictionary was taken from OpenOfice 2023-11-01 12:35:01 -04:00			`.from_reader(local_data.as_slice());`
			`let allowed_local = allowed_local.records().map(Result::unwrap).map(\|r\| {`
			`let mut i = r.iter();`
			`let mut typo = i.next().expect("typo").to_owned();`
			`typo.make_ascii_lowercase();`
			`let reason = i.next().expect("reason").to_owned();`
			`(typo, reason)`
			`});`

			`allowed_english.chain(allowed_local).collect()`
fix(dict): Remove nilable See conversation in #613 2022-12-06 11:47:08 -05:00			`}`