mirror of
https://github.com/crate-ci/typos.git
synced 2024-11-25 10:31:02 -05:00
test: Ensure words.csv stays sorted
This commit is contained in:
parent
41048d15b3
commit
0008713395
2 changed files with 21650 additions and 21655 deletions
43253
crates/typos-dict/assets/words.csv
vendored
43253
crates/typos-dict/assets/words.csv
vendored
File diff suppressed because it is too large
Load diff
|
@ -1,63 +1,63 @@
|
||||||
|
use std::collections::BTreeMap;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
|
use unicase::UniCase;
|
||||||
|
|
||||||
use structopt::StructOpt;
|
use structopt::StructOpt;
|
||||||
|
|
||||||
|
type Dict = BTreeMap<UniCase<String>, Vec<String>>;
|
||||||
|
|
||||||
fn generate<W: std::io::Write>(file: &mut W, dict: &[u8]) {
|
fn generate<W: std::io::Write>(file: &mut W, dict: &[u8]) {
|
||||||
let rows: Vec<Vec<_>> = csv::ReaderBuilder::new()
|
let mut rows = Dict::new();
|
||||||
|
csv::ReaderBuilder::new()
|
||||||
.has_headers(false)
|
.has_headers(false)
|
||||||
.flexible(true)
|
.flexible(true)
|
||||||
.from_reader(dict)
|
.from_reader(dict)
|
||||||
.records()
|
.records()
|
||||||
.map(Result::unwrap)
|
.map(Result::unwrap)
|
||||||
.map(|r| {
|
.for_each(|r| {
|
||||||
let row: Vec<String> = r.iter().map(ToOwned::to_owned).collect();
|
let mut i = r.iter();
|
||||||
row
|
let typo = UniCase::new(i.next().expect("typo").to_owned());
|
||||||
})
|
rows.entry(typo)
|
||||||
.collect();
|
.or_insert_with(|| Vec::new())
|
||||||
|
.extend(i.map(ToOwned::to_owned));
|
||||||
|
});
|
||||||
|
|
||||||
let disallowed_typos = varcon_words();
|
let disallowed_typos = varcon_words();
|
||||||
let word_variants = proper_word_variants();
|
let word_variants = proper_word_variants();
|
||||||
let rows: Vec<_> = rows
|
let rows: Dict = rows
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.filter(|r| {
|
.filter(|(typo, _)| {
|
||||||
let typo = &r[0];
|
|
||||||
let is_disallowed = disallowed_typos.contains(&unicase::UniCase::new(typo));
|
let is_disallowed = disallowed_typos.contains(&unicase::UniCase::new(typo));
|
||||||
if is_disallowed {
|
if is_disallowed {
|
||||||
eprintln!("{:?} is disallowed", typo);
|
eprintln!("{:?} is disallowed", typo);
|
||||||
}
|
}
|
||||||
!is_disallowed
|
!is_disallowed
|
||||||
})
|
})
|
||||||
.map(|r| {
|
.map(|(typo, corrections)| {
|
||||||
let mut fields = r.into_iter();
|
let mut new_corrections = vec![];
|
||||||
let typo = fields.next().expect("at least a typo");
|
for correction in corrections {
|
||||||
let mut row = vec![typo.clone()];
|
|
||||||
for correction in fields {
|
|
||||||
let correction = word_variants
|
let correction = word_variants
|
||||||
.get(correction.as_str())
|
.get(correction.as_str())
|
||||||
.and_then(|words| find_best_match(&typo, correction.as_str(), words))
|
.and_then(|words| find_best_match(&typo, correction.as_str(), words))
|
||||||
.unwrap_or(&correction);
|
.unwrap_or(&correction);
|
||||||
row.push(correction.to_owned());
|
new_corrections.push(correction.to_owned());
|
||||||
}
|
}
|
||||||
row
|
(typo, new_corrections)
|
||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
let corrections: std::collections::HashSet<_> = rows
|
let corrections: std::collections::HashSet<_> =
|
||||||
.iter()
|
rows.values().flatten().map(ToOwned::to_owned).collect();
|
||||||
.flat_map(|r| {
|
|
||||||
let mut i = r.iter();
|
|
||||||
i.next();
|
|
||||||
i.map(ToOwned::to_owned)
|
|
||||||
})
|
|
||||||
.collect();
|
|
||||||
let rows: Vec<_> = rows
|
let rows: Vec<_> = rows
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.filter(|r| !corrections.contains(&r[0]))
|
.filter(|(typo, _)| !corrections.contains(typo.as_str()))
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
let mut wtr = csv::WriterBuilder::new().flexible(true).from_writer(file);
|
let mut wtr = csv::WriterBuilder::new().flexible(true).from_writer(file);
|
||||||
for row in rows {
|
for (typo, corrections) in rows {
|
||||||
|
let mut row = corrections;
|
||||||
|
row.insert(0, typo.as_str().to_owned());
|
||||||
wtr.write_record(&row).unwrap();
|
wtr.write_record(&row).unwrap();
|
||||||
}
|
}
|
||||||
wtr.flush().unwrap();
|
wtr.flush().unwrap();
|
||||||
|
|
Loading…
Reference in a new issue