typos/crates/codespell-dict/tests/codegen.rs

83 lines
2 KiB
Rust
Raw Normal View History

2022-08-01 15:45:58 -04:00
pub const DICT: &str = include_str!("../assets/dictionary.txt");
2019-10-28 12:01:22 -04:00
2022-08-01 15:45:58 -04:00
#[test]
fn codegen() {
let mut content = vec![];
generate(&mut content);
let content = String::from_utf8(content).unwrap();
let content = codegenrs::rustfmt(&content, None).unwrap();
2023-09-01 11:19:16 -04:00
snapbox::assert_eq_path("./src/dict_codegen.rs", content);
2022-08-01 15:45:58 -04:00
}
2023-06-08 09:10:49 -04:00
#[test]
fn compat() {
use std::fmt::Write as _;
let mut content = String::new();
for (bad, good) in parse_dict(DICT) {
if !is_word(bad) {
continue;
}
if !good.iter().copied().all(is_word) {
continue;
}
let bad = bad.to_lowercase();
write!(content, "{bad}").unwrap();
for good in good {
let good = good.to_lowercase();
write!(content, ",{good}").unwrap();
}
writeln!(content).unwrap();
}
snapbox::assert_eq_path("./assets/compatible.csv", &content);
}
fn is_word(word: &str) -> bool {
let tokenizer = typos::tokens::Tokenizer::new();
tokenizer.parse_str(word).flat_map(|t| t.split()).count() == 1 && !word.contains('_')
}
2022-08-01 15:45:58 -04:00
fn generate<W: std::io::Write>(file: &mut W) {
2022-09-01 08:15:42 -04:00
writeln!(
file,
"// This file is @generated {}",
file!().replace('\\', "/")
)
.unwrap();
2022-08-01 15:45:58 -04:00
writeln!(file).unwrap();
let dict = parse_dict(DICT);
dictgen::generate_table(
file,
"WORD_DICTIONARY",
"&[&str]",
dict.map(|kv| (kv.0, format!("&{:?}", kv.1))),
)
.unwrap();
}
2019-10-28 12:01:22 -04:00
fn parse_dict(raw: &str) -> impl Iterator<Item = (&str, Vec<&str>)> {
raw.lines().map(|s| {
let mut parts = s.splitn(2, "->");
let typo = parts.next().unwrap().trim();
let corrections = parts
.next()
.unwrap()
2019-10-28 18:25:41 -04:00
.split(',')
2019-10-28 12:01:22 -04:00
.filter_map(|c| {
let c = c.trim();
if c.is_empty() {
None
} else {
Some(c)
}
})
.collect();
(typo, corrections)
})
}