typos/crates/misspell-dict/tests/codegen.rs

107 lines
2.7 KiB
Rust
Raw Normal View History

use std::collections::HashMap;
2022-08-01 15:45:58 -04:00
pub const DICT: &str = include_str!("../assets/words.go");
2022-08-01 15:45:58 -04:00
#[test]
fn codegen() {
let mut content = vec![];
generate(&mut content);
let content = String::from_utf8(content).unwrap();
let content = codegenrs::rustfmt(&content, None).unwrap();
2024-02-14 21:28:51 -05:00
snapbox::assert_eq(snapbox::file!["../src/dict_codegen.rs"], content);
2022-08-01 15:45:58 -04:00
}
fn generate<W: std::io::Write>(file: &mut W) {
2022-09-01 08:15:42 -04:00
writeln!(
file,
"// This file is @generated by {}",
file!().replace('\\', "/")
)
.unwrap();
2022-08-01 15:45:58 -04:00
writeln!(file).unwrap();
let Words {
main,
american,
british,
} = parse_dict(DICT);
dictgen::generate_table(
file,
"MAIN_DICTIONARY",
"&[&str]",
main.into_iter().map(|kv| (kv.0, format!("&{:?}", kv.1))),
)
.unwrap();
dictgen::generate_table(
file,
"AMERICAN_DICTIONARY",
"&[&str]",
american
.into_iter()
.map(|kv| (kv.0, format!("&{:?}", kv.1))),
)
.unwrap();
dictgen::generate_table(
file,
"BRITISH_DICTIONARY",
"&[&str]",
british.into_iter().map(|kv| (kv.0, format!("&{:?}", kv.1))),
)
.unwrap();
}
struct Words<'s> {
main: HashMap<&'s str, Vec<&'s str>>,
american: HashMap<&'s str, Vec<&'s str>>,
british: HashMap<&'s str, Vec<&'s str>>,
}
2024-04-26 22:14:01 -04:00
fn parse_dict(raw: &str) -> Words<'_> {
let mut bad = HashMap::new();
let mut main = HashMap::new();
let mut american = HashMap::new();
let mut british = HashMap::new();
let mapping = regex::Regex::new(r#"^"(.*)", "(.*)",$"#).unwrap();
let mut current = &mut bad;
for line in raw.lines() {
2022-03-29 16:01:51 -04:00
let line = line.split_once("//").map(|l| l.0).unwrap_or(line).trim();
2019-10-29 10:07:27 -04:00
if line.is_empty() || line.starts_with("package") {
continue;
} else if line.contains("DictMain") {
current = &mut main;
} else if line.contains("DictAmerican") {
current = &mut american;
} else if line.contains("DictBritish") {
current = &mut british;
2019-10-28 18:25:41 -04:00
} else if line.contains('}') {
current = &mut bad;
} else {
let captures = mapping.captures(line);
if let Some(captures) = captures {
current.insert(
captures.get(1).unwrap().as_str(),
vec![captures.get(2).unwrap().as_str()],
);
} else {
eprintln!("Unknown line: {}", line);
}
}
}
if !bad.is_empty() {
panic!("Failed parsing; found extra words: {:#?}", bad);
}
Words {
main,
american,
british,
}
}