typos/crates/typos-dict/tests/codegen.rs

306 lines
10 KiB
Rust
Raw Normal View History

2022-08-01 15:45:58 -04:00
#[test]
fn codegen() {
const DICT: &[u8] = include_bytes!("../assets/words.csv");
2022-08-01 15:45:58 -04:00
2024-12-24 22:34:34 -05:00
let mut trie_content = vec![];
2024-12-28 22:30:07 -05:00
generate_trie(&mut trie_content, "WORD", DICT);
2024-12-24 22:34:34 -05:00
let trie_content = String::from_utf8(trie_content).unwrap();
let trie_content = codegenrs::rustfmt(&trie_content, None).unwrap();
snapbox::assert_data_eq!(
&trie_content,
snapbox::file!["../benches/benches/trie_codegen.rs"].raw()
);
let mut map_content = vec![];
generate_map(&mut map_content, "WORD", DICT);
let map_content = String::from_utf8(map_content).unwrap();
let map_content = codegenrs::rustfmt(&map_content, None).unwrap();
snapbox::assert_data_eq!(
&map_content,
snapbox::file!["../benches/benches/map_codegen.rs"].raw()
);
2024-12-30 17:48:19 -05:00
let mut cased_map_content = vec![];
generate_cased_map(&mut cased_map_content, "WORD", DICT);
let cased_map_content = String::from_utf8(cased_map_content).unwrap();
let cased_map_content = codegenrs::rustfmt(&cased_map_content, None).unwrap();
snapbox::assert_data_eq!(
&cased_map_content,
snapbox::file!["../benches/benches/cased_map_codegen.rs"].raw()
);
2024-12-30 15:57:39 -05:00
let mut ordered_map_content = vec![];
generate_ordered_map(&mut ordered_map_content, "WORD", DICT);
let ordered_map_content = String::from_utf8(ordered_map_content).unwrap();
let ordered_map_content = codegenrs::rustfmt(&ordered_map_content, None).unwrap();
2024-12-24 22:41:48 -05:00
snapbox::assert_data_eq!(
2024-12-30 15:57:39 -05:00
&ordered_map_content,
snapbox::file!["../benches/benches/ordered_map_codegen.rs"].raw()
2024-12-24 22:41:48 -05:00
);
let mut aho_corasick_content = vec![];
generate_aho_corasick(&mut aho_corasick_content, "Word", DICT);
let aho_corasick_content = String::from_utf8(aho_corasick_content).unwrap();
let aho_corasick_content = codegenrs::rustfmt(&aho_corasick_content, None).unwrap();
snapbox::assert_data_eq!(
&aho_corasick_content,
snapbox::file!["../benches/benches/aho_corasick_codegen.rs"].raw()
);
2024-12-30 21:58:17 -05:00
snapbox::assert_data_eq!(&map_content, snapbox::file!["../src/word_codegen.rs"].raw());
2022-08-01 15:45:58 -04:00
}
2019-10-28 12:09:29 -04:00
fn generate_trie<W: std::io::Write>(file: &mut W, name: &str, dict: &[u8]) {
2022-09-01 08:15:42 -04:00
writeln!(
file,
"// This file is @generated by {}",
file!().replace('\\', "/")
)
.unwrap();
writeln!(file, "#![allow(clippy::unreadable_literal)]",).unwrap();
2024-04-26 22:14:01 -04:00
writeln!(file, "#![allow(unreachable_pub)]",).unwrap();
writeln!(file).unwrap();
2020-08-15 22:03:00 -04:00
let records: Vec<_> = csv::ReaderBuilder::new()
.has_headers(false)
.flexible(true)
.from_reader(dict)
.records()
.map(|r| r.unwrap())
.collect();
dictgen::DictGen::new()
.name(name)
.value_type("&[&str]")
.trie()
.write(
file,
records.iter().map(|record| {
let mut record_fields = record.iter();
let key = record_fields.next().unwrap();
let value = format!(
"&[{}]",
itertools::join(record_fields.map(|field| format!(r#""{field}""#)), ", ")
);
(key, value)
}),
)
.unwrap();
}
2024-12-24 22:34:34 -05:00
2024-12-30 17:48:19 -05:00
fn generate_cased_map<W: std::io::Write>(file: &mut W, name: &str, dict: &[u8]) {
writeln!(
file,
"// This file is @generated by {}",
file!().replace('\\', "/")
)
.unwrap();
writeln!(file, "#![allow(clippy::unreadable_literal)]",).unwrap();
writeln!(file, "#![allow(unreachable_pub)]",).unwrap();
writeln!(file).unwrap();
let records: Vec<_> = csv::ReaderBuilder::new()
.has_headers(false)
.flexible(true)
.from_reader(dict)
.records()
.map(|r| r.unwrap())
.collect();
dictgen::DictGen::new()
.name(&format!("{name}_ASCII_LOWER"))
.value_type("&[&str]")
.map()
.unicase(false)
.write(
file,
records
.iter()
.filter(|r| r.iter().next().unwrap().is_ascii())
.map(|record| {
let mut record_fields = record.iter();
let key = record_fields.next().unwrap();
let value = format!(
"&[{}]",
itertools::join(record_fields.map(|field| format!(r#""{field}""#)), ", ")
);
(key, value)
}),
)
.unwrap();
dictgen::DictGen::new()
.name(&format!("{name}_ASCII_UPPER"))
.value_type("&[&str]")
.map()
.unicase(false)
.write(
file,
records
.iter()
.filter(|r| r.iter().next().unwrap().is_ascii())
.map(|record| {
use heck::ToShoutySnakeCase;
let mut record_fields = record.iter();
let key = record_fields.next().unwrap().to_shouty_snake_case();
let value = format!(
"&[{}]",
itertools::join(record_fields.map(|field| format!(r#""{field}""#)), ", ")
);
(key, value)
}),
)
.unwrap();
dictgen::DictGen::new()
.name(&format!("{name}_ASCII_TITLE"))
.value_type("&[&str]")
.map()
.unicase(false)
.write(
file,
records
.iter()
.filter(|r| r.iter().next().unwrap().is_ascii())
.map(|record| {
use heck::ToTitleCase;
let mut record_fields = record.iter();
let key = record_fields.next().unwrap().to_title_case();
let value = format!(
"&[{}]",
itertools::join(record_fields.map(|field| format!(r#""{field}""#)), ", ")
);
(key, value)
}),
)
.unwrap();
dictgen::DictGen::new()
.name(&format!("{name}_UNICODE"))
.value_type("&[&str]")
.ordered_map()
.write(
file,
records
.iter()
.filter(|r| !r.iter().next().unwrap().is_ascii())
.map(|record| {
let mut record_fields = record.iter();
let key = record_fields.next().unwrap();
let value = format!(
"&[{}]",
itertools::join(record_fields.map(|field| format!(r#""{field}""#)), ", ")
);
(key, value)
}),
)
.unwrap();
}
fn generate_map<W: std::io::Write>(file: &mut W, name: &str, dict: &[u8]) {
2024-12-24 22:34:34 -05:00
writeln!(
file,
"// This file is @generated by {}",
file!().replace('\\', "/")
)
.unwrap();
writeln!(file, "#![allow(clippy::unreadable_literal)]",).unwrap();
writeln!(file, "#![allow(unreachable_pub)]",).unwrap();
writeln!(file).unwrap();
let records: Vec<_> = csv::ReaderBuilder::new()
.has_headers(false)
.flexible(true)
.from_reader(dict)
.records()
.map(|r| r.unwrap())
.collect();
dictgen::DictGen::new()
.name(name)
.value_type("&[&str]")
.map()
.write(
file,
records.iter().map(|record| {
let mut record_fields = record.iter();
let key = record_fields.next().unwrap();
let value = format!(
"&[{}]",
itertools::join(record_fields.map(|field| format!(r#""{field}""#)), ", ")
);
(key, value)
}),
)
.unwrap();
2024-12-24 22:34:34 -05:00
}
2024-12-24 22:41:48 -05:00
2024-12-30 15:57:39 -05:00
fn generate_ordered_map<W: std::io::Write>(file: &mut W, name: &str, dict: &[u8]) {
2024-12-24 22:41:48 -05:00
writeln!(
file,
"// This file is @generated by {}",
file!().replace('\\', "/")
)
.unwrap();
writeln!(file, "#![allow(clippy::unreadable_literal)]",).unwrap();
writeln!(file, "#![allow(unreachable_pub)]",).unwrap();
writeln!(file).unwrap();
let records: Vec<_> = csv::ReaderBuilder::new()
.has_headers(false)
.flexible(true)
.from_reader(dict)
.records()
.map(|r| r.unwrap())
.collect();
dictgen::DictGen::new()
.name(name)
.value_type("&[&str]")
2024-12-30 15:57:39 -05:00
.ordered_map()
.write(
file,
records.iter().map(|record| {
let mut record_fields = record.iter();
let key = record_fields.next().unwrap();
let value = format!(
"&[{}]",
itertools::join(record_fields.map(|field| format!(r#""{field}""#)), ", ")
);
(key, value)
}),
)
.unwrap();
2024-12-24 22:41:48 -05:00
}
fn generate_aho_corasick<W: std::io::Write>(file: &mut W, name: &str, dict: &[u8]) {
writeln!(
file,
"// This file is @generated by {}",
file!().replace('\\', "/")
)
.unwrap();
writeln!(file, "#![allow(clippy::unreadable_literal)]",).unwrap();
writeln!(file, "#![allow(clippy::redundant_static_lifetimes)]",).unwrap();
writeln!(file, "#![allow(unreachable_pub)]",).unwrap();
writeln!(file).unwrap();
let records: Vec<_> = csv::ReaderBuilder::new()
.has_headers(false)
.flexible(true)
.from_reader(dict)
.records()
.map(|r| r.unwrap())
.collect();
dictgen::DictGen::new()
.name(name)
.value_type("&'static [&'static str]")
.aho_corasick()
.write(
file,
records.iter().map(|record| {
let mut record_fields = record.iter();
let key = record_fields.next().unwrap();
let value = format!(
"&[{}]",
itertools::join(record_fields.map(|field| format!(r#""{field}""#)), ", ")
);
(key, value)
}),
)
.unwrap();
}