2022-08-01 14:45:58 -05:00
|
|
|
#[test]
|
|
|
|
fn codegen() {
|
2023-09-25 12:13:53 -05:00
|
|
|
const DICT: &[u8] = include_bytes!("../assets/words.csv");
|
2022-08-01 14:45:58 -05:00
|
|
|
|
2024-12-24 21:34:34 -06:00
|
|
|
let mut trie_content = vec![];
|
2024-12-28 21:30:07 -06:00
|
|
|
generate_trie(&mut trie_content, "WORD", DICT);
|
2024-12-24 21:34:34 -06:00
|
|
|
let trie_content = String::from_utf8(trie_content).unwrap();
|
|
|
|
let trie_content = codegenrs::rustfmt(&trie_content, None).unwrap();
|
|
|
|
snapbox::assert_data_eq!(
|
|
|
|
&trie_content,
|
|
|
|
snapbox::file!["../benches/benches/trie_codegen.rs"].raw()
|
|
|
|
);
|
|
|
|
|
|
|
|
let mut map_content = vec![];
|
|
|
|
generate_map(&mut map_content, "WORD", DICT);
|
|
|
|
let map_content = String::from_utf8(map_content).unwrap();
|
|
|
|
let map_content = codegenrs::rustfmt(&map_content, None).unwrap();
|
|
|
|
snapbox::assert_data_eq!(
|
|
|
|
&map_content,
|
|
|
|
snapbox::file!["../benches/benches/map_codegen.rs"].raw()
|
|
|
|
);
|
|
|
|
|
2024-12-30 16:48:19 -06:00
|
|
|
let mut cased_map_content = vec![];
|
|
|
|
generate_cased_map(&mut cased_map_content, "WORD", DICT);
|
|
|
|
let cased_map_content = String::from_utf8(cased_map_content).unwrap();
|
|
|
|
let cased_map_content = codegenrs::rustfmt(&cased_map_content, None).unwrap();
|
|
|
|
snapbox::assert_data_eq!(
|
|
|
|
&cased_map_content,
|
|
|
|
snapbox::file!["../benches/benches/cased_map_codegen.rs"].raw()
|
|
|
|
);
|
|
|
|
|
2024-12-30 14:57:39 -06:00
|
|
|
let mut ordered_map_content = vec![];
|
|
|
|
generate_ordered_map(&mut ordered_map_content, "WORD", DICT);
|
|
|
|
let ordered_map_content = String::from_utf8(ordered_map_content).unwrap();
|
|
|
|
let ordered_map_content = codegenrs::rustfmt(&ordered_map_content, None).unwrap();
|
2024-12-24 21:41:48 -06:00
|
|
|
snapbox::assert_data_eq!(
|
2024-12-30 14:57:39 -06:00
|
|
|
&ordered_map_content,
|
|
|
|
snapbox::file!["../benches/benches/ordered_map_codegen.rs"].raw()
|
2024-12-24 21:41:48 -06:00
|
|
|
);
|
|
|
|
|
2024-12-31 07:48:55 -06:00
|
|
|
let mut aho_corasick_content = vec![];
|
|
|
|
generate_aho_corasick(&mut aho_corasick_content, "Word", DICT);
|
|
|
|
let aho_corasick_content = String::from_utf8(aho_corasick_content).unwrap();
|
|
|
|
let aho_corasick_content = codegenrs::rustfmt(&aho_corasick_content, None).unwrap();
|
|
|
|
snapbox::assert_data_eq!(
|
|
|
|
&aho_corasick_content,
|
|
|
|
snapbox::file!["../benches/benches/aho_corasick_codegen.rs"].raw()
|
|
|
|
);
|
|
|
|
|
2024-12-30 20:58:17 -06:00
|
|
|
snapbox::assert_data_eq!(&map_content, snapbox::file!["../src/word_codegen.rs"].raw());
|
2022-08-01 14:45:58 -05:00
|
|
|
}
|
2019-10-28 10:09:29 -06:00
|
|
|
|
2024-12-28 20:58:19 -06:00
|
|
|
fn generate_trie<W: std::io::Write>(file: &mut W, name: &str, dict: &[u8]) {
|
2022-09-01 07:15:42 -05:00
|
|
|
writeln!(
|
|
|
|
file,
|
|
|
|
"// This file is @generated by {}",
|
|
|
|
file!().replace('\\', "/")
|
|
|
|
)
|
|
|
|
.unwrap();
|
2019-10-29 07:53:32 -06:00
|
|
|
writeln!(file, "#![allow(clippy::unreadable_literal)]",).unwrap();
|
2024-04-26 21:14:01 -05:00
|
|
|
writeln!(file, "#![allow(unreachable_pub)]",).unwrap();
|
2019-10-05 07:30:30 -06:00
|
|
|
writeln!(file).unwrap();
|
|
|
|
|
2020-08-15 21:03:00 -05:00
|
|
|
let records: Vec<_> = csv::ReaderBuilder::new()
|
|
|
|
.has_headers(false)
|
2021-05-15 19:06:04 -05:00
|
|
|
.flexible(true)
|
2023-09-25 12:13:53 -05:00
|
|
|
.from_reader(dict)
|
2019-10-05 07:30:30 -06:00
|
|
|
.records()
|
|
|
|
.map(|r| r.unwrap())
|
|
|
|
.collect();
|
2024-12-28 20:58:19 -06:00
|
|
|
dictgen::DictGen::new()
|
|
|
|
.name(name)
|
|
|
|
.value_type("&[&str]")
|
|
|
|
.trie()
|
|
|
|
.write(
|
|
|
|
file,
|
|
|
|
records.iter().map(|record| {
|
|
|
|
let mut record_fields = record.iter();
|
|
|
|
let key = record_fields.next().unwrap();
|
|
|
|
let value = format!(
|
|
|
|
"&[{}]",
|
|
|
|
itertools::join(record_fields.map(|field| format!(r#""{field}""#)), ", ")
|
|
|
|
);
|
|
|
|
(key, value)
|
|
|
|
}),
|
|
|
|
)
|
|
|
|
.unwrap();
|
2019-10-05 07:30:30 -06:00
|
|
|
}
|
2024-12-24 21:34:34 -06:00
|
|
|
|
2024-12-30 16:48:19 -06:00
|
|
|
fn generate_cased_map<W: std::io::Write>(file: &mut W, name: &str, dict: &[u8]) {
|
|
|
|
writeln!(
|
|
|
|
file,
|
|
|
|
"// This file is @generated by {}",
|
|
|
|
file!().replace('\\', "/")
|
|
|
|
)
|
|
|
|
.unwrap();
|
|
|
|
writeln!(file, "#![allow(clippy::unreadable_literal)]",).unwrap();
|
|
|
|
writeln!(file, "#![allow(unreachable_pub)]",).unwrap();
|
|
|
|
writeln!(file).unwrap();
|
|
|
|
|
|
|
|
let records: Vec<_> = csv::ReaderBuilder::new()
|
|
|
|
.has_headers(false)
|
|
|
|
.flexible(true)
|
|
|
|
.from_reader(dict)
|
|
|
|
.records()
|
|
|
|
.map(|r| r.unwrap())
|
|
|
|
.collect();
|
|
|
|
dictgen::DictGen::new()
|
|
|
|
.name(&format!("{name}_ASCII_LOWER"))
|
|
|
|
.value_type("&[&str]")
|
|
|
|
.map()
|
|
|
|
.unicase(false)
|
|
|
|
.write(
|
|
|
|
file,
|
|
|
|
records
|
|
|
|
.iter()
|
|
|
|
.filter(|r| r.iter().next().unwrap().is_ascii())
|
|
|
|
.map(|record| {
|
|
|
|
let mut record_fields = record.iter();
|
|
|
|
let key = record_fields.next().unwrap();
|
|
|
|
let value = format!(
|
|
|
|
"&[{}]",
|
|
|
|
itertools::join(record_fields.map(|field| format!(r#""{field}""#)), ", ")
|
|
|
|
);
|
|
|
|
(key, value)
|
|
|
|
}),
|
|
|
|
)
|
|
|
|
.unwrap();
|
|
|
|
dictgen::DictGen::new()
|
|
|
|
.name(&format!("{name}_ASCII_UPPER"))
|
|
|
|
.value_type("&[&str]")
|
|
|
|
.map()
|
|
|
|
.unicase(false)
|
|
|
|
.write(
|
|
|
|
file,
|
|
|
|
records
|
|
|
|
.iter()
|
|
|
|
.filter(|r| r.iter().next().unwrap().is_ascii())
|
|
|
|
.map(|record| {
|
|
|
|
use heck::ToShoutySnakeCase;
|
|
|
|
let mut record_fields = record.iter();
|
|
|
|
let key = record_fields.next().unwrap().to_shouty_snake_case();
|
|
|
|
let value = format!(
|
|
|
|
"&[{}]",
|
|
|
|
itertools::join(record_fields.map(|field| format!(r#""{field}""#)), ", ")
|
|
|
|
);
|
|
|
|
(key, value)
|
|
|
|
}),
|
|
|
|
)
|
|
|
|
.unwrap();
|
|
|
|
dictgen::DictGen::new()
|
|
|
|
.name(&format!("{name}_ASCII_TITLE"))
|
|
|
|
.value_type("&[&str]")
|
|
|
|
.map()
|
|
|
|
.unicase(false)
|
|
|
|
.write(
|
|
|
|
file,
|
|
|
|
records
|
|
|
|
.iter()
|
|
|
|
.filter(|r| r.iter().next().unwrap().is_ascii())
|
|
|
|
.map(|record| {
|
|
|
|
use heck::ToTitleCase;
|
|
|
|
let mut record_fields = record.iter();
|
|
|
|
let key = record_fields.next().unwrap().to_title_case();
|
|
|
|
let value = format!(
|
|
|
|
"&[{}]",
|
|
|
|
itertools::join(record_fields.map(|field| format!(r#""{field}""#)), ", ")
|
|
|
|
);
|
|
|
|
(key, value)
|
|
|
|
}),
|
|
|
|
)
|
|
|
|
.unwrap();
|
|
|
|
dictgen::DictGen::new()
|
|
|
|
.name(&format!("{name}_UNICODE"))
|
|
|
|
.value_type("&[&str]")
|
|
|
|
.ordered_map()
|
|
|
|
.write(
|
|
|
|
file,
|
|
|
|
records
|
|
|
|
.iter()
|
|
|
|
.filter(|r| !r.iter().next().unwrap().is_ascii())
|
|
|
|
.map(|record| {
|
|
|
|
let mut record_fields = record.iter();
|
|
|
|
let key = record_fields.next().unwrap();
|
|
|
|
let value = format!(
|
|
|
|
"&[{}]",
|
|
|
|
itertools::join(record_fields.map(|field| format!(r#""{field}""#)), ", ")
|
|
|
|
);
|
|
|
|
(key, value)
|
|
|
|
}),
|
|
|
|
)
|
|
|
|
.unwrap();
|
|
|
|
}
|
|
|
|
|
2024-12-28 20:58:19 -06:00
|
|
|
fn generate_map<W: std::io::Write>(file: &mut W, name: &str, dict: &[u8]) {
|
2024-12-24 21:34:34 -06:00
|
|
|
writeln!(
|
|
|
|
file,
|
|
|
|
"// This file is @generated by {}",
|
|
|
|
file!().replace('\\', "/")
|
|
|
|
)
|
|
|
|
.unwrap();
|
|
|
|
writeln!(file, "#![allow(clippy::unreadable_literal)]",).unwrap();
|
|
|
|
writeln!(file, "#![allow(unreachable_pub)]",).unwrap();
|
|
|
|
writeln!(file).unwrap();
|
|
|
|
|
|
|
|
let records: Vec<_> = csv::ReaderBuilder::new()
|
|
|
|
.has_headers(false)
|
|
|
|
.flexible(true)
|
|
|
|
.from_reader(dict)
|
|
|
|
.records()
|
|
|
|
.map(|r| r.unwrap())
|
|
|
|
.collect();
|
2024-12-28 20:58:19 -06:00
|
|
|
dictgen::DictGen::new()
|
|
|
|
.name(name)
|
|
|
|
.value_type("&[&str]")
|
|
|
|
.map()
|
|
|
|
.write(
|
|
|
|
file,
|
|
|
|
records.iter().map(|record| {
|
|
|
|
let mut record_fields = record.iter();
|
|
|
|
let key = record_fields.next().unwrap();
|
|
|
|
let value = format!(
|
|
|
|
"&[{}]",
|
|
|
|
itertools::join(record_fields.map(|field| format!(r#""{field}""#)), ", ")
|
|
|
|
);
|
|
|
|
(key, value)
|
|
|
|
}),
|
|
|
|
)
|
|
|
|
.unwrap();
|
2024-12-24 21:34:34 -06:00
|
|
|
}
|
2024-12-24 21:41:48 -06:00
|
|
|
|
2024-12-30 14:57:39 -06:00
|
|
|
fn generate_ordered_map<W: std::io::Write>(file: &mut W, name: &str, dict: &[u8]) {
|
2024-12-24 21:41:48 -06:00
|
|
|
writeln!(
|
|
|
|
file,
|
|
|
|
"// This file is @generated by {}",
|
|
|
|
file!().replace('\\', "/")
|
|
|
|
)
|
|
|
|
.unwrap();
|
|
|
|
writeln!(file, "#![allow(clippy::unreadable_literal)]",).unwrap();
|
|
|
|
writeln!(file, "#![allow(unreachable_pub)]",).unwrap();
|
|
|
|
writeln!(file).unwrap();
|
|
|
|
|
|
|
|
let records: Vec<_> = csv::ReaderBuilder::new()
|
|
|
|
.has_headers(false)
|
|
|
|
.flexible(true)
|
|
|
|
.from_reader(dict)
|
|
|
|
.records()
|
|
|
|
.map(|r| r.unwrap())
|
|
|
|
.collect();
|
2024-12-28 20:58:19 -06:00
|
|
|
dictgen::DictGen::new()
|
|
|
|
.name(name)
|
|
|
|
.value_type("&[&str]")
|
2024-12-30 14:57:39 -06:00
|
|
|
.ordered_map()
|
2024-12-28 20:58:19 -06:00
|
|
|
.write(
|
|
|
|
file,
|
|
|
|
records.iter().map(|record| {
|
|
|
|
let mut record_fields = record.iter();
|
|
|
|
let key = record_fields.next().unwrap();
|
|
|
|
let value = format!(
|
|
|
|
"&[{}]",
|
|
|
|
itertools::join(record_fields.map(|field| format!(r#""{field}""#)), ", ")
|
|
|
|
);
|
|
|
|
(key, value)
|
|
|
|
}),
|
|
|
|
)
|
|
|
|
.unwrap();
|
2024-12-24 21:41:48 -06:00
|
|
|
}
|
2024-12-31 07:48:55 -06:00
|
|
|
|
|
|
|
fn generate_aho_corasick<W: std::io::Write>(file: &mut W, name: &str, dict: &[u8]) {
|
|
|
|
writeln!(
|
|
|
|
file,
|
|
|
|
"// This file is @generated by {}",
|
|
|
|
file!().replace('\\', "/")
|
|
|
|
)
|
|
|
|
.unwrap();
|
|
|
|
writeln!(file, "#![allow(clippy::unreadable_literal)]",).unwrap();
|
|
|
|
writeln!(file, "#![allow(clippy::redundant_static_lifetimes)]",).unwrap();
|
|
|
|
writeln!(file, "#![allow(unreachable_pub)]",).unwrap();
|
|
|
|
writeln!(file).unwrap();
|
|
|
|
|
|
|
|
let records: Vec<_> = csv::ReaderBuilder::new()
|
|
|
|
.has_headers(false)
|
|
|
|
.flexible(true)
|
|
|
|
.from_reader(dict)
|
|
|
|
.records()
|
|
|
|
.map(|r| r.unwrap())
|
|
|
|
.collect();
|
|
|
|
dictgen::DictGen::new()
|
|
|
|
.name(name)
|
|
|
|
.value_type("&'static [&'static str]")
|
|
|
|
.aho_corasick()
|
|
|
|
.write(
|
|
|
|
file,
|
|
|
|
records.iter().map(|record| {
|
|
|
|
let mut record_fields = record.iter();
|
|
|
|
let key = record_fields.next().unwrap();
|
|
|
|
let value = format!(
|
|
|
|
"&[{}]",
|
|
|
|
itertools::join(record_fields.map(|field| format!(r#""{field}""#)), ", ")
|
|
|
|
);
|
|
|
|
(key, value)
|
|
|
|
}),
|
|
|
|
)
|
|
|
|
.unwrap();
|
|
|
|
}
|