Merge pull request #1189 from epage/bench
Some checks failed
Security audit / security_audit (push) Has been cancelled
Security audit / cargo_deny (bans licenses sources) (push) Has been cancelled
CI / Test (push) Has been cancelled
CI / Check MSRV (push) Has been cancelled
CI / lockfile (push) Has been cancelled
CI / Docs (push) Has been cancelled
CI / rustfmt (push) Has been cancelled
CI / clippy (push) Has been cancelled
CI / Coverage (push) Has been cancelled
/ linux (aarch64) (push) Has been cancelled
/ linux (x86) (push) Has been cancelled
/ linux (x86_64) (push) Has been cancelled
/ musllinux (aarch64) (push) Has been cancelled
/ musllinux (x86_64) (push) Has been cancelled
/ windows (x64) (push) Has been cancelled
/ windows (x86) (push) Has been cancelled
/ macos (aarch64) (push) Has been cancelled
/ macos (x86_64) (push) Has been cancelled
/ sdist (push) Has been cancelled
pre-commit / pre-commit (push) Has been cancelled
CI / CI (push) Has been cancelled
/ Release (push) Has been cancelled

perf(dict): Add benches
This commit is contained in:
Ed Page 2024-12-28 21:10:56 -06:00 committed by GitHub
commit 93d147382d
WARNING! Although there is a key with this ID in the database it does not verify this commit! This commit is SUSPICIOUS.
GPG key ID: B5690EEEBB952194
7 changed files with 509853 additions and 7 deletions

1
Cargo.lock generated
View file

@ -1496,6 +1496,7 @@ dependencies = [
"codegenrs", "codegenrs",
"csv", "csv",
"dictgen", "dictgen",
"divan",
"edit-distance", "edit-distance",
"indexmap", "indexmap",
"itertools 0.13.0", "itertools 0.13.0",

View file

@ -24,10 +24,15 @@ itertools = "0.13"
edit-distance = "2.1" edit-distance = "2.1"
unicase = "2.7" unicase = "2.7"
codegenrs = "3.0" codegenrs = "3.0"
dictgen = { version = "^0.2", path = "../dictgen", features = ["codegen"] } dictgen = { version = "^0.2", path = "../dictgen", features = ["codegen", "map"] }
varcon = { version = "^1.0", path = "../varcon" } varcon = { version = "^1.0", path = "../varcon" }
snapbox = "0.6.5" snapbox = "0.6.5"
indexmap = "2.2.6" indexmap = "2.2.6"
divan = "0.1.16"
[lints] [lints]
workspace = true workspace = true
[[bench]]
name = "benches"
harness = false

View file

@ -0,0 +1,51 @@
#![allow(clippy::wildcard_imports)]
mod map_codegen;
mod table_codegen;
mod trie_codegen;
mod miss {
use super::*;
const MISS: &str = "finalizes";
#[divan::bench(args = [unicase::UniCase::new(MISS)])]
fn map(word: unicase::UniCase<&str>) -> Option<&'static &[&str]> {
map_codegen::WORD.find(&word)
}
#[divan::bench(args = [unicase::UniCase::new(MISS)])]
fn trie(word: unicase::UniCase<&str>) -> Option<&'static &[&str]> {
trie_codegen::WORD_TRIE.find(&word)
}
#[divan::bench(args = [unicase::UniCase::new(MISS)])]
fn table(word: unicase::UniCase<&str>) -> Option<&'static &[&str]> {
table_codegen::WORD.find(&word)
}
}
mod hit {
use super::*;
const HIT: &str = "finallizes";
#[divan::bench(args = [unicase::UniCase::new(HIT)])]
fn map(word: unicase::UniCase<&str>) -> Option<&'static &[&str]> {
map_codegen::WORD.find(&word)
}
#[divan::bench(args = [unicase::UniCase::new(HIT)])]
fn trie(word: unicase::UniCase<&str>) -> Option<&'static &[&str]> {
trie_codegen::WORD_TRIE.find(&word)
}
#[divan::bench(args = [unicase::UniCase::new(HIT)])]
fn table(word: unicase::UniCase<&str>) -> Option<&'static &[&str]> {
table_codegen::WORD.find(&word)
}
}
fn main() {
divan::main();
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,15 +1,41 @@
#[test] #[test]
fn codegen() { fn codegen() {
let mut content = vec![];
const DICT: &[u8] = include_bytes!("../assets/words.csv"); const DICT: &[u8] = include_bytes!("../assets/words.csv");
generate(&mut content, "WORD", DICT);
let content = String::from_utf8(content).unwrap(); let mut trie_content = vec![];
let content = codegenrs::rustfmt(&content, None).unwrap(); generate_trie(&mut trie_content, "WORD", DICT);
snapbox::assert_data_eq!(content, snapbox::file!["../src/word_codegen.rs"].raw()); let trie_content = String::from_utf8(trie_content).unwrap();
let trie_content = codegenrs::rustfmt(&trie_content, None).unwrap();
snapbox::assert_data_eq!(
&trie_content,
snapbox::file!["../benches/benches/trie_codegen.rs"].raw()
);
let mut map_content = vec![];
generate_map(&mut map_content, "WORD", DICT);
let map_content = String::from_utf8(map_content).unwrap();
let map_content = codegenrs::rustfmt(&map_content, None).unwrap();
snapbox::assert_data_eq!(
&map_content,
snapbox::file!["../benches/benches/map_codegen.rs"].raw()
);
let mut table_content = vec![];
generate_table(&mut table_content, "WORD", DICT);
let table_content = String::from_utf8(table_content).unwrap();
let table_content = codegenrs::rustfmt(&table_content, None).unwrap();
snapbox::assert_data_eq!(
&table_content,
snapbox::file!["../benches/benches/table_codegen.rs"].raw()
);
snapbox::assert_data_eq!(
&trie_content,
snapbox::file!["../src/word_codegen.rs"].raw()
);
} }
fn generate<W: std::io::Write>(file: &mut W, prefix: &str, dict: &[u8]) { fn generate_trie<W: std::io::Write>(file: &mut W, prefix: &str, dict: &[u8]) {
writeln!( writeln!(
file, file,
"// This file is @generated by {}", "// This file is @generated by {}",
@ -44,3 +70,73 @@ fn generate<W: std::io::Write>(file: &mut W, prefix: &str, dict: &[u8]) {
) )
.unwrap(); .unwrap();
} }
fn generate_map<W: std::io::Write>(file: &mut W, prefix: &str, dict: &[u8]) {
writeln!(
file,
"// This file is @generated by {}",
file!().replace('\\', "/")
)
.unwrap();
writeln!(file, "#![allow(clippy::unreadable_literal)]",).unwrap();
writeln!(file, "#![allow(unreachable_pub)]",).unwrap();
writeln!(file).unwrap();
let records: Vec<_> = csv::ReaderBuilder::new()
.has_headers(false)
.flexible(true)
.from_reader(dict)
.records()
.map(|r| r.unwrap())
.collect();
dictgen::generate_map(
file,
prefix,
"&'static [&'static str]",
records.iter().map(|record| {
let mut record_fields = record.iter();
let key = record_fields.next().unwrap();
let value = format!(
"&[{}]",
itertools::join(record_fields.map(|field| format!(r#""{field}""#)), ", ")
);
(key, value)
}),
)
.unwrap();
}
fn generate_table<W: std::io::Write>(file: &mut W, prefix: &str, dict: &[u8]) {
writeln!(
file,
"// This file is @generated by {}",
file!().replace('\\', "/")
)
.unwrap();
writeln!(file, "#![allow(clippy::unreadable_literal)]",).unwrap();
writeln!(file, "#![allow(unreachable_pub)]",).unwrap();
writeln!(file).unwrap();
let records: Vec<_> = csv::ReaderBuilder::new()
.has_headers(false)
.flexible(true)
.from_reader(dict)
.records()
.map(|r| r.unwrap())
.collect();
dictgen::generate_table(
file,
prefix,
"&'static [&'static str]",
records.iter().map(|record| {
let mut record_fields = record.iter();
let key = record_fields.next().unwrap();
let value = format!(
"&[{}]",
itertools::join(record_fields.map(|field| format!(r#""{field}""#)), ", ")
);
(key, value)
}),
)
.unwrap();
}