Merge pull request #928 from epage/divan

test(bench): Switch to Divan
This commit is contained in:
Ed Page 2024-02-07 14:50:04 -06:00 committed by GitHub
commit ba0d73e480
WARNING! Although there is a key with this ID in the database it does not verify this commit! This commit is SUSPICIOUS.
GPG key ID: B5690EEEBB952194
8 changed files with 411 additions and 515 deletions

289
Cargo.lock generated
View file

@ -38,12 +38,6 @@ dependencies = [
"memchr",
]
[[package]]
name = "anes"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299"
[[package]]
name = "anstream"
version = "0.6.11"
@ -157,18 +151,6 @@ dependencies = [
"serde",
]
[[package]]
name = "bumpalo"
version = "3.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f30e7476521f6f8af1a1c4c0b8cc94f0bee37d91763d0ca2665f299b6cd8aec"
[[package]]
name = "cast"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
[[package]]
name = "cc"
version = "1.0.83"
@ -184,33 +166,6 @@ version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "ciborium"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "effd91f6c78e5a4ace8a5d3c0b6bfaec9e2baaef55f3efc00e45fb2e477ee926"
dependencies = [
"ciborium-io",
"ciborium-ll",
"serde",
]
[[package]]
name = "ciborium-io"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cdf919175532b369853f5d5e20b26b43112613fd6fe7aee757e35f7a44642656"
[[package]]
name = "ciborium-ll"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "defaa24ecc093c77630e6c15e17c51f5e187bf35ee514f4e2d67baaa96dae22b"
dependencies = [
"ciborium-io",
"half",
]
[[package]]
name = "clap"
version = "4.4.18"
@ -241,6 +196,7 @@ dependencies = [
"anstyle",
"clap_lex",
"strsim",
"terminal_size",
]
[[package]]
@ -301,6 +257,12 @@ dependencies = [
"colorchoice",
]
[[package]]
name = "condtype"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf0a07a401f374238ab8e2f11a104d2851bf9ce711ec69804834de8af45c7af"
[[package]]
name = "content_inspector"
version = "0.2.4"
@ -316,42 +278,6 @@ version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e"
[[package]]
name = "criterion"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f"
dependencies = [
"anes",
"cast",
"ciborium",
"clap",
"criterion-plot",
"is-terminal",
"itertools 0.10.5",
"num-traits",
"once_cell",
"oorandom",
"plotters",
"rayon",
"regex",
"serde",
"serde_derive",
"serde_json",
"tinytemplate",
"walkdir",
]
[[package]]
name = "criterion-plot"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1"
dependencies = [
"cast",
"itertools 0.10.5",
]
[[package]]
name = "crossbeam-deque"
version = "0.8.3"
@ -488,6 +414,31 @@ version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6184e33543162437515c2e2b48714794e37845ec9851711914eec9d308f6ebe8"
[[package]]
name = "divan"
version = "0.1.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5398159ee27f2b123d89b856bad61725442f37df5fb98c30cd570c318d594aee"
dependencies = [
"cfg-if",
"clap",
"condtype",
"divan-macros",
"libc",
"regex-lite",
]
[[package]]
name = "divan-macros"
version = "0.1.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5092f66eb3563a01e85552731ae82c04c934ff4efd7ad1a0deae7b948f4b3ec4"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.38",
]
[[package]]
name = "doc-comment"
version = "0.3.3"
@ -649,12 +600,6 @@ dependencies = [
"walkdir",
]
[[package]]
name = "half"
version = "1.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7"
[[package]]
name = "hashbrown"
version = "0.14.1"
@ -667,12 +612,6 @@ version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
[[package]]
name = "hermit-abi"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d77f7ec81a6d05a3abb01ab6eb7590f6083d08449fe5a1c8b1e620283546ccb7"
[[package]]
name = "human-panic"
version = "1.2.3"
@ -738,26 +677,6 @@ dependencies = [
"hashbrown",
]
[[package]]
name = "is-terminal"
version = "0.4.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cb0889898416213fab133e1d33a0e5858a48177452750691bde3666d0fdbaf8b"
dependencies = [
"hermit-abi",
"rustix",
"windows-sys 0.48.0",
]
[[package]]
name = "itertools"
version = "0.10.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473"
dependencies = [
"either",
]
[[package]]
name = "itertools"
version = "0.11.0"
@ -782,15 +701,6 @@ version = "1.0.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38"
[[package]]
name = "js-sys"
version = "0.3.64"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c5f195fe497f702db0f318b07fdd68edb16955aed830df8363d837542f8f935a"
dependencies = [
"wasm-bindgen",
]
[[package]]
name = "kstring"
version = "2.0.0"
@ -873,15 +783,6 @@ version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "61807f77802ff30975e01f4f071c8ba10c022052f98b3294119f3e615d13e5be"
[[package]]
name = "num-traits"
version = "0.2.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c"
dependencies = [
"autocfg",
]
[[package]]
name = "object"
version = "0.32.1"
@ -897,12 +798,6 @@ version = "1.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
[[package]]
name = "oorandom"
version = "11.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575"
[[package]]
name = "os_info"
version = "3.7.0"
@ -978,34 +873,6 @@ dependencies = [
"unicase",
]
[[package]]
name = "plotters"
version = "0.3.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d2c224ba00d7cadd4d5c660deaf2098e5e80e07846537c51f9cfa4be50c1fd45"
dependencies = [
"num-traits",
"plotters-backend",
"plotters-svg",
"wasm-bindgen",
"web-sys",
]
[[package]]
name = "plotters-backend"
version = "0.3.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9e76628b4d3a7581389a35d5b6e2139607ad7c75b17aed325f210aa91f4a9609"
[[package]]
name = "plotters-svg"
version = "0.3.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38f6d39893cca0701371e3c27294f09797214b86f1fb951b89ade8ec04e2abab"
dependencies = [
"plotters-backend",
]
[[package]]
name = "predicates"
version = "3.0.4"
@ -1125,6 +992,12 @@ dependencies = [
"regex-syntax",
]
[[package]]
name = "regex-lite"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "30b661b2f27137bdbc16f00eda72866a92bb28af1753ffbd56744fb6e2e9cd8e"
[[package]]
name = "regex-syntax"
version = "0.8.2"
@ -1338,6 +1211,16 @@ dependencies = [
"windows-sys 0.48.0",
]
[[package]]
name = "terminal_size"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "21bebf2b7c9e0a515f6e0f8c51dc0f8e4696391e6f1ff30379559f8365fb0df7"
dependencies = [
"rustix",
"windows-sys 0.48.0",
]
[[package]]
name = "termtree"
version = "0.4.1"
@ -1374,16 +1257,6 @@ dependencies = [
"once_cell",
]
[[package]]
name = "tinytemplate"
version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc"
dependencies = [
"serde",
"serde_json",
]
[[package]]
name = "toml"
version = "0.8.9"
@ -1470,10 +1343,10 @@ dependencies = [
"clap-verbosity-flag",
"colorchoice-clap",
"content_inspector",
"criterion",
"derive_more",
"derive_setters",
"difflib",
"divan",
"encoding_rs",
"env_logger",
"globset",
@ -1669,70 +1542,6 @@ version = "0.11.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]]
name = "wasm-bindgen"
version = "0.2.87"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7706a72ab36d8cb1f80ffbf0e071533974a60d0a308d01a5d0375bf60499a342"
dependencies = [
"cfg-if",
"wasm-bindgen-macro",
]
[[package]]
name = "wasm-bindgen-backend"
version = "0.2.87"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5ef2b6d3c510e9625e5fe6f509ab07d66a760f0885d858736483c32ed7809abd"
dependencies = [
"bumpalo",
"log",
"once_cell",
"proc-macro2",
"quote",
"syn 2.0.38",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-macro"
version = "0.2.87"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dee495e55982a3bd48105a7b947fd2a9b4a8ae3010041b9e0faab3f9cd028f1d"
dependencies = [
"quote",
"wasm-bindgen-macro-support",
]
[[package]]
name = "wasm-bindgen-macro-support"
version = "0.2.87"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.38",
"wasm-bindgen-backend",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-shared"
version = "0.2.87"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1"
[[package]]
name = "web-sys"
version = "0.3.64"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b85cbef8c220a6abc02aefd892dfc0fc23afb1c6a426316ec33253a3877249b"
dependencies = [
"js-sys",
"wasm-bindgen",
]
[[package]]
name = "wikipedia-dict"
version = "0.4.0"

View file

@ -79,16 +79,16 @@ encoding_rs = "0.8.33"
[dev-dependencies]
assert_fs = "1.1"
trycmd = "0.14.20"
criterion = "0.5"
divan = "0.1.11"
snapbox = "0.4.16"
trycmd = "0.14.20"
[[bench]]
name = "checks"
name = "check_file"
harness = false
[[bench]]
name = "corrections"
name = "correct_word"
harness = false
[[bench]]

View file

@ -0,0 +1,93 @@
mod data;
use assert_fs::prelude::*;
use typos_cli::file::FileChecker;
#[divan::bench(args = data::DATA)]
fn found_files(bencher: divan::Bencher, sample: &data::Data) {
let dict = typos_cli::dict::BuiltIn::new(Default::default());
let tokenizer = typos::tokens::Tokenizer::new();
let policy = typos_cli::policy::Policy::new()
.dict(&dict)
.tokenizer(&tokenizer);
let temp = assert_fs::TempDir::new().unwrap();
let sample_path = temp.child(sample.name());
sample_path.write_str(sample.content()).unwrap();
bencher
.counter(divan::counter::BytesCount::of_str(sample.content()))
.bench_local(|| {
typos_cli::file::FoundFiles.check_file(sample_path.path(), true, &policy, &PrintSilent)
})
}
#[divan::bench(args = data::DATA)]
fn identifiers(bencher: divan::Bencher, sample: &data::Data) {
let dict = typos_cli::dict::BuiltIn::new(Default::default());
let tokenizer = typos::tokens::Tokenizer::new();
let policy = typos_cli::policy::Policy::new()
.dict(&dict)
.tokenizer(&tokenizer);
let temp = assert_fs::TempDir::new().unwrap();
let sample_path = temp.child(sample.name());
sample_path.write_str(sample.content()).unwrap();
bencher
.counter(divan::counter::BytesCount::of_str(sample.content()))
.bench_local(|| {
typos_cli::file::Identifiers.check_file(sample_path.path(), true, &policy, &PrintSilent)
})
}
#[divan::bench(args = data::DATA)]
fn words(bencher: divan::Bencher, sample: &data::Data) {
let dict = typos_cli::dict::BuiltIn::new(Default::default());
let tokenizer = typos::tokens::Tokenizer::new();
let policy = typos_cli::policy::Policy::new()
.dict(&dict)
.tokenizer(&tokenizer);
let temp = assert_fs::TempDir::new().unwrap();
let sample_path = temp.child(sample.name());
sample_path.write_str(sample.content()).unwrap();
bencher
.counter(divan::counter::BytesCount::of_str(sample.content()))
.bench_local(|| {
typos_cli::file::Words.check_file(sample_path.path(), true, &policy, &PrintSilent)
})
}
#[divan::bench(args = data::DATA)]
fn typos(bencher: divan::Bencher, sample: &data::Data) {
let dict = typos_cli::dict::BuiltIn::new(Default::default());
let tokenizer = typos::tokens::Tokenizer::new();
let policy = typos_cli::policy::Policy::new()
.dict(&dict)
.tokenizer(&tokenizer);
let temp = assert_fs::TempDir::new().unwrap();
let sample_path = temp.child(sample.name());
sample_path.write_str(sample.content()).unwrap();
bencher
.counter(divan::counter::BytesCount::of_str(sample.content()))
.bench_local(|| {
typos_cli::file::Typos.check_file(sample_path.path(), true, &policy, &PrintSilent)
})
}
#[derive(Debug, Default)]
pub struct PrintSilent;
impl typos_cli::report::Report for PrintSilent {
fn report(&self, _msg: typos_cli::report::Message) -> Result<(), std::io::Error> {
Ok(())
}
}
fn main() {
divan::main();
}

View file

@ -1,69 +0,0 @@
mod data;
use assert_fs::prelude::*;
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
use typos_cli::file::FileChecker;
fn bench_checks(c: &mut Criterion) {
let dict = typos_cli::dict::BuiltIn::new(Default::default());
let tokenizer = typos::tokens::Tokenizer::new();
let policy = typos_cli::policy::Policy::new()
.dict(&dict)
.tokenizer(&tokenizer);
let temp = assert_fs::TempDir::new().unwrap();
let mut group = c.benchmark_group("check_file");
for (name, sample) in data::DATA {
let sample_path = temp.child(name);
sample_path.write_str(sample).unwrap();
let len = sample.len();
group.throughput(Throughput::Bytes(len as u64));
group.bench_with_input(BenchmarkId::new("FoundFiles", name), &len, |b, _| {
b.iter(|| {
typos_cli::file::FoundFiles.check_file(
sample_path.path(),
true,
&policy,
&PrintSilent,
)
});
});
group.bench_with_input(BenchmarkId::new("Identifiers", name), &len, |b, _| {
b.iter(|| {
typos_cli::file::Identifiers.check_file(
sample_path.path(),
true,
&policy,
&PrintSilent,
)
});
});
group.bench_with_input(BenchmarkId::new("Words", name), &len, |b, _| {
b.iter(|| {
typos_cli::file::Words.check_file(sample_path.path(), true, &policy, &PrintSilent)
});
});
group.bench_with_input(BenchmarkId::new("Typos", name), &len, |b, _| {
b.iter(|| {
typos_cli::file::Typos.check_file(sample_path.path(), true, &policy, &PrintSilent)
});
});
}
group.finish();
temp.close().unwrap();
}
#[derive(Debug, Default)]
pub struct PrintSilent;
impl typos_cli::report::Report for PrintSilent {
fn report(&self, _msg: typos_cli::report::Message) -> Result<(), std::io::Error> {
Ok(())
}
}
criterion_group!(benches, bench_checks,);
criterion_main!(benches);

View file

@ -0,0 +1,155 @@
mod regular {
mod ok {
#[divan::bench]
fn en(bencher: divan::Bencher) {
let input = "finalizes";
let locale = typos_cli::config::Locale::En;
let corrections = typos_cli::dict::BuiltIn::new(locale);
let input = typos::tokens::Word::new(input, 0).unwrap();
#[cfg(feature = "vars")]
assert_eq!(corrections.correct_word(input), None);
bencher
.with_inputs(|| input)
.bench_local_values(|input| corrections.correct_word(input));
}
#[divan::bench]
#[cfg(feature = "vars")]
fn en_us(bencher: divan::Bencher) {
let input = "finalizes";
let locale = typos_cli::config::Locale::EnUs;
let corrections = typos_cli::dict::BuiltIn::new(locale);
let input = typos::tokens::Word::new(input, 0).unwrap();
#[cfg(feature = "vars")]
assert_eq!(corrections.correct_word(input), Some(typos::Status::Valid));
bencher
.with_inputs(|| input)
.bench_local_values(|input| corrections.correct_word(input));
}
}
mod misspell {
#[divan::bench]
fn en(bencher: divan::Bencher) {
let input = "finallizes";
let output = "finalizes";
let locale = typos_cli::config::Locale::En;
let corrections = typos_cli::dict::BuiltIn::new(locale);
let input = typos::tokens::Word::new(input, 0).unwrap();
assert_eq!(
corrections.correct_word(input),
Some(typos::Status::Corrections(vec![
std::borrow::Cow::Borrowed(output)
]))
);
bencher
.with_inputs(|| input)
.bench_local_values(|input| corrections.correct_word(input));
}
#[divan::bench]
#[cfg(feature = "vars")]
fn en_us(bencher: divan::Bencher) {
let input = "finallizes";
let output = "finalizes";
let locale = typos_cli::config::Locale::EnUs;
let corrections = typos_cli::dict::BuiltIn::new(locale);
let input = typos::tokens::Word::new(input, 0).unwrap();
assert_eq!(
corrections.correct_word(input),
Some(typos::Status::Corrections(vec![
std::borrow::Cow::Borrowed(output)
]))
);
bencher
.with_inputs(|| input)
.bench_local_values(|input| corrections.correct_word(input));
}
}
mod misspell_case {
#[divan::bench]
fn en(bencher: divan::Bencher) {
let input = "FINALLIZES";
let output = "FINALIZES";
let locale = typos_cli::config::Locale::En;
let corrections = typos_cli::dict::BuiltIn::new(locale);
let input = typos::tokens::Word::new(input, 0).unwrap();
assert_eq!(
corrections.correct_word(input),
Some(typos::Status::Corrections(vec![
std::borrow::Cow::Borrowed(output)
]))
);
bencher
.with_inputs(|| input)
.bench_local_values(|input| corrections.correct_word(input));
}
#[divan::bench]
#[cfg(feature = "vars")]
fn en_us(bencher: divan::Bencher) {
let input = "FINALLIZES";
let output = "FINALIZES";
let locale = typos_cli::config::Locale::EnUs;
let corrections = typos_cli::dict::BuiltIn::new(locale);
let input = typos::tokens::Word::new(input, 0).unwrap();
assert_eq!(
corrections.correct_word(input),
Some(typos::Status::Corrections(vec![
std::borrow::Cow::Borrowed(output)
]))
);
bencher
.with_inputs(|| input)
.bench_local_values(|input| corrections.correct_word(input));
}
}
}
#[cfg(feature = "vars")]
mod varcon {
mod ok {
#[divan::bench]
fn en_gb(bencher: divan::Bencher) {
let input = "finalizes";
let output = "finalises";
let locale = typos_cli::config::Locale::EnGb;
let corrections = typos_cli::dict::BuiltIn::new(locale);
let input = typos::tokens::Word::new(input, 0).unwrap();
assert_eq!(
corrections.correct_word(input),
Some(typos::Status::Corrections(vec![
std::borrow::Cow::Borrowed(output)
]))
);
bencher
.with_inputs(|| input)
.bench_local_values(|input| corrections.correct_word(input));
}
}
mod misspell {
#[divan::bench]
fn en_gb(bencher: divan::Bencher) {
let input = "finallizes";
let output = "finalises";
let locale = typos_cli::config::Locale::EnGb;
let corrections = typos_cli::dict::BuiltIn::new(locale);
let input = typos::tokens::Word::new(input, 0).unwrap();
assert_eq!(
corrections.correct_word(input),
Some(typos::Status::Corrections(vec![
std::borrow::Cow::Borrowed(output)
]))
);
bencher
.with_inputs(|| input)
.bench_local_values(|input| corrections.correct_word(input));
}
}
}
fn main() {
divan::main();
}

View file

@ -1,116 +0,0 @@
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
fn bench_dict_load(c: &mut Criterion) {
let mut group = c.benchmark_group("load");
group.bench_function(BenchmarkId::new("load", "builtin"), |b| {
b.iter(|| typos_cli::dict::BuiltIn::new(Default::default()));
});
group.finish();
}
fn bench_dict_correct_word(c: &mut Criterion) {
let mut group = c.benchmark_group("correct_word");
{
let case = "ok";
let input = "finalizes";
group.bench_function(BenchmarkId::new("en", case), |b| {
let corrections = typos_cli::dict::BuiltIn::new(typos_cli::config::Locale::En);
let input = typos::tokens::Word::new(input, 0).unwrap();
#[cfg(feature = "vars")]
assert_eq!(corrections.correct_word(input), None);
b.iter(|| corrections.correct_word(input));
});
#[cfg(feature = "vars")]
group.bench_function(BenchmarkId::new("en-us", case), |b| {
let corrections = typos_cli::dict::BuiltIn::new(typos_cli::config::Locale::EnUs);
let input = typos::tokens::Word::new(input, 0).unwrap();
#[cfg(feature = "vars")]
assert_eq!(corrections.correct_word(input), Some(typos::Status::Valid));
b.iter(|| corrections.correct_word(input));
});
}
{
let case = "misspell";
let input = "finallizes";
let output = "finalizes";
group.bench_function(BenchmarkId::new("en", case), |b| {
let corrections = typos_cli::dict::BuiltIn::new(typos_cli::config::Locale::En);
let input = typos::tokens::Word::new(input, 0).unwrap();
assert_eq!(
corrections.correct_word(input),
Some(typos::Status::Corrections(vec![
std::borrow::Cow::Borrowed(output)
]))
);
b.iter(|| corrections.correct_word(input));
});
#[cfg(feature = "vars")]
group.bench_function(BenchmarkId::new("en-us", case), |b| {
let corrections = typos_cli::dict::BuiltIn::new(typos_cli::config::Locale::EnUs);
let input = typos::tokens::Word::new(input, 0).unwrap();
assert_eq!(
corrections.correct_word(input),
Some(typos::Status::Corrections(vec![
std::borrow::Cow::Borrowed(output)
]))
);
b.iter(|| corrections.correct_word(input));
});
}
{
let case = "misspell_case";
let input = "FINALLIZES";
let output = "FINALIZES";
group.bench_function(BenchmarkId::new("en", case), |b| {
let corrections = typos_cli::dict::BuiltIn::new(typos_cli::config::Locale::En);
let input = typos::tokens::Word::new(input, 0).unwrap();
assert_eq!(
corrections.correct_word(input),
Some(typos::Status::Corrections(vec![
std::borrow::Cow::Borrowed(output)
]))
);
b.iter(|| corrections.correct_word(input));
});
}
#[cfg(feature = "vars")]
{
let case = "varcon";
let input = "finalizes";
let output = "finalises";
group.bench_function(BenchmarkId::new("en-gb", case), |b| {
let corrections = typos_cli::dict::BuiltIn::new(typos_cli::config::Locale::EnGb);
let input = typos::tokens::Word::new(input, 0).unwrap();
assert_eq!(
corrections.correct_word(input),
Some(typos::Status::Corrections(vec![
std::borrow::Cow::Borrowed(output)
]))
);
b.iter(|| corrections.correct_word(input));
});
}
#[cfg(feature = "vars")]
{
let case = "misspell_varcon";
let input = "finallizes";
let output = "finalises";
group.bench_function(BenchmarkId::new("en-gb", case), |b| {
let corrections = typos_cli::dict::BuiltIn::new(typos_cli::config::Locale::EnGb);
let input = typos::tokens::Word::new(input, 0).unwrap();
assert_eq!(
corrections.correct_word(input),
Some(typos::Status::Corrections(vec![
std::borrow::Cow::Borrowed(output)
]))
);
b.iter(|| corrections.correct_word(input));
});
}
group.finish();
}
criterion_group!(benches, bench_dict_load, bench_dict_correct_word);
criterion_main!(benches);

View file

@ -30,11 +30,30 @@ fn main() {
pub static CORPUS: &str = include_str!("../../typos-dict/assets/words.csv");
pub static DATA: &[(&str, &str)] = &[
("empty", EMPTY),
("no_tokens", NO_TOKENS),
("single_token", SINGLE_TOKEN),
("sherlock", SHERLOCK),
("code", CODE),
("corpus", CORPUS),
#[derive(Debug)]
pub struct Data(&'static str, &'static str);
impl Data {
pub const fn name(&self) -> &'static str {
self.0
}
pub const fn content(&self) -> &'static str {
self.1
}
}
impl std::fmt::Display for Data {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.name().fmt(f)
}
}
pub static DATA: &[Data] = &[
Data("empty", EMPTY),
Data("no_tokens", NO_TOKENS),
Data("single_token", SINGLE_TOKEN),
Data("sherlock", SHERLOCK),
Data("code", CODE),
Data("corpus", CORPUS),
];

View file

@ -1,93 +1,98 @@
mod data;
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
mod parse_str {
use super::*;
fn bench_parse_str(c: &mut Criterion) {
let mut group = c.benchmark_group("parse_str");
for (name, sample) in data::DATA {
let len = sample.len();
group.throughput(Throughput::Bytes(len as u64));
group.bench_with_input(BenchmarkId::new("unicode", name), &len, |b, _| {
let parser = typos::tokens::TokenizerBuilder::new().unicode(true).build();
b.iter(|| parser.parse_str(sample).last());
});
group.bench_with_input(BenchmarkId::new("ascii", name), &len, |b, _| {
let parser = typos::tokens::TokenizerBuilder::new()
.unicode(false)
.build();
b.iter(|| parser.parse_str(sample).last());
});
#[divan::bench(args = data::DATA)]
fn ascii(bencher: divan::Bencher, sample: &data::Data) {
let unicode = false;
let parser = typos::tokens::TokenizerBuilder::new()
.unicode(unicode)
.build();
bencher
.with_inputs(|| sample.content())
.input_counter(divan::counter::BytesCount::of_str)
.bench_local_values(|sample| parser.parse_str(sample).last())
}
#[divan::bench(args = data::DATA)]
fn unicode(bencher: divan::Bencher, sample: &data::Data) {
let unicode = true;
let parser = typos::tokens::TokenizerBuilder::new()
.unicode(unicode)
.build();
bencher
.with_inputs(|| sample.content())
.input_counter(divan::counter::BytesCount::of_str)
.bench_local_values(|sample| parser.parse_str(sample).last())
}
group.finish();
}
fn bench_parse_bytes(c: &mut Criterion) {
let mut group = c.benchmark_group("parse_bytes");
for (name, sample) in data::DATA {
let len = sample.len();
group.throughput(Throughput::Bytes(len as u64));
group.bench_with_input(BenchmarkId::new("unicode", name), &len, |b, _| {
let parser = typos::tokens::TokenizerBuilder::new().unicode(true).build();
b.iter(|| parser.parse_bytes(sample.as_bytes()).last());
});
group.bench_with_input(BenchmarkId::new("ascii", name), &len, |b, _| {
let parser = typos::tokens::TokenizerBuilder::new()
.unicode(false)
.build();
b.iter(|| parser.parse_bytes(sample.as_bytes()).last());
});
mod parse_bytes {
use super::*;
#[divan::bench(args = data::DATA)]
fn ascii(bencher: divan::Bencher, sample: &data::Data) {
let unicode = false;
let parser = typos::tokens::TokenizerBuilder::new()
.unicode(unicode)
.build();
bencher
.with_inputs(|| sample.content().as_bytes())
.input_counter(divan::counter::BytesCount::of_slice)
.bench_local_values(|sample| parser.parse_bytes(sample).last())
}
#[divan::bench(args = data::DATA)]
fn unicode(bencher: divan::Bencher, sample: &data::Data) {
let unicode = true;
let parser = typos::tokens::TokenizerBuilder::new()
.unicode(unicode)
.build();
bencher
.with_inputs(|| sample.content().as_bytes())
.input_counter(divan::counter::BytesCount::of_slice)
.bench_local_values(|sample| parser.parse_bytes(sample).last())
}
group.finish();
}
fn bench_split(c: &mut Criterion) {
let mut group = c.benchmark_group("split");
for (name, sample) in data::DATA {
let len = sample.len();
group.throughput(Throughput::Bytes(len as u64));
group.bench_with_input(BenchmarkId::new("words", name), &len, |b, _| {
let symbol =
typos::tokens::Identifier::new_unchecked(sample, typos::tokens::Case::None, 0);
b.iter(|| symbol.split().last());
});
}
group.finish();
#[divan::bench(args = data::DATA)]
fn split(bencher: divan::Bencher, sample: &data::Data) {
let symbol =
typos::tokens::Identifier::new_unchecked(sample.content(), typos::tokens::Case::None, 0);
bencher
.counter(divan::counter::BytesCount::of_str(sample.content()))
.bench_local(|| symbol.split().last())
}
fn bench_parse_split(c: &mut Criterion) {
let mut group = c.benchmark_group("parse_bytes+split");
for (name, sample) in data::DATA {
let len = sample.len();
group.throughput(Throughput::Bytes(len as u64));
group.bench_with_input(BenchmarkId::new("unicode", name), &len, |b, _| {
let parser = typos::tokens::TokenizerBuilder::new().unicode(true).build();
b.iter(|| {
parser
.parse_bytes(sample.as_bytes())
.flat_map(|i| i.split())
.last()
});
});
group.bench_with_input(BenchmarkId::new("ascii", name), &len, |b, _| {
let parser = typos::tokens::TokenizerBuilder::new()
.unicode(false)
.build();
b.iter(|| {
parser
.parse_bytes(sample.as_bytes())
.flat_map(|i| i.split())
.last()
});
});
mod parse_split_bytes {
use super::*;
#[divan::bench(args = data::DATA)]
fn ascii(bencher: divan::Bencher, sample: &data::Data) {
let unicode = false;
let parser = typos::tokens::TokenizerBuilder::new()
.unicode(unicode)
.build();
bencher
.with_inputs(|| sample.content().as_bytes())
.input_counter(divan::counter::BytesCount::of_slice)
.bench_local_values(|sample| parser.parse_bytes(sample).flat_map(|i| i.split()).last())
}
#[divan::bench(args = data::DATA)]
fn unicode(bencher: divan::Bencher, sample: &data::Data) {
let unicode = true;
let parser = typos::tokens::TokenizerBuilder::new()
.unicode(unicode)
.build();
bencher
.with_inputs(|| sample.content().as_bytes())
.input_counter(divan::counter::BytesCount::of_slice)
.bench_local_values(|sample| parser.parse_bytes(sample).flat_map(|i| i.split()).last())
}
group.finish();
}
criterion_group!(
benches,
bench_parse_str,
bench_parse_bytes,
bench_split,
bench_parse_split
);
criterion_main!(benches);
fn main() {
divan::main();
}