refactor(bench): Switch to criterion

This commit is contained in:
Ed Page 2021-02-05 21:38:44 -06:00
parent f64f9b7fde
commit 364e9f1dc8
6 changed files with 454 additions and 307 deletions

299
Cargo.lock generated
View file

@ -131,12 +131,27 @@ dependencies = [
"serde",
]
[[package]]
name = "bumpalo"
version = "3.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "099e596ef14349721d9016f6b80dd3419ea1bf289ab9b44df8e4dfd3a005d5d9"
[[package]]
name = "byteorder"
version = "1.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ae44d1a3d5a19df61dd0c8beb138458ac2a53a7ac09eba97d55592540004306b"
[[package]]
name = "cast"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4b9434b9a5aa1450faa3f9cb14ea0e8c53bb5d2b3c1bfd1ab4fc03e9f33fbfb0"
dependencies = [
"rustc_version",
]
[[package]]
name = "cfg-if"
version = "0.1.10"
@ -191,7 +206,7 @@ name = "codespell-codegen"
version = "0.1.2"
dependencies = [
"codegenrs",
"itertools",
"itertools 0.10.0",
"phf",
"phf_codegen",
"structopt",
@ -207,6 +222,12 @@ dependencies = [
"unicase",
]
[[package]]
name = "const_fn"
version = "0.4.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "28b9d6de7f49e22cf97ad17fc4036ece69300032f45f78f30b4a4482cdc3f4a6"
[[package]]
name = "content_inspector"
version = "0.2.4"
@ -216,6 +237,77 @@ dependencies = [
"memchr",
]
[[package]]
name = "criterion"
version = "0.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ab327ed7354547cc2ef43cbe20ef68b988e70b4b593cbd66a2a61733123a3d23"
dependencies = [
"atty",
"cast",
"clap",
"criterion-plot",
"csv",
"itertools 0.10.0",
"lazy_static",
"num-traits",
"oorandom",
"plotters",
"rayon",
"regex",
"serde",
"serde_cbor",
"serde_derive",
"serde_json",
"tinytemplate",
"walkdir",
]
[[package]]
name = "criterion-plot"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e022feadec601fba1649cfa83586381a4ad31c6bf3a9ab7d408118b05dd9889d"
dependencies = [
"cast",
"itertools 0.9.0",
]
[[package]]
name = "crossbeam-channel"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dca26ee1f8d361640700bde38b2c37d8c22b3ce2d360e1fc1c74ea4b0aa7d775"
dependencies = [
"cfg-if 1.0.0",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-deque"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94af6efb46fef72616855b036a624cf27ba656ffc9be1b9a3c931cfc7749a9a9"
dependencies = [
"cfg-if 1.0.0",
"crossbeam-epoch",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-epoch"
version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a1aaa739f95311c2c7887a76863f500026092fb1dce0161dab577e559ef3569d"
dependencies = [
"cfg-if 1.0.0",
"const_fn",
"crossbeam-utils",
"lazy_static",
"memoffset",
"scopeguard",
]
[[package]]
name = "crossbeam-utils"
version = "0.8.1"
@ -534,6 +626,12 @@ dependencies = [
"walkdir",
]
[[package]]
name = "half"
version = "1.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "62aca2aba2d62b4a7f5b33f3712cb1b0692779a56fb510499d5c0aa594daeaf3"
[[package]]
name = "heck"
version = "0.3.2"
@ -606,6 +704,15 @@ dependencies = [
"winapi-util",
]
[[package]]
name = "itertools"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "284f18f85651fe11e8a991b2adb42cb078325c996ed026d994719efcfca1d54b"
dependencies = [
"either",
]
[[package]]
name = "itertools"
version = "0.10.0"
@ -621,6 +728,15 @@ version = "0.4.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dd25036021b0de88a0aff6b850051563c6516d0bf53f8638938edbb9de732736"
[[package]]
name = "js-sys"
version = "0.3.47"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5cfb73131c35423a367daf8cbd24100af0d077668c8c2943f0e7dd775fef0f65"
dependencies = [
"wasm-bindgen",
]
[[package]]
name = "lazy_static"
version = "1.4.0"
@ -661,6 +777,15 @@ version = "2.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ee1c47aaa256ecabcaea351eae4a9b01ef39ed810004e298d2511ed284b1525"
[[package]]
name = "memoffset"
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "157b4208e3059a8f9e78d559edc658e13df41410cb3ae03979c83130067fdd87"
dependencies = [
"autocfg",
]
[[package]]
name = "miniz_oxide"
version = "0.4.3"
@ -676,7 +801,7 @@ name = "misspell-codegen"
version = "0.1.2"
dependencies = [
"codegenrs",
"itertools",
"itertools 0.10.0",
"phf",
"phf_codegen",
"regex",
@ -720,6 +845,16 @@ dependencies = [
"autocfg",
]
[[package]]
name = "num_cpus"
version = "1.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "05499f3756671c15885fee9034446956fff3f243d6077b91e5767df161f766b3"
dependencies = [
"hermit-abi",
"libc",
]
[[package]]
name = "object"
version = "0.23.0"
@ -732,6 +867,12 @@ version = "1.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13bd41f508810a131401606d54ac32a467c97172d74ba7662562ebba5ad07fa0"
[[package]]
name = "oorandom"
version = "11.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575"
[[package]]
name = "os_type"
version = "2.2.0"
@ -780,6 +921,34 @@ dependencies = [
"unicase",
]
[[package]]
name = "plotters"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "45ca0ae5f169d0917a7c7f5a9c1a3d3d9598f18f529dd2b8373ed988efea307a"
dependencies = [
"num-traits",
"plotters-backend",
"plotters-svg",
"wasm-bindgen",
"web-sys",
]
[[package]]
name = "plotters-backend"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b07fffcddc1cb3a1de753caa4e4df03b79922ba43cf882acc1bdd7e8df9f4590"
[[package]]
name = "plotters-svg"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b38a02e23bd9604b842a812063aec4ef702b57989c37b655254bb61c471ad211"
dependencies = [
"plotters-backend",
]
[[package]]
name = "ppv-lite86"
version = "0.2.10"
@ -984,6 +1153,31 @@ dependencies = [
"rand_core 0.5.1",
]
[[package]]
name = "rayon"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b0d8e0819fadc20c74ea8373106ead0600e3a67ef1fe8da56e39b9ae7275674"
dependencies = [
"autocfg",
"crossbeam-deque",
"either",
"rayon-core",
]
[[package]]
name = "rayon-core"
version = "1.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ab346ac5921dc62ffa9f89b7a773907511cdfa5490c572ae9be1be33e8afa4a"
dependencies = [
"crossbeam-channel",
"crossbeam-deque",
"crossbeam-utils",
"lazy_static",
"num_cpus",
]
[[package]]
name = "redox_syscall"
version = "0.2.4"
@ -1059,6 +1253,12 @@ dependencies = [
"winapi-util",
]
[[package]]
name = "scopeguard"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
[[package]]
name = "semver"
version = "0.9.0"
@ -1083,6 +1283,16 @@ dependencies = [
"serde_derive",
]
[[package]]
name = "serde_cbor"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e18acfa2f90e8b735b2836ab8d538de304cbb6729a7360729ea5a895d15a622"
dependencies = [
"half",
"serde",
]
[[package]]
name = "serde_derive"
version = "1.0.123"
@ -1242,6 +1452,16 @@ dependencies = [
"once_cell",
]
[[package]]
name = "tinytemplate"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2ada8616fad06a2d0c455adc530de4ef57605a8120cc65da9653e0e9623ca74"
dependencies = [
"serde",
"serde_json",
]
[[package]]
name = "toml"
version = "0.5.8"
@ -1262,7 +1482,7 @@ name = "typos"
version = "0.3.0"
dependencies = [
"anyhow",
"itertools",
"itertools 0.10.0",
"log",
"once_cell",
"regex",
@ -1282,6 +1502,7 @@ dependencies = [
"clap",
"clap-verbosity-flag",
"content_inspector",
"criterion",
"derive_more 0.99.11",
"derive_setters",
"difflib",
@ -1289,7 +1510,7 @@ dependencies = [
"env_logger 0.8.2",
"human-panic",
"ignore",
"itertools",
"itertools 0.10.0",
"log",
"phf",
"predicates",
@ -1333,7 +1554,7 @@ dependencies = [
"codegenrs",
"csv",
"edit-distance",
"itertools",
"itertools 0.10.0",
"structopt",
"unicase",
"varcon",
@ -1357,7 +1578,7 @@ dependencies = [
"clap-verbosity-flag",
"codegenrs",
"env_logger 0.7.1",
"itertools",
"itertools 0.10.0",
"log",
"phf",
"phf_codegen",
@ -1469,12 +1690,76 @@ version = "0.10.2+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6"
[[package]]
name = "wasm-bindgen"
version = "0.2.70"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "55c0f7123de74f0dab9b7d00fd614e7b19349cd1e2f5252bbe9b1754b59433be"
dependencies = [
"cfg-if 1.0.0",
"wasm-bindgen-macro",
]
[[package]]
name = "wasm-bindgen-backend"
version = "0.2.70"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7bc45447f0d4573f3d65720f636bbcc3dd6ce920ed704670118650bcd47764c7"
dependencies = [
"bumpalo",
"lazy_static",
"log",
"proc-macro2 1.0.24",
"quote 1.0.8",
"syn 1.0.60",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-macro"
version = "0.2.70"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3b8853882eef39593ad4174dd26fc9865a64e84026d223f63bb2c42affcbba2c"
dependencies = [
"quote 1.0.8",
"wasm-bindgen-macro-support",
]
[[package]]
name = "wasm-bindgen-macro-support"
version = "0.2.70"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4133b5e7f2a531fa413b3a1695e925038a05a71cf67e87dafa295cb645a01385"
dependencies = [
"proc-macro2 1.0.24",
"quote 1.0.8",
"syn 1.0.60",
"wasm-bindgen-backend",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-shared"
version = "0.2.70"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dd4945e4943ae02d15c13962b38a5b1e81eadd4b71214eee75af64a4d6a4fd64"
[[package]]
name = "web-sys"
version = "0.3.47"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c40dc691fc48003eba817c38da7113c15698142da971298003cac3ef175680b3"
dependencies = [
"js-sys",
"wasm-bindgen",
]
[[package]]
name = "wikipedia-codegen"
version = "0.1.1"
dependencies = [
"codegenrs",
"itertools",
"itertools 0.10.0",
"phf",
"phf_codegen",
"structopt",

View file

@ -61,9 +61,22 @@ encoding = "0.2"
[dev-dependencies]
assert_fs = "1.0"
predicates = "1.0"
criterion = "0.3"
[profile.dev]
panic = "abort"
[profile.release]
panic = "abort"
[[bench]]
name = "checks"
harness = false
[[bench]]
name = "corrections"
harness = false
[[bench]]
name = "tokenize"
harness = false

View file

@ -1,212 +1,96 @@
#![feature(test)]
extern crate test;
mod data;
use assert_fs::prelude::*;
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
use typos_cli::checks::FileChecker;
fn bench_files(data: &str, b: &mut test::Bencher) {
let temp = assert_fs::TempDir::new().unwrap();
let sample_path = temp.child("sample");
sample_path.write_str(data).unwrap();
fn bench_checks(c: &mut Criterion) {
let mut group = c.benchmark_group("checks");
for (name, sample) in data::DATA {
let len = sample.len();
group.bench_with_input(BenchmarkId::new("files", name), &len, |b, _| {
let temp = assert_fs::TempDir::new().unwrap();
let sample_path = temp.child("sample");
sample_path.write_str(sample).unwrap();
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
let parser = typos::tokens::Tokenizer::new();
let checks = typos_cli::checks::TyposSettings::new().build_files();
b.iter(|| {
checks.check_file(
sample_path.path(),
true,
&parser,
&corrections,
&typos_cli::report::PrintSilent,
)
});
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
let parser = typos::tokens::Tokenizer::new();
let checks = typos_cli::checks::TyposSettings::new().build_files();
b.iter(|| {
checks.check_file(
sample_path.path(),
true,
&parser,
&corrections,
&typos_cli::report::PrintSilent,
)
});
temp.close().unwrap();
temp.close().unwrap();
});
group.bench_with_input(BenchmarkId::new("identifiers", name), &len, |b, _| {
let temp = assert_fs::TempDir::new().unwrap();
let sample_path = temp.child("sample");
sample_path.write_str(sample).unwrap();
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
let parser = typos::tokens::Tokenizer::new();
let checks = typos_cli::checks::TyposSettings::new().build_identifier_parser();
b.iter(|| {
checks.check_file(
sample_path.path(),
true,
&parser,
&corrections,
&typos_cli::report::PrintSilent,
)
});
temp.close().unwrap();
});
group.bench_with_input(BenchmarkId::new("words", name), &len, |b, _| {
let temp = assert_fs::TempDir::new().unwrap();
let sample_path = temp.child("sample");
sample_path.write_str(sample).unwrap();
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
let parser = typos::tokens::Tokenizer::new();
let checks = typos_cli::checks::TyposSettings::new().build_word_parser();
b.iter(|| {
checks.check_file(
sample_path.path(),
true,
&parser,
&corrections,
&typos_cli::report::PrintSilent,
)
});
temp.close().unwrap();
});
group.bench_with_input(BenchmarkId::new("typos", name), &len, |b, _| {
let temp = assert_fs::TempDir::new().unwrap();
let sample_path = temp.child("sample");
sample_path.write_str(sample).unwrap();
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
let parser = typos::tokens::Tokenizer::new();
let checks = typos_cli::checks::TyposSettings::new().build_typos();
b.iter(|| {
checks.check_file(
sample_path.path(),
true,
&parser,
&corrections,
&typos_cli::report::PrintSilent,
)
});
temp.close().unwrap();
});
}
group.finish();
}
#[bench]
fn files_empty(b: &mut test::Bencher) {
bench_files(data::EMPTY, b);
}
#[bench]
fn files_no_tokens(b: &mut test::Bencher) {
bench_files(data::NO_TOKENS, b);
}
#[bench]
fn files_single_token(b: &mut test::Bencher) {
bench_files(data::SINGLE_TOKEN, b);
}
#[bench]
fn files_sherlock(b: &mut test::Bencher) {
bench_files(data::SHERLOCK, b);
}
#[bench]
fn files_code(b: &mut test::Bencher) {
bench_files(data::CODE, b);
}
#[bench]
fn files_corpus(b: &mut test::Bencher) {
bench_files(data::CORPUS, b);
}
fn bench_identifiers(data: &str, b: &mut test::Bencher) {
let temp = assert_fs::TempDir::new().unwrap();
let sample_path = temp.child("sample");
sample_path.write_str(data).unwrap();
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
let parser = typos::tokens::Tokenizer::new();
let checks = typos_cli::checks::TyposSettings::new().build_identifier_parser();
b.iter(|| {
checks.check_file(
sample_path.path(),
true,
&parser,
&corrections,
&typos_cli::report::PrintSilent,
)
});
temp.close().unwrap();
}
#[bench]
fn identifiers_empty(b: &mut test::Bencher) {
bench_identifiers(data::EMPTY, b);
}
#[bench]
fn identifiers_no_tokens(b: &mut test::Bencher) {
bench_identifiers(data::NO_TOKENS, b);
}
#[bench]
fn identifiers_single_token(b: &mut test::Bencher) {
bench_identifiers(data::SINGLE_TOKEN, b);
}
#[bench]
fn identifiers_sherlock(b: &mut test::Bencher) {
bench_identifiers(data::SHERLOCK, b);
}
#[bench]
fn identifiers_code(b: &mut test::Bencher) {
bench_identifiers(data::CODE, b);
}
#[bench]
fn identifiers_corpus(b: &mut test::Bencher) {
bench_identifiers(data::CORPUS, b);
}
fn bench_words(data: &str, b: &mut test::Bencher) {
let temp = assert_fs::TempDir::new().unwrap();
let sample_path = temp.child("sample");
sample_path.write_str(data).unwrap();
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
let parser = typos::tokens::Tokenizer::new();
let checks = typos_cli::checks::TyposSettings::new().build_word_parser();
b.iter(|| {
checks.check_file(
sample_path.path(),
true,
&parser,
&corrections,
&typos_cli::report::PrintSilent,
)
});
temp.close().unwrap();
}
#[bench]
fn words_empty(b: &mut test::Bencher) {
bench_words(data::EMPTY, b);
}
#[bench]
fn words_no_tokens(b: &mut test::Bencher) {
bench_words(data::NO_TOKENS, b);
}
#[bench]
fn words_single_token(b: &mut test::Bencher) {
bench_words(data::SINGLE_TOKEN, b);
}
#[bench]
fn words_sherlock(b: &mut test::Bencher) {
bench_words(data::SHERLOCK, b);
}
#[bench]
fn words_code(b: &mut test::Bencher) {
bench_words(data::CODE, b);
}
#[bench]
fn words_corpus(b: &mut test::Bencher) {
bench_words(data::CORPUS, b);
}
fn bench_typos(data: &str, b: &mut test::Bencher) {
let temp = assert_fs::TempDir::new().unwrap();
let sample_path = temp.child("sample");
sample_path.write_str(data).unwrap();
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
let parser = typos::tokens::Tokenizer::new();
let checks = typos_cli::checks::TyposSettings::new().build_typos();
b.iter(|| {
checks.check_file(
sample_path.path(),
true,
&parser,
&corrections,
&typos_cli::report::PrintSilent,
)
});
temp.close().unwrap();
}
#[bench]
fn typos_empty(b: &mut test::Bencher) {
bench_typos(data::EMPTY, b);
}
#[bench]
fn typos_no_tokens(b: &mut test::Bencher) {
bench_typos(data::NO_TOKENS, b);
}
#[bench]
fn typos_single_token(b: &mut test::Bencher) {
bench_typos(data::SINGLE_TOKEN, b);
}
#[bench]
fn typos_sherlock(b: &mut test::Bencher) {
bench_typos(data::SHERLOCK, b);
}
#[bench]
fn typos_code(b: &mut test::Bencher) {
bench_typos(data::CODE, b);
}
#[bench]
fn typos_corpus(b: &mut test::Bencher) {
bench_typos(data::CORPUS, b);
}
criterion_group!(benches, bench_checks,);
criterion_main!(benches);

View file

@ -1,29 +1,34 @@
#![feature(test)]
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
extern crate test;
#[bench]
fn load_corrections(b: &mut test::Bencher) {
b.iter(|| typos_cli::dict::BuiltIn::new(Default::default()));
fn bench_dict_load(c: &mut Criterion) {
let mut group = c.benchmark_group("load");
group.bench_function(BenchmarkId::new("load", "builtin"), |b| {
b.iter(|| typos_cli::dict::BuiltIn::new(Default::default()));
});
group.finish();
}
#[bench]
fn correct_word_hit(b: &mut test::Bencher) {
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
let input = typos::tokens::Word::new("successs", 0).unwrap();
assert_eq!(
corrections.correct_word(input),
Some(typos::Status::Corrections(vec![
std::borrow::Cow::Borrowed("successes")
]))
);
b.iter(|| corrections.correct_word(input));
fn bench_dict_lookup(c: &mut Criterion) {
let mut group = c.benchmark_group("lookup");
group.bench_function(BenchmarkId::new("lookup", "hit"), |b| {
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
let input = typos::tokens::Word::new("successs", 0).unwrap();
assert_eq!(
corrections.correct_word(input),
Some(typos::Status::Corrections(vec![
std::borrow::Cow::Borrowed("successes")
]))
);
b.iter(|| corrections.correct_word(input));
});
group.bench_function(BenchmarkId::new("lookup", "miss"), |b| {
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
let input = typos::tokens::Word::new("success", 0).unwrap();
assert!(corrections.correct_word(input).is_none());
b.iter(|| corrections.correct_word(input));
});
group.finish();
}
#[bench]
fn correct_word_miss(b: &mut test::Bencher) {
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
let input = typos::tokens::Word::new("success", 0).unwrap();
assert!(corrections.correct_word(input).is_none());
b.iter(|| corrections.correct_word(input));
}
criterion_group!(benches, bench_dict_load, bench_dict_lookup);
criterion_main!(benches);

View file

@ -1,11 +1,11 @@
pub const EMPTY: &str = "";
pub static EMPTY: &str = "";
pub const NO_TOKENS: &str = " ";
pub static NO_TOKENS: &str = " ";
pub const SINGLE_TOKEN: &str = "success";
pub static SINGLE_TOKEN: &str = "success";
// Stolen from https://github.com/BurntSushi/ripgrep/blob/master/grep-searcher/src/searcher/glue.rs
pub const SHERLOCK: &'static str = "\
pub static SHERLOCK: &'static str = "\
For the Doctor Watsons of this world, as opposed to the Sherlock
Holmeses, success in the province of detective work must always
be, to a very large extent, the result of luck. Sherlock Holmes
@ -15,7 +15,7 @@ and exhibited clearly, with a label attached.\
";
// Stolen from https://github.com/BurntSushi/ripgrep/blob/master/grep-searcher/src/searcher/glue.rs
pub const CODE: &'static str = "\
pub static CODE: &'static str = "\
extern crate snap;
use std::io;
fn main() {
@ -28,4 +28,13 @@ fn main() {
}
";
pub const CORPUS: &str = include_str!("../crates/typos-dict/assets/words.csv");
pub static CORPUS: &str = include_str!("../crates/typos-dict/assets/words.csv");
pub static DATA: &[(&str, &str)] = &[
("empty", EMPTY),
("no_tokens", NO_TOKENS),
("single_token", SINGLE_TOKEN),
("sherlock", SHERLOCK),
("code", CODE),
("corpus", CORPUS),
];

View file

@ -1,71 +1,22 @@
#![feature(test)]
extern crate test;
mod data;
#[bench]
fn ident_parse_empty(b: &mut test::Bencher) {
let parser = typos::tokens::Tokenizer::new();
b.iter(|| parser.parse_bytes(data::EMPTY.as_bytes()).last());
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
fn bench_tokenize(c: &mut Criterion) {
let mut group = c.benchmark_group("tokenize");
for (name, sample) in data::DATA {
let len = sample.len();
group.bench_with_input(BenchmarkId::new("ident", name), &len, |b, _| {
let parser = typos::tokens::Tokenizer::new();
b.iter(|| parser.parse_bytes(sample.as_bytes()).last());
});
group.bench_with_input(BenchmarkId::new("words", name), &len, |b, _| {
let symbol = typos::tokens::Identifier::new_unchecked(sample, 0);
b.iter(|| symbol.split().last());
});
}
group.finish();
}
#[bench]
fn ident_parse_no_tokens(b: &mut test::Bencher) {
let parser = typos::tokens::Tokenizer::new();
b.iter(|| parser.parse_bytes(data::NO_TOKENS.as_bytes()).last());
}
#[bench]
fn ident_parse_single_token(b: &mut test::Bencher) {
let parser = typos::tokens::Tokenizer::new();
b.iter(|| {
parser.parse_bytes(data::SINGLE_TOKEN.as_bytes()).last();
});
}
#[bench]
fn ident_parse_sherlock(b: &mut test::Bencher) {
let parser = typos::tokens::Tokenizer::new();
b.iter(|| parser.parse_bytes(data::SHERLOCK.as_bytes()).last());
}
#[bench]
fn ident_parse_code(b: &mut test::Bencher) {
let parser = typos::tokens::Tokenizer::new();
b.iter(|| parser.parse_bytes(data::CODE.as_bytes()).last());
}
#[bench]
fn ident_parse_corpus(b: &mut test::Bencher) {
let parser = typos::tokens::Tokenizer::new();
b.iter(|| parser.parse_bytes(data::CORPUS.as_bytes()).last());
}
#[bench]
fn ident_split_lowercase_short(b: &mut test::Bencher) {
let input = "abcabcabcabc";
let symbol = typos::tokens::Identifier::new_unchecked(input, 0);
b.iter(|| symbol.split().last());
}
#[bench]
fn ident_split_lowercase_long(b: &mut test::Bencher) {
let input = "abcabcabcabc".repeat(90);
let symbol = typos::tokens::Identifier::new_unchecked(&input, 0);
b.iter(|| symbol.split().last());
}
#[bench]
fn ident_split_mixed_short(b: &mut test::Bencher) {
let input = "abcABCAbc123";
let symbol = typos::tokens::Identifier::new_unchecked(input, 0);
b.iter(|| symbol.split().last());
}
#[bench]
fn ident_split_mixed_long(b: &mut test::Bencher) {
let input = "abcABCAbc123".repeat(90);
let symbol = typos::tokens::Identifier::new_unchecked(&input, 0);
b.iter(|| symbol.split().last());
}
criterion_group!(benches, bench_tokenize);
criterion_main!(benches);