diff --git a/Cargo.lock b/Cargo.lock index 2d17837..1a68dba 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -131,12 +131,27 @@ dependencies = [ "serde", ] +[[package]] +name = "bumpalo" +version = "3.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "099e596ef14349721d9016f6b80dd3419ea1bf289ab9b44df8e4dfd3a005d5d9" + [[package]] name = "byteorder" version = "1.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ae44d1a3d5a19df61dd0c8beb138458ac2a53a7ac09eba97d55592540004306b" +[[package]] +name = "cast" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b9434b9a5aa1450faa3f9cb14ea0e8c53bb5d2b3c1bfd1ab4fc03e9f33fbfb0" +dependencies = [ + "rustc_version", +] + [[package]] name = "cfg-if" version = "0.1.10" @@ -191,7 +206,7 @@ name = "codespell-codegen" version = "0.1.2" dependencies = [ "codegenrs", - "itertools", + "itertools 0.10.0", "phf", "phf_codegen", "structopt", @@ -207,6 +222,12 @@ dependencies = [ "unicase", ] +[[package]] +name = "const_fn" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28b9d6de7f49e22cf97ad17fc4036ece69300032f45f78f30b4a4482cdc3f4a6" + [[package]] name = "content_inspector" version = "0.2.4" @@ -216,6 +237,77 @@ dependencies = [ "memchr", ] +[[package]] +name = "criterion" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab327ed7354547cc2ef43cbe20ef68b988e70b4b593cbd66a2a61733123a3d23" +dependencies = [ + "atty", + "cast", + "clap", + "criterion-plot", + "csv", + "itertools 0.10.0", + "lazy_static", + "num-traits", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_cbor", + "serde_derive", + "serde_json", + "tinytemplate", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e022feadec601fba1649cfa83586381a4ad31c6bf3a9ab7d408118b05dd9889d" +dependencies = [ + "cast", + "itertools 0.9.0", +] + +[[package]] +name = "crossbeam-channel" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dca26ee1f8d361640700bde38b2c37d8c22b3ce2d360e1fc1c74ea4b0aa7d775" +dependencies = [ + "cfg-if 1.0.0", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94af6efb46fef72616855b036a624cf27ba656ffc9be1b9a3c931cfc7749a9a9" +dependencies = [ + "cfg-if 1.0.0", + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1aaa739f95311c2c7887a76863f500026092fb1dce0161dab577e559ef3569d" +dependencies = [ + "cfg-if 1.0.0", + "const_fn", + "crossbeam-utils", + "lazy_static", + "memoffset", + "scopeguard", +] + [[package]] name = "crossbeam-utils" version = "0.8.1" @@ -534,6 +626,12 @@ dependencies = [ "walkdir", ] +[[package]] +name = "half" +version = "1.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62aca2aba2d62b4a7f5b33f3712cb1b0692779a56fb510499d5c0aa594daeaf3" + [[package]] name = "heck" version = "0.3.2" @@ -606,6 +704,15 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "itertools" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "284f18f85651fe11e8a991b2adb42cb078325c996ed026d994719efcfca1d54b" +dependencies = [ + "either", +] + [[package]] name = "itertools" version = "0.10.0" @@ -621,6 +728,15 @@ version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dd25036021b0de88a0aff6b850051563c6516d0bf53f8638938edbb9de732736" +[[package]] +name = "js-sys" +version = "0.3.47" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5cfb73131c35423a367daf8cbd24100af0d077668c8c2943f0e7dd775fef0f65" +dependencies = [ + "wasm-bindgen", +] + [[package]] name = "lazy_static" version = "1.4.0" @@ -661,6 +777,15 @@ version = "2.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ee1c47aaa256ecabcaea351eae4a9b01ef39ed810004e298d2511ed284b1525" +[[package]] +name = "memoffset" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "157b4208e3059a8f9e78d559edc658e13df41410cb3ae03979c83130067fdd87" +dependencies = [ + "autocfg", +] + [[package]] name = "miniz_oxide" version = "0.4.3" @@ -676,7 +801,7 @@ name = "misspell-codegen" version = "0.1.2" dependencies = [ "codegenrs", - "itertools", + "itertools 0.10.0", "phf", "phf_codegen", "regex", @@ -720,6 +845,16 @@ dependencies = [ "autocfg", ] +[[package]] +name = "num_cpus" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05499f3756671c15885fee9034446956fff3f243d6077b91e5767df161f766b3" +dependencies = [ + "hermit-abi", + "libc", +] + [[package]] name = "object" version = "0.23.0" @@ -732,6 +867,12 @@ version = "1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13bd41f508810a131401606d54ac32a467c97172d74ba7662562ebba5ad07fa0" +[[package]] +name = "oorandom" +version = "11.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" + [[package]] name = "os_type" version = "2.2.0" @@ -780,6 +921,34 @@ dependencies = [ "unicase", ] +[[package]] +name = "plotters" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45ca0ae5f169d0917a7c7f5a9c1a3d3d9598f18f529dd2b8373ed988efea307a" +dependencies = [ + "num-traits", + "plotters-backend", + "plotters-svg", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "plotters-backend" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b07fffcddc1cb3a1de753caa4e4df03b79922ba43cf882acc1bdd7e8df9f4590" + +[[package]] +name = "plotters-svg" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b38a02e23bd9604b842a812063aec4ef702b57989c37b655254bb61c471ad211" +dependencies = [ + "plotters-backend", +] + [[package]] name = "ppv-lite86" version = "0.2.10" @@ -984,6 +1153,31 @@ dependencies = [ "rand_core 0.5.1", ] +[[package]] +name = "rayon" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b0d8e0819fadc20c74ea8373106ead0600e3a67ef1fe8da56e39b9ae7275674" +dependencies = [ + "autocfg", + "crossbeam-deque", + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ab346ac5921dc62ffa9f89b7a773907511cdfa5490c572ae9be1be33e8afa4a" +dependencies = [ + "crossbeam-channel", + "crossbeam-deque", + "crossbeam-utils", + "lazy_static", + "num_cpus", +] + [[package]] name = "redox_syscall" version = "0.2.4" @@ -1059,6 +1253,12 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "scopeguard" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" + [[package]] name = "semver" version = "0.9.0" @@ -1083,6 +1283,16 @@ dependencies = [ "serde_derive", ] +[[package]] +name = "serde_cbor" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e18acfa2f90e8b735b2836ab8d538de304cbb6729a7360729ea5a895d15a622" +dependencies = [ + "half", + "serde", +] + [[package]] name = "serde_derive" version = "1.0.123" @@ -1242,6 +1452,16 @@ dependencies = [ "once_cell", ] +[[package]] +name = "tinytemplate" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2ada8616fad06a2d0c455adc530de4ef57605a8120cc65da9653e0e9623ca74" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "toml" version = "0.5.8" @@ -1262,7 +1482,7 @@ name = "typos" version = "0.3.0" dependencies = [ "anyhow", - "itertools", + "itertools 0.10.0", "log", "once_cell", "regex", @@ -1282,6 +1502,7 @@ dependencies = [ "clap", "clap-verbosity-flag", "content_inspector", + "criterion", "derive_more 0.99.11", "derive_setters", "difflib", @@ -1289,7 +1510,7 @@ dependencies = [ "env_logger 0.8.2", "human-panic", "ignore", - "itertools", + "itertools 0.10.0", "log", "phf", "predicates", @@ -1333,7 +1554,7 @@ dependencies = [ "codegenrs", "csv", "edit-distance", - "itertools", + "itertools 0.10.0", "structopt", "unicase", "varcon", @@ -1357,7 +1578,7 @@ dependencies = [ "clap-verbosity-flag", "codegenrs", "env_logger 0.7.1", - "itertools", + "itertools 0.10.0", "log", "phf", "phf_codegen", @@ -1469,12 +1690,76 @@ version = "0.10.2+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" +[[package]] +name = "wasm-bindgen" +version = "0.2.70" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55c0f7123de74f0dab9b7d00fd614e7b19349cd1e2f5252bbe9b1754b59433be" +dependencies = [ + "cfg-if 1.0.0", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.70" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7bc45447f0d4573f3d65720f636bbcc3dd6ce920ed704670118650bcd47764c7" +dependencies = [ + "bumpalo", + "lazy_static", + "log", + "proc-macro2 1.0.24", + "quote 1.0.8", + "syn 1.0.60", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.70" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b8853882eef39593ad4174dd26fc9865a64e84026d223f63bb2c42affcbba2c" +dependencies = [ + "quote 1.0.8", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.70" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4133b5e7f2a531fa413b3a1695e925038a05a71cf67e87dafa295cb645a01385" +dependencies = [ + "proc-macro2 1.0.24", + "quote 1.0.8", + "syn 1.0.60", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.70" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd4945e4943ae02d15c13962b38a5b1e81eadd4b71214eee75af64a4d6a4fd64" + +[[package]] +name = "web-sys" +version = "0.3.47" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c40dc691fc48003eba817c38da7113c15698142da971298003cac3ef175680b3" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + [[package]] name = "wikipedia-codegen" version = "0.1.1" dependencies = [ "codegenrs", - "itertools", + "itertools 0.10.0", "phf", "phf_codegen", "structopt", diff --git a/Cargo.toml b/Cargo.toml index 1537dc1..07d574d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -61,9 +61,22 @@ encoding = "0.2" [dev-dependencies] assert_fs = "1.0" predicates = "1.0" +criterion = "0.3" [profile.dev] panic = "abort" [profile.release] panic = "abort" + +[[bench]] +name = "checks" +harness = false + +[[bench]] +name = "corrections" +harness = false + +[[bench]] +name = "tokenize" +harness = false diff --git a/benches/checks.rs b/benches/checks.rs index fbf1f42..2b311e8 100644 --- a/benches/checks.rs +++ b/benches/checks.rs @@ -1,212 +1,96 @@ -#![feature(test)] - -extern crate test; - mod data; use assert_fs::prelude::*; +use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion}; use typos_cli::checks::FileChecker; -fn bench_files(data: &str, b: &mut test::Bencher) { - let temp = assert_fs::TempDir::new().unwrap(); - let sample_path = temp.child("sample"); - sample_path.write_str(data).unwrap(); +fn bench_checks(c: &mut Criterion) { + let mut group = c.benchmark_group("checks"); + for (name, sample) in data::DATA { + let len = sample.len(); + group.bench_with_input(BenchmarkId::new("files", name), &len, |b, _| { + let temp = assert_fs::TempDir::new().unwrap(); + let sample_path = temp.child("sample"); + sample_path.write_str(sample).unwrap(); - let corrections = typos_cli::dict::BuiltIn::new(Default::default()); - let parser = typos::tokens::Tokenizer::new(); - let checks = typos_cli::checks::TyposSettings::new().build_files(); - b.iter(|| { - checks.check_file( - sample_path.path(), - true, - &parser, - &corrections, - &typos_cli::report::PrintSilent, - ) - }); + let corrections = typos_cli::dict::BuiltIn::new(Default::default()); + let parser = typos::tokens::Tokenizer::new(); + let checks = typos_cli::checks::TyposSettings::new().build_files(); + b.iter(|| { + checks.check_file( + sample_path.path(), + true, + &parser, + &corrections, + &typos_cli::report::PrintSilent, + ) + }); - temp.close().unwrap(); + temp.close().unwrap(); + }); + group.bench_with_input(BenchmarkId::new("identifiers", name), &len, |b, _| { + let temp = assert_fs::TempDir::new().unwrap(); + let sample_path = temp.child("sample"); + sample_path.write_str(sample).unwrap(); + + let corrections = typos_cli::dict::BuiltIn::new(Default::default()); + let parser = typos::tokens::Tokenizer::new(); + let checks = typos_cli::checks::TyposSettings::new().build_identifier_parser(); + b.iter(|| { + checks.check_file( + sample_path.path(), + true, + &parser, + &corrections, + &typos_cli::report::PrintSilent, + ) + }); + + temp.close().unwrap(); + }); + group.bench_with_input(BenchmarkId::new("words", name), &len, |b, _| { + let temp = assert_fs::TempDir::new().unwrap(); + let sample_path = temp.child("sample"); + sample_path.write_str(sample).unwrap(); + + let corrections = typos_cli::dict::BuiltIn::new(Default::default()); + let parser = typos::tokens::Tokenizer::new(); + let checks = typos_cli::checks::TyposSettings::new().build_word_parser(); + b.iter(|| { + checks.check_file( + sample_path.path(), + true, + &parser, + &corrections, + &typos_cli::report::PrintSilent, + ) + }); + + temp.close().unwrap(); + }); + group.bench_with_input(BenchmarkId::new("typos", name), &len, |b, _| { + let temp = assert_fs::TempDir::new().unwrap(); + let sample_path = temp.child("sample"); + sample_path.write_str(sample).unwrap(); + + let corrections = typos_cli::dict::BuiltIn::new(Default::default()); + let parser = typos::tokens::Tokenizer::new(); + let checks = typos_cli::checks::TyposSettings::new().build_typos(); + b.iter(|| { + checks.check_file( + sample_path.path(), + true, + &parser, + &corrections, + &typos_cli::report::PrintSilent, + ) + }); + + temp.close().unwrap(); + }); + } + group.finish(); } -#[bench] -fn files_empty(b: &mut test::Bencher) { - bench_files(data::EMPTY, b); -} - -#[bench] -fn files_no_tokens(b: &mut test::Bencher) { - bench_files(data::NO_TOKENS, b); -} - -#[bench] -fn files_single_token(b: &mut test::Bencher) { - bench_files(data::SINGLE_TOKEN, b); -} - -#[bench] -fn files_sherlock(b: &mut test::Bencher) { - bench_files(data::SHERLOCK, b); -} - -#[bench] -fn files_code(b: &mut test::Bencher) { - bench_files(data::CODE, b); -} - -#[bench] -fn files_corpus(b: &mut test::Bencher) { - bench_files(data::CORPUS, b); -} - -fn bench_identifiers(data: &str, b: &mut test::Bencher) { - let temp = assert_fs::TempDir::new().unwrap(); - let sample_path = temp.child("sample"); - sample_path.write_str(data).unwrap(); - - let corrections = typos_cli::dict::BuiltIn::new(Default::default()); - let parser = typos::tokens::Tokenizer::new(); - let checks = typos_cli::checks::TyposSettings::new().build_identifier_parser(); - b.iter(|| { - checks.check_file( - sample_path.path(), - true, - &parser, - &corrections, - &typos_cli::report::PrintSilent, - ) - }); - - temp.close().unwrap(); -} - -#[bench] -fn identifiers_empty(b: &mut test::Bencher) { - bench_identifiers(data::EMPTY, b); -} - -#[bench] -fn identifiers_no_tokens(b: &mut test::Bencher) { - bench_identifiers(data::NO_TOKENS, b); -} - -#[bench] -fn identifiers_single_token(b: &mut test::Bencher) { - bench_identifiers(data::SINGLE_TOKEN, b); -} - -#[bench] -fn identifiers_sherlock(b: &mut test::Bencher) { - bench_identifiers(data::SHERLOCK, b); -} - -#[bench] -fn identifiers_code(b: &mut test::Bencher) { - bench_identifiers(data::CODE, b); -} - -#[bench] -fn identifiers_corpus(b: &mut test::Bencher) { - bench_identifiers(data::CORPUS, b); -} - -fn bench_words(data: &str, b: &mut test::Bencher) { - let temp = assert_fs::TempDir::new().unwrap(); - let sample_path = temp.child("sample"); - sample_path.write_str(data).unwrap(); - - let corrections = typos_cli::dict::BuiltIn::new(Default::default()); - let parser = typos::tokens::Tokenizer::new(); - let checks = typos_cli::checks::TyposSettings::new().build_word_parser(); - b.iter(|| { - checks.check_file( - sample_path.path(), - true, - &parser, - &corrections, - &typos_cli::report::PrintSilent, - ) - }); - - temp.close().unwrap(); -} - -#[bench] -fn words_empty(b: &mut test::Bencher) { - bench_words(data::EMPTY, b); -} - -#[bench] -fn words_no_tokens(b: &mut test::Bencher) { - bench_words(data::NO_TOKENS, b); -} - -#[bench] -fn words_single_token(b: &mut test::Bencher) { - bench_words(data::SINGLE_TOKEN, b); -} - -#[bench] -fn words_sherlock(b: &mut test::Bencher) { - bench_words(data::SHERLOCK, b); -} - -#[bench] -fn words_code(b: &mut test::Bencher) { - bench_words(data::CODE, b); -} - -#[bench] -fn words_corpus(b: &mut test::Bencher) { - bench_words(data::CORPUS, b); -} - -fn bench_typos(data: &str, b: &mut test::Bencher) { - let temp = assert_fs::TempDir::new().unwrap(); - let sample_path = temp.child("sample"); - sample_path.write_str(data).unwrap(); - - let corrections = typos_cli::dict::BuiltIn::new(Default::default()); - let parser = typos::tokens::Tokenizer::new(); - let checks = typos_cli::checks::TyposSettings::new().build_typos(); - b.iter(|| { - checks.check_file( - sample_path.path(), - true, - &parser, - &corrections, - &typos_cli::report::PrintSilent, - ) - }); - - temp.close().unwrap(); -} - -#[bench] -fn typos_empty(b: &mut test::Bencher) { - bench_typos(data::EMPTY, b); -} - -#[bench] -fn typos_no_tokens(b: &mut test::Bencher) { - bench_typos(data::NO_TOKENS, b); -} - -#[bench] -fn typos_single_token(b: &mut test::Bencher) { - bench_typos(data::SINGLE_TOKEN, b); -} - -#[bench] -fn typos_sherlock(b: &mut test::Bencher) { - bench_typos(data::SHERLOCK, b); -} - -#[bench] -fn typos_code(b: &mut test::Bencher) { - bench_typos(data::CODE, b); -} - -#[bench] -fn typos_corpus(b: &mut test::Bencher) { - bench_typos(data::CORPUS, b); -} +criterion_group!(benches, bench_checks,); +criterion_main!(benches); diff --git a/benches/corrections.rs b/benches/corrections.rs index 0e3a7f4..303c1bf 100644 --- a/benches/corrections.rs +++ b/benches/corrections.rs @@ -1,29 +1,34 @@ -#![feature(test)] +use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion}; -extern crate test; - -#[bench] -fn load_corrections(b: &mut test::Bencher) { - b.iter(|| typos_cli::dict::BuiltIn::new(Default::default())); +fn bench_dict_load(c: &mut Criterion) { + let mut group = c.benchmark_group("load"); + group.bench_function(BenchmarkId::new("load", "builtin"), |b| { + b.iter(|| typos_cli::dict::BuiltIn::new(Default::default())); + }); + group.finish(); } -#[bench] -fn correct_word_hit(b: &mut test::Bencher) { - let corrections = typos_cli::dict::BuiltIn::new(Default::default()); - let input = typos::tokens::Word::new("successs", 0).unwrap(); - assert_eq!( - corrections.correct_word(input), - Some(typos::Status::Corrections(vec![ - std::borrow::Cow::Borrowed("successes") - ])) - ); - b.iter(|| corrections.correct_word(input)); +fn bench_dict_lookup(c: &mut Criterion) { + let mut group = c.benchmark_group("lookup"); + group.bench_function(BenchmarkId::new("lookup", "hit"), |b| { + let corrections = typos_cli::dict::BuiltIn::new(Default::default()); + let input = typos::tokens::Word::new("successs", 0).unwrap(); + assert_eq!( + corrections.correct_word(input), + Some(typos::Status::Corrections(vec![ + std::borrow::Cow::Borrowed("successes") + ])) + ); + b.iter(|| corrections.correct_word(input)); + }); + group.bench_function(BenchmarkId::new("lookup", "miss"), |b| { + let corrections = typos_cli::dict::BuiltIn::new(Default::default()); + let input = typos::tokens::Word::new("success", 0).unwrap(); + assert!(corrections.correct_word(input).is_none()); + b.iter(|| corrections.correct_word(input)); + }); + group.finish(); } -#[bench] -fn correct_word_miss(b: &mut test::Bencher) { - let corrections = typos_cli::dict::BuiltIn::new(Default::default()); - let input = typos::tokens::Word::new("success", 0).unwrap(); - assert!(corrections.correct_word(input).is_none()); - b.iter(|| corrections.correct_word(input)); -} +criterion_group!(benches, bench_dict_load, bench_dict_lookup); +criterion_main!(benches); diff --git a/benches/data.rs b/benches/data.rs index be89d07..5f25ba6 100644 --- a/benches/data.rs +++ b/benches/data.rs @@ -1,11 +1,11 @@ -pub const EMPTY: &str = ""; +pub static EMPTY: &str = ""; -pub const NO_TOKENS: &str = " "; +pub static NO_TOKENS: &str = " "; -pub const SINGLE_TOKEN: &str = "success"; +pub static SINGLE_TOKEN: &str = "success"; // Stolen from https://github.com/BurntSushi/ripgrep/blob/master/grep-searcher/src/searcher/glue.rs -pub const SHERLOCK: &'static str = "\ +pub static SHERLOCK: &'static str = "\ For the Doctor Watsons of this world, as opposed to the Sherlock Holmeses, success in the province of detective work must always be, to a very large extent, the result of luck. Sherlock Holmes @@ -15,7 +15,7 @@ and exhibited clearly, with a label attached.\ "; // Stolen from https://github.com/BurntSushi/ripgrep/blob/master/grep-searcher/src/searcher/glue.rs -pub const CODE: &'static str = "\ +pub static CODE: &'static str = "\ extern crate snap; use std::io; fn main() { @@ -28,4 +28,13 @@ fn main() { } "; -pub const CORPUS: &str = include_str!("../crates/typos-dict/assets/words.csv"); +pub static CORPUS: &str = include_str!("../crates/typos-dict/assets/words.csv"); + +pub static DATA: &[(&str, &str)] = &[ + ("empty", EMPTY), + ("no_tokens", NO_TOKENS), + ("single_token", SINGLE_TOKEN), + ("sherlock", SHERLOCK), + ("code", CODE), + ("corpus", CORPUS), +]; diff --git a/benches/tokenize.rs b/benches/tokenize.rs index efcce0b..f86e61c 100644 --- a/benches/tokenize.rs +++ b/benches/tokenize.rs @@ -1,71 +1,22 @@ -#![feature(test)] - -extern crate test; - mod data; -#[bench] -fn ident_parse_empty(b: &mut test::Bencher) { - let parser = typos::tokens::Tokenizer::new(); - b.iter(|| parser.parse_bytes(data::EMPTY.as_bytes()).last()); +use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion}; + +fn bench_tokenize(c: &mut Criterion) { + let mut group = c.benchmark_group("tokenize"); + for (name, sample) in data::DATA { + let len = sample.len(); + group.bench_with_input(BenchmarkId::new("ident", name), &len, |b, _| { + let parser = typos::tokens::Tokenizer::new(); + b.iter(|| parser.parse_bytes(sample.as_bytes()).last()); + }); + group.bench_with_input(BenchmarkId::new("words", name), &len, |b, _| { + let symbol = typos::tokens::Identifier::new_unchecked(sample, 0); + b.iter(|| symbol.split().last()); + }); + } + group.finish(); } -#[bench] -fn ident_parse_no_tokens(b: &mut test::Bencher) { - let parser = typos::tokens::Tokenizer::new(); - b.iter(|| parser.parse_bytes(data::NO_TOKENS.as_bytes()).last()); -} - -#[bench] -fn ident_parse_single_token(b: &mut test::Bencher) { - let parser = typos::tokens::Tokenizer::new(); - b.iter(|| { - parser.parse_bytes(data::SINGLE_TOKEN.as_bytes()).last(); - }); -} - -#[bench] -fn ident_parse_sherlock(b: &mut test::Bencher) { - let parser = typos::tokens::Tokenizer::new(); - b.iter(|| parser.parse_bytes(data::SHERLOCK.as_bytes()).last()); -} - -#[bench] -fn ident_parse_code(b: &mut test::Bencher) { - let parser = typos::tokens::Tokenizer::new(); - b.iter(|| parser.parse_bytes(data::CODE.as_bytes()).last()); -} - -#[bench] -fn ident_parse_corpus(b: &mut test::Bencher) { - let parser = typos::tokens::Tokenizer::new(); - b.iter(|| parser.parse_bytes(data::CORPUS.as_bytes()).last()); -} - -#[bench] -fn ident_split_lowercase_short(b: &mut test::Bencher) { - let input = "abcabcabcabc"; - let symbol = typos::tokens::Identifier::new_unchecked(input, 0); - b.iter(|| symbol.split().last()); -} - -#[bench] -fn ident_split_lowercase_long(b: &mut test::Bencher) { - let input = "abcabcabcabc".repeat(90); - let symbol = typos::tokens::Identifier::new_unchecked(&input, 0); - b.iter(|| symbol.split().last()); -} - -#[bench] -fn ident_split_mixed_short(b: &mut test::Bencher) { - let input = "abcABCAbc123"; - let symbol = typos::tokens::Identifier::new_unchecked(input, 0); - b.iter(|| symbol.split().last()); -} - -#[bench] -fn ident_split_mixed_long(b: &mut test::Bencher) { - let input = "abcABCAbc123".repeat(90); - let symbol = typos::tokens::Identifier::new_unchecked(&input, 0); - b.iter(|| symbol.split().last()); -} +criterion_group!(benches, bench_tokenize); +criterion_main!(benches);