From a60ab52c5684f4ef5738e0da44c4dfc9efb589e1 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Fri, 25 Oct 2019 15:48:39 -0600 Subject: [PATCH] test: Add benchmarks for real-word processing --- Cargo.lock | 1 + Cargo.toml | 1 + benches/file.rs | 291 +++++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 292 insertions(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index a62f41a..afe752a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -758,6 +758,7 @@ name = "typos-cli" version = "0.1.1" dependencies = [ "assert_fs 0.11.3 (registry+https://github.com/rust-lang/crates.io-index)", + "bstr 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", "clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)", "clap-verbosity-flag 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "env_logger 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)", diff --git a/Cargo.toml b/Cargo.toml index b78815a..8733acd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -37,3 +37,4 @@ env_logger = "0.6" [dev-dependencies] assert_fs = "0.11" +bstr = "0.2" diff --git a/benches/file.rs b/benches/file.rs index ccf3ff2..fc941e2 100644 --- a/benches/file.rs +++ b/benches/file.rs @@ -4,7 +4,8 @@ extern crate test; mod data; -pub use assert_fs::prelude::*; +use assert_fs::prelude::*; +use bstr::ByteSlice; #[bench] fn check_file_empty(b: &mut test::Bencher) { @@ -89,3 +90,291 @@ fn check_file_corpus(b: &mut test::Bencher) { temp.close().unwrap(); } + +#[bench] +fn read_empty(b: &mut test::Bencher) { + let temp = assert_fs::TempDir::new().unwrap(); + let sample_path = temp.child("sample"); + sample_path.write_str(data::EMPTY).unwrap(); + + b.iter(|| std::fs::read(sample_path.path())); + + temp.close().unwrap(); +} + +#[bench] +fn read_no_tokens(b: &mut test::Bencher) { + let temp = assert_fs::TempDir::new().unwrap(); + let sample_path = temp.child("sample"); + sample_path.write_str(data::NO_TOKENS).unwrap(); + + b.iter(|| std::fs::read(sample_path.path())); + + temp.close().unwrap(); +} + +#[bench] +fn read_single_token(b: &mut test::Bencher) { + let temp = assert_fs::TempDir::new().unwrap(); + let sample_path = temp.child("sample"); + sample_path.write_str(data::SINGLE_TOKEN).unwrap(); + + b.iter(|| std::fs::read(sample_path.path())); + + temp.close().unwrap(); +} + +#[bench] +fn read_sherlock(b: &mut test::Bencher) { + let temp = assert_fs::TempDir::new().unwrap(); + let sample_path = temp.child("sample"); + sample_path.write_str(data::SHERLOCK).unwrap(); + + b.iter(|| std::fs::read(sample_path.path())); + + temp.close().unwrap(); +} + +#[bench] +fn read_code(b: &mut test::Bencher) { + let temp = assert_fs::TempDir::new().unwrap(); + let sample_path = temp.child("sample"); + sample_path.write_str(data::CODE).unwrap(); + + b.iter(|| std::fs::read(sample_path.path())); + + temp.close().unwrap(); +} + +#[bench] +fn read_corpus(b: &mut test::Bencher) { + let temp = assert_fs::TempDir::new().unwrap(); + let sample_path = temp.child("sample"); + sample_path.write_str(data::CORPUS).unwrap(); + + b.iter(|| std::fs::read(sample_path.path())); + + temp.close().unwrap(); +} + +#[bench] +fn split_lines_empty(b: &mut test::Bencher) { + b.iter(|| data::EMPTY.as_bytes().lines().enumerate().last()); +} + +#[bench] +fn split_lines_no_tokens(b: &mut test::Bencher) { + b.iter(|| data::NO_TOKENS.as_bytes().lines().enumerate().last()); +} + +#[bench] +fn split_lines_single_token(b: &mut test::Bencher) { + b.iter(|| data::SINGLE_TOKEN.as_bytes().lines().enumerate().last()); +} + +#[bench] +fn split_lines_sherlock(b: &mut test::Bencher) { + b.iter(|| data::SHERLOCK.as_bytes().lines().enumerate().last()); +} + +#[bench] +fn split_lines_code(b: &mut test::Bencher) { + b.iter(|| data::CODE.as_bytes().lines().enumerate().last()); +} + +#[bench] +fn split_lines_corpus(b: &mut test::Bencher) { + b.iter(|| data::CORPUS.as_bytes().lines().enumerate().last()); +} + +#[bench] +fn parse_empty(b: &mut test::Bencher) { + let parser = typos::tokens::Parser::new(); + b.iter(|| { + data::EMPTY + .as_bytes() + .lines() + .enumerate() + .for_each(|(_idx, l)| { + parser.parse_bytes(l).last(); + () + }) + }); +} + +#[bench] +fn parse_no_tokens(b: &mut test::Bencher) { + let parser = typos::tokens::Parser::new(); + b.iter(|| { + data::NO_TOKENS + .as_bytes() + .lines() + .enumerate() + .for_each(|(_idx, l)| { + parser.parse_bytes(l).last(); + () + }) + }); +} + +#[bench] +fn parse_single_token(b: &mut test::Bencher) { + let parser = typos::tokens::Parser::new(); + b.iter(|| { + data::SINGLE_TOKEN + .as_bytes() + .lines() + .enumerate() + .for_each(|(_idx, l)| { + parser.parse_bytes(l).last(); + () + }) + }); +} + +#[bench] +fn parse_sherlock(b: &mut test::Bencher) { + let parser = typos::tokens::Parser::new(); + b.iter(|| { + data::SHERLOCK + .as_bytes() + .lines() + .enumerate() + .for_each(|(_idx, l)| { + parser.parse_bytes(l).last(); + () + }) + }); +} + +#[bench] +fn parse_code(b: &mut test::Bencher) { + let parser = typos::tokens::Parser::new(); + b.iter(|| { + data::CODE + .as_bytes() + .lines() + .enumerate() + .for_each(|(_idx, l)| { + parser.parse_bytes(l).last(); + () + }) + }); +} + +#[bench] +fn parse_corpus(b: &mut test::Bencher) { + let parser = typos::tokens::Parser::new(); + b.iter(|| { + data::CORPUS + .as_bytes() + .lines() + .enumerate() + .for_each(|(_idx, l)| { + parser.parse_bytes(l).last(); + () + }) + }); +} + +#[bench] +fn split_empty(b: &mut test::Bencher) { + let parser = typos::tokens::Parser::new(); + b.iter(|| { + data::EMPTY + .as_bytes() + .lines() + .enumerate() + .for_each(|(_idx, l)| { + parser.parse_bytes(l).for_each(|l| { + l.split().last(); + () + }) + }) + }); +} + +#[bench] +fn split_no_tokens(b: &mut test::Bencher) { + let parser = typos::tokens::Parser::new(); + b.iter(|| { + data::NO_TOKENS + .as_bytes() + .lines() + .enumerate() + .for_each(|(_idx, l)| { + parser.parse_bytes(l).for_each(|l| { + l.split().last(); + () + }) + }) + }); +} + +#[bench] +fn split_single_token(b: &mut test::Bencher) { + let parser = typos::tokens::Parser::new(); + b.iter(|| { + data::SINGLE_TOKEN + .as_bytes() + .lines() + .enumerate() + .for_each(|(_idx, l)| { + parser.parse_bytes(l).for_each(|l| { + l.split().last(); + () + }) + }) + }); +} + +#[bench] +fn split_sherlock(b: &mut test::Bencher) { + let parser = typos::tokens::Parser::new(); + b.iter(|| { + data::SHERLOCK + .as_bytes() + .lines() + .enumerate() + .for_each(|(_idx, l)| { + parser.parse_bytes(l).for_each(|l| { + l.split().last(); + () + }) + }) + }); +} + +#[bench] +fn split_code(b: &mut test::Bencher) { + let parser = typos::tokens::Parser::new(); + b.iter(|| { + data::CODE + .as_bytes() + .lines() + .enumerate() + .for_each(|(_idx, l)| { + parser.parse_bytes(l).for_each(|l| { + l.split().last(); + () + }) + }) + }); +} + +#[bench] +fn split_corpus(b: &mut test::Bencher) { + let parser = typos::tokens::Parser::new(); + b.iter(|| { + data::CORPUS + .as_bytes() + .lines() + .enumerate() + .for_each(|(_idx, l)| { + parser.parse_bytes(l).for_each(|l| { + l.split().last(); + () + }) + }) + }); +}