test: Add benchmarks for real-word processing

This commit is contained in:
Ed Page 2019-10-25 15:48:39 -06:00
parent 09513fdc13
commit a60ab52c56
3 changed files with 292 additions and 1 deletions

1
Cargo.lock generated
View file

@ -758,6 +758,7 @@ name = "typos-cli"
version = "0.1.1"
dependencies = [
"assert_fs 0.11.3 (registry+https://github.com/rust-lang/crates.io-index)",
"bstr 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
"clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)",
"clap-verbosity-flag 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
"env_logger 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)",

View file

@ -37,3 +37,4 @@ env_logger = "0.6"
[dev-dependencies]
assert_fs = "0.11"
bstr = "0.2"

View file

@ -4,7 +4,8 @@ extern crate test;
mod data;
pub use assert_fs::prelude::*;
use assert_fs::prelude::*;
use bstr::ByteSlice;
#[bench]
fn check_file_empty(b: &mut test::Bencher) {
@ -89,3 +90,291 @@ fn check_file_corpus(b: &mut test::Bencher) {
temp.close().unwrap();
}
#[bench]
fn read_empty(b: &mut test::Bencher) {
let temp = assert_fs::TempDir::new().unwrap();
let sample_path = temp.child("sample");
sample_path.write_str(data::EMPTY).unwrap();
b.iter(|| std::fs::read(sample_path.path()));
temp.close().unwrap();
}
#[bench]
fn read_no_tokens(b: &mut test::Bencher) {
let temp = assert_fs::TempDir::new().unwrap();
let sample_path = temp.child("sample");
sample_path.write_str(data::NO_TOKENS).unwrap();
b.iter(|| std::fs::read(sample_path.path()));
temp.close().unwrap();
}
#[bench]
fn read_single_token(b: &mut test::Bencher) {
let temp = assert_fs::TempDir::new().unwrap();
let sample_path = temp.child("sample");
sample_path.write_str(data::SINGLE_TOKEN).unwrap();
b.iter(|| std::fs::read(sample_path.path()));
temp.close().unwrap();
}
#[bench]
fn read_sherlock(b: &mut test::Bencher) {
let temp = assert_fs::TempDir::new().unwrap();
let sample_path = temp.child("sample");
sample_path.write_str(data::SHERLOCK).unwrap();
b.iter(|| std::fs::read(sample_path.path()));
temp.close().unwrap();
}
#[bench]
fn read_code(b: &mut test::Bencher) {
let temp = assert_fs::TempDir::new().unwrap();
let sample_path = temp.child("sample");
sample_path.write_str(data::CODE).unwrap();
b.iter(|| std::fs::read(sample_path.path()));
temp.close().unwrap();
}
#[bench]
fn read_corpus(b: &mut test::Bencher) {
let temp = assert_fs::TempDir::new().unwrap();
let sample_path = temp.child("sample");
sample_path.write_str(data::CORPUS).unwrap();
b.iter(|| std::fs::read(sample_path.path()));
temp.close().unwrap();
}
#[bench]
fn split_lines_empty(b: &mut test::Bencher) {
b.iter(|| data::EMPTY.as_bytes().lines().enumerate().last());
}
#[bench]
fn split_lines_no_tokens(b: &mut test::Bencher) {
b.iter(|| data::NO_TOKENS.as_bytes().lines().enumerate().last());
}
#[bench]
fn split_lines_single_token(b: &mut test::Bencher) {
b.iter(|| data::SINGLE_TOKEN.as_bytes().lines().enumerate().last());
}
#[bench]
fn split_lines_sherlock(b: &mut test::Bencher) {
b.iter(|| data::SHERLOCK.as_bytes().lines().enumerate().last());
}
#[bench]
fn split_lines_code(b: &mut test::Bencher) {
b.iter(|| data::CODE.as_bytes().lines().enumerate().last());
}
#[bench]
fn split_lines_corpus(b: &mut test::Bencher) {
b.iter(|| data::CORPUS.as_bytes().lines().enumerate().last());
}
#[bench]
fn parse_empty(b: &mut test::Bencher) {
let parser = typos::tokens::Parser::new();
b.iter(|| {
data::EMPTY
.as_bytes()
.lines()
.enumerate()
.for_each(|(_idx, l)| {
parser.parse_bytes(l).last();
()
})
});
}
#[bench]
fn parse_no_tokens(b: &mut test::Bencher) {
let parser = typos::tokens::Parser::new();
b.iter(|| {
data::NO_TOKENS
.as_bytes()
.lines()
.enumerate()
.for_each(|(_idx, l)| {
parser.parse_bytes(l).last();
()
})
});
}
#[bench]
fn parse_single_token(b: &mut test::Bencher) {
let parser = typos::tokens::Parser::new();
b.iter(|| {
data::SINGLE_TOKEN
.as_bytes()
.lines()
.enumerate()
.for_each(|(_idx, l)| {
parser.parse_bytes(l).last();
()
})
});
}
#[bench]
fn parse_sherlock(b: &mut test::Bencher) {
let parser = typos::tokens::Parser::new();
b.iter(|| {
data::SHERLOCK
.as_bytes()
.lines()
.enumerate()
.for_each(|(_idx, l)| {
parser.parse_bytes(l).last();
()
})
});
}
#[bench]
fn parse_code(b: &mut test::Bencher) {
let parser = typos::tokens::Parser::new();
b.iter(|| {
data::CODE
.as_bytes()
.lines()
.enumerate()
.for_each(|(_idx, l)| {
parser.parse_bytes(l).last();
()
})
});
}
#[bench]
fn parse_corpus(b: &mut test::Bencher) {
let parser = typos::tokens::Parser::new();
b.iter(|| {
data::CORPUS
.as_bytes()
.lines()
.enumerate()
.for_each(|(_idx, l)| {
parser.parse_bytes(l).last();
()
})
});
}
#[bench]
fn split_empty(b: &mut test::Bencher) {
let parser = typos::tokens::Parser::new();
b.iter(|| {
data::EMPTY
.as_bytes()
.lines()
.enumerate()
.for_each(|(_idx, l)| {
parser.parse_bytes(l).for_each(|l| {
l.split().last();
()
})
})
});
}
#[bench]
fn split_no_tokens(b: &mut test::Bencher) {
let parser = typos::tokens::Parser::new();
b.iter(|| {
data::NO_TOKENS
.as_bytes()
.lines()
.enumerate()
.for_each(|(_idx, l)| {
parser.parse_bytes(l).for_each(|l| {
l.split().last();
()
})
})
});
}
#[bench]
fn split_single_token(b: &mut test::Bencher) {
let parser = typos::tokens::Parser::new();
b.iter(|| {
data::SINGLE_TOKEN
.as_bytes()
.lines()
.enumerate()
.for_each(|(_idx, l)| {
parser.parse_bytes(l).for_each(|l| {
l.split().last();
()
})
})
});
}
#[bench]
fn split_sherlock(b: &mut test::Bencher) {
let parser = typos::tokens::Parser::new();
b.iter(|| {
data::SHERLOCK
.as_bytes()
.lines()
.enumerate()
.for_each(|(_idx, l)| {
parser.parse_bytes(l).for_each(|l| {
l.split().last();
()
})
})
});
}
#[bench]
fn split_code(b: &mut test::Bencher) {
let parser = typos::tokens::Parser::new();
b.iter(|| {
data::CODE
.as_bytes()
.lines()
.enumerate()
.for_each(|(_idx, l)| {
parser.parse_bytes(l).for_each(|l| {
l.split().last();
()
})
})
});
}
#[bench]
fn split_corpus(b: &mut test::Bencher) {
let parser = typos::tokens::Parser::new();
b.iter(|| {
data::CORPUS
.as_bytes()
.lines()
.enumerate()
.for_each(|(_idx, l)| {
parser.parse_bytes(l).for_each(|l| {
l.split().last();
()
})
})
});
}