mirror of
https://github.com/crate-ci/typos.git
synced 2024-11-22 00:51:11 -05:00
Merge pull request #61 from epage/overhead
Look into processing overhead
This commit is contained in:
commit
a1a8ba2268
5 changed files with 339 additions and 49 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
@ -758,6 +758,7 @@ name = "typos-cli"
|
|||
version = "0.1.1"
|
||||
dependencies = [
|
||||
"assert_fs 0.11.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"bstr 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"clap-verbosity-flag 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"env_logger 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
|
|
|
@ -37,3 +37,4 @@ env_logger = "0.6"
|
|||
|
||||
[dev-dependencies]
|
||||
assert_fs = "0.11"
|
||||
bstr = "0.2"
|
||||
|
|
303
benches/file.rs
303
benches/file.rs
|
@ -4,10 +4,11 @@ extern crate test;
|
|||
|
||||
mod data;
|
||||
|
||||
pub use assert_fs::prelude::*;
|
||||
use assert_fs::prelude::*;
|
||||
use bstr::ByteSlice;
|
||||
|
||||
#[bench]
|
||||
fn process_empty(b: &mut test::Bencher) {
|
||||
fn check_file_empty(b: &mut test::Bencher) {
|
||||
let temp = assert_fs::TempDir::new().unwrap();
|
||||
let sample_path = temp.child("sample");
|
||||
sample_path.write_str(data::EMPTY).unwrap();
|
||||
|
@ -21,7 +22,7 @@ fn process_empty(b: &mut test::Bencher) {
|
|||
}
|
||||
|
||||
#[bench]
|
||||
fn process_no_tokens(b: &mut test::Bencher) {
|
||||
fn check_file_no_tokens(b: &mut test::Bencher) {
|
||||
let temp = assert_fs::TempDir::new().unwrap();
|
||||
let sample_path = temp.child("sample");
|
||||
sample_path.write_str(data::NO_TOKENS).unwrap();
|
||||
|
@ -35,7 +36,7 @@ fn process_no_tokens(b: &mut test::Bencher) {
|
|||
}
|
||||
|
||||
#[bench]
|
||||
fn process_single_token(b: &mut test::Bencher) {
|
||||
fn check_file_single_token(b: &mut test::Bencher) {
|
||||
let temp = assert_fs::TempDir::new().unwrap();
|
||||
let sample_path = temp.child("sample");
|
||||
sample_path.write_str(data::SINGLE_TOKEN).unwrap();
|
||||
|
@ -49,7 +50,7 @@ fn process_single_token(b: &mut test::Bencher) {
|
|||
}
|
||||
|
||||
#[bench]
|
||||
fn process_sherlock(b: &mut test::Bencher) {
|
||||
fn check_file_sherlock(b: &mut test::Bencher) {
|
||||
let temp = assert_fs::TempDir::new().unwrap();
|
||||
let sample_path = temp.child("sample");
|
||||
sample_path.write_str(data::SHERLOCK).unwrap();
|
||||
|
@ -63,7 +64,7 @@ fn process_sherlock(b: &mut test::Bencher) {
|
|||
}
|
||||
|
||||
#[bench]
|
||||
fn process_code(b: &mut test::Bencher) {
|
||||
fn check_file_code(b: &mut test::Bencher) {
|
||||
let temp = assert_fs::TempDir::new().unwrap();
|
||||
let sample_path = temp.child("sample");
|
||||
sample_path.write_str(data::CODE).unwrap();
|
||||
|
@ -77,7 +78,7 @@ fn process_code(b: &mut test::Bencher) {
|
|||
}
|
||||
|
||||
#[bench]
|
||||
fn process_corpus(b: &mut test::Bencher) {
|
||||
fn check_file_corpus(b: &mut test::Bencher) {
|
||||
let temp = assert_fs::TempDir::new().unwrap();
|
||||
let sample_path = temp.child("sample");
|
||||
sample_path.write_str(data::CORPUS).unwrap();
|
||||
|
@ -89,3 +90,291 @@ fn process_corpus(b: &mut test::Bencher) {
|
|||
|
||||
temp.close().unwrap();
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn read_empty(b: &mut test::Bencher) {
|
||||
let temp = assert_fs::TempDir::new().unwrap();
|
||||
let sample_path = temp.child("sample");
|
||||
sample_path.write_str(data::EMPTY).unwrap();
|
||||
|
||||
b.iter(|| std::fs::read(sample_path.path()));
|
||||
|
||||
temp.close().unwrap();
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn read_no_tokens(b: &mut test::Bencher) {
|
||||
let temp = assert_fs::TempDir::new().unwrap();
|
||||
let sample_path = temp.child("sample");
|
||||
sample_path.write_str(data::NO_TOKENS).unwrap();
|
||||
|
||||
b.iter(|| std::fs::read(sample_path.path()));
|
||||
|
||||
temp.close().unwrap();
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn read_single_token(b: &mut test::Bencher) {
|
||||
let temp = assert_fs::TempDir::new().unwrap();
|
||||
let sample_path = temp.child("sample");
|
||||
sample_path.write_str(data::SINGLE_TOKEN).unwrap();
|
||||
|
||||
b.iter(|| std::fs::read(sample_path.path()));
|
||||
|
||||
temp.close().unwrap();
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn read_sherlock(b: &mut test::Bencher) {
|
||||
let temp = assert_fs::TempDir::new().unwrap();
|
||||
let sample_path = temp.child("sample");
|
||||
sample_path.write_str(data::SHERLOCK).unwrap();
|
||||
|
||||
b.iter(|| std::fs::read(sample_path.path()));
|
||||
|
||||
temp.close().unwrap();
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn read_code(b: &mut test::Bencher) {
|
||||
let temp = assert_fs::TempDir::new().unwrap();
|
||||
let sample_path = temp.child("sample");
|
||||
sample_path.write_str(data::CODE).unwrap();
|
||||
|
||||
b.iter(|| std::fs::read(sample_path.path()));
|
||||
|
||||
temp.close().unwrap();
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn read_corpus(b: &mut test::Bencher) {
|
||||
let temp = assert_fs::TempDir::new().unwrap();
|
||||
let sample_path = temp.child("sample");
|
||||
sample_path.write_str(data::CORPUS).unwrap();
|
||||
|
||||
b.iter(|| std::fs::read(sample_path.path()));
|
||||
|
||||
temp.close().unwrap();
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn split_lines_empty(b: &mut test::Bencher) {
|
||||
b.iter(|| data::EMPTY.as_bytes().lines().enumerate().last());
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn split_lines_no_tokens(b: &mut test::Bencher) {
|
||||
b.iter(|| data::NO_TOKENS.as_bytes().lines().enumerate().last());
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn split_lines_single_token(b: &mut test::Bencher) {
|
||||
b.iter(|| data::SINGLE_TOKEN.as_bytes().lines().enumerate().last());
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn split_lines_sherlock(b: &mut test::Bencher) {
|
||||
b.iter(|| data::SHERLOCK.as_bytes().lines().enumerate().last());
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn split_lines_code(b: &mut test::Bencher) {
|
||||
b.iter(|| data::CODE.as_bytes().lines().enumerate().last());
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn split_lines_corpus(b: &mut test::Bencher) {
|
||||
b.iter(|| data::CORPUS.as_bytes().lines().enumerate().last());
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_empty(b: &mut test::Bencher) {
|
||||
let parser = typos::tokens::Parser::new();
|
||||
b.iter(|| {
|
||||
data::EMPTY
|
||||
.as_bytes()
|
||||
.lines()
|
||||
.enumerate()
|
||||
.for_each(|(_idx, l)| {
|
||||
parser.parse_bytes(l).last();
|
||||
()
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_no_tokens(b: &mut test::Bencher) {
|
||||
let parser = typos::tokens::Parser::new();
|
||||
b.iter(|| {
|
||||
data::NO_TOKENS
|
||||
.as_bytes()
|
||||
.lines()
|
||||
.enumerate()
|
||||
.for_each(|(_idx, l)| {
|
||||
parser.parse_bytes(l).last();
|
||||
()
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_single_token(b: &mut test::Bencher) {
|
||||
let parser = typos::tokens::Parser::new();
|
||||
b.iter(|| {
|
||||
data::SINGLE_TOKEN
|
||||
.as_bytes()
|
||||
.lines()
|
||||
.enumerate()
|
||||
.for_each(|(_idx, l)| {
|
||||
parser.parse_bytes(l).last();
|
||||
()
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_sherlock(b: &mut test::Bencher) {
|
||||
let parser = typos::tokens::Parser::new();
|
||||
b.iter(|| {
|
||||
data::SHERLOCK
|
||||
.as_bytes()
|
||||
.lines()
|
||||
.enumerate()
|
||||
.for_each(|(_idx, l)| {
|
||||
parser.parse_bytes(l).last();
|
||||
()
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_code(b: &mut test::Bencher) {
|
||||
let parser = typos::tokens::Parser::new();
|
||||
b.iter(|| {
|
||||
data::CODE
|
||||
.as_bytes()
|
||||
.lines()
|
||||
.enumerate()
|
||||
.for_each(|(_idx, l)| {
|
||||
parser.parse_bytes(l).last();
|
||||
()
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_corpus(b: &mut test::Bencher) {
|
||||
let parser = typos::tokens::Parser::new();
|
||||
b.iter(|| {
|
||||
data::CORPUS
|
||||
.as_bytes()
|
||||
.lines()
|
||||
.enumerate()
|
||||
.for_each(|(_idx, l)| {
|
||||
parser.parse_bytes(l).last();
|
||||
()
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn split_empty(b: &mut test::Bencher) {
|
||||
let parser = typos::tokens::Parser::new();
|
||||
b.iter(|| {
|
||||
data::EMPTY
|
||||
.as_bytes()
|
||||
.lines()
|
||||
.enumerate()
|
||||
.for_each(|(_idx, l)| {
|
||||
parser.parse_bytes(l).for_each(|l| {
|
||||
l.split().last();
|
||||
()
|
||||
})
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn split_no_tokens(b: &mut test::Bencher) {
|
||||
let parser = typos::tokens::Parser::new();
|
||||
b.iter(|| {
|
||||
data::NO_TOKENS
|
||||
.as_bytes()
|
||||
.lines()
|
||||
.enumerate()
|
||||
.for_each(|(_idx, l)| {
|
||||
parser.parse_bytes(l).for_each(|l| {
|
||||
l.split().last();
|
||||
()
|
||||
})
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn split_single_token(b: &mut test::Bencher) {
|
||||
let parser = typos::tokens::Parser::new();
|
||||
b.iter(|| {
|
||||
data::SINGLE_TOKEN
|
||||
.as_bytes()
|
||||
.lines()
|
||||
.enumerate()
|
||||
.for_each(|(_idx, l)| {
|
||||
parser.parse_bytes(l).for_each(|l| {
|
||||
l.split().last();
|
||||
()
|
||||
})
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn split_sherlock(b: &mut test::Bencher) {
|
||||
let parser = typos::tokens::Parser::new();
|
||||
b.iter(|| {
|
||||
data::SHERLOCK
|
||||
.as_bytes()
|
||||
.lines()
|
||||
.enumerate()
|
||||
.for_each(|(_idx, l)| {
|
||||
parser.parse_bytes(l).for_each(|l| {
|
||||
l.split().last();
|
||||
()
|
||||
})
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn split_code(b: &mut test::Bencher) {
|
||||
let parser = typos::tokens::Parser::new();
|
||||
b.iter(|| {
|
||||
data::CODE
|
||||
.as_bytes()
|
||||
.lines()
|
||||
.enumerate()
|
||||
.for_each(|(_idx, l)| {
|
||||
parser.parse_bytes(l).for_each(|l| {
|
||||
l.split().last();
|
||||
()
|
||||
})
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn split_corpus(b: &mut test::Bencher) {
|
||||
let parser = typos::tokens::Parser::new();
|
||||
b.iter(|| {
|
||||
data::CORPUS
|
||||
.as_bytes()
|
||||
.lines()
|
||||
.enumerate()
|
||||
.for_each(|(_idx, l)| {
|
||||
parser.parse_bytes(l).for_each(|l| {
|
||||
l.split().last();
|
||||
()
|
||||
})
|
||||
})
|
||||
});
|
||||
}
|
||||
|
|
|
@ -5,19 +5,19 @@ extern crate test;
|
|||
mod data;
|
||||
|
||||
#[bench]
|
||||
fn symbol_parse_empty(b: &mut test::Bencher) {
|
||||
fn ident_parse_empty(b: &mut test::Bencher) {
|
||||
let parser = typos::tokens::Parser::new();
|
||||
b.iter(|| parser.parse_bytes(data::EMPTY.as_bytes()).last());
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn symbol_parse_no_tokens(b: &mut test::Bencher) {
|
||||
fn ident_parse_no_tokens(b: &mut test::Bencher) {
|
||||
let parser = typos::tokens::Parser::new();
|
||||
b.iter(|| parser.parse_bytes(data::NO_TOKENS.as_bytes()).last());
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn symbol_parse_single_token(b: &mut test::Bencher) {
|
||||
fn ident_parse_single_token(b: &mut test::Bencher) {
|
||||
let parser = typos::tokens::Parser::new();
|
||||
b.iter(|| {
|
||||
parser.parse_bytes(data::SINGLE_TOKEN.as_bytes()).last();
|
||||
|
@ -25,46 +25,46 @@ fn symbol_parse_single_token(b: &mut test::Bencher) {
|
|||
}
|
||||
|
||||
#[bench]
|
||||
fn symbol_parse_sherlock(b: &mut test::Bencher) {
|
||||
fn ident_parse_sherlock(b: &mut test::Bencher) {
|
||||
let parser = typos::tokens::Parser::new();
|
||||
b.iter(|| parser.parse_bytes(data::SHERLOCK.as_bytes()).last());
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn symbol_parse_code(b: &mut test::Bencher) {
|
||||
fn ident_parse_code(b: &mut test::Bencher) {
|
||||
let parser = typos::tokens::Parser::new();
|
||||
b.iter(|| parser.parse_bytes(data::CODE.as_bytes()).last());
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn symbol_parse_corpus(b: &mut test::Bencher) {
|
||||
fn ident_parse_corpus(b: &mut test::Bencher) {
|
||||
let parser = typos::tokens::Parser::new();
|
||||
b.iter(|| parser.parse_bytes(data::CORPUS.as_bytes()).last());
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn symbol_split_lowercase_short(b: &mut test::Bencher) {
|
||||
fn ident_split_lowercase_short(b: &mut test::Bencher) {
|
||||
let input = "abcabcabcabc";
|
||||
let symbol = typos::tokens::Identifier::new_unchecked(input, 0);
|
||||
b.iter(|| symbol.split().last());
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn symbol_split_lowercase_long(b: &mut test::Bencher) {
|
||||
fn ident_split_lowercase_long(b: &mut test::Bencher) {
|
||||
let input = "abcabcabcabc".repeat(90);
|
||||
let symbol = typos::tokens::Identifier::new_unchecked(&input, 0);
|
||||
b.iter(|| symbol.split().last());
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn symbol_split_mixed_short(b: &mut test::Bencher) {
|
||||
fn ident_split_mixed_short(b: &mut test::Bencher) {
|
||||
let input = "abcABCAbc123";
|
||||
let symbol = typos::tokens::Identifier::new_unchecked(input, 0);
|
||||
b.iter(|| symbol.split().last());
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn symbol_split_mixed_long(b: &mut test::Bencher) {
|
||||
fn ident_split_mixed_long(b: &mut test::Bencher) {
|
||||
let input = "abcABCAbc123".repeat(90);
|
||||
let symbol = typos::tokens::Identifier::new_unchecked(&input, 0);
|
||||
b.iter(|| symbol.split().last());
|
||||
|
|
|
@ -1,6 +1,3 @@
|
|||
use std::fs::File;
|
||||
use std::io::Read;
|
||||
|
||||
use bstr::ByteSlice;
|
||||
|
||||
use crate::report;
|
||||
|
@ -91,17 +88,18 @@ impl<'d, 'p> Checks<'d, 'p> {
|
|||
};
|
||||
report(msg.into());
|
||||
typos_found = true;
|
||||
}
|
||||
for word in ident.split() {
|
||||
if let Some(correction) = self.dictionary.correct_word(word) {
|
||||
let msg = report::FilenameCorrection {
|
||||
path,
|
||||
typo: word.token(),
|
||||
correction,
|
||||
non_exhaustive: (),
|
||||
};
|
||||
report(msg.into());
|
||||
typos_found = true;
|
||||
} else {
|
||||
for word in ident.split() {
|
||||
if let Some(correction) = self.dictionary.correct_word(word) {
|
||||
let msg = report::FilenameCorrection {
|
||||
path,
|
||||
typo: word.token(),
|
||||
correction,
|
||||
non_exhaustive: (),
|
||||
};
|
||||
report(msg.into());
|
||||
typos_found = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -122,9 +120,9 @@ impl<'d, 'p> Checks<'d, 'p> {
|
|||
return Ok(typos_found);
|
||||
}
|
||||
|
||||
let mut buffer = Vec::new();
|
||||
File::open(path)?.read_to_end(&mut buffer)?;
|
||||
if !explicit && !self.binary && buffer.find_byte(b'\0').is_some() {
|
||||
let buffer = std::fs::read(path)?;
|
||||
let null_max = std::cmp::min(buffer.len(), 1024);
|
||||
if !explicit && !self.binary && buffer[0..null_max].find_byte(b'\0').is_some() {
|
||||
let msg = report::BinaryFile {
|
||||
path,
|
||||
non_exhaustive: (),
|
||||
|
@ -149,21 +147,22 @@ impl<'d, 'p> Checks<'d, 'p> {
|
|||
};
|
||||
typos_found = true;
|
||||
report(msg.into());
|
||||
}
|
||||
for word in ident.split() {
|
||||
if let Some(correction) = self.dictionary.correct_word(word) {
|
||||
let col_num = word.offset();
|
||||
let msg = report::Correction {
|
||||
path,
|
||||
line,
|
||||
line_num,
|
||||
col_num,
|
||||
typo: word.token(),
|
||||
correction,
|
||||
non_exhaustive: (),
|
||||
};
|
||||
typos_found = true;
|
||||
report(msg.into());
|
||||
} else {
|
||||
for word in ident.split() {
|
||||
if let Some(correction) = self.dictionary.correct_word(word) {
|
||||
let col_num = word.offset();
|
||||
let msg = report::Correction {
|
||||
path,
|
||||
line,
|
||||
line_num,
|
||||
col_num,
|
||||
typo: word.token(),
|
||||
correction,
|
||||
non_exhaustive: (),
|
||||
};
|
||||
typos_found = true;
|
||||
report(msg.into());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue