mirror of
https://github.com/crate-ci/typos.git
synced 2025-01-09 00:04:49 -05:00
Merge pull request #64 from epage/debug
feat: Dump files, identifiers, and words
This commit is contained in:
commit
cc4b53a1b4
5 changed files with 509 additions and 306 deletions
391
benches/file.rs
391
benches/file.rs
|
@ -7,374 +7,203 @@ mod data;
|
|||
use assert_fs::prelude::*;
|
||||
use bstr::ByteSlice;
|
||||
|
||||
#[bench]
|
||||
fn check_file_empty(b: &mut test::Bencher) {
|
||||
fn bench_read(data: &str, b: &mut test::Bencher) {
|
||||
let temp = assert_fs::TempDir::new().unwrap();
|
||||
let sample_path = temp.child("sample");
|
||||
sample_path.write_str(data::EMPTY).unwrap();
|
||||
sample_path.write_str(data).unwrap();
|
||||
|
||||
let corrections = typos_cli::dict::BuiltIn::new();
|
||||
let parser = typos::tokens::Parser::new();
|
||||
let checks = typos::checks::CheckSettings::new().build(&corrections, &parser);
|
||||
b.iter(|| checks.check_file(sample_path.path(), true, typos::report::print_silent));
|
||||
|
||||
temp.close().unwrap();
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn check_file_no_tokens(b: &mut test::Bencher) {
|
||||
let temp = assert_fs::TempDir::new().unwrap();
|
||||
let sample_path = temp.child("sample");
|
||||
sample_path.write_str(data::NO_TOKENS).unwrap();
|
||||
|
||||
let corrections = typos_cli::dict::BuiltIn::new();
|
||||
let parser = typos::tokens::Parser::new();
|
||||
let checks = typos::checks::CheckSettings::new().build(&corrections, &parser);
|
||||
b.iter(|| checks.check_file(sample_path.path(), true, typos::report::print_silent));
|
||||
|
||||
temp.close().unwrap();
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn check_file_single_token(b: &mut test::Bencher) {
|
||||
let temp = assert_fs::TempDir::new().unwrap();
|
||||
let sample_path = temp.child("sample");
|
||||
sample_path.write_str(data::SINGLE_TOKEN).unwrap();
|
||||
|
||||
let corrections = typos_cli::dict::BuiltIn::new();
|
||||
let parser = typos::tokens::Parser::new();
|
||||
let checks = typos::checks::CheckSettings::new().build(&corrections, &parser);
|
||||
b.iter(|| checks.check_file(sample_path.path(), true, typos::report::print_silent));
|
||||
|
||||
temp.close().unwrap();
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn check_file_sherlock(b: &mut test::Bencher) {
|
||||
let temp = assert_fs::TempDir::new().unwrap();
|
||||
let sample_path = temp.child("sample");
|
||||
sample_path.write_str(data::SHERLOCK).unwrap();
|
||||
|
||||
let corrections = typos_cli::dict::BuiltIn::new();
|
||||
let parser = typos::tokens::Parser::new();
|
||||
let checks = typos::checks::CheckSettings::new().build(&corrections, &parser);
|
||||
b.iter(|| checks.check_file(sample_path.path(), true, typos::report::print_silent));
|
||||
|
||||
temp.close().unwrap();
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn check_file_code(b: &mut test::Bencher) {
|
||||
let temp = assert_fs::TempDir::new().unwrap();
|
||||
let sample_path = temp.child("sample");
|
||||
sample_path.write_str(data::CODE).unwrap();
|
||||
|
||||
let corrections = typos_cli::dict::BuiltIn::new();
|
||||
let parser = typos::tokens::Parser::new();
|
||||
let checks = typos::checks::CheckSettings::new().build(&corrections, &parser);
|
||||
b.iter(|| checks.check_file(sample_path.path(), true, typos::report::print_silent));
|
||||
|
||||
temp.close().unwrap();
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn check_file_corpus(b: &mut test::Bencher) {
|
||||
let temp = assert_fs::TempDir::new().unwrap();
|
||||
let sample_path = temp.child("sample");
|
||||
sample_path.write_str(data::CORPUS).unwrap();
|
||||
|
||||
let corrections = typos_cli::dict::BuiltIn::new();
|
||||
let parser = typos::tokens::Parser::new();
|
||||
let checks = typos::checks::CheckSettings::new().build(&corrections, &parser);
|
||||
b.iter(|| checks.check_file(sample_path.path(), true, typos::report::print_silent));
|
||||
b.iter(|| std::fs::read(sample_path.path()));
|
||||
|
||||
temp.close().unwrap();
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn read_empty(b: &mut test::Bencher) {
|
||||
let temp = assert_fs::TempDir::new().unwrap();
|
||||
let sample_path = temp.child("sample");
|
||||
sample_path.write_str(data::EMPTY).unwrap();
|
||||
|
||||
b.iter(|| std::fs::read(sample_path.path()));
|
||||
|
||||
temp.close().unwrap();
|
||||
bench_read(data::EMPTY, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn read_no_tokens(b: &mut test::Bencher) {
|
||||
let temp = assert_fs::TempDir::new().unwrap();
|
||||
let sample_path = temp.child("sample");
|
||||
sample_path.write_str(data::NO_TOKENS).unwrap();
|
||||
|
||||
b.iter(|| std::fs::read(sample_path.path()));
|
||||
|
||||
temp.close().unwrap();
|
||||
bench_read(data::NO_TOKENS, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn read_single_token(b: &mut test::Bencher) {
|
||||
let temp = assert_fs::TempDir::new().unwrap();
|
||||
let sample_path = temp.child("sample");
|
||||
sample_path.write_str(data::SINGLE_TOKEN).unwrap();
|
||||
|
||||
b.iter(|| std::fs::read(sample_path.path()));
|
||||
|
||||
temp.close().unwrap();
|
||||
bench_read(data::SINGLE_TOKEN, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn read_sherlock(b: &mut test::Bencher) {
|
||||
let temp = assert_fs::TempDir::new().unwrap();
|
||||
let sample_path = temp.child("sample");
|
||||
sample_path.write_str(data::SHERLOCK).unwrap();
|
||||
|
||||
b.iter(|| std::fs::read(sample_path.path()));
|
||||
|
||||
temp.close().unwrap();
|
||||
bench_read(data::SHERLOCK, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn read_code(b: &mut test::Bencher) {
|
||||
let temp = assert_fs::TempDir::new().unwrap();
|
||||
let sample_path = temp.child("sample");
|
||||
sample_path.write_str(data::CODE).unwrap();
|
||||
|
||||
b.iter(|| std::fs::read(sample_path.path()));
|
||||
|
||||
temp.close().unwrap();
|
||||
bench_read(data::CODE, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn read_corpus(b: &mut test::Bencher) {
|
||||
bench_read(data::CORPUS, b);
|
||||
}
|
||||
|
||||
fn bench_split_lines(data: &str, b: &mut test::Bencher) {
|
||||
b.iter(|| data.as_bytes().lines().enumerate().last());
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_words_lines_empty(b: &mut test::Bencher) {
|
||||
bench_split_lines(data::EMPTY, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_words_lines_no_tokens(b: &mut test::Bencher) {
|
||||
bench_split_lines(data::NO_TOKENS, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_words_lines_single_token(b: &mut test::Bencher) {
|
||||
bench_split_lines(data::SINGLE_TOKEN, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_words_lines_sherlock(b: &mut test::Bencher) {
|
||||
bench_split_lines(data::SHERLOCK, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_words_lines_code(b: &mut test::Bencher) {
|
||||
bench_split_lines(data::CODE, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_words_lines_corpus(b: &mut test::Bencher) {
|
||||
bench_split_lines(data::CORPUS, b);
|
||||
}
|
||||
|
||||
fn bench_parse_ident(data: &str, b: &mut test::Bencher) {
|
||||
let temp = assert_fs::TempDir::new().unwrap();
|
||||
let sample_path = temp.child("sample");
|
||||
sample_path.write_str(data::CORPUS).unwrap();
|
||||
sample_path.write_str(data).unwrap();
|
||||
|
||||
b.iter(|| std::fs::read(sample_path.path()));
|
||||
let parser = typos::tokens::Parser::new();
|
||||
let checks = typos::checks::TyposSettings::new().build_identifier_parser(&parser);
|
||||
b.iter(|| checks.check_file(sample_path.path(), true, typos::report::print_silent));
|
||||
|
||||
temp.close().unwrap();
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn split_lines_empty(b: &mut test::Bencher) {
|
||||
b.iter(|| data::EMPTY.as_bytes().lines().enumerate().last());
|
||||
fn parse_idents_empty(b: &mut test::Bencher) {
|
||||
bench_parse_ident(data::EMPTY, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn split_lines_no_tokens(b: &mut test::Bencher) {
|
||||
b.iter(|| data::NO_TOKENS.as_bytes().lines().enumerate().last());
|
||||
fn parse_idents_no_tokens(b: &mut test::Bencher) {
|
||||
bench_parse_ident(data::NO_TOKENS, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn split_lines_single_token(b: &mut test::Bencher) {
|
||||
b.iter(|| data::SINGLE_TOKEN.as_bytes().lines().enumerate().last());
|
||||
fn parse_idents_single_token(b: &mut test::Bencher) {
|
||||
bench_parse_ident(data::SINGLE_TOKEN, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn split_lines_sherlock(b: &mut test::Bencher) {
|
||||
b.iter(|| data::SHERLOCK.as_bytes().lines().enumerate().last());
|
||||
fn parse_idents_sherlock(b: &mut test::Bencher) {
|
||||
bench_parse_ident(data::SHERLOCK, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn split_lines_code(b: &mut test::Bencher) {
|
||||
b.iter(|| data::CODE.as_bytes().lines().enumerate().last());
|
||||
fn parse_idents_code(b: &mut test::Bencher) {
|
||||
bench_parse_ident(data::CODE, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn split_lines_corpus(b: &mut test::Bencher) {
|
||||
b.iter(|| data::CORPUS.as_bytes().lines().enumerate().last());
|
||||
fn parse_idents_corpus(b: &mut test::Bencher) {
|
||||
bench_parse_ident(data::CORPUS, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_empty(b: &mut test::Bencher) {
|
||||
fn bench_parse_word(data: &str, b: &mut test::Bencher) {
|
||||
let temp = assert_fs::TempDir::new().unwrap();
|
||||
let sample_path = temp.child("sample");
|
||||
sample_path.write_str(data).unwrap();
|
||||
|
||||
let parser = typos::tokens::Parser::new();
|
||||
b.iter(|| {
|
||||
data::EMPTY
|
||||
.as_bytes()
|
||||
.lines()
|
||||
.enumerate()
|
||||
.for_each(|(_idx, l)| {
|
||||
parser.parse_bytes(l).last();
|
||||
()
|
||||
})
|
||||
});
|
||||
let checks = typos::checks::TyposSettings::new().build_word_parser(&parser);
|
||||
b.iter(|| checks.check_file(sample_path.path(), true, typos::report::print_silent));
|
||||
|
||||
temp.close().unwrap();
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_no_tokens(b: &mut test::Bencher) {
|
||||
let parser = typos::tokens::Parser::new();
|
||||
b.iter(|| {
|
||||
data::NO_TOKENS
|
||||
.as_bytes()
|
||||
.lines()
|
||||
.enumerate()
|
||||
.for_each(|(_idx, l)| {
|
||||
parser.parse_bytes(l).last();
|
||||
()
|
||||
})
|
||||
});
|
||||
fn parse_words_empty(b: &mut test::Bencher) {
|
||||
bench_parse_word(data::EMPTY, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_single_token(b: &mut test::Bencher) {
|
||||
let parser = typos::tokens::Parser::new();
|
||||
b.iter(|| {
|
||||
data::SINGLE_TOKEN
|
||||
.as_bytes()
|
||||
.lines()
|
||||
.enumerate()
|
||||
.for_each(|(_idx, l)| {
|
||||
parser.parse_bytes(l).last();
|
||||
()
|
||||
})
|
||||
});
|
||||
fn parse_words_no_tokens(b: &mut test::Bencher) {
|
||||
bench_parse_word(data::NO_TOKENS, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_sherlock(b: &mut test::Bencher) {
|
||||
let parser = typos::tokens::Parser::new();
|
||||
b.iter(|| {
|
||||
data::SHERLOCK
|
||||
.as_bytes()
|
||||
.lines()
|
||||
.enumerate()
|
||||
.for_each(|(_idx, l)| {
|
||||
parser.parse_bytes(l).last();
|
||||
()
|
||||
})
|
||||
});
|
||||
fn parse_words_single_token(b: &mut test::Bencher) {
|
||||
bench_parse_word(data::SINGLE_TOKEN, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_code(b: &mut test::Bencher) {
|
||||
let parser = typos::tokens::Parser::new();
|
||||
b.iter(|| {
|
||||
data::CODE
|
||||
.as_bytes()
|
||||
.lines()
|
||||
.enumerate()
|
||||
.for_each(|(_idx, l)| {
|
||||
parser.parse_bytes(l).last();
|
||||
()
|
||||
})
|
||||
});
|
||||
fn parse_words_sherlock(b: &mut test::Bencher) {
|
||||
bench_parse_word(data::SHERLOCK, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_corpus(b: &mut test::Bencher) {
|
||||
let parser = typos::tokens::Parser::new();
|
||||
b.iter(|| {
|
||||
data::CORPUS
|
||||
.as_bytes()
|
||||
.lines()
|
||||
.enumerate()
|
||||
.for_each(|(_idx, l)| {
|
||||
parser.parse_bytes(l).last();
|
||||
()
|
||||
})
|
||||
});
|
||||
fn parse_words_code(b: &mut test::Bencher) {
|
||||
bench_parse_word(data::CODE, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn split_empty(b: &mut test::Bencher) {
|
||||
fn parse_words_corpus(b: &mut test::Bencher) {
|
||||
bench_parse_word(data::CORPUS, b);
|
||||
}
|
||||
|
||||
fn bench_check_file(data: &str, b: &mut test::Bencher) {
|
||||
let temp = assert_fs::TempDir::new().unwrap();
|
||||
let sample_path = temp.child("sample");
|
||||
sample_path.write_str(data).unwrap();
|
||||
|
||||
let corrections = typos_cli::dict::BuiltIn::new();
|
||||
let parser = typos::tokens::Parser::new();
|
||||
b.iter(|| {
|
||||
data::EMPTY
|
||||
.as_bytes()
|
||||
.lines()
|
||||
.enumerate()
|
||||
.for_each(|(_idx, l)| {
|
||||
parser.parse_bytes(l).for_each(|l| {
|
||||
l.split().last();
|
||||
()
|
||||
})
|
||||
})
|
||||
});
|
||||
let checks = typos::checks::TyposSettings::new().build_checks(&corrections, &parser);
|
||||
b.iter(|| checks.check_file(sample_path.path(), true, typos::report::print_silent));
|
||||
|
||||
temp.close().unwrap();
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn split_no_tokens(b: &mut test::Bencher) {
|
||||
let parser = typos::tokens::Parser::new();
|
||||
b.iter(|| {
|
||||
data::NO_TOKENS
|
||||
.as_bytes()
|
||||
.lines()
|
||||
.enumerate()
|
||||
.for_each(|(_idx, l)| {
|
||||
parser.parse_bytes(l).for_each(|l| {
|
||||
l.split().last();
|
||||
()
|
||||
})
|
||||
})
|
||||
});
|
||||
fn check_file_empty(b: &mut test::Bencher) {
|
||||
bench_check_file(data::EMPTY, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn split_single_token(b: &mut test::Bencher) {
|
||||
let parser = typos::tokens::Parser::new();
|
||||
b.iter(|| {
|
||||
data::SINGLE_TOKEN
|
||||
.as_bytes()
|
||||
.lines()
|
||||
.enumerate()
|
||||
.for_each(|(_idx, l)| {
|
||||
parser.parse_bytes(l).for_each(|l| {
|
||||
l.split().last();
|
||||
()
|
||||
})
|
||||
})
|
||||
});
|
||||
fn check_file_no_tokens(b: &mut test::Bencher) {
|
||||
bench_check_file(data::NO_TOKENS, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn split_sherlock(b: &mut test::Bencher) {
|
||||
let parser = typos::tokens::Parser::new();
|
||||
b.iter(|| {
|
||||
data::SHERLOCK
|
||||
.as_bytes()
|
||||
.lines()
|
||||
.enumerate()
|
||||
.for_each(|(_idx, l)| {
|
||||
parser.parse_bytes(l).for_each(|l| {
|
||||
l.split().last();
|
||||
()
|
||||
})
|
||||
})
|
||||
});
|
||||
fn check_file_single_token(b: &mut test::Bencher) {
|
||||
bench_check_file(data::SINGLE_TOKEN, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn split_code(b: &mut test::Bencher) {
|
||||
let parser = typos::tokens::Parser::new();
|
||||
b.iter(|| {
|
||||
data::CODE
|
||||
.as_bytes()
|
||||
.lines()
|
||||
.enumerate()
|
||||
.for_each(|(_idx, l)| {
|
||||
parser.parse_bytes(l).for_each(|l| {
|
||||
l.split().last();
|
||||
()
|
||||
})
|
||||
})
|
||||
});
|
||||
fn check_file_sherlock(b: &mut test::Bencher) {
|
||||
bench_check_file(data::SHERLOCK, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn split_corpus(b: &mut test::Bencher) {
|
||||
let parser = typos::tokens::Parser::new();
|
||||
b.iter(|| {
|
||||
data::CORPUS
|
||||
.as_bytes()
|
||||
.lines()
|
||||
.enumerate()
|
||||
.for_each(|(_idx, l)| {
|
||||
parser.parse_bytes(l).for_each(|l| {
|
||||
l.split().last();
|
||||
()
|
||||
})
|
||||
})
|
||||
});
|
||||
fn check_file_code(b: &mut test::Bencher) {
|
||||
bench_check_file(data::CODE, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn check_file_corpus(b: &mut test::Bencher) {
|
||||
bench_check_file(data::CORPUS, b);
|
||||
}
|
||||
|
|
159
src/main.rs
159
src/main.rs
|
@ -56,6 +56,18 @@ struct Args {
|
|||
/// Ignore implicit configuration files.
|
||||
isolated: bool,
|
||||
|
||||
#[structopt(long)]
|
||||
/// Print each file that would be spellchecked.
|
||||
files: bool,
|
||||
|
||||
#[structopt(long)]
|
||||
/// Print each identifier that would be spellchecked.
|
||||
identifiers: bool,
|
||||
|
||||
#[structopt(long)]
|
||||
/// Print each word that would be spellchecked.
|
||||
words: bool,
|
||||
|
||||
#[structopt(flatten)]
|
||||
overrides: FileArgs,
|
||||
|
||||
|
@ -249,7 +261,79 @@ impl config::WalkSource for WalkArgs {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn init_logging(level: Option<log::Level>) {
|
||||
trait Checks {
|
||||
fn check_filename(
|
||||
&self,
|
||||
path: &std::path::Path,
|
||||
report: typos::report::Report,
|
||||
) -> Result<bool, typos::Error>;
|
||||
|
||||
fn check_file(
|
||||
&self,
|
||||
path: &std::path::Path,
|
||||
explicit: bool,
|
||||
report: typos::report::Report,
|
||||
) -> Result<bool, typos::Error>;
|
||||
}
|
||||
|
||||
impl<'p> Checks for typos::checks::ParseIdentifiers<'p> {
|
||||
fn check_filename(
|
||||
&self,
|
||||
path: &std::path::Path,
|
||||
report: typos::report::Report,
|
||||
) -> Result<bool, typos::Error> {
|
||||
self.check_filename(path, report)
|
||||
}
|
||||
|
||||
fn check_file(
|
||||
&self,
|
||||
path: &std::path::Path,
|
||||
explicit: bool,
|
||||
report: typos::report::Report,
|
||||
) -> Result<bool, typos::Error> {
|
||||
self.check_file(path, explicit, report)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'p> Checks for typos::checks::ParseWords<'p> {
|
||||
fn check_filename(
|
||||
&self,
|
||||
path: &std::path::Path,
|
||||
report: typos::report::Report,
|
||||
) -> Result<bool, typos::Error> {
|
||||
self.check_filename(path, report)
|
||||
}
|
||||
|
||||
fn check_file(
|
||||
&self,
|
||||
path: &std::path::Path,
|
||||
explicit: bool,
|
||||
report: typos::report::Report,
|
||||
) -> Result<bool, typos::Error> {
|
||||
self.check_file(path, explicit, report)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'d, 'p> Checks for typos::checks::Checks<'d, 'p> {
|
||||
fn check_filename(
|
||||
&self,
|
||||
path: &std::path::Path,
|
||||
report: typos::report::Report,
|
||||
) -> Result<bool, typos::Error> {
|
||||
self.check_filename(path, report)
|
||||
}
|
||||
|
||||
fn check_file(
|
||||
&self,
|
||||
path: &std::path::Path,
|
||||
explicit: bool,
|
||||
report: typos::report::Report,
|
||||
) -> Result<bool, typos::Error> {
|
||||
self.check_file(path, explicit, report)
|
||||
}
|
||||
}
|
||||
|
||||
fn init_logging(level: Option<log::Level>) {
|
||||
if let Some(level) = level {
|
||||
let mut builder = env_logger::Builder::new();
|
||||
|
||||
|
@ -274,18 +358,18 @@ pub fn init_logging(level: Option<log::Level>) {
|
|||
|
||||
fn check_entry(
|
||||
entry: Result<ignore::DirEntry, ignore::Error>,
|
||||
args: &Args,
|
||||
checks: &typos::checks::Checks,
|
||||
format: Format,
|
||||
checks: &dyn Checks,
|
||||
) -> Result<bool, anyhow::Error> {
|
||||
let mut typos_found = false;
|
||||
|
||||
let entry = entry?;
|
||||
if entry.file_type().map(|t| t.is_file()).unwrap_or(true) {
|
||||
let explicit = entry.depth() == 0;
|
||||
if checks.check_filename(entry.path(), args.format.report())? {
|
||||
if checks.check_filename(entry.path(), format.report())? {
|
||||
typos_found = true;
|
||||
}
|
||||
if checks.check_file(entry.path(), explicit, args.format.report())? {
|
||||
if checks.check_file(entry.path(), explicit, format.report())? {
|
||||
typos_found = true;
|
||||
}
|
||||
}
|
||||
|
@ -332,11 +416,11 @@ fn run() -> Result<i32, anyhow::Error> {
|
|||
.include_chars(config.default.identifier_include_chars().to_owned())
|
||||
.build();
|
||||
|
||||
let checks = typos::checks::CheckSettings::new()
|
||||
let mut settings = typos::checks::TyposSettings::new();
|
||||
settings
|
||||
.check_filenames(config.default.check_filename())
|
||||
.check_files(config.default.check_file())
|
||||
.binary(config.files.binary())
|
||||
.build(&dictionary, &parser);
|
||||
.binary(config.files.binary());
|
||||
|
||||
let mut walk = ignore::WalkBuilder::new(path);
|
||||
walk.hidden(config.files.ignore_hidden())
|
||||
|
@ -345,15 +429,58 @@ fn run() -> Result<i32, anyhow::Error> {
|
|||
.git_ignore(config.files.ignore_vcs())
|
||||
.git_exclude(config.files.ignore_vcs())
|
||||
.parents(config.files.ignore_parent());
|
||||
for entry in walk.build() {
|
||||
match check_entry(entry, &args, &checks) {
|
||||
Ok(true) => typos_found = true,
|
||||
Err(err) => {
|
||||
let msg = typos::report::Error::new(err.to_string());
|
||||
args.format.report()(msg.into());
|
||||
errors_found = true
|
||||
if args.files {
|
||||
for entry in walk.build() {
|
||||
match entry {
|
||||
Ok(entry) => {
|
||||
let msg = typos::report::File::new(entry.path());
|
||||
args.format.report()(msg.into());
|
||||
}
|
||||
Err(err) => {
|
||||
let msg = typos::report::Error::new(err.to_string());
|
||||
args.format.report()(msg.into());
|
||||
errors_found = true
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if args.identifiers {
|
||||
let checks = settings.build_identifier_parser(&parser);
|
||||
for entry in walk.build() {
|
||||
match check_entry(entry, args.format, &checks) {
|
||||
Ok(true) => typos_found = true,
|
||||
Err(err) => {
|
||||
let msg = typos::report::Error::new(err.to_string());
|
||||
args.format.report()(msg.into());
|
||||
errors_found = true
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
} else if args.words {
|
||||
let checks = settings.build_word_parser(&parser);
|
||||
for entry in walk.build() {
|
||||
match check_entry(entry, args.format, &checks) {
|
||||
Ok(true) => typos_found = true,
|
||||
Err(err) => {
|
||||
let msg = typos::report::Error::new(err.to_string());
|
||||
args.format.report()(msg.into());
|
||||
errors_found = true
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
} else {
|
||||
let checks = settings.build_checks(&dictionary, &parser);
|
||||
for entry in walk.build() {
|
||||
match check_entry(entry, args.format, &checks) {
|
||||
Ok(true) => typos_found = true,
|
||||
Err(err) => {
|
||||
let msg = typos::report::Error::new(err.to_string());
|
||||
args.format.report()(msg.into());
|
||||
errors_found = true
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -5,13 +5,13 @@ use crate::tokens;
|
|||
use crate::Dictionary;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct CheckSettings {
|
||||
pub struct TyposSettings {
|
||||
check_filenames: bool,
|
||||
check_files: bool,
|
||||
binary: bool,
|
||||
}
|
||||
|
||||
impl CheckSettings {
|
||||
impl TyposSettings {
|
||||
pub fn new() -> Self {
|
||||
Default::default()
|
||||
}
|
||||
|
@ -31,7 +31,7 @@ impl CheckSettings {
|
|||
self
|
||||
}
|
||||
|
||||
pub fn build<'d, 'p>(
|
||||
pub fn build_checks<'d, 'p>(
|
||||
&self,
|
||||
dictionary: &'d dyn Dictionary,
|
||||
parser: &'p tokens::Parser,
|
||||
|
@ -44,9 +44,27 @@ impl CheckSettings {
|
|||
binary: self.binary,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn build_identifier_parser<'p>(&self, parser: &'p tokens::Parser) -> ParseIdentifiers<'p> {
|
||||
ParseIdentifiers {
|
||||
parser,
|
||||
check_filenames: self.check_filenames,
|
||||
check_files: self.check_files,
|
||||
binary: self.binary,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn build_word_parser<'p>(&self, parser: &'p tokens::Parser) -> ParseWords<'p> {
|
||||
ParseWords {
|
||||
parser,
|
||||
check_filenames: self.check_filenames,
|
||||
check_files: self.check_files,
|
||||
binary: self.binary,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for CheckSettings {
|
||||
impl Default for TyposSettings {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
check_filenames: true,
|
||||
|
@ -56,6 +74,176 @@ impl Default for CheckSettings {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ParseIdentifiers<'p> {
|
||||
parser: &'p tokens::Parser,
|
||||
check_filenames: bool,
|
||||
check_files: bool,
|
||||
binary: bool,
|
||||
}
|
||||
|
||||
impl<'p> ParseIdentifiers<'p> {
|
||||
pub fn check_filename(
|
||||
&self,
|
||||
path: &std::path::Path,
|
||||
report: report::Report,
|
||||
) -> Result<bool, crate::Error> {
|
||||
let typos_found = false;
|
||||
|
||||
if !self.check_filenames {
|
||||
return Ok(typos_found);
|
||||
}
|
||||
|
||||
for part in path.components().filter_map(|c| c.as_os_str().to_str()) {
|
||||
let msg = report::Parse {
|
||||
path,
|
||||
kind: report::ParseKind::Identifier,
|
||||
data: self.parser.parse(part).map(|i| i.token()).collect(),
|
||||
non_exhaustive: (),
|
||||
};
|
||||
report(msg.into());
|
||||
}
|
||||
|
||||
Ok(typos_found)
|
||||
}
|
||||
|
||||
pub fn check_file(
|
||||
&self,
|
||||
path: &std::path::Path,
|
||||
explicit: bool,
|
||||
report: report::Report,
|
||||
) -> Result<bool, crate::Error> {
|
||||
let typos_found = false;
|
||||
|
||||
if !self.check_files {
|
||||
return Ok(typos_found);
|
||||
}
|
||||
|
||||
let buffer = std::fs::read(path)
|
||||
.map_err(|e| crate::ErrorKind::IoError.into_error().with_source(e))?;
|
||||
if !explicit && !self.binary && is_binary(&buffer) {
|
||||
let msg = report::BinaryFile {
|
||||
path,
|
||||
non_exhaustive: (),
|
||||
};
|
||||
report(msg.into());
|
||||
return Ok(typos_found);
|
||||
}
|
||||
|
||||
for line in buffer.lines() {
|
||||
let msg = report::Parse {
|
||||
path,
|
||||
kind: report::ParseKind::Identifier,
|
||||
data: self.parser.parse_bytes(line).map(|i| i.token()).collect(),
|
||||
non_exhaustive: (),
|
||||
};
|
||||
report(msg.into());
|
||||
}
|
||||
|
||||
Ok(typos_found)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for ParseIdentifiers<'_> {
|
||||
fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
fmt.debug_struct("Checks")
|
||||
.field("parser", self.parser)
|
||||
.field("check_filenames", &self.check_filenames)
|
||||
.field("check_files", &self.check_files)
|
||||
.field("binary", &self.binary)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ParseWords<'p> {
|
||||
parser: &'p tokens::Parser,
|
||||
check_filenames: bool,
|
||||
check_files: bool,
|
||||
binary: bool,
|
||||
}
|
||||
|
||||
impl<'p> ParseWords<'p> {
|
||||
pub fn check_filename(
|
||||
&self,
|
||||
path: &std::path::Path,
|
||||
report: report::Report,
|
||||
) -> Result<bool, crate::Error> {
|
||||
let typos_found = false;
|
||||
|
||||
if !self.check_filenames {
|
||||
return Ok(typos_found);
|
||||
}
|
||||
|
||||
for part in path.components().filter_map(|c| c.as_os_str().to_str()) {
|
||||
let msg = report::Parse {
|
||||
path,
|
||||
kind: report::ParseKind::Word,
|
||||
data: self
|
||||
.parser
|
||||
.parse(part)
|
||||
.flat_map(|ident| ident.split().map(|i| i.token()))
|
||||
.collect(),
|
||||
non_exhaustive: (),
|
||||
};
|
||||
report(msg.into());
|
||||
}
|
||||
|
||||
Ok(typos_found)
|
||||
}
|
||||
|
||||
pub fn check_file(
|
||||
&self,
|
||||
path: &std::path::Path,
|
||||
explicit: bool,
|
||||
report: report::Report,
|
||||
) -> Result<bool, crate::Error> {
|
||||
let typos_found = false;
|
||||
|
||||
if !self.check_files {
|
||||
return Ok(typos_found);
|
||||
}
|
||||
|
||||
let buffer = std::fs::read(path)
|
||||
.map_err(|e| crate::ErrorKind::IoError.into_error().with_source(e))?;
|
||||
if !explicit && !self.binary && is_binary(&buffer) {
|
||||
let msg = report::BinaryFile {
|
||||
path,
|
||||
non_exhaustive: (),
|
||||
};
|
||||
report(msg.into());
|
||||
return Ok(typos_found);
|
||||
}
|
||||
|
||||
for line in buffer.lines() {
|
||||
let msg = report::Parse {
|
||||
path,
|
||||
kind: report::ParseKind::Word,
|
||||
data: self
|
||||
.parser
|
||||
.parse_bytes(line)
|
||||
.flat_map(|ident| ident.split().map(|i| i.token()))
|
||||
.collect(),
|
||||
non_exhaustive: (),
|
||||
};
|
||||
report(msg.into());
|
||||
}
|
||||
|
||||
Ok(typos_found)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for ParseWords<'_> {
|
||||
fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
fmt.debug_struct("Checks")
|
||||
.field("parser", self.parser)
|
||||
.field("check_filenames", &self.check_filenames)
|
||||
.field("check_files", &self.check_files)
|
||||
.field("binary", &self.binary)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Checks<'d, 'p> {
|
||||
dictionary: &'d dyn Dictionary,
|
||||
|
@ -122,8 +310,7 @@ impl<'d, 'p> Checks<'d, 'p> {
|
|||
|
||||
let buffer = std::fs::read(path)
|
||||
.map_err(|e| crate::ErrorKind::IoError.into_error().with_source(e))?;
|
||||
let null_max = std::cmp::min(buffer.len(), 1024);
|
||||
if !explicit && !self.binary && buffer[0..null_max].find_byte(b'\0').is_some() {
|
||||
if !explicit && !self.binary && is_binary(&buffer) {
|
||||
let msg = report::BinaryFile {
|
||||
path,
|
||||
non_exhaustive: (),
|
||||
|
@ -183,3 +370,8 @@ impl std::fmt::Debug for Checks<'_, '_> {
|
|||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
fn is_binary(buffer: &[u8]) -> bool {
|
||||
let null_max = std::cmp::min(buffer.len(), 1024);
|
||||
buffer[0..null_max].find_byte(b'\0').is_some()
|
||||
}
|
||||
|
|
|
@ -8,8 +8,12 @@ pub enum Message<'m> {
|
|||
BinaryFile(BinaryFile<'m>),
|
||||
Correction(Correction<'m>),
|
||||
FilenameCorrection(FilenameCorrection<'m>),
|
||||
File(File<'m>),
|
||||
Parse(Parse<'m>),
|
||||
PathError(PathError<'m>),
|
||||
Error(Error),
|
||||
#[serde(skip)]
|
||||
__NonExhaustive,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, serde::Serialize, derive_more::Display)]
|
||||
|
@ -42,6 +46,39 @@ pub struct FilenameCorrection<'m> {
|
|||
pub(crate) non_exhaustive: (),
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug, serde::Serialize)]
|
||||
pub enum ParseKind {
|
||||
Identifier,
|
||||
Word,
|
||||
#[doc(hidden)]
|
||||
__NonExhaustive,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, serde::Serialize)]
|
||||
pub struct File<'m> {
|
||||
pub path: &'m std::path::Path,
|
||||
#[serde(skip)]
|
||||
pub(crate) non_exhaustive: (),
|
||||
}
|
||||
|
||||
impl<'m> File<'m> {
|
||||
pub fn new(path: &'m std::path::Path) -> Self {
|
||||
Self {
|
||||
path,
|
||||
non_exhaustive: (),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, serde::Serialize)]
|
||||
pub struct Parse<'m> {
|
||||
pub path: &'m std::path::Path,
|
||||
pub kind: ParseKind,
|
||||
pub data: Vec<&'m str>,
|
||||
#[serde(skip)]
|
||||
pub(crate) non_exhaustive: (),
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, serde::Serialize)]
|
||||
pub struct PathError<'m> {
|
||||
pub path: &'m std::path::Path,
|
||||
|
@ -88,12 +125,21 @@ pub fn print_brief(msg: Message) {
|
|||
Message::FilenameCorrection(msg) => {
|
||||
println!("{}: {} -> {}", msg.path.display(), msg.typo, msg.correction);
|
||||
}
|
||||
Message::File(msg) => {
|
||||
println!("{}", msg.path.display());
|
||||
}
|
||||
Message::Parse(msg) => {
|
||||
println!("{}", itertools::join(msg.data.iter(), " "));
|
||||
}
|
||||
Message::PathError(msg) => {
|
||||
println!("{}: {}", msg.path.display(), msg.msg);
|
||||
}
|
||||
Message::Error(msg) => {
|
||||
println!("{}", msg.msg);
|
||||
}
|
||||
Message::__NonExhaustive => {
|
||||
unreachable!("Non-creatable case");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -111,12 +157,21 @@ pub fn print_long(msg: Message) {
|
|||
msg.correction
|
||||
);
|
||||
}
|
||||
Message::File(msg) => {
|
||||
println!("{}", msg.path.display());
|
||||
}
|
||||
Message::Parse(msg) => {
|
||||
println!("{}", itertools::join(msg.data.iter(), " "));
|
||||
}
|
||||
Message::PathError(msg) => {
|
||||
println!("{}: {}", msg.path.display(), msg.msg);
|
||||
}
|
||||
Message::Error(msg) => {
|
||||
println!("{}", msg.msg);
|
||||
}
|
||||
Message::__NonExhaustive => {
|
||||
unreachable!("Non-creatable case");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -123,7 +123,7 @@ impl<'t> Identifier<'t> {
|
|||
Self { token, offset }
|
||||
}
|
||||
|
||||
pub fn token(&self) -> &str {
|
||||
pub fn token(&self) -> &'t str {
|
||||
self.token
|
||||
}
|
||||
|
||||
|
@ -135,7 +135,7 @@ impl<'t> Identifier<'t> {
|
|||
self.offset
|
||||
}
|
||||
|
||||
pub fn split(&self) -> impl Iterator<Item = Word<'_>> {
|
||||
pub fn split(&self) -> impl Iterator<Item = Word<'t>> {
|
||||
split_ident(self.token, self.offset)
|
||||
}
|
||||
}
|
||||
|
@ -177,7 +177,7 @@ impl<'t> Word<'t> {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn token(&self) -> &str {
|
||||
pub fn token(&self) -> &'t str {
|
||||
self.token
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue