mirror of
https://github.com/crate-ci/typos.git
synced 2024-12-25 09:02:12 -05:00
refactor: Layer files/filenames on buffer processing
This commit is contained in:
parent
eb20ba9f11
commit
e12cd8ed55
7 changed files with 483 additions and 505 deletions
214
benches/checks.rs
Normal file
214
benches/checks.rs
Normal file
|
@ -0,0 +1,214 @@
|
|||
#![feature(test)]
|
||||
|
||||
extern crate test;
|
||||
|
||||
mod data;
|
||||
|
||||
use assert_fs::prelude::*;
|
||||
use typos::checks::Check;
|
||||
|
||||
fn bench_parse_ident_str(data: &str, b: &mut test::Bencher) {
|
||||
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
|
||||
let parser = typos::tokens::Parser::new();
|
||||
let checks = typos::checks::TyposSettings::new().build_identifier_parser();
|
||||
b.iter(|| checks.check_str(data, &parser, &corrections, &typos::report::PrintSilent));
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_idents_empty_str(b: &mut test::Bencher) {
|
||||
bench_parse_ident_str(data::EMPTY, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_idents_no_tokens_str(b: &mut test::Bencher) {
|
||||
bench_parse_ident_str(data::NO_TOKENS, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_idents_single_token_str(b: &mut test::Bencher) {
|
||||
bench_parse_ident_str(data::SINGLE_TOKEN, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_idents_sherlock_str(b: &mut test::Bencher) {
|
||||
bench_parse_ident_str(data::SHERLOCK, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_idents_code_str(b: &mut test::Bencher) {
|
||||
bench_parse_ident_str(data::CODE, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_idents_corpus_str(b: &mut test::Bencher) {
|
||||
bench_parse_ident_str(data::CORPUS, b);
|
||||
}
|
||||
|
||||
fn bench_parse_ident_bytes(data: &str, b: &mut test::Bencher) {
|
||||
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
|
||||
let parser = typos::tokens::Parser::new();
|
||||
let checks = typos::checks::TyposSettings::new().build_identifier_parser();
|
||||
b.iter(|| {
|
||||
checks.check_bytes(
|
||||
data.as_bytes(),
|
||||
&parser,
|
||||
&corrections,
|
||||
&typos::report::PrintSilent,
|
||||
)
|
||||
});
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_idents_empty_bytes(b: &mut test::Bencher) {
|
||||
bench_parse_ident_bytes(data::EMPTY, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_idents_no_tokens_bytes(b: &mut test::Bencher) {
|
||||
bench_parse_ident_bytes(data::NO_TOKENS, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_idents_single_token_bytes(b: &mut test::Bencher) {
|
||||
bench_parse_ident_bytes(data::SINGLE_TOKEN, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_idents_sherlock_bytes(b: &mut test::Bencher) {
|
||||
bench_parse_ident_bytes(data::SHERLOCK, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_idents_code_bytes(b: &mut test::Bencher) {
|
||||
bench_parse_ident_bytes(data::CODE, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_idents_corpus_bytes(b: &mut test::Bencher) {
|
||||
bench_parse_ident_bytes(data::CORPUS, b);
|
||||
}
|
||||
|
||||
fn bench_parse_word_str(data: &str, b: &mut test::Bencher) {
|
||||
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
|
||||
let parser = typos::tokens::Parser::new();
|
||||
let checks = typos::checks::TyposSettings::new().build_word_parser();
|
||||
b.iter(|| checks.check_str(data, &parser, &corrections, &typos::report::PrintSilent));
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_words_empty(b: &mut test::Bencher) {
|
||||
bench_parse_word_str(data::EMPTY, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_words_no_tokens(b: &mut test::Bencher) {
|
||||
bench_parse_word_str(data::NO_TOKENS, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_words_single_token(b: &mut test::Bencher) {
|
||||
bench_parse_word_str(data::SINGLE_TOKEN, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_words_sherlock(b: &mut test::Bencher) {
|
||||
bench_parse_word_str(data::SHERLOCK, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_words_code(b: &mut test::Bencher) {
|
||||
bench_parse_word_str(data::CODE, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_words_corpus(b: &mut test::Bencher) {
|
||||
bench_parse_word_str(data::CORPUS, b);
|
||||
}
|
||||
|
||||
fn bench_typos(data: &str, b: &mut test::Bencher) {
|
||||
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
|
||||
let parser = typos::tokens::Parser::new();
|
||||
let checks = typos::checks::TyposSettings::new().build_typos();
|
||||
b.iter(|| checks.check_str(data, &parser, &corrections, &typos::report::PrintSilent));
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn typos_empty(b: &mut test::Bencher) {
|
||||
bench_typos(data::EMPTY, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn typos_no_tokens(b: &mut test::Bencher) {
|
||||
bench_typos(data::NO_TOKENS, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn typos_single_token(b: &mut test::Bencher) {
|
||||
bench_typos(data::SINGLE_TOKEN, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn typos_sherlock(b: &mut test::Bencher) {
|
||||
bench_typos(data::SHERLOCK, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn typos_code(b: &mut test::Bencher) {
|
||||
bench_typos(data::CODE, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn typos_corpus(b: &mut test::Bencher) {
|
||||
bench_typos(data::CORPUS, b);
|
||||
}
|
||||
|
||||
fn bench_check_file(data: &str, b: &mut test::Bencher) {
|
||||
let temp = assert_fs::TempDir::new().unwrap();
|
||||
let sample_path = temp.child("sample");
|
||||
sample_path.write_str(data).unwrap();
|
||||
|
||||
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
|
||||
let parser = typos::tokens::Parser::new();
|
||||
let checks = typos::checks::TyposSettings::new().build_typos();
|
||||
b.iter(|| {
|
||||
checks.check_file(
|
||||
sample_path.path(),
|
||||
true,
|
||||
&parser,
|
||||
&corrections,
|
||||
&typos::report::PrintSilent,
|
||||
)
|
||||
});
|
||||
|
||||
temp.close().unwrap();
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn check_file_empty(b: &mut test::Bencher) {
|
||||
bench_check_file(data::EMPTY, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn check_file_no_tokens(b: &mut test::Bencher) {
|
||||
bench_check_file(data::NO_TOKENS, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn check_file_single_token(b: &mut test::Bencher) {
|
||||
bench_check_file(data::SINGLE_TOKEN, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn check_file_sherlock(b: &mut test::Bencher) {
|
||||
bench_check_file(data::SHERLOCK, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn check_file_code(b: &mut test::Bencher) {
|
||||
bench_check_file(data::CODE, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn check_file_corpus(b: &mut test::Bencher) {
|
||||
bench_check_file(data::CORPUS, b);
|
||||
}
|
231
benches/file.rs
231
benches/file.rs
|
@ -1,231 +0,0 @@
|
|||
#![feature(test)]
|
||||
|
||||
extern crate test;
|
||||
|
||||
mod data;
|
||||
|
||||
use assert_fs::prelude::*;
|
||||
use bstr::ByteSlice;
|
||||
|
||||
fn bench_read(data: &str, b: &mut test::Bencher) {
|
||||
let temp = assert_fs::TempDir::new().unwrap();
|
||||
let sample_path = temp.child("sample");
|
||||
sample_path.write_str(data).unwrap();
|
||||
|
||||
b.iter(|| std::fs::read(sample_path.path()));
|
||||
|
||||
temp.close().unwrap();
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn read_empty(b: &mut test::Bencher) {
|
||||
bench_read(data::EMPTY, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn read_no_tokens(b: &mut test::Bencher) {
|
||||
bench_read(data::NO_TOKENS, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn read_single_token(b: &mut test::Bencher) {
|
||||
bench_read(data::SINGLE_TOKEN, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn read_sherlock(b: &mut test::Bencher) {
|
||||
bench_read(data::SHERLOCK, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn read_code(b: &mut test::Bencher) {
|
||||
bench_read(data::CODE, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn read_corpus(b: &mut test::Bencher) {
|
||||
bench_read(data::CORPUS, b);
|
||||
}
|
||||
|
||||
fn bench_split_lines(data: &str, b: &mut test::Bencher) {
|
||||
b.iter(|| data.as_bytes().lines().enumerate().last());
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_lines_empty(b: &mut test::Bencher) {
|
||||
bench_split_lines(data::EMPTY, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_lines_no_tokens(b: &mut test::Bencher) {
|
||||
bench_split_lines(data::NO_TOKENS, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_lines_single_token(b: &mut test::Bencher) {
|
||||
bench_split_lines(data::SINGLE_TOKEN, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_lines_sherlock(b: &mut test::Bencher) {
|
||||
bench_split_lines(data::SHERLOCK, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_lines_code(b: &mut test::Bencher) {
|
||||
bench_split_lines(data::CODE, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_lines_corpus(b: &mut test::Bencher) {
|
||||
bench_split_lines(data::CORPUS, b);
|
||||
}
|
||||
|
||||
fn bench_parse_ident(data: &str, b: &mut test::Bencher) {
|
||||
let temp = assert_fs::TempDir::new().unwrap();
|
||||
let sample_path = temp.child("sample");
|
||||
sample_path.write_str(data).unwrap();
|
||||
|
||||
let parser = typos::tokens::Parser::new();
|
||||
let checks = typos::checks::TyposSettings::new().build_identifier_parser();
|
||||
b.iter(|| {
|
||||
checks.check_file(
|
||||
sample_path.path(),
|
||||
true,
|
||||
&parser,
|
||||
&typos::report::PrintSilent,
|
||||
)
|
||||
});
|
||||
|
||||
temp.close().unwrap();
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_idents_empty(b: &mut test::Bencher) {
|
||||
bench_parse_ident(data::EMPTY, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_idents_no_tokens(b: &mut test::Bencher) {
|
||||
bench_parse_ident(data::NO_TOKENS, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_idents_single_token(b: &mut test::Bencher) {
|
||||
bench_parse_ident(data::SINGLE_TOKEN, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_idents_sherlock(b: &mut test::Bencher) {
|
||||
bench_parse_ident(data::SHERLOCK, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_idents_code(b: &mut test::Bencher) {
|
||||
bench_parse_ident(data::CODE, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_idents_corpus(b: &mut test::Bencher) {
|
||||
bench_parse_ident(data::CORPUS, b);
|
||||
}
|
||||
|
||||
fn bench_parse_word(data: &str, b: &mut test::Bencher) {
|
||||
let temp = assert_fs::TempDir::new().unwrap();
|
||||
let sample_path = temp.child("sample");
|
||||
sample_path.write_str(data).unwrap();
|
||||
|
||||
let parser = typos::tokens::Parser::new();
|
||||
let checks = typos::checks::TyposSettings::new().build_word_parser();
|
||||
b.iter(|| {
|
||||
checks.check_file(
|
||||
sample_path.path(),
|
||||
true,
|
||||
&parser,
|
||||
&typos::report::PrintSilent,
|
||||
)
|
||||
});
|
||||
|
||||
temp.close().unwrap();
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_words_empty(b: &mut test::Bencher) {
|
||||
bench_parse_word(data::EMPTY, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_words_no_tokens(b: &mut test::Bencher) {
|
||||
bench_parse_word(data::NO_TOKENS, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_words_single_token(b: &mut test::Bencher) {
|
||||
bench_parse_word(data::SINGLE_TOKEN, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_words_sherlock(b: &mut test::Bencher) {
|
||||
bench_parse_word(data::SHERLOCK, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_words_code(b: &mut test::Bencher) {
|
||||
bench_parse_word(data::CODE, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_words_corpus(b: &mut test::Bencher) {
|
||||
bench_parse_word(data::CORPUS, b);
|
||||
}
|
||||
|
||||
fn bench_check_file(data: &str, b: &mut test::Bencher) {
|
||||
let temp = assert_fs::TempDir::new().unwrap();
|
||||
let sample_path = temp.child("sample");
|
||||
sample_path.write_str(data).unwrap();
|
||||
|
||||
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
|
||||
let parser = typos::tokens::Parser::new();
|
||||
let checks = typos::checks::TyposSettings::new().build_checks();
|
||||
b.iter(|| {
|
||||
checks.check_file(
|
||||
sample_path.path(),
|
||||
true,
|
||||
&parser,
|
||||
&corrections,
|
||||
&typos::report::PrintSilent,
|
||||
)
|
||||
});
|
||||
|
||||
temp.close().unwrap();
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn check_file_empty(b: &mut test::Bencher) {
|
||||
bench_check_file(data::EMPTY, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn check_file_no_tokens(b: &mut test::Bencher) {
|
||||
bench_check_file(data::NO_TOKENS, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn check_file_single_token(b: &mut test::Bencher) {
|
||||
bench_check_file(data::SINGLE_TOKEN, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn check_file_sherlock(b: &mut test::Bencher) {
|
||||
bench_check_file(data::SHERLOCK, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn check_file_code(b: &mut test::Bencher) {
|
||||
bench_check_file(data::CODE, b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn check_file_corpus(b: &mut test::Bencher) {
|
||||
bench_check_file(data::CORPUS, b);
|
||||
}
|
|
@ -5,6 +5,100 @@ use crate::tokens;
|
|||
use crate::Dictionary;
|
||||
use crate::Status;
|
||||
|
||||
pub trait Check: Send + Sync {
|
||||
fn check_str(
|
||||
&self,
|
||||
buffer: &str,
|
||||
parser: &tokens::Parser,
|
||||
dictionary: &dyn Dictionary,
|
||||
reporter: &dyn report::Report,
|
||||
) -> Result<bool, crate::Error>;
|
||||
|
||||
fn check_bytes(
|
||||
&self,
|
||||
buffer: &[u8],
|
||||
parser: &tokens::Parser,
|
||||
dictionary: &dyn Dictionary,
|
||||
reporter: &dyn report::Report,
|
||||
) -> Result<bool, crate::Error>;
|
||||
|
||||
fn check_filenames(&self) -> bool;
|
||||
|
||||
fn check_files(&self) -> bool;
|
||||
|
||||
fn binary(&self) -> bool;
|
||||
|
||||
fn check_filename(
|
||||
&self,
|
||||
path: &std::path::Path,
|
||||
parser: &tokens::Parser,
|
||||
dictionary: &dyn Dictionary,
|
||||
reporter: &dyn report::Report,
|
||||
) -> Result<bool, crate::Error> {
|
||||
let mut typos_found = false;
|
||||
|
||||
if !self.check_filenames() {
|
||||
return Ok(typos_found);
|
||||
}
|
||||
|
||||
if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
|
||||
let context_reporter = ReportContext {
|
||||
reporter,
|
||||
context: report::PathContext { path }.into(),
|
||||
};
|
||||
typos_found |= self.check_str(file_name, parser, dictionary, &context_reporter)?;
|
||||
}
|
||||
|
||||
Ok(typos_found)
|
||||
}
|
||||
|
||||
fn check_file(
|
||||
&self,
|
||||
path: &std::path::Path,
|
||||
explicit: bool,
|
||||
parser: &tokens::Parser,
|
||||
dictionary: &dyn Dictionary,
|
||||
reporter: &dyn report::Report,
|
||||
) -> Result<bool, crate::Error> {
|
||||
let mut typos_found = false;
|
||||
|
||||
if !self.check_files() {
|
||||
return Ok(typos_found);
|
||||
}
|
||||
|
||||
let buffer = read_file(path)?;
|
||||
let (buffer, content_type) = massage_data(buffer)?;
|
||||
if !explicit && !self.binary() && content_type.is_binary() {
|
||||
let msg = report::BinaryFile { path };
|
||||
reporter.report(msg.into());
|
||||
return Ok(typos_found);
|
||||
}
|
||||
|
||||
for (line_idx, line) in buffer.lines().enumerate() {
|
||||
let line_num = line_idx + 1;
|
||||
let context_reporter = ReportContext {
|
||||
reporter,
|
||||
context: report::FileContext { path, line_num }.into(),
|
||||
};
|
||||
typos_found |= self.check_bytes(line, parser, dictionary, &context_reporter)?;
|
||||
}
|
||||
|
||||
Ok(typos_found)
|
||||
}
|
||||
}
|
||||
|
||||
struct ReportContext<'m, 'r> {
|
||||
reporter: &'r dyn report::Report,
|
||||
context: report::Context<'m>,
|
||||
}
|
||||
|
||||
impl<'m, 'r> report::Report for ReportContext<'m, 'r> {
|
||||
fn report(&self, msg: report::Message) -> bool {
|
||||
let msg = msg.context(self.context.clone());
|
||||
self.reporter.report(msg)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct TyposSettings {
|
||||
check_filenames: bool,
|
||||
|
@ -32,8 +126,8 @@ impl TyposSettings {
|
|||
self
|
||||
}
|
||||
|
||||
pub fn build_checks(&self) -> Checks {
|
||||
Checks {
|
||||
pub fn build_typos(&self) -> Typos {
|
||||
Typos {
|
||||
check_filenames: self.check_filenames,
|
||||
check_files: self.check_files,
|
||||
binary: self.binary,
|
||||
|
@ -74,64 +168,56 @@ pub struct ParseIdentifiers {
|
|||
binary: bool,
|
||||
}
|
||||
|
||||
impl ParseIdentifiers {
|
||||
pub fn check_filename(
|
||||
impl Check for ParseIdentifiers {
|
||||
fn check_str(
|
||||
&self,
|
||||
path: &std::path::Path,
|
||||
buffer: &str,
|
||||
parser: &tokens::Parser,
|
||||
_dictionary: &dyn Dictionary,
|
||||
reporter: &dyn report::Report,
|
||||
) -> Result<bool, crate::Error> {
|
||||
let typos_found = false;
|
||||
|
||||
if !self.check_filenames {
|
||||
return Ok(typos_found);
|
||||
}
|
||||
|
||||
for part in path.components().filter_map(|c| c.as_os_str().to_str()) {
|
||||
let msg = report::Parse {
|
||||
context: report::PathContext { path }.into(),
|
||||
kind: report::ParseKind::Identifier,
|
||||
data: parser.parse(part).map(|i| i.token()).collect(),
|
||||
};
|
||||
reporter.report(msg.into());
|
||||
}
|
||||
let msg = report::Parse {
|
||||
context: report::Context::None,
|
||||
kind: report::ParseKind::Identifier,
|
||||
data: parser.parse_str(buffer).map(|i| i.token()).collect(),
|
||||
};
|
||||
reporter.report(msg.into());
|
||||
|
||||
Ok(typos_found)
|
||||
}
|
||||
|
||||
pub fn check_file(
|
||||
fn check_bytes(
|
||||
&self,
|
||||
path: &std::path::Path,
|
||||
explicit: bool,
|
||||
buffer: &[u8],
|
||||
parser: &tokens::Parser,
|
||||
_dictionary: &dyn Dictionary,
|
||||
reporter: &dyn report::Report,
|
||||
) -> Result<bool, crate::Error> {
|
||||
let typos_found = false;
|
||||
|
||||
if !self.check_files {
|
||||
return Ok(typos_found);
|
||||
}
|
||||
|
||||
let buffer = read_file(path)?;
|
||||
let (buffer, content_type) = massage_data(buffer)?;
|
||||
if !explicit && !self.binary && content_type.is_binary() {
|
||||
let msg = report::BinaryFile { path };
|
||||
reporter.report(msg.into());
|
||||
return Ok(typos_found);
|
||||
}
|
||||
|
||||
for (line_idx, line) in buffer.lines().enumerate() {
|
||||
let line_num = line_idx + 1;
|
||||
let msg = report::Parse {
|
||||
context: report::FileContext { path, line_num }.into(),
|
||||
kind: report::ParseKind::Identifier,
|
||||
data: parser.parse_bytes(line).map(|i| i.token()).collect(),
|
||||
};
|
||||
reporter.report(msg.into());
|
||||
}
|
||||
let msg = report::Parse {
|
||||
context: report::Context::None,
|
||||
kind: report::ParseKind::Identifier,
|
||||
data: parser.parse_bytes(buffer).map(|i| i.token()).collect(),
|
||||
};
|
||||
reporter.report(msg.into());
|
||||
|
||||
Ok(typos_found)
|
||||
}
|
||||
|
||||
fn check_filenames(&self) -> bool {
|
||||
self.check_filenames
|
||||
}
|
||||
|
||||
fn check_files(&self) -> bool {
|
||||
self.check_files
|
||||
}
|
||||
|
||||
fn binary(&self) -> bool {
|
||||
self.binary
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
|
@ -141,124 +227,111 @@ pub struct ParseWords {
|
|||
binary: bool,
|
||||
}
|
||||
|
||||
impl ParseWords {
|
||||
pub fn check_filename(
|
||||
impl Check for ParseWords {
|
||||
fn check_str(
|
||||
&self,
|
||||
path: &std::path::Path,
|
||||
buffer: &str,
|
||||
parser: &tokens::Parser,
|
||||
_dictionary: &dyn Dictionary,
|
||||
reporter: &dyn report::Report,
|
||||
) -> Result<bool, crate::Error> {
|
||||
let typos_found = false;
|
||||
|
||||
if !self.check_filenames {
|
||||
return Ok(typos_found);
|
||||
}
|
||||
|
||||
for part in path.components().filter_map(|c| c.as_os_str().to_str()) {
|
||||
let msg = report::Parse {
|
||||
context: report::PathContext { path }.into(),
|
||||
kind: report::ParseKind::Word,
|
||||
data: parser
|
||||
.parse(part)
|
||||
.flat_map(|ident| ident.split().map(|i| i.token()))
|
||||
.collect(),
|
||||
};
|
||||
reporter.report(msg.into());
|
||||
}
|
||||
let msg = report::Parse {
|
||||
context: report::Context::None,
|
||||
kind: report::ParseKind::Word,
|
||||
data: parser
|
||||
.parse_str(buffer)
|
||||
.flat_map(|ident| ident.split().map(|i| i.token()))
|
||||
.collect(),
|
||||
};
|
||||
reporter.report(msg.into());
|
||||
|
||||
Ok(typos_found)
|
||||
}
|
||||
|
||||
pub fn check_file(
|
||||
fn check_bytes(
|
||||
&self,
|
||||
path: &std::path::Path,
|
||||
explicit: bool,
|
||||
buffer: &[u8],
|
||||
parser: &tokens::Parser,
|
||||
_dictionary: &dyn Dictionary,
|
||||
reporter: &dyn report::Report,
|
||||
) -> Result<bool, crate::Error> {
|
||||
let typos_found = false;
|
||||
|
||||
if !self.check_files {
|
||||
return Ok(typos_found);
|
||||
}
|
||||
|
||||
let buffer = read_file(path)?;
|
||||
let (buffer, content_type) = massage_data(buffer)?;
|
||||
if !explicit && !self.binary && content_type.is_binary() {
|
||||
let msg = report::BinaryFile { path };
|
||||
reporter.report(msg.into());
|
||||
return Ok(typos_found);
|
||||
}
|
||||
|
||||
for (line_idx, line) in buffer.lines().enumerate() {
|
||||
let line_num = line_idx + 1;
|
||||
let msg = report::Parse {
|
||||
context: report::FileContext { path, line_num }.into(),
|
||||
kind: report::ParseKind::Word,
|
||||
data: parser
|
||||
.parse_bytes(line)
|
||||
.flat_map(|ident| ident.split().map(|i| i.token()))
|
||||
.collect(),
|
||||
};
|
||||
reporter.report(msg.into());
|
||||
}
|
||||
let msg = report::Parse {
|
||||
context: report::Context::None,
|
||||
kind: report::ParseKind::Word,
|
||||
data: parser
|
||||
.parse_bytes(buffer)
|
||||
.flat_map(|ident| ident.split().map(|i| i.token()))
|
||||
.collect(),
|
||||
};
|
||||
reporter.report(msg.into());
|
||||
|
||||
Ok(typos_found)
|
||||
}
|
||||
|
||||
fn check_filenames(&self) -> bool {
|
||||
self.check_filenames
|
||||
}
|
||||
|
||||
fn check_files(&self) -> bool {
|
||||
self.check_files
|
||||
}
|
||||
|
||||
fn binary(&self) -> bool {
|
||||
self.binary
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Checks {
|
||||
pub struct Typos {
|
||||
check_filenames: bool,
|
||||
check_files: bool,
|
||||
binary: bool,
|
||||
}
|
||||
|
||||
impl Checks {
|
||||
pub fn check_filename(
|
||||
impl Check for Typos {
|
||||
fn check_str(
|
||||
&self,
|
||||
path: &std::path::Path,
|
||||
buffer: &str,
|
||||
parser: &tokens::Parser,
|
||||
dictionary: &dyn Dictionary,
|
||||
reporter: &dyn report::Report,
|
||||
) -> Result<bool, crate::Error> {
|
||||
if !self.check_filenames {
|
||||
return Ok(false);
|
||||
}
|
||||
|
||||
let mut typos_found = false;
|
||||
if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
|
||||
for ident in parser.parse(file_name) {
|
||||
match dictionary.correct_ident(ident) {
|
||||
Some(Status::Valid) => {}
|
||||
Some(corrections) => {
|
||||
let byte_offset = ident.offset();
|
||||
let msg = report::Typo {
|
||||
context: report::PathContext { path }.into(),
|
||||
buffer: std::borrow::Cow::Borrowed(file_name.as_bytes()),
|
||||
byte_offset,
|
||||
typo: ident.token(),
|
||||
corrections,
|
||||
};
|
||||
typos_found |= reporter.report(msg.into());
|
||||
}
|
||||
None => {
|
||||
for word in ident.split() {
|
||||
match dictionary.correct_word(word) {
|
||||
Some(Status::Valid) => {}
|
||||
Some(corrections) => {
|
||||
let byte_offset = word.offset();
|
||||
let msg = report::Typo {
|
||||
context: report::PathContext { path }.into(),
|
||||
buffer: std::borrow::Cow::Borrowed(file_name.as_bytes()),
|
||||
byte_offset,
|
||||
typo: word.token(),
|
||||
corrections,
|
||||
};
|
||||
typos_found |= reporter.report(msg.into());
|
||||
}
|
||||
None => {}
|
||||
|
||||
for ident in parser.parse_str(buffer) {
|
||||
match dictionary.correct_ident(ident) {
|
||||
Some(Status::Valid) => {}
|
||||
Some(corrections) => {
|
||||
let byte_offset = ident.offset();
|
||||
let msg = report::Typo {
|
||||
context: report::Context::None,
|
||||
buffer: std::borrow::Cow::Borrowed(buffer.as_bytes()),
|
||||
byte_offset,
|
||||
typo: ident.token(),
|
||||
corrections,
|
||||
};
|
||||
typos_found |= reporter.report(msg.into());
|
||||
}
|
||||
None => {
|
||||
for word in ident.split() {
|
||||
match dictionary.correct_word(word) {
|
||||
Some(Status::Valid) => {}
|
||||
Some(corrections) => {
|
||||
let byte_offset = word.offset();
|
||||
let msg = report::Typo {
|
||||
context: report::Context::None,
|
||||
buffer: std::borrow::Cow::Borrowed(buffer.as_bytes()),
|
||||
byte_offset,
|
||||
typo: word.token(),
|
||||
corrections,
|
||||
};
|
||||
typos_found |= reporter.report(msg.into());
|
||||
}
|
||||
None => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -268,61 +341,45 @@ impl Checks {
|
|||
Ok(typos_found)
|
||||
}
|
||||
|
||||
pub fn check_file(
|
||||
fn check_bytes(
|
||||
&self,
|
||||
path: &std::path::Path,
|
||||
explicit: bool,
|
||||
buffer: &[u8],
|
||||
parser: &tokens::Parser,
|
||||
dictionary: &dyn Dictionary,
|
||||
reporter: &dyn report::Report,
|
||||
) -> Result<bool, crate::Error> {
|
||||
let mut typos_found = false;
|
||||
|
||||
if !self.check_files {
|
||||
return Ok(typos_found);
|
||||
}
|
||||
|
||||
let buffer = read_file(path)?;
|
||||
let (buffer, content_type) = massage_data(buffer)?;
|
||||
if !explicit && !self.binary && content_type.is_binary() {
|
||||
let msg = report::BinaryFile { path };
|
||||
reporter.report(msg.into());
|
||||
return Ok(typos_found);
|
||||
}
|
||||
|
||||
for (line_idx, line) in buffer.lines().enumerate() {
|
||||
let line_num = line_idx + 1;
|
||||
for ident in parser.parse_bytes(line) {
|
||||
match dictionary.correct_ident(ident) {
|
||||
Some(Status::Valid) => {}
|
||||
Some(corrections) => {
|
||||
let byte_offset = ident.offset();
|
||||
let msg = report::Typo {
|
||||
context: report::FileContext { path, line_num }.into(),
|
||||
buffer: std::borrow::Cow::Borrowed(line),
|
||||
byte_offset,
|
||||
typo: ident.token(),
|
||||
corrections,
|
||||
};
|
||||
typos_found |= reporter.report(msg.into());
|
||||
}
|
||||
None => {
|
||||
for word in ident.split() {
|
||||
match dictionary.correct_word(word) {
|
||||
Some(Status::Valid) => {}
|
||||
Some(corrections) => {
|
||||
let byte_offset = word.offset();
|
||||
let msg = report::Typo {
|
||||
context: report::FileContext { path, line_num }.into(),
|
||||
buffer: std::borrow::Cow::Borrowed(line),
|
||||
byte_offset,
|
||||
typo: word.token(),
|
||||
corrections,
|
||||
};
|
||||
typos_found |= reporter.report(msg.into());
|
||||
}
|
||||
None => {}
|
||||
for ident in parser.parse_bytes(buffer) {
|
||||
match dictionary.correct_ident(ident) {
|
||||
Some(Status::Valid) => {}
|
||||
Some(corrections) => {
|
||||
let byte_offset = ident.offset();
|
||||
let msg = report::Typo {
|
||||
context: report::Context::None,
|
||||
buffer: std::borrow::Cow::Borrowed(buffer),
|
||||
byte_offset,
|
||||
typo: ident.token(),
|
||||
corrections,
|
||||
};
|
||||
typos_found |= reporter.report(msg.into());
|
||||
}
|
||||
None => {
|
||||
for word in ident.split() {
|
||||
match dictionary.correct_word(word) {
|
||||
Some(Status::Valid) => {}
|
||||
Some(corrections) => {
|
||||
let byte_offset = word.offset();
|
||||
let msg = report::Typo {
|
||||
context: report::Context::None,
|
||||
buffer: std::borrow::Cow::Borrowed(buffer),
|
||||
byte_offset,
|
||||
typo: word.token(),
|
||||
corrections,
|
||||
};
|
||||
typos_found |= reporter.report(msg.into());
|
||||
}
|
||||
None => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -331,6 +388,18 @@ impl Checks {
|
|||
|
||||
Ok(typos_found)
|
||||
}
|
||||
|
||||
fn check_filenames(&self) -> bool {
|
||||
self.check_filenames
|
||||
}
|
||||
|
||||
fn check_files(&self) -> bool {
|
||||
self.check_files
|
||||
}
|
||||
|
||||
fn binary(&self) -> bool {
|
||||
self.binary
|
||||
}
|
||||
}
|
||||
|
||||
fn read_file(path: &std::path::Path) -> Result<Vec<u8>, crate::Error> {
|
||||
|
|
|
@ -38,6 +38,20 @@ impl<'m> Message<'m> {
|
|||
Message::Error(_) => true,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn context(self, context: Context<'m>) -> Self {
|
||||
match self {
|
||||
Message::Typo(typo) => {
|
||||
let typo = typo.context(context);
|
||||
Message::Typo(typo)
|
||||
}
|
||||
Message::Parse(parse) => {
|
||||
let parse = parse.context(context);
|
||||
Message::Parse(parse)
|
||||
}
|
||||
_ => self,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, serde::Serialize, derive_more::Display, derive_setters::Setters)]
|
||||
|
|
|
@ -102,7 +102,7 @@ impl Parser {
|
|||
ParserBuilder::default().build()
|
||||
}
|
||||
|
||||
pub fn parse<'c>(&'c self, content: &'c str) -> impl Iterator<Item = Identifier<'c>> {
|
||||
pub fn parse_str<'c>(&'c self, content: &'c str) -> impl Iterator<Item = Identifier<'c>> {
|
||||
self.words_str
|
||||
.find_iter(content)
|
||||
.filter(move |m| self.accept(m.as_str().as_bytes()))
|
||||
|
@ -390,7 +390,7 @@ mod test {
|
|||
let expected: Vec<Identifier> = vec![];
|
||||
let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect();
|
||||
assert_eq!(expected, actual);
|
||||
let actual: Vec<_> = parser.parse(input).collect();
|
||||
let actual: Vec<_> = parser.parse_str(input).collect();
|
||||
assert_eq!(expected, actual);
|
||||
}
|
||||
|
||||
|
@ -402,7 +402,7 @@ mod test {
|
|||
let expected: Vec<Identifier> = vec![Identifier::new_unchecked("word", 0)];
|
||||
let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect();
|
||||
assert_eq!(expected, actual);
|
||||
let actual: Vec<_> = parser.parse(input).collect();
|
||||
let actual: Vec<_> = parser.parse_str(input).collect();
|
||||
assert_eq!(expected, actual);
|
||||
}
|
||||
|
||||
|
@ -417,7 +417,7 @@ mod test {
|
|||
];
|
||||
let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect();
|
||||
assert_eq!(expected, actual);
|
||||
let actual: Vec<_> = parser.parse(input).collect();
|
||||
let actual: Vec<_> = parser.parse_str(input).collect();
|
||||
assert_eq!(expected, actual);
|
||||
}
|
||||
|
||||
|
@ -432,7 +432,7 @@ mod test {
|
|||
];
|
||||
let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect();
|
||||
assert_eq!(expected, actual);
|
||||
let actual: Vec<_> = parser.parse(input).collect();
|
||||
let actual: Vec<_> = parser.parse_str(input).collect();
|
||||
assert_eq!(expected, actual);
|
||||
}
|
||||
|
||||
|
@ -447,7 +447,7 @@ mod test {
|
|||
];
|
||||
let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect();
|
||||
assert_eq!(expected, actual);
|
||||
let actual: Vec<_> = parser.parse(input).collect();
|
||||
let actual: Vec<_> = parser.parse_str(input).collect();
|
||||
assert_eq!(expected, actual);
|
||||
}
|
||||
|
||||
|
@ -459,7 +459,7 @@ mod test {
|
|||
let expected: Vec<Identifier> = vec![Identifier::new_unchecked("A_B", 0)];
|
||||
let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect();
|
||||
assert_eq!(expected, actual);
|
||||
let actual: Vec<_> = parser.parse(input).collect();
|
||||
let actual: Vec<_> = parser.parse_str(input).collect();
|
||||
assert_eq!(expected, actual);
|
||||
}
|
||||
|
||||
|
@ -474,7 +474,7 @@ mod test {
|
|||
];
|
||||
let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect();
|
||||
assert_eq!(expected, actual);
|
||||
let actual: Vec<_> = parser.parse(input).collect();
|
||||
let actual: Vec<_> = parser.parse_str(input).collect();
|
||||
assert_eq!(expected, actual);
|
||||
}
|
||||
|
||||
|
@ -493,7 +493,7 @@ mod test {
|
|||
];
|
||||
let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect();
|
||||
assert_eq!(expected, actual);
|
||||
let actual: Vec<_> = parser.parse(input).collect();
|
||||
let actual: Vec<_> = parser.parse_str(input).collect();
|
||||
assert_eq!(expected, actual);
|
||||
}
|
||||
|
||||
|
|
|
@ -1,96 +1,8 @@
|
|||
use std::sync::atomic;
|
||||
|
||||
pub(crate) trait Checks: Send + Sync {
|
||||
fn check_filename(
|
||||
&self,
|
||||
path: &std::path::Path,
|
||||
parser: &typos::tokens::Parser,
|
||||
dictionary: &dyn typos::Dictionary,
|
||||
report: &dyn typos::report::Report,
|
||||
) -> Result<bool, typos::Error>;
|
||||
|
||||
fn check_file(
|
||||
&self,
|
||||
path: &std::path::Path,
|
||||
explicit: bool,
|
||||
parser: &typos::tokens::Parser,
|
||||
dictionary: &dyn typos::Dictionary,
|
||||
report: &dyn typos::report::Report,
|
||||
) -> Result<bool, typos::Error>;
|
||||
}
|
||||
|
||||
impl<'p> Checks for typos::checks::ParseIdentifiers {
|
||||
fn check_filename(
|
||||
&self,
|
||||
path: &std::path::Path,
|
||||
parser: &typos::tokens::Parser,
|
||||
_dictionary: &dyn typos::Dictionary,
|
||||
report: &dyn typos::report::Report,
|
||||
) -> Result<bool, typos::Error> {
|
||||
self.check_filename(path, parser, report)
|
||||
}
|
||||
|
||||
fn check_file(
|
||||
&self,
|
||||
path: &std::path::Path,
|
||||
explicit: bool,
|
||||
parser: &typos::tokens::Parser,
|
||||
_dictionary: &dyn typos::Dictionary,
|
||||
report: &dyn typos::report::Report,
|
||||
) -> Result<bool, typos::Error> {
|
||||
self.check_file(path, explicit, parser, report)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'p> Checks for typos::checks::ParseWords {
|
||||
fn check_filename(
|
||||
&self,
|
||||
path: &std::path::Path,
|
||||
parser: &typos::tokens::Parser,
|
||||
_dictionary: &dyn typos::Dictionary,
|
||||
report: &dyn typos::report::Report,
|
||||
) -> Result<bool, typos::Error> {
|
||||
self.check_filename(path, parser, report)
|
||||
}
|
||||
|
||||
fn check_file(
|
||||
&self,
|
||||
path: &std::path::Path,
|
||||
explicit: bool,
|
||||
parser: &typos::tokens::Parser,
|
||||
_dictionary: &dyn typos::Dictionary,
|
||||
report: &dyn typos::report::Report,
|
||||
) -> Result<bool, typos::Error> {
|
||||
self.check_file(path, explicit, parser, report)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'d, 'p> Checks for typos::checks::Checks {
|
||||
fn check_filename(
|
||||
&self,
|
||||
path: &std::path::Path,
|
||||
parser: &typos::tokens::Parser,
|
||||
dictionary: &dyn typos::Dictionary,
|
||||
report: &dyn typos::report::Report,
|
||||
) -> Result<bool, typos::Error> {
|
||||
self.check_filename(path, parser, dictionary, report)
|
||||
}
|
||||
|
||||
fn check_file(
|
||||
&self,
|
||||
path: &std::path::Path,
|
||||
explicit: bool,
|
||||
parser: &typos::tokens::Parser,
|
||||
dictionary: &dyn typos::Dictionary,
|
||||
report: &dyn typos::report::Report,
|
||||
) -> Result<bool, typos::Error> {
|
||||
self.check_file(path, explicit, parser, dictionary, report)
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn check_path(
|
||||
walk: ignore::Walk,
|
||||
checks: &dyn Checks,
|
||||
checks: &dyn typos::checks::Check,
|
||||
parser: &typos::tokens::Parser,
|
||||
dictionary: &dyn typos::Dictionary,
|
||||
reporter: &dyn typos::report::Report,
|
||||
|
@ -115,7 +27,7 @@ pub(crate) fn check_path(
|
|||
|
||||
pub(crate) fn check_path_parallel(
|
||||
walk: ignore::WalkParallel,
|
||||
checks: &dyn Checks,
|
||||
checks: &dyn typos::checks::Check,
|
||||
parser: &typos::tokens::Parser,
|
||||
dictionary: &dyn typos::Dictionary,
|
||||
reporter: &dyn typos::report::Report,
|
||||
|
@ -143,7 +55,7 @@ pub(crate) fn check_path_parallel(
|
|||
|
||||
fn check_entry(
|
||||
entry: Result<ignore::DirEntry, ignore::Error>,
|
||||
checks: &dyn Checks,
|
||||
checks: &dyn typos::checks::Check,
|
||||
parser: &typos::tokens::Parser,
|
||||
dictionary: &dyn typos::Dictionary,
|
||||
reporter: &dyn typos::report::Report,
|
||||
|
|
|
@ -128,14 +128,14 @@ fn run() -> Result<i32, anyhow::Error> {
|
|||
}
|
||||
} else {
|
||||
let (identifier_parser, word_parser, checks);
|
||||
let selected_checks: &dyn checks::Checks = if args.identifiers {
|
||||
let selected_checks: &dyn typos::checks::Check = if args.identifiers {
|
||||
identifier_parser = settings.build_identifier_parser();
|
||||
&identifier_parser
|
||||
} else if args.words {
|
||||
word_parser = settings.build_word_parser();
|
||||
&word_parser
|
||||
} else {
|
||||
checks = settings.build_checks();
|
||||
checks = settings.build_typos();
|
||||
&checks
|
||||
};
|
||||
|
||||
|
|
Loading…
Reference in a new issue