mirror of
https://github.com/crate-ci/typos.git
synced 2024-11-26 02:51:08 -05:00
refactor: Switch Typos to check_file
This commit is contained in:
parent
6e53d7e719
commit
663eb94d32
2 changed files with 212 additions and 382 deletions
|
@ -7,129 +7,178 @@ mod data;
|
||||||
use assert_fs::prelude::*;
|
use assert_fs::prelude::*;
|
||||||
use typos_cli::checks::Check;
|
use typos_cli::checks::Check;
|
||||||
|
|
||||||
fn bench_parse_ident_str(data: &str, b: &mut test::Bencher) {
|
fn bench_files(data: &str, b: &mut test::Bencher) {
|
||||||
|
let temp = assert_fs::TempDir::new().unwrap();
|
||||||
|
let sample_path = temp.child("sample");
|
||||||
|
sample_path.write_str(data).unwrap();
|
||||||
|
|
||||||
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
|
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
|
||||||
let parser = typos::tokens::Tokenizer::new();
|
let parser = typos::tokens::Tokenizer::new();
|
||||||
let checks = typos_cli::checks::TyposSettings::new().build_identifier_parser();
|
let checks = typos_cli::checks::TyposSettings::new().build_files();
|
||||||
b.iter(|| checks.check_str(data, &parser, &corrections, &typos_cli::report::PrintSilent));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[bench]
|
|
||||||
fn parse_idents_empty_str(b: &mut test::Bencher) {
|
|
||||||
bench_parse_ident_str(data::EMPTY, b);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[bench]
|
|
||||||
fn parse_idents_no_tokens_str(b: &mut test::Bencher) {
|
|
||||||
bench_parse_ident_str(data::NO_TOKENS, b);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[bench]
|
|
||||||
fn parse_idents_single_token_str(b: &mut test::Bencher) {
|
|
||||||
bench_parse_ident_str(data::SINGLE_TOKEN, b);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[bench]
|
|
||||||
fn parse_idents_sherlock_str(b: &mut test::Bencher) {
|
|
||||||
bench_parse_ident_str(data::SHERLOCK, b);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[bench]
|
|
||||||
fn parse_idents_code_str(b: &mut test::Bencher) {
|
|
||||||
bench_parse_ident_str(data::CODE, b);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[bench]
|
|
||||||
fn parse_idents_corpus_str(b: &mut test::Bencher) {
|
|
||||||
bench_parse_ident_str(data::CORPUS, b);
|
|
||||||
}
|
|
||||||
|
|
||||||
fn bench_parse_ident_bytes(data: &str, b: &mut test::Bencher) {
|
|
||||||
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
|
|
||||||
let parser = typos::tokens::Tokenizer::new();
|
|
||||||
let checks = typos_cli::checks::TyposSettings::new().build_identifier_parser();
|
|
||||||
b.iter(|| {
|
b.iter(|| {
|
||||||
checks.check_bytes(
|
checks.check_file(
|
||||||
data.as_bytes(),
|
sample_path.path(),
|
||||||
|
true,
|
||||||
&parser,
|
&parser,
|
||||||
&corrections,
|
&corrections,
|
||||||
&typos_cli::report::PrintSilent,
|
&typos_cli::report::PrintSilent,
|
||||||
)
|
)
|
||||||
});
|
});
|
||||||
|
|
||||||
|
temp.close().unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
#[bench]
|
#[bench]
|
||||||
fn parse_idents_empty_bytes(b: &mut test::Bencher) {
|
fn files_empty(b: &mut test::Bencher) {
|
||||||
bench_parse_ident_bytes(data::EMPTY, b);
|
bench_files(data::EMPTY, b);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[bench]
|
#[bench]
|
||||||
fn parse_idents_no_tokens_bytes(b: &mut test::Bencher) {
|
fn files_no_tokens(b: &mut test::Bencher) {
|
||||||
bench_parse_ident_bytes(data::NO_TOKENS, b);
|
bench_files(data::NO_TOKENS, b);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[bench]
|
#[bench]
|
||||||
fn parse_idents_single_token_bytes(b: &mut test::Bencher) {
|
fn files_single_token(b: &mut test::Bencher) {
|
||||||
bench_parse_ident_bytes(data::SINGLE_TOKEN, b);
|
bench_files(data::SINGLE_TOKEN, b);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[bench]
|
#[bench]
|
||||||
fn parse_idents_sherlock_bytes(b: &mut test::Bencher) {
|
fn files_sherlock(b: &mut test::Bencher) {
|
||||||
bench_parse_ident_bytes(data::SHERLOCK, b);
|
bench_files(data::SHERLOCK, b);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[bench]
|
#[bench]
|
||||||
fn parse_idents_code_bytes(b: &mut test::Bencher) {
|
fn files_code(b: &mut test::Bencher) {
|
||||||
bench_parse_ident_bytes(data::CODE, b);
|
bench_files(data::CODE, b);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[bench]
|
#[bench]
|
||||||
fn parse_idents_corpus_bytes(b: &mut test::Bencher) {
|
fn files_corpus(b: &mut test::Bencher) {
|
||||||
bench_parse_ident_bytes(data::CORPUS, b);
|
bench_files(data::CORPUS, b);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn bench_parse_word_str(data: &str, b: &mut test::Bencher) {
|
fn bench_identifiers(data: &str, b: &mut test::Bencher) {
|
||||||
|
let temp = assert_fs::TempDir::new().unwrap();
|
||||||
|
let sample_path = temp.child("sample");
|
||||||
|
sample_path.write_str(data).unwrap();
|
||||||
|
|
||||||
|
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
|
||||||
|
let parser = typos::tokens::Tokenizer::new();
|
||||||
|
let checks = typos_cli::checks::TyposSettings::new().build_identifier_parser();
|
||||||
|
b.iter(|| {
|
||||||
|
checks.check_file(
|
||||||
|
sample_path.path(),
|
||||||
|
true,
|
||||||
|
&parser,
|
||||||
|
&corrections,
|
||||||
|
&typos_cli::report::PrintSilent,
|
||||||
|
)
|
||||||
|
});
|
||||||
|
|
||||||
|
temp.close().unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
#[bench]
|
||||||
|
fn identifiers_empty(b: &mut test::Bencher) {
|
||||||
|
bench_identifiers(data::EMPTY, b);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[bench]
|
||||||
|
fn identifiers_no_tokens(b: &mut test::Bencher) {
|
||||||
|
bench_identifiers(data::NO_TOKENS, b);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[bench]
|
||||||
|
fn identifiers_single_token(b: &mut test::Bencher) {
|
||||||
|
bench_identifiers(data::SINGLE_TOKEN, b);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[bench]
|
||||||
|
fn identifiers_sherlock(b: &mut test::Bencher) {
|
||||||
|
bench_identifiers(data::SHERLOCK, b);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[bench]
|
||||||
|
fn identifiers_code(b: &mut test::Bencher) {
|
||||||
|
bench_identifiers(data::CODE, b);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[bench]
|
||||||
|
fn identifiers_corpus(b: &mut test::Bencher) {
|
||||||
|
bench_identifiers(data::CORPUS, b);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn bench_words(data: &str, b: &mut test::Bencher) {
|
||||||
|
let temp = assert_fs::TempDir::new().unwrap();
|
||||||
|
let sample_path = temp.child("sample");
|
||||||
|
sample_path.write_str(data).unwrap();
|
||||||
|
|
||||||
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
|
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
|
||||||
let parser = typos::tokens::Tokenizer::new();
|
let parser = typos::tokens::Tokenizer::new();
|
||||||
let checks = typos_cli::checks::TyposSettings::new().build_word_parser();
|
let checks = typos_cli::checks::TyposSettings::new().build_word_parser();
|
||||||
b.iter(|| checks.check_str(data, &parser, &corrections, &typos_cli::report::PrintSilent));
|
b.iter(|| {
|
||||||
|
checks.check_file(
|
||||||
|
sample_path.path(),
|
||||||
|
true,
|
||||||
|
&parser,
|
||||||
|
&corrections,
|
||||||
|
&typos_cli::report::PrintSilent,
|
||||||
|
)
|
||||||
|
});
|
||||||
|
|
||||||
|
temp.close().unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
#[bench]
|
#[bench]
|
||||||
fn parse_words_empty(b: &mut test::Bencher) {
|
fn words_empty(b: &mut test::Bencher) {
|
||||||
bench_parse_word_str(data::EMPTY, b);
|
bench_words(data::EMPTY, b);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[bench]
|
#[bench]
|
||||||
fn parse_words_no_tokens(b: &mut test::Bencher) {
|
fn words_no_tokens(b: &mut test::Bencher) {
|
||||||
bench_parse_word_str(data::NO_TOKENS, b);
|
bench_words(data::NO_TOKENS, b);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[bench]
|
#[bench]
|
||||||
fn parse_words_single_token(b: &mut test::Bencher) {
|
fn words_single_token(b: &mut test::Bencher) {
|
||||||
bench_parse_word_str(data::SINGLE_TOKEN, b);
|
bench_words(data::SINGLE_TOKEN, b);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[bench]
|
#[bench]
|
||||||
fn parse_words_sherlock(b: &mut test::Bencher) {
|
fn words_sherlock(b: &mut test::Bencher) {
|
||||||
bench_parse_word_str(data::SHERLOCK, b);
|
bench_words(data::SHERLOCK, b);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[bench]
|
#[bench]
|
||||||
fn parse_words_code(b: &mut test::Bencher) {
|
fn words_code(b: &mut test::Bencher) {
|
||||||
bench_parse_word_str(data::CODE, b);
|
bench_words(data::CODE, b);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[bench]
|
#[bench]
|
||||||
fn parse_words_corpus(b: &mut test::Bencher) {
|
fn words_corpus(b: &mut test::Bencher) {
|
||||||
bench_parse_word_str(data::CORPUS, b);
|
bench_words(data::CORPUS, b);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn bench_typos(data: &str, b: &mut test::Bencher) {
|
fn bench_typos(data: &str, b: &mut test::Bencher) {
|
||||||
|
let temp = assert_fs::TempDir::new().unwrap();
|
||||||
|
let sample_path = temp.child("sample");
|
||||||
|
sample_path.write_str(data).unwrap();
|
||||||
|
|
||||||
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
|
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
|
||||||
let parser = typos::tokens::Tokenizer::new();
|
let parser = typos::tokens::Tokenizer::new();
|
||||||
let checks = typos_cli::checks::TyposSettings::new().build_typos();
|
let checks = typos_cli::checks::TyposSettings::new().build_typos();
|
||||||
b.iter(|| checks.check_str(data, &parser, &corrections, &typos_cli::report::PrintSilent));
|
b.iter(|| {
|
||||||
|
checks.check_file(
|
||||||
|
sample_path.path(),
|
||||||
|
true,
|
||||||
|
&parser,
|
||||||
|
&corrections,
|
||||||
|
&typos_cli::report::PrintSilent,
|
||||||
|
)
|
||||||
|
});
|
||||||
|
|
||||||
|
temp.close().unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
#[bench]
|
#[bench]
|
||||||
|
@ -161,54 +210,3 @@ fn typos_code(b: &mut test::Bencher) {
|
||||||
fn typos_corpus(b: &mut test::Bencher) {
|
fn typos_corpus(b: &mut test::Bencher) {
|
||||||
bench_typos(data::CORPUS, b);
|
bench_typos(data::CORPUS, b);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn bench_check_file(data: &str, b: &mut test::Bencher) {
|
|
||||||
let temp = assert_fs::TempDir::new().unwrap();
|
|
||||||
let sample_path = temp.child("sample");
|
|
||||||
sample_path.write_str(data).unwrap();
|
|
||||||
|
|
||||||
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
|
|
||||||
let parser = typos::tokens::Tokenizer::new();
|
|
||||||
let checks = typos_cli::checks::TyposSettings::new().build_typos();
|
|
||||||
b.iter(|| {
|
|
||||||
checks.check_file_content(
|
|
||||||
sample_path.path(),
|
|
||||||
true,
|
|
||||||
&parser,
|
|
||||||
&corrections,
|
|
||||||
&typos_cli::report::PrintSilent,
|
|
||||||
)
|
|
||||||
});
|
|
||||||
|
|
||||||
temp.close().unwrap();
|
|
||||||
}
|
|
||||||
|
|
||||||
#[bench]
|
|
||||||
fn check_file_empty(b: &mut test::Bencher) {
|
|
||||||
bench_check_file(data::EMPTY, b);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[bench]
|
|
||||||
fn check_file_no_tokens(b: &mut test::Bencher) {
|
|
||||||
bench_check_file(data::NO_TOKENS, b);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[bench]
|
|
||||||
fn check_file_single_token(b: &mut test::Bencher) {
|
|
||||||
bench_check_file(data::SINGLE_TOKEN, b);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[bench]
|
|
||||||
fn check_file_sherlock(b: &mut test::Bencher) {
|
|
||||||
bench_check_file(data::SHERLOCK, b);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[bench]
|
|
||||||
fn check_file_code(b: &mut test::Bencher) {
|
|
||||||
bench_check_file(data::CODE, b);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[bench]
|
|
||||||
fn check_file_corpus(b: &mut test::Bencher) {
|
|
||||||
bench_check_file(data::CORPUS, b);
|
|
||||||
}
|
|
||||||
|
|
334
src/checks.rs
334
src/checks.rs
|
@ -5,82 +5,6 @@ use typos::tokens;
|
||||||
use typos::Dictionary;
|
use typos::Dictionary;
|
||||||
|
|
||||||
pub trait Check: Send + Sync {
|
pub trait Check: Send + Sync {
|
||||||
fn check_str(
|
|
||||||
&self,
|
|
||||||
buffer: &str,
|
|
||||||
parser: &tokens::Tokenizer,
|
|
||||||
dictionary: &dyn Dictionary,
|
|
||||||
reporter: &dyn report::Report,
|
|
||||||
) -> Result<(), std::io::Error>;
|
|
||||||
|
|
||||||
fn check_bytes(
|
|
||||||
&self,
|
|
||||||
buffer: &[u8],
|
|
||||||
parser: &tokens::Tokenizer,
|
|
||||||
dictionary: &dyn Dictionary,
|
|
||||||
reporter: &dyn report::Report,
|
|
||||||
) -> Result<(), std::io::Error>;
|
|
||||||
|
|
||||||
fn check_filenames(&self) -> bool;
|
|
||||||
|
|
||||||
fn check_files(&self) -> bool;
|
|
||||||
|
|
||||||
fn binary(&self) -> bool;
|
|
||||||
|
|
||||||
fn check_filename(
|
|
||||||
&self,
|
|
||||||
path: &std::path::Path,
|
|
||||||
parser: &tokens::Tokenizer,
|
|
||||||
dictionary: &dyn Dictionary,
|
|
||||||
reporter: &dyn report::Report,
|
|
||||||
) -> Result<(), std::io::Error> {
|
|
||||||
if !self.check_filenames() {
|
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
|
|
||||||
let context_reporter = ReportContext {
|
|
||||||
reporter,
|
|
||||||
context: report::PathContext { path }.into(),
|
|
||||||
};
|
|
||||||
self.check_str(file_name, parser, dictionary, &context_reporter)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn check_file_content(
|
|
||||||
&self,
|
|
||||||
path: &std::path::Path,
|
|
||||||
explicit: bool,
|
|
||||||
parser: &tokens::Tokenizer,
|
|
||||||
dictionary: &dyn Dictionary,
|
|
||||||
reporter: &dyn report::Report,
|
|
||||||
) -> Result<(), std::io::Error> {
|
|
||||||
if !self.check_files() {
|
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
|
|
||||||
let buffer = read_file(path, reporter)?;
|
|
||||||
let (buffer, content_type) = massage_data(buffer)?;
|
|
||||||
if !explicit && !self.binary() && content_type.is_binary() {
|
|
||||||
let msg = report::BinaryFile { path };
|
|
||||||
reporter.report(msg.into())?;
|
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
|
|
||||||
for (line_idx, line) in buffer.lines().enumerate() {
|
|
||||||
let line_num = line_idx + 1;
|
|
||||||
let context_reporter = ReportContext {
|
|
||||||
reporter,
|
|
||||||
context: report::FileContext { path, line_num }.into(),
|
|
||||||
};
|
|
||||||
self.check_bytes(line, parser, dictionary, &context_reporter)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn check_file(
|
fn check_file(
|
||||||
&self,
|
&self,
|
||||||
path: &std::path::Path,
|
path: &std::path::Path,
|
||||||
|
@ -88,23 +12,7 @@ pub trait Check: Send + Sync {
|
||||||
parser: &tokens::Tokenizer,
|
parser: &tokens::Tokenizer,
|
||||||
dictionary: &dyn Dictionary,
|
dictionary: &dyn Dictionary,
|
||||||
reporter: &dyn report::Report,
|
reporter: &dyn report::Report,
|
||||||
) -> Result<(), std::io::Error> {
|
) -> Result<(), std::io::Error>;
|
||||||
self.check_filename(path, parser, dictionary, reporter)?;
|
|
||||||
self.check_file_content(path, explicit, parser, dictionary, reporter)?;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
struct ReportContext<'m, 'r> {
|
|
||||||
reporter: &'r dyn report::Report,
|
|
||||||
context: report::Context<'m>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'m, 'r> report::Report for ReportContext<'m, 'r> {
|
|
||||||
fn report(&self, msg: report::Message) -> Result<(), std::io::Error> {
|
|
||||||
let msg = msg.context(Some(self.context.clone()));
|
|
||||||
self.reporter.report(msg)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||||
|
@ -183,9 +91,10 @@ pub struct Typos {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Check for Typos {
|
impl Check for Typos {
|
||||||
fn check_str(
|
fn check_file(
|
||||||
&self,
|
&self,
|
||||||
buffer: &str,
|
path: &std::path::Path,
|
||||||
|
explicit: bool,
|
||||||
tokenizer: &tokens::Tokenizer,
|
tokenizer: &tokens::Tokenizer,
|
||||||
dictionary: &dyn Dictionary,
|
dictionary: &dyn Dictionary,
|
||||||
reporter: &dyn report::Report,
|
reporter: &dyn report::Report,
|
||||||
|
@ -194,54 +103,47 @@ impl Check for Typos {
|
||||||
.tokenizer(tokenizer)
|
.tokenizer(tokenizer)
|
||||||
.dictionary(dictionary)
|
.dictionary(dictionary)
|
||||||
.typos();
|
.typos();
|
||||||
for typo in parser.parse_str(buffer) {
|
|
||||||
|
if self.check_filenames {
|
||||||
|
if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
|
||||||
|
for typo in parser.parse_str(file_name) {
|
||||||
let msg = report::Typo {
|
let msg = report::Typo {
|
||||||
context: None,
|
context: Some(report::PathContext { path }.into()),
|
||||||
buffer: std::borrow::Cow::Borrowed(buffer.as_bytes()),
|
buffer: std::borrow::Cow::Borrowed(file_name.as_bytes()),
|
||||||
byte_offset: typo.byte_offset,
|
byte_offset: typo.byte_offset,
|
||||||
typo: typo.typo,
|
typo: typo.typo,
|
||||||
corrections: typo.corrections,
|
corrections: typo.corrections,
|
||||||
};
|
};
|
||||||
reporter.report(msg.into())?;
|
reporter.report(msg.into())?;
|
||||||
}
|
}
|
||||||
Ok(())
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn check_bytes(
|
if self.check_files {
|
||||||
&self,
|
let buffer = read_file(path, reporter)?;
|
||||||
buffer: &[u8],
|
let (buffer, content_type) = massage_data(buffer)?;
|
||||||
tokenizer: &tokens::Tokenizer,
|
if !explicit && !self.binary && content_type.is_binary() {
|
||||||
dictionary: &dyn Dictionary,
|
let msg = report::BinaryFile { path };
|
||||||
reporter: &dyn report::Report,
|
reporter.report(msg.into())?;
|
||||||
) -> Result<(), std::io::Error> {
|
} else {
|
||||||
let parser = typos::ParserBuilder::new()
|
let mut accum_line_num = AccumulateLineNum::new();
|
||||||
.tokenizer(tokenizer)
|
for typo in parser.parse_bytes(&buffer) {
|
||||||
.dictionary(dictionary)
|
let line_num = accum_line_num.line_num(&buffer, typo.byte_offset);
|
||||||
.typos();
|
let (line, line_offset) = extract_line(&buffer, typo.byte_offset);
|
||||||
for typo in parser.parse_bytes(buffer) {
|
|
||||||
let msg = report::Typo {
|
let msg = report::Typo {
|
||||||
context: None,
|
context: Some(report::FileContext { path, line_num }.into()),
|
||||||
buffer: std::borrow::Cow::Borrowed(buffer.as_bytes()),
|
buffer: std::borrow::Cow::Borrowed(line),
|
||||||
byte_offset: typo.byte_offset,
|
byte_offset: line_offset,
|
||||||
typo: typo.typo,
|
typo: typo.typo,
|
||||||
corrections: typo.corrections,
|
corrections: typo.corrections,
|
||||||
};
|
};
|
||||||
reporter.report(msg.into())?;
|
reporter.report(msg.into())?;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn check_filenames(&self) -> bool {
|
|
||||||
self.check_filenames
|
|
||||||
}
|
|
||||||
|
|
||||||
fn check_files(&self) -> bool {
|
|
||||||
self.check_files
|
|
||||||
}
|
|
||||||
|
|
||||||
fn binary(&self) -> bool {
|
|
||||||
self.binary
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
|
@ -252,26 +154,6 @@ pub struct Identifiers {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Check for Identifiers {
|
impl Check for Identifiers {
|
||||||
fn check_str(
|
|
||||||
&self,
|
|
||||||
_buffer: &str,
|
|
||||||
_tokenizer: &tokens::Tokenizer,
|
|
||||||
_dictionary: &dyn Dictionary,
|
|
||||||
_reporter: &dyn report::Report,
|
|
||||||
) -> Result<(), std::io::Error> {
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn check_bytes(
|
|
||||||
&self,
|
|
||||||
_buffer: &[u8],
|
|
||||||
_tokenizer: &tokens::Tokenizer,
|
|
||||||
_dictionary: &dyn Dictionary,
|
|
||||||
_reporter: &dyn report::Report,
|
|
||||||
) -> Result<(), std::io::Error> {
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn check_file(
|
fn check_file(
|
||||||
&self,
|
&self,
|
||||||
path: &std::path::Path,
|
path: &std::path::Path,
|
||||||
|
@ -284,7 +166,7 @@ impl Check for Identifiers {
|
||||||
.tokenizer(tokenizer)
|
.tokenizer(tokenizer)
|
||||||
.identifiers();
|
.identifiers();
|
||||||
|
|
||||||
if self.check_filenames() {
|
if self.check_filenames {
|
||||||
if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
|
if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
|
||||||
for word in parser.parse_str(file_name) {
|
for word in parser.parse_str(file_name) {
|
||||||
let msg = report::Parse {
|
let msg = report::Parse {
|
||||||
|
@ -297,16 +179,20 @@ impl Check for Identifiers {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if self.check_files() {
|
if self.check_files {
|
||||||
let buffer = read_file(path, reporter)?;
|
let buffer = read_file(path, reporter)?;
|
||||||
let (buffer, content_type) = massage_data(buffer)?;
|
let (buffer, content_type) = massage_data(buffer)?;
|
||||||
if !explicit && !self.binary() && content_type.is_binary() {
|
if !explicit && !self.binary && content_type.is_binary() {
|
||||||
let msg = report::BinaryFile { path };
|
let msg = report::BinaryFile { path };
|
||||||
reporter.report(msg.into())?;
|
reporter.report(msg.into())?;
|
||||||
} else {
|
} else {
|
||||||
for word in parser.parse_bytes(&buffer) {
|
for word in parser.parse_bytes(&buffer) {
|
||||||
|
// HACK: Don't look up the line_num per entry to better match the performance
|
||||||
|
// of Typos for comparison purposes. We don't really get much out of it
|
||||||
|
// anyway.
|
||||||
|
let line_num = 0;
|
||||||
let msg = report::Parse {
|
let msg = report::Parse {
|
||||||
context: Some(report::FileContext { path, line_num: 0 }.into()),
|
context: Some(report::FileContext { path, line_num }.into()),
|
||||||
kind: report::ParseKind::Identifier,
|
kind: report::ParseKind::Identifier,
|
||||||
data: word.token(),
|
data: word.token(),
|
||||||
};
|
};
|
||||||
|
@ -317,18 +203,6 @@ impl Check for Identifiers {
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn check_filenames(&self) -> bool {
|
|
||||||
self.check_filenames
|
|
||||||
}
|
|
||||||
|
|
||||||
fn check_files(&self) -> bool {
|
|
||||||
self.check_files
|
|
||||||
}
|
|
||||||
|
|
||||||
fn binary(&self) -> bool {
|
|
||||||
self.binary
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
|
@ -339,26 +213,6 @@ pub struct Words {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Check for Words {
|
impl Check for Words {
|
||||||
fn check_str(
|
|
||||||
&self,
|
|
||||||
_buffer: &str,
|
|
||||||
_tokenizer: &tokens::Tokenizer,
|
|
||||||
_dictionary: &dyn Dictionary,
|
|
||||||
_reporter: &dyn report::Report,
|
|
||||||
) -> Result<(), std::io::Error> {
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn check_bytes(
|
|
||||||
&self,
|
|
||||||
_buffer: &[u8],
|
|
||||||
_tokenizer: &tokens::Tokenizer,
|
|
||||||
_dictionary: &dyn Dictionary,
|
|
||||||
_reporter: &dyn report::Report,
|
|
||||||
) -> Result<(), std::io::Error> {
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn check_file(
|
fn check_file(
|
||||||
&self,
|
&self,
|
||||||
path: &std::path::Path,
|
path: &std::path::Path,
|
||||||
|
@ -369,7 +223,7 @@ impl Check for Words {
|
||||||
) -> Result<(), std::io::Error> {
|
) -> Result<(), std::io::Error> {
|
||||||
let parser = typos::ParserBuilder::new().tokenizer(tokenizer).words();
|
let parser = typos::ParserBuilder::new().tokenizer(tokenizer).words();
|
||||||
|
|
||||||
if self.check_filenames() {
|
if self.check_filenames {
|
||||||
if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
|
if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
|
||||||
for word in parser.parse_str(file_name) {
|
for word in parser.parse_str(file_name) {
|
||||||
let msg = report::Parse {
|
let msg = report::Parse {
|
||||||
|
@ -382,16 +236,20 @@ impl Check for Words {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if self.check_files() {
|
if self.check_files {
|
||||||
let buffer = read_file(path, reporter)?;
|
let buffer = read_file(path, reporter)?;
|
||||||
let (buffer, content_type) = massage_data(buffer)?;
|
let (buffer, content_type) = massage_data(buffer)?;
|
||||||
if !explicit && !self.binary() && content_type.is_binary() {
|
if !explicit && !self.binary && content_type.is_binary() {
|
||||||
let msg = report::BinaryFile { path };
|
let msg = report::BinaryFile { path };
|
||||||
reporter.report(msg.into())?;
|
reporter.report(msg.into())?;
|
||||||
} else {
|
} else {
|
||||||
for word in parser.parse_bytes(&buffer) {
|
for word in parser.parse_bytes(&buffer) {
|
||||||
|
// HACK: Don't look up the line_num per entry to better match the performance
|
||||||
|
// of Typos for comparison purposes. We don't really get much out of it
|
||||||
|
// anyway.
|
||||||
|
let line_num = 0;
|
||||||
let msg = report::Parse {
|
let msg = report::Parse {
|
||||||
context: Some(report::FileContext { path, line_num: 0 }.into()),
|
context: Some(report::FileContext { path, line_num }.into()),
|
||||||
kind: report::ParseKind::Word,
|
kind: report::ParseKind::Word,
|
||||||
data: word.token(),
|
data: word.token(),
|
||||||
};
|
};
|
||||||
|
@ -402,18 +260,6 @@ impl Check for Words {
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn check_filenames(&self) -> bool {
|
|
||||||
self.check_filenames
|
|
||||||
}
|
|
||||||
|
|
||||||
fn check_files(&self) -> bool {
|
|
||||||
self.check_files
|
|
||||||
}
|
|
||||||
|
|
||||||
fn binary(&self) -> bool {
|
|
||||||
self.binary
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
|
@ -422,59 +268,6 @@ pub struct FoundFiles {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Check for FoundFiles {
|
impl Check for FoundFiles {
|
||||||
fn check_str(
|
|
||||||
&self,
|
|
||||||
_buffer: &str,
|
|
||||||
_parser: &tokens::Tokenizer,
|
|
||||||
_dictionary: &dyn Dictionary,
|
|
||||||
_reporter: &dyn report::Report,
|
|
||||||
) -> Result<(), std::io::Error> {
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn check_bytes(
|
|
||||||
&self,
|
|
||||||
_buffer: &[u8],
|
|
||||||
_parser: &tokens::Tokenizer,
|
|
||||||
_dictionary: &dyn Dictionary,
|
|
||||||
_reporter: &dyn report::Report,
|
|
||||||
) -> Result<(), std::io::Error> {
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn check_filenames(&self) -> bool {
|
|
||||||
true
|
|
||||||
}
|
|
||||||
|
|
||||||
fn check_files(&self) -> bool {
|
|
||||||
true
|
|
||||||
}
|
|
||||||
|
|
||||||
fn binary(&self) -> bool {
|
|
||||||
self.binary
|
|
||||||
}
|
|
||||||
|
|
||||||
fn check_filename(
|
|
||||||
&self,
|
|
||||||
_path: &std::path::Path,
|
|
||||||
_parser: &tokens::Tokenizer,
|
|
||||||
_dictionary: &dyn Dictionary,
|
|
||||||
_reporter: &dyn report::Report,
|
|
||||||
) -> Result<(), std::io::Error> {
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn check_file_content(
|
|
||||||
&self,
|
|
||||||
_path: &std::path::Path,
|
|
||||||
_explicit: bool,
|
|
||||||
_parser: &tokens::Tokenizer,
|
|
||||||
_dictionary: &dyn Dictionary,
|
|
||||||
_reporter: &dyn report::Report,
|
|
||||||
) -> Result<(), std::io::Error> {
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn check_file(
|
fn check_file(
|
||||||
&self,
|
&self,
|
||||||
path: &std::path::Path,
|
path: &std::path::Path,
|
||||||
|
@ -533,6 +326,45 @@ fn massage_data(
|
||||||
Ok((buffer, content_type))
|
Ok((buffer, content_type))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct AccumulateLineNum {
|
||||||
|
line_num: usize,
|
||||||
|
last_offset: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl AccumulateLineNum {
|
||||||
|
fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
// 1-indexed
|
||||||
|
line_num: 1,
|
||||||
|
last_offset: 0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn line_num(&mut self, buffer: &[u8], byte_offset: usize) -> usize {
|
||||||
|
assert!(self.last_offset <= byte_offset);
|
||||||
|
let slice = &buffer[self.last_offset..byte_offset];
|
||||||
|
let newlines = slice.lines().count();
|
||||||
|
let line_num = self.line_num + newlines;
|
||||||
|
self.line_num = line_num;
|
||||||
|
self.last_offset = byte_offset;
|
||||||
|
line_num
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn extract_line(buffer: &[u8], byte_offset: usize) -> (&[u8], usize) {
|
||||||
|
let line_start = buffer[0..byte_offset]
|
||||||
|
.rfind_byte(b'\n')
|
||||||
|
// Skip the newline
|
||||||
|
.map(|s| s + 1)
|
||||||
|
.unwrap_or(0);
|
||||||
|
let line = buffer[line_start..]
|
||||||
|
.lines()
|
||||||
|
.next()
|
||||||
|
.expect("should always be at least a line");
|
||||||
|
let line_offset = byte_offset - line_start;
|
||||||
|
(line, line_offset)
|
||||||
|
}
|
||||||
|
|
||||||
pub fn check_path(
|
pub fn check_path(
|
||||||
walk: ignore::Walk,
|
walk: ignore::Walk,
|
||||||
checks: &dyn Check,
|
checks: &dyn Check,
|
||||||
|
|
Loading…
Reference in a new issue