refactor: Layer files/filenames on buffer processing

This commit is contained in:
Ed Page 2020-11-10 06:01:01 -06:00
parent eb20ba9f11
commit e12cd8ed55
7 changed files with 483 additions and 505 deletions

214
benches/checks.rs Normal file
View file

@ -0,0 +1,214 @@
#![feature(test)]
extern crate test;
mod data;
use assert_fs::prelude::*;
use typos::checks::Check;
fn bench_parse_ident_str(data: &str, b: &mut test::Bencher) {
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
let parser = typos::tokens::Parser::new();
let checks = typos::checks::TyposSettings::new().build_identifier_parser();
b.iter(|| checks.check_str(data, &parser, &corrections, &typos::report::PrintSilent));
}
#[bench]
fn parse_idents_empty_str(b: &mut test::Bencher) {
bench_parse_ident_str(data::EMPTY, b);
}
#[bench]
fn parse_idents_no_tokens_str(b: &mut test::Bencher) {
bench_parse_ident_str(data::NO_TOKENS, b);
}
#[bench]
fn parse_idents_single_token_str(b: &mut test::Bencher) {
bench_parse_ident_str(data::SINGLE_TOKEN, b);
}
#[bench]
fn parse_idents_sherlock_str(b: &mut test::Bencher) {
bench_parse_ident_str(data::SHERLOCK, b);
}
#[bench]
fn parse_idents_code_str(b: &mut test::Bencher) {
bench_parse_ident_str(data::CODE, b);
}
#[bench]
fn parse_idents_corpus_str(b: &mut test::Bencher) {
bench_parse_ident_str(data::CORPUS, b);
}
fn bench_parse_ident_bytes(data: &str, b: &mut test::Bencher) {
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
let parser = typos::tokens::Parser::new();
let checks = typos::checks::TyposSettings::new().build_identifier_parser();
b.iter(|| {
checks.check_bytes(
data.as_bytes(),
&parser,
&corrections,
&typos::report::PrintSilent,
)
});
}
#[bench]
fn parse_idents_empty_bytes(b: &mut test::Bencher) {
bench_parse_ident_bytes(data::EMPTY, b);
}
#[bench]
fn parse_idents_no_tokens_bytes(b: &mut test::Bencher) {
bench_parse_ident_bytes(data::NO_TOKENS, b);
}
#[bench]
fn parse_idents_single_token_bytes(b: &mut test::Bencher) {
bench_parse_ident_bytes(data::SINGLE_TOKEN, b);
}
#[bench]
fn parse_idents_sherlock_bytes(b: &mut test::Bencher) {
bench_parse_ident_bytes(data::SHERLOCK, b);
}
#[bench]
fn parse_idents_code_bytes(b: &mut test::Bencher) {
bench_parse_ident_bytes(data::CODE, b);
}
#[bench]
fn parse_idents_corpus_bytes(b: &mut test::Bencher) {
bench_parse_ident_bytes(data::CORPUS, b);
}
fn bench_parse_word_str(data: &str, b: &mut test::Bencher) {
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
let parser = typos::tokens::Parser::new();
let checks = typos::checks::TyposSettings::new().build_word_parser();
b.iter(|| checks.check_str(data, &parser, &corrections, &typos::report::PrintSilent));
}
#[bench]
fn parse_words_empty(b: &mut test::Bencher) {
bench_parse_word_str(data::EMPTY, b);
}
#[bench]
fn parse_words_no_tokens(b: &mut test::Bencher) {
bench_parse_word_str(data::NO_TOKENS, b);
}
#[bench]
fn parse_words_single_token(b: &mut test::Bencher) {
bench_parse_word_str(data::SINGLE_TOKEN, b);
}
#[bench]
fn parse_words_sherlock(b: &mut test::Bencher) {
bench_parse_word_str(data::SHERLOCK, b);
}
#[bench]
fn parse_words_code(b: &mut test::Bencher) {
bench_parse_word_str(data::CODE, b);
}
#[bench]
fn parse_words_corpus(b: &mut test::Bencher) {
bench_parse_word_str(data::CORPUS, b);
}
fn bench_typos(data: &str, b: &mut test::Bencher) {
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
let parser = typos::tokens::Parser::new();
let checks = typos::checks::TyposSettings::new().build_typos();
b.iter(|| checks.check_str(data, &parser, &corrections, &typos::report::PrintSilent));
}
#[bench]
fn typos_empty(b: &mut test::Bencher) {
bench_typos(data::EMPTY, b);
}
#[bench]
fn typos_no_tokens(b: &mut test::Bencher) {
bench_typos(data::NO_TOKENS, b);
}
#[bench]
fn typos_single_token(b: &mut test::Bencher) {
bench_typos(data::SINGLE_TOKEN, b);
}
#[bench]
fn typos_sherlock(b: &mut test::Bencher) {
bench_typos(data::SHERLOCK, b);
}
#[bench]
fn typos_code(b: &mut test::Bencher) {
bench_typos(data::CODE, b);
}
#[bench]
fn typos_corpus(b: &mut test::Bencher) {
bench_typos(data::CORPUS, b);
}
fn bench_check_file(data: &str, b: &mut test::Bencher) {
let temp = assert_fs::TempDir::new().unwrap();
let sample_path = temp.child("sample");
sample_path.write_str(data).unwrap();
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
let parser = typos::tokens::Parser::new();
let checks = typos::checks::TyposSettings::new().build_typos();
b.iter(|| {
checks.check_file(
sample_path.path(),
true,
&parser,
&corrections,
&typos::report::PrintSilent,
)
});
temp.close().unwrap();
}
#[bench]
fn check_file_empty(b: &mut test::Bencher) {
bench_check_file(data::EMPTY, b);
}
#[bench]
fn check_file_no_tokens(b: &mut test::Bencher) {
bench_check_file(data::NO_TOKENS, b);
}
#[bench]
fn check_file_single_token(b: &mut test::Bencher) {
bench_check_file(data::SINGLE_TOKEN, b);
}
#[bench]
fn check_file_sherlock(b: &mut test::Bencher) {
bench_check_file(data::SHERLOCK, b);
}
#[bench]
fn check_file_code(b: &mut test::Bencher) {
bench_check_file(data::CODE, b);
}
#[bench]
fn check_file_corpus(b: &mut test::Bencher) {
bench_check_file(data::CORPUS, b);
}

View file

@ -1,231 +0,0 @@
#![feature(test)]
extern crate test;
mod data;
use assert_fs::prelude::*;
use bstr::ByteSlice;
fn bench_read(data: &str, b: &mut test::Bencher) {
let temp = assert_fs::TempDir::new().unwrap();
let sample_path = temp.child("sample");
sample_path.write_str(data).unwrap();
b.iter(|| std::fs::read(sample_path.path()));
temp.close().unwrap();
}
#[bench]
fn read_empty(b: &mut test::Bencher) {
bench_read(data::EMPTY, b);
}
#[bench]
fn read_no_tokens(b: &mut test::Bencher) {
bench_read(data::NO_TOKENS, b);
}
#[bench]
fn read_single_token(b: &mut test::Bencher) {
bench_read(data::SINGLE_TOKEN, b);
}
#[bench]
fn read_sherlock(b: &mut test::Bencher) {
bench_read(data::SHERLOCK, b);
}
#[bench]
fn read_code(b: &mut test::Bencher) {
bench_read(data::CODE, b);
}
#[bench]
fn read_corpus(b: &mut test::Bencher) {
bench_read(data::CORPUS, b);
}
fn bench_split_lines(data: &str, b: &mut test::Bencher) {
b.iter(|| data.as_bytes().lines().enumerate().last());
}
#[bench]
fn parse_lines_empty(b: &mut test::Bencher) {
bench_split_lines(data::EMPTY, b);
}
#[bench]
fn parse_lines_no_tokens(b: &mut test::Bencher) {
bench_split_lines(data::NO_TOKENS, b);
}
#[bench]
fn parse_lines_single_token(b: &mut test::Bencher) {
bench_split_lines(data::SINGLE_TOKEN, b);
}
#[bench]
fn parse_lines_sherlock(b: &mut test::Bencher) {
bench_split_lines(data::SHERLOCK, b);
}
#[bench]
fn parse_lines_code(b: &mut test::Bencher) {
bench_split_lines(data::CODE, b);
}
#[bench]
fn parse_lines_corpus(b: &mut test::Bencher) {
bench_split_lines(data::CORPUS, b);
}
fn bench_parse_ident(data: &str, b: &mut test::Bencher) {
let temp = assert_fs::TempDir::new().unwrap();
let sample_path = temp.child("sample");
sample_path.write_str(data).unwrap();
let parser = typos::tokens::Parser::new();
let checks = typos::checks::TyposSettings::new().build_identifier_parser();
b.iter(|| {
checks.check_file(
sample_path.path(),
true,
&parser,
&typos::report::PrintSilent,
)
});
temp.close().unwrap();
}
#[bench]
fn parse_idents_empty(b: &mut test::Bencher) {
bench_parse_ident(data::EMPTY, b);
}
#[bench]
fn parse_idents_no_tokens(b: &mut test::Bencher) {
bench_parse_ident(data::NO_TOKENS, b);
}
#[bench]
fn parse_idents_single_token(b: &mut test::Bencher) {
bench_parse_ident(data::SINGLE_TOKEN, b);
}
#[bench]
fn parse_idents_sherlock(b: &mut test::Bencher) {
bench_parse_ident(data::SHERLOCK, b);
}
#[bench]
fn parse_idents_code(b: &mut test::Bencher) {
bench_parse_ident(data::CODE, b);
}
#[bench]
fn parse_idents_corpus(b: &mut test::Bencher) {
bench_parse_ident(data::CORPUS, b);
}
fn bench_parse_word(data: &str, b: &mut test::Bencher) {
let temp = assert_fs::TempDir::new().unwrap();
let sample_path = temp.child("sample");
sample_path.write_str(data).unwrap();
let parser = typos::tokens::Parser::new();
let checks = typos::checks::TyposSettings::new().build_word_parser();
b.iter(|| {
checks.check_file(
sample_path.path(),
true,
&parser,
&typos::report::PrintSilent,
)
});
temp.close().unwrap();
}
#[bench]
fn parse_words_empty(b: &mut test::Bencher) {
bench_parse_word(data::EMPTY, b);
}
#[bench]
fn parse_words_no_tokens(b: &mut test::Bencher) {
bench_parse_word(data::NO_TOKENS, b);
}
#[bench]
fn parse_words_single_token(b: &mut test::Bencher) {
bench_parse_word(data::SINGLE_TOKEN, b);
}
#[bench]
fn parse_words_sherlock(b: &mut test::Bencher) {
bench_parse_word(data::SHERLOCK, b);
}
#[bench]
fn parse_words_code(b: &mut test::Bencher) {
bench_parse_word(data::CODE, b);
}
#[bench]
fn parse_words_corpus(b: &mut test::Bencher) {
bench_parse_word(data::CORPUS, b);
}
fn bench_check_file(data: &str, b: &mut test::Bencher) {
let temp = assert_fs::TempDir::new().unwrap();
let sample_path = temp.child("sample");
sample_path.write_str(data).unwrap();
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
let parser = typos::tokens::Parser::new();
let checks = typos::checks::TyposSettings::new().build_checks();
b.iter(|| {
checks.check_file(
sample_path.path(),
true,
&parser,
&corrections,
&typos::report::PrintSilent,
)
});
temp.close().unwrap();
}
#[bench]
fn check_file_empty(b: &mut test::Bencher) {
bench_check_file(data::EMPTY, b);
}
#[bench]
fn check_file_no_tokens(b: &mut test::Bencher) {
bench_check_file(data::NO_TOKENS, b);
}
#[bench]
fn check_file_single_token(b: &mut test::Bencher) {
bench_check_file(data::SINGLE_TOKEN, b);
}
#[bench]
fn check_file_sherlock(b: &mut test::Bencher) {
bench_check_file(data::SHERLOCK, b);
}
#[bench]
fn check_file_code(b: &mut test::Bencher) {
bench_check_file(data::CODE, b);
}
#[bench]
fn check_file_corpus(b: &mut test::Bencher) {
bench_check_file(data::CORPUS, b);
}

View file

@ -5,6 +5,100 @@ use crate::tokens;
use crate::Dictionary; use crate::Dictionary;
use crate::Status; use crate::Status;
pub trait Check: Send + Sync {
fn check_str(
&self,
buffer: &str,
parser: &tokens::Parser,
dictionary: &dyn Dictionary,
reporter: &dyn report::Report,
) -> Result<bool, crate::Error>;
fn check_bytes(
&self,
buffer: &[u8],
parser: &tokens::Parser,
dictionary: &dyn Dictionary,
reporter: &dyn report::Report,
) -> Result<bool, crate::Error>;
fn check_filenames(&self) -> bool;
fn check_files(&self) -> bool;
fn binary(&self) -> bool;
fn check_filename(
&self,
path: &std::path::Path,
parser: &tokens::Parser,
dictionary: &dyn Dictionary,
reporter: &dyn report::Report,
) -> Result<bool, crate::Error> {
let mut typos_found = false;
if !self.check_filenames() {
return Ok(typos_found);
}
if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
let context_reporter = ReportContext {
reporter,
context: report::PathContext { path }.into(),
};
typos_found |= self.check_str(file_name, parser, dictionary, &context_reporter)?;
}
Ok(typos_found)
}
fn check_file(
&self,
path: &std::path::Path,
explicit: bool,
parser: &tokens::Parser,
dictionary: &dyn Dictionary,
reporter: &dyn report::Report,
) -> Result<bool, crate::Error> {
let mut typos_found = false;
if !self.check_files() {
return Ok(typos_found);
}
let buffer = read_file(path)?;
let (buffer, content_type) = massage_data(buffer)?;
if !explicit && !self.binary() && content_type.is_binary() {
let msg = report::BinaryFile { path };
reporter.report(msg.into());
return Ok(typos_found);
}
for (line_idx, line) in buffer.lines().enumerate() {
let line_num = line_idx + 1;
let context_reporter = ReportContext {
reporter,
context: report::FileContext { path, line_num }.into(),
};
typos_found |= self.check_bytes(line, parser, dictionary, &context_reporter)?;
}
Ok(typos_found)
}
}
struct ReportContext<'m, 'r> {
reporter: &'r dyn report::Report,
context: report::Context<'m>,
}
impl<'m, 'r> report::Report for ReportContext<'m, 'r> {
fn report(&self, msg: report::Message) -> bool {
let msg = msg.context(self.context.clone());
self.reporter.report(msg)
}
}
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq, Eq)]
pub struct TyposSettings { pub struct TyposSettings {
check_filenames: bool, check_filenames: bool,
@ -32,8 +126,8 @@ impl TyposSettings {
self self
} }
pub fn build_checks(&self) -> Checks { pub fn build_typos(&self) -> Typos {
Checks { Typos {
check_filenames: self.check_filenames, check_filenames: self.check_filenames,
check_files: self.check_files, check_files: self.check_files,
binary: self.binary, binary: self.binary,
@ -74,64 +168,56 @@ pub struct ParseIdentifiers {
binary: bool, binary: bool,
} }
impl ParseIdentifiers { impl Check for ParseIdentifiers {
pub fn check_filename( fn check_str(
&self, &self,
path: &std::path::Path, buffer: &str,
parser: &tokens::Parser, parser: &tokens::Parser,
_dictionary: &dyn Dictionary,
reporter: &dyn report::Report, reporter: &dyn report::Report,
) -> Result<bool, crate::Error> { ) -> Result<bool, crate::Error> {
let typos_found = false; let typos_found = false;
if !self.check_filenames { let msg = report::Parse {
return Ok(typos_found); context: report::Context::None,
} kind: report::ParseKind::Identifier,
data: parser.parse_str(buffer).map(|i| i.token()).collect(),
for part in path.components().filter_map(|c| c.as_os_str().to_str()) { };
let msg = report::Parse { reporter.report(msg.into());
context: report::PathContext { path }.into(),
kind: report::ParseKind::Identifier,
data: parser.parse(part).map(|i| i.token()).collect(),
};
reporter.report(msg.into());
}
Ok(typos_found) Ok(typos_found)
} }
pub fn check_file( fn check_bytes(
&self, &self,
path: &std::path::Path, buffer: &[u8],
explicit: bool,
parser: &tokens::Parser, parser: &tokens::Parser,
_dictionary: &dyn Dictionary,
reporter: &dyn report::Report, reporter: &dyn report::Report,
) -> Result<bool, crate::Error> { ) -> Result<bool, crate::Error> {
let typos_found = false; let typos_found = false;
if !self.check_files { let msg = report::Parse {
return Ok(typos_found); context: report::Context::None,
} kind: report::ParseKind::Identifier,
data: parser.parse_bytes(buffer).map(|i| i.token()).collect(),
let buffer = read_file(path)?; };
let (buffer, content_type) = massage_data(buffer)?; reporter.report(msg.into());
if !explicit && !self.binary && content_type.is_binary() {
let msg = report::BinaryFile { path };
reporter.report(msg.into());
return Ok(typos_found);
}
for (line_idx, line) in buffer.lines().enumerate() {
let line_num = line_idx + 1;
let msg = report::Parse {
context: report::FileContext { path, line_num }.into(),
kind: report::ParseKind::Identifier,
data: parser.parse_bytes(line).map(|i| i.token()).collect(),
};
reporter.report(msg.into());
}
Ok(typos_found) Ok(typos_found)
} }
fn check_filenames(&self) -> bool {
self.check_filenames
}
fn check_files(&self) -> bool {
self.check_files
}
fn binary(&self) -> bool {
self.binary
}
} }
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
@ -141,124 +227,111 @@ pub struct ParseWords {
binary: bool, binary: bool,
} }
impl ParseWords { impl Check for ParseWords {
pub fn check_filename( fn check_str(
&self, &self,
path: &std::path::Path, buffer: &str,
parser: &tokens::Parser, parser: &tokens::Parser,
_dictionary: &dyn Dictionary,
reporter: &dyn report::Report, reporter: &dyn report::Report,
) -> Result<bool, crate::Error> { ) -> Result<bool, crate::Error> {
let typos_found = false; let typos_found = false;
if !self.check_filenames { let msg = report::Parse {
return Ok(typos_found); context: report::Context::None,
} kind: report::ParseKind::Word,
data: parser
for part in path.components().filter_map(|c| c.as_os_str().to_str()) { .parse_str(buffer)
let msg = report::Parse { .flat_map(|ident| ident.split().map(|i| i.token()))
context: report::PathContext { path }.into(), .collect(),
kind: report::ParseKind::Word, };
data: parser reporter.report(msg.into());
.parse(part)
.flat_map(|ident| ident.split().map(|i| i.token()))
.collect(),
};
reporter.report(msg.into());
}
Ok(typos_found) Ok(typos_found)
} }
pub fn check_file( fn check_bytes(
&self, &self,
path: &std::path::Path, buffer: &[u8],
explicit: bool,
parser: &tokens::Parser, parser: &tokens::Parser,
_dictionary: &dyn Dictionary,
reporter: &dyn report::Report, reporter: &dyn report::Report,
) -> Result<bool, crate::Error> { ) -> Result<bool, crate::Error> {
let typos_found = false; let typos_found = false;
if !self.check_files { let msg = report::Parse {
return Ok(typos_found); context: report::Context::None,
} kind: report::ParseKind::Word,
data: parser
let buffer = read_file(path)?; .parse_bytes(buffer)
let (buffer, content_type) = massage_data(buffer)?; .flat_map(|ident| ident.split().map(|i| i.token()))
if !explicit && !self.binary && content_type.is_binary() { .collect(),
let msg = report::BinaryFile { path }; };
reporter.report(msg.into()); reporter.report(msg.into());
return Ok(typos_found);
}
for (line_idx, line) in buffer.lines().enumerate() {
let line_num = line_idx + 1;
let msg = report::Parse {
context: report::FileContext { path, line_num }.into(),
kind: report::ParseKind::Word,
data: parser
.parse_bytes(line)
.flat_map(|ident| ident.split().map(|i| i.token()))
.collect(),
};
reporter.report(msg.into());
}
Ok(typos_found) Ok(typos_found)
} }
fn check_filenames(&self) -> bool {
self.check_filenames
}
fn check_files(&self) -> bool {
self.check_files
}
fn binary(&self) -> bool {
self.binary
}
} }
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct Checks { pub struct Typos {
check_filenames: bool, check_filenames: bool,
check_files: bool, check_files: bool,
binary: bool, binary: bool,
} }
impl Checks { impl Check for Typos {
pub fn check_filename( fn check_str(
&self, &self,
path: &std::path::Path, buffer: &str,
parser: &tokens::Parser, parser: &tokens::Parser,
dictionary: &dyn Dictionary, dictionary: &dyn Dictionary,
reporter: &dyn report::Report, reporter: &dyn report::Report,
) -> Result<bool, crate::Error> { ) -> Result<bool, crate::Error> {
if !self.check_filenames {
return Ok(false);
}
let mut typos_found = false; let mut typos_found = false;
if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
for ident in parser.parse(file_name) { for ident in parser.parse_str(buffer) {
match dictionary.correct_ident(ident) { match dictionary.correct_ident(ident) {
Some(Status::Valid) => {} Some(Status::Valid) => {}
Some(corrections) => { Some(corrections) => {
let byte_offset = ident.offset(); let byte_offset = ident.offset();
let msg = report::Typo { let msg = report::Typo {
context: report::PathContext { path }.into(), context: report::Context::None,
buffer: std::borrow::Cow::Borrowed(file_name.as_bytes()), buffer: std::borrow::Cow::Borrowed(buffer.as_bytes()),
byte_offset, byte_offset,
typo: ident.token(), typo: ident.token(),
corrections, corrections,
}; };
typos_found |= reporter.report(msg.into()); typos_found |= reporter.report(msg.into());
} }
None => { None => {
for word in ident.split() { for word in ident.split() {
match dictionary.correct_word(word) { match dictionary.correct_word(word) {
Some(Status::Valid) => {} Some(Status::Valid) => {}
Some(corrections) => { Some(corrections) => {
let byte_offset = word.offset(); let byte_offset = word.offset();
let msg = report::Typo { let msg = report::Typo {
context: report::PathContext { path }.into(), context: report::Context::None,
buffer: std::borrow::Cow::Borrowed(file_name.as_bytes()), buffer: std::borrow::Cow::Borrowed(buffer.as_bytes()),
byte_offset, byte_offset,
typo: word.token(), typo: word.token(),
corrections, corrections,
}; };
typos_found |= reporter.report(msg.into()); typos_found |= reporter.report(msg.into());
}
None => {}
} }
None => {}
} }
} }
} }
@ -268,61 +341,45 @@ impl Checks {
Ok(typos_found) Ok(typos_found)
} }
pub fn check_file( fn check_bytes(
&self, &self,
path: &std::path::Path, buffer: &[u8],
explicit: bool,
parser: &tokens::Parser, parser: &tokens::Parser,
dictionary: &dyn Dictionary, dictionary: &dyn Dictionary,
reporter: &dyn report::Report, reporter: &dyn report::Report,
) -> Result<bool, crate::Error> { ) -> Result<bool, crate::Error> {
let mut typos_found = false; let mut typos_found = false;
if !self.check_files { for ident in parser.parse_bytes(buffer) {
return Ok(typos_found); match dictionary.correct_ident(ident) {
} Some(Status::Valid) => {}
Some(corrections) => {
let buffer = read_file(path)?; let byte_offset = ident.offset();
let (buffer, content_type) = massage_data(buffer)?; let msg = report::Typo {
if !explicit && !self.binary && content_type.is_binary() { context: report::Context::None,
let msg = report::BinaryFile { path }; buffer: std::borrow::Cow::Borrowed(buffer),
reporter.report(msg.into()); byte_offset,
return Ok(typos_found); typo: ident.token(),
} corrections,
};
for (line_idx, line) in buffer.lines().enumerate() { typos_found |= reporter.report(msg.into());
let line_num = line_idx + 1; }
for ident in parser.parse_bytes(line) { None => {
match dictionary.correct_ident(ident) { for word in ident.split() {
Some(Status::Valid) => {} match dictionary.correct_word(word) {
Some(corrections) => { Some(Status::Valid) => {}
let byte_offset = ident.offset(); Some(corrections) => {
let msg = report::Typo { let byte_offset = word.offset();
context: report::FileContext { path, line_num }.into(), let msg = report::Typo {
buffer: std::borrow::Cow::Borrowed(line), context: report::Context::None,
byte_offset, buffer: std::borrow::Cow::Borrowed(buffer),
typo: ident.token(), byte_offset,
corrections, typo: word.token(),
}; corrections,
typos_found |= reporter.report(msg.into()); };
} typos_found |= reporter.report(msg.into());
None => {
for word in ident.split() {
match dictionary.correct_word(word) {
Some(Status::Valid) => {}
Some(corrections) => {
let byte_offset = word.offset();
let msg = report::Typo {
context: report::FileContext { path, line_num }.into(),
buffer: std::borrow::Cow::Borrowed(line),
byte_offset,
typo: word.token(),
corrections,
};
typos_found |= reporter.report(msg.into());
}
None => {}
} }
None => {}
} }
} }
} }
@ -331,6 +388,18 @@ impl Checks {
Ok(typos_found) Ok(typos_found)
} }
fn check_filenames(&self) -> bool {
self.check_filenames
}
fn check_files(&self) -> bool {
self.check_files
}
fn binary(&self) -> bool {
self.binary
}
} }
fn read_file(path: &std::path::Path) -> Result<Vec<u8>, crate::Error> { fn read_file(path: &std::path::Path) -> Result<Vec<u8>, crate::Error> {

View file

@ -38,6 +38,20 @@ impl<'m> Message<'m> {
Message::Error(_) => true, Message::Error(_) => true,
} }
} }
pub fn context(self, context: Context<'m>) -> Self {
match self {
Message::Typo(typo) => {
let typo = typo.context(context);
Message::Typo(typo)
}
Message::Parse(parse) => {
let parse = parse.context(context);
Message::Parse(parse)
}
_ => self,
}
}
} }
#[derive(Clone, Debug, serde::Serialize, derive_more::Display, derive_setters::Setters)] #[derive(Clone, Debug, serde::Serialize, derive_more::Display, derive_setters::Setters)]

View file

@ -102,7 +102,7 @@ impl Parser {
ParserBuilder::default().build() ParserBuilder::default().build()
} }
pub fn parse<'c>(&'c self, content: &'c str) -> impl Iterator<Item = Identifier<'c>> { pub fn parse_str<'c>(&'c self, content: &'c str) -> impl Iterator<Item = Identifier<'c>> {
self.words_str self.words_str
.find_iter(content) .find_iter(content)
.filter(move |m| self.accept(m.as_str().as_bytes())) .filter(move |m| self.accept(m.as_str().as_bytes()))
@ -390,7 +390,7 @@ mod test {
let expected: Vec<Identifier> = vec![]; let expected: Vec<Identifier> = vec![];
let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect(); let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect();
assert_eq!(expected, actual); assert_eq!(expected, actual);
let actual: Vec<_> = parser.parse(input).collect(); let actual: Vec<_> = parser.parse_str(input).collect();
assert_eq!(expected, actual); assert_eq!(expected, actual);
} }
@ -402,7 +402,7 @@ mod test {
let expected: Vec<Identifier> = vec![Identifier::new_unchecked("word", 0)]; let expected: Vec<Identifier> = vec![Identifier::new_unchecked("word", 0)];
let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect(); let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect();
assert_eq!(expected, actual); assert_eq!(expected, actual);
let actual: Vec<_> = parser.parse(input).collect(); let actual: Vec<_> = parser.parse_str(input).collect();
assert_eq!(expected, actual); assert_eq!(expected, actual);
} }
@ -417,7 +417,7 @@ mod test {
]; ];
let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect(); let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect();
assert_eq!(expected, actual); assert_eq!(expected, actual);
let actual: Vec<_> = parser.parse(input).collect(); let actual: Vec<_> = parser.parse_str(input).collect();
assert_eq!(expected, actual); assert_eq!(expected, actual);
} }
@ -432,7 +432,7 @@ mod test {
]; ];
let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect(); let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect();
assert_eq!(expected, actual); assert_eq!(expected, actual);
let actual: Vec<_> = parser.parse(input).collect(); let actual: Vec<_> = parser.parse_str(input).collect();
assert_eq!(expected, actual); assert_eq!(expected, actual);
} }
@ -447,7 +447,7 @@ mod test {
]; ];
let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect(); let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect();
assert_eq!(expected, actual); assert_eq!(expected, actual);
let actual: Vec<_> = parser.parse(input).collect(); let actual: Vec<_> = parser.parse_str(input).collect();
assert_eq!(expected, actual); assert_eq!(expected, actual);
} }
@ -459,7 +459,7 @@ mod test {
let expected: Vec<Identifier> = vec![Identifier::new_unchecked("A_B", 0)]; let expected: Vec<Identifier> = vec![Identifier::new_unchecked("A_B", 0)];
let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect(); let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect();
assert_eq!(expected, actual); assert_eq!(expected, actual);
let actual: Vec<_> = parser.parse(input).collect(); let actual: Vec<_> = parser.parse_str(input).collect();
assert_eq!(expected, actual); assert_eq!(expected, actual);
} }
@ -474,7 +474,7 @@ mod test {
]; ];
let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect(); let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect();
assert_eq!(expected, actual); assert_eq!(expected, actual);
let actual: Vec<_> = parser.parse(input).collect(); let actual: Vec<_> = parser.parse_str(input).collect();
assert_eq!(expected, actual); assert_eq!(expected, actual);
} }
@ -493,7 +493,7 @@ mod test {
]; ];
let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect(); let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect();
assert_eq!(expected, actual); assert_eq!(expected, actual);
let actual: Vec<_> = parser.parse(input).collect(); let actual: Vec<_> = parser.parse_str(input).collect();
assert_eq!(expected, actual); assert_eq!(expected, actual);
} }

View file

@ -1,96 +1,8 @@
use std::sync::atomic; use std::sync::atomic;
pub(crate) trait Checks: Send + Sync {
fn check_filename(
&self,
path: &std::path::Path,
parser: &typos::tokens::Parser,
dictionary: &dyn typos::Dictionary,
report: &dyn typos::report::Report,
) -> Result<bool, typos::Error>;
fn check_file(
&self,
path: &std::path::Path,
explicit: bool,
parser: &typos::tokens::Parser,
dictionary: &dyn typos::Dictionary,
report: &dyn typos::report::Report,
) -> Result<bool, typos::Error>;
}
impl<'p> Checks for typos::checks::ParseIdentifiers {
fn check_filename(
&self,
path: &std::path::Path,
parser: &typos::tokens::Parser,
_dictionary: &dyn typos::Dictionary,
report: &dyn typos::report::Report,
) -> Result<bool, typos::Error> {
self.check_filename(path, parser, report)
}
fn check_file(
&self,
path: &std::path::Path,
explicit: bool,
parser: &typos::tokens::Parser,
_dictionary: &dyn typos::Dictionary,
report: &dyn typos::report::Report,
) -> Result<bool, typos::Error> {
self.check_file(path, explicit, parser, report)
}
}
impl<'p> Checks for typos::checks::ParseWords {
fn check_filename(
&self,
path: &std::path::Path,
parser: &typos::tokens::Parser,
_dictionary: &dyn typos::Dictionary,
report: &dyn typos::report::Report,
) -> Result<bool, typos::Error> {
self.check_filename(path, parser, report)
}
fn check_file(
&self,
path: &std::path::Path,
explicit: bool,
parser: &typos::tokens::Parser,
_dictionary: &dyn typos::Dictionary,
report: &dyn typos::report::Report,
) -> Result<bool, typos::Error> {
self.check_file(path, explicit, parser, report)
}
}
impl<'d, 'p> Checks for typos::checks::Checks {
fn check_filename(
&self,
path: &std::path::Path,
parser: &typos::tokens::Parser,
dictionary: &dyn typos::Dictionary,
report: &dyn typos::report::Report,
) -> Result<bool, typos::Error> {
self.check_filename(path, parser, dictionary, report)
}
fn check_file(
&self,
path: &std::path::Path,
explicit: bool,
parser: &typos::tokens::Parser,
dictionary: &dyn typos::Dictionary,
report: &dyn typos::report::Report,
) -> Result<bool, typos::Error> {
self.check_file(path, explicit, parser, dictionary, report)
}
}
pub(crate) fn check_path( pub(crate) fn check_path(
walk: ignore::Walk, walk: ignore::Walk,
checks: &dyn Checks, checks: &dyn typos::checks::Check,
parser: &typos::tokens::Parser, parser: &typos::tokens::Parser,
dictionary: &dyn typos::Dictionary, dictionary: &dyn typos::Dictionary,
reporter: &dyn typos::report::Report, reporter: &dyn typos::report::Report,
@ -115,7 +27,7 @@ pub(crate) fn check_path(
pub(crate) fn check_path_parallel( pub(crate) fn check_path_parallel(
walk: ignore::WalkParallel, walk: ignore::WalkParallel,
checks: &dyn Checks, checks: &dyn typos::checks::Check,
parser: &typos::tokens::Parser, parser: &typos::tokens::Parser,
dictionary: &dyn typos::Dictionary, dictionary: &dyn typos::Dictionary,
reporter: &dyn typos::report::Report, reporter: &dyn typos::report::Report,
@ -143,7 +55,7 @@ pub(crate) fn check_path_parallel(
fn check_entry( fn check_entry(
entry: Result<ignore::DirEntry, ignore::Error>, entry: Result<ignore::DirEntry, ignore::Error>,
checks: &dyn Checks, checks: &dyn typos::checks::Check,
parser: &typos::tokens::Parser, parser: &typos::tokens::Parser,
dictionary: &dyn typos::Dictionary, dictionary: &dyn typos::Dictionary,
reporter: &dyn typos::report::Report, reporter: &dyn typos::report::Report,

View file

@ -128,14 +128,14 @@ fn run() -> Result<i32, anyhow::Error> {
} }
} else { } else {
let (identifier_parser, word_parser, checks); let (identifier_parser, word_parser, checks);
let selected_checks: &dyn checks::Checks = if args.identifiers { let selected_checks: &dyn typos::checks::Check = if args.identifiers {
identifier_parser = settings.build_identifier_parser(); identifier_parser = settings.build_identifier_parser();
&identifier_parser &identifier_parser
} else if args.words { } else if args.words {
word_parser = settings.build_word_parser(); word_parser = settings.build_word_parser();
&word_parser &word_parser
} else { } else {
checks = settings.build_checks(); checks = settings.build_typos();
&checks &checks
}; };