Merge pull request #67 from epage/custom

Refactor in prep for custom file types
This commit is contained in:
Ed Page 2019-11-16 13:35:20 +00:00 committed by GitHub
commit 0767e52f77
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 138 additions and 121 deletions

View file

@ -87,8 +87,15 @@ fn bench_parse_ident(data: &str, b: &mut test::Bencher) {
sample_path.write_str(data).unwrap(); sample_path.write_str(data).unwrap();
let parser = typos::tokens::Parser::new(); let parser = typos::tokens::Parser::new();
let checks = typos::checks::TyposSettings::new().build_identifier_parser(&parser); let checks = typos::checks::TyposSettings::new().build_identifier_parser();
b.iter(|| checks.check_file(sample_path.path(), true, typos::report::print_silent)); b.iter(|| {
checks.check_file(
sample_path.path(),
true,
&parser,
typos::report::print_silent,
)
});
temp.close().unwrap(); temp.close().unwrap();
} }
@ -129,8 +136,15 @@ fn bench_parse_word(data: &str, b: &mut test::Bencher) {
sample_path.write_str(data).unwrap(); sample_path.write_str(data).unwrap();
let parser = typos::tokens::Parser::new(); let parser = typos::tokens::Parser::new();
let checks = typos::checks::TyposSettings::new().build_word_parser(&parser); let checks = typos::checks::TyposSettings::new().build_word_parser();
b.iter(|| checks.check_file(sample_path.path(), true, typos::report::print_silent)); b.iter(|| {
checks.check_file(
sample_path.path(),
true,
&parser,
typos::report::print_silent,
)
});
temp.close().unwrap(); temp.close().unwrap();
} }
@ -172,8 +186,16 @@ fn bench_check_file(data: &str, b: &mut test::Bencher) {
let corrections = typos_cli::dict::BuiltIn::new(); let corrections = typos_cli::dict::BuiltIn::new();
let parser = typos::tokens::Parser::new(); let parser = typos::tokens::Parser::new();
let checks = typos::checks::TyposSettings::new().build_checks(&corrections, &parser); let checks = typos::checks::TyposSettings::new().build_checks();
b.iter(|| checks.check_file(sample_path.path(), true, typos::report::print_silent)); b.iter(|| {
checks.check_file(
sample_path.path(),
true,
&parser,
&corrections,
typos::report::print_silent,
)
});
temp.close().unwrap(); temp.close().unwrap();
} }

View file

@ -265,6 +265,8 @@ trait Checks {
fn check_filename( fn check_filename(
&self, &self,
path: &std::path::Path, path: &std::path::Path,
parser: &typos::tokens::Parser,
dictionary: &dyn typos::Dictionary,
report: typos::report::Report, report: typos::report::Report,
) -> Result<bool, typos::Error>; ) -> Result<bool, typos::Error>;
@ -272,64 +274,78 @@ trait Checks {
&self, &self,
path: &std::path::Path, path: &std::path::Path,
explicit: bool, explicit: bool,
parser: &typos::tokens::Parser,
dictionary: &dyn typos::Dictionary,
report: typos::report::Report, report: typos::report::Report,
) -> Result<bool, typos::Error>; ) -> Result<bool, typos::Error>;
} }
impl<'p> Checks for typos::checks::ParseIdentifiers<'p> { impl<'p> Checks for typos::checks::ParseIdentifiers {
fn check_filename( fn check_filename(
&self, &self,
path: &std::path::Path, path: &std::path::Path,
parser: &typos::tokens::Parser,
_dictionary: &dyn typos::Dictionary,
report: typos::report::Report, report: typos::report::Report,
) -> Result<bool, typos::Error> { ) -> Result<bool, typos::Error> {
self.check_filename(path, report) self.check_filename(path, parser, report)
} }
fn check_file( fn check_file(
&self, &self,
path: &std::path::Path, path: &std::path::Path,
explicit: bool, explicit: bool,
parser: &typos::tokens::Parser,
_dictionary: &dyn typos::Dictionary,
report: typos::report::Report, report: typos::report::Report,
) -> Result<bool, typos::Error> { ) -> Result<bool, typos::Error> {
self.check_file(path, explicit, report) self.check_file(path, explicit, parser, report)
} }
} }
impl<'p> Checks for typos::checks::ParseWords<'p> { impl<'p> Checks for typos::checks::ParseWords {
fn check_filename( fn check_filename(
&self, &self,
path: &std::path::Path, path: &std::path::Path,
parser: &typos::tokens::Parser,
_dictionary: &dyn typos::Dictionary,
report: typos::report::Report, report: typos::report::Report,
) -> Result<bool, typos::Error> { ) -> Result<bool, typos::Error> {
self.check_filename(path, report) self.check_filename(path, parser, report)
} }
fn check_file( fn check_file(
&self, &self,
path: &std::path::Path, path: &std::path::Path,
explicit: bool, explicit: bool,
parser: &typos::tokens::Parser,
_dictionary: &dyn typos::Dictionary,
report: typos::report::Report, report: typos::report::Report,
) -> Result<bool, typos::Error> { ) -> Result<bool, typos::Error> {
self.check_file(path, explicit, report) self.check_file(path, explicit, parser, report)
} }
} }
impl<'d, 'p> Checks for typos::checks::Checks<'d, 'p> { impl<'d, 'p> Checks for typos::checks::Checks {
fn check_filename( fn check_filename(
&self, &self,
path: &std::path::Path, path: &std::path::Path,
parser: &typos::tokens::Parser,
dictionary: &dyn typos::Dictionary,
report: typos::report::Report, report: typos::report::Report,
) -> Result<bool, typos::Error> { ) -> Result<bool, typos::Error> {
self.check_filename(path, report) self.check_filename(path, parser, dictionary, report)
} }
fn check_file( fn check_file(
&self, &self,
path: &std::path::Path, path: &std::path::Path,
explicit: bool, explicit: bool,
parser: &typos::tokens::Parser,
dictionary: &dyn typos::Dictionary,
report: typos::report::Report, report: typos::report::Report,
) -> Result<bool, typos::Error> { ) -> Result<bool, typos::Error> {
self.check_file(path, explicit, report) self.check_file(path, explicit, parser, dictionary, report)
} }
} }
@ -356,20 +372,47 @@ fn init_logging(level: Option<log::Level>) {
} }
} }
fn check_path(
mut walk: ignore::Walk,
format: Format,
checks: &dyn Checks,
parser: &typos::tokens::Parser,
dictionary: &dyn typos::Dictionary,
) -> Result<(bool, bool), anyhow::Error> {
let mut typos_found = false;
let mut errors_found = false;
for entry in walk {
match check_entry(entry, format, checks, parser, dictionary) {
Ok(true) => typos_found = true,
Err(err) => {
let msg = typos::report::Error::new(err.to_string());
format.report()(msg.into());
errors_found = true
}
_ => (),
}
}
Ok((typos_found, errors_found))
}
fn check_entry( fn check_entry(
entry: Result<ignore::DirEntry, ignore::Error>, entry: Result<ignore::DirEntry, ignore::Error>,
format: Format, format: Format,
checks: &dyn Checks, checks: &dyn Checks,
parser: &typos::tokens::Parser,
dictionary: &dyn typos::Dictionary,
) -> Result<bool, anyhow::Error> { ) -> Result<bool, anyhow::Error> {
let mut typos_found = false; let mut typos_found = false;
let entry = entry?; let entry = entry?;
if entry.file_type().map(|t| t.is_file()).unwrap_or(true) { if entry.file_type().map(|t| t.is_file()).unwrap_or(true) {
let explicit = entry.depth() == 0; let explicit = entry.depth() == 0;
if checks.check_filename(entry.path(), format.report())? { if checks.check_filename(entry.path(), parser, dictionary, format.report())? {
typos_found = true; typos_found = true;
} }
if checks.check_file(entry.path(), explicit, format.report())? { if checks.check_file(entry.path(), explicit, parser, dictionary, format.report())? {
typos_found = true; typos_found = true;
} }
} }
@ -408,8 +451,6 @@ fn run() -> Result<i32, anyhow::Error> {
config.default.update(&args.overrides); config.default.update(&args.overrides);
let config = config; let config = config;
let dictionary = crate::dict::BuiltIn::new();
let parser = typos::tokens::ParserBuilder::new() let parser = typos::tokens::ParserBuilder::new()
.ignore_hex(config.default.ignore_hex()) .ignore_hex(config.default.ignore_hex())
.leading_digits(config.default.identifier_leading_digits()) .leading_digits(config.default.identifier_leading_digits())
@ -418,6 +459,8 @@ fn run() -> Result<i32, anyhow::Error> {
.include_chars(config.default.identifier_include_chars().to_owned()) .include_chars(config.default.identifier_include_chars().to_owned())
.build(); .build();
let dictionary = crate::dict::BuiltIn::new();
let mut settings = typos::checks::TyposSettings::new(); let mut settings = typos::checks::TyposSettings::new();
settings settings
.check_filenames(config.default.check_filename()) .check_filenames(config.default.check_filename())
@ -446,43 +489,34 @@ fn run() -> Result<i32, anyhow::Error> {
} }
} }
} else if args.identifiers { } else if args.identifiers {
let checks = settings.build_identifier_parser(&parser); let checks = settings.build_identifier_parser();
for entry in walk.build() { let (cur_typos, cur_errors) =
match check_entry(entry, args.format, &checks) { check_path(walk.build(), args.format, &checks, &parser, &dictionary)?;
Ok(true) => typos_found = true, if cur_typos {
Err(err) => { typos_found = true;
let msg = typos::report::Error::new(err.to_string());
args.format.report()(msg.into());
errors_found = true
}
_ => (),
} }
if cur_errors {
errors_found = true;
} }
} else if args.words { } else if args.words {
let checks = settings.build_word_parser(&parser); let checks = settings.build_word_parser();
for entry in walk.build() { let (cur_typos, cur_errors) =
match check_entry(entry, args.format, &checks) { check_path(walk.build(), args.format, &checks, &parser, &dictionary)?;
Ok(true) => typos_found = true, if cur_typos {
Err(err) => { typos_found = true;
let msg = typos::report::Error::new(err.to_string());
args.format.report()(msg.into());
errors_found = true
}
_ => (),
} }
if cur_errors {
errors_found = true;
} }
} else { } else {
let checks = settings.build_checks(&dictionary, &parser); let checks = settings.build_checks();
for entry in walk.build() { let (cur_typos, cur_errors) =
match check_entry(entry, args.format, &checks) { check_path(walk.build(), args.format, &checks, &parser, &dictionary)?;
Ok(true) => typos_found = true, if cur_typos {
Err(err) => { typos_found = true;
let msg = typos::report::Error::new(err.to_string());
args.format.report()(msg.into());
errors_found = true
}
_ => (),
} }
if cur_errors {
errors_found = true;
} }
} }
} }

View file

@ -31,32 +31,24 @@ impl TyposSettings {
self self
} }
pub fn build_checks<'d, 'p>( pub fn build_checks(&self) -> Checks {
&self,
dictionary: &'d dyn Dictionary,
parser: &'p tokens::Parser,
) -> Checks<'d, 'p> {
Checks { Checks {
dictionary,
parser,
check_filenames: self.check_filenames, check_filenames: self.check_filenames,
check_files: self.check_files, check_files: self.check_files,
binary: self.binary, binary: self.binary,
} }
} }
pub fn build_identifier_parser<'p>(&self, parser: &'p tokens::Parser) -> ParseIdentifiers<'p> { pub fn build_identifier_parser(&self) -> ParseIdentifiers {
ParseIdentifiers { ParseIdentifiers {
parser,
check_filenames: self.check_filenames, check_filenames: self.check_filenames,
check_files: self.check_files, check_files: self.check_files,
binary: self.binary, binary: self.binary,
} }
} }
pub fn build_word_parser<'p>(&self, parser: &'p tokens::Parser) -> ParseWords<'p> { pub fn build_word_parser(&self) -> ParseWords {
ParseWords { ParseWords {
parser,
check_filenames: self.check_filenames, check_filenames: self.check_filenames,
check_files: self.check_files, check_files: self.check_files,
binary: self.binary, binary: self.binary,
@ -74,18 +66,18 @@ impl Default for TyposSettings {
} }
} }
#[derive(Clone)] #[derive(Debug, Clone)]
pub struct ParseIdentifiers<'p> { pub struct ParseIdentifiers {
parser: &'p tokens::Parser,
check_filenames: bool, check_filenames: bool,
check_files: bool, check_files: bool,
binary: bool, binary: bool,
} }
impl<'p> ParseIdentifiers<'p> { impl ParseIdentifiers {
pub fn check_filename( pub fn check_filename(
&self, &self,
path: &std::path::Path, path: &std::path::Path,
parser: &tokens::Parser,
report: report::Report, report: report::Report,
) -> Result<bool, crate::Error> { ) -> Result<bool, crate::Error> {
let typos_found = false; let typos_found = false;
@ -98,7 +90,7 @@ impl<'p> ParseIdentifiers<'p> {
let msg = report::Parse { let msg = report::Parse {
path, path,
kind: report::ParseKind::Identifier, kind: report::ParseKind::Identifier,
data: self.parser.parse(part).map(|i| i.token()).collect(), data: parser.parse(part).map(|i| i.token()).collect(),
non_exhaustive: (), non_exhaustive: (),
}; };
report(msg.into()); report(msg.into());
@ -111,6 +103,7 @@ impl<'p> ParseIdentifiers<'p> {
&self, &self,
path: &std::path::Path, path: &std::path::Path,
explicit: bool, explicit: bool,
parser: &tokens::Parser,
report: report::Report, report: report::Report,
) -> Result<bool, crate::Error> { ) -> Result<bool, crate::Error> {
let typos_found = false; let typos_found = false;
@ -134,7 +127,7 @@ impl<'p> ParseIdentifiers<'p> {
let msg = report::Parse { let msg = report::Parse {
path, path,
kind: report::ParseKind::Identifier, kind: report::ParseKind::Identifier,
data: self.parser.parse_bytes(line).map(|i| i.token()).collect(), data: parser.parse_bytes(line).map(|i| i.token()).collect(),
non_exhaustive: (), non_exhaustive: (),
}; };
report(msg.into()); report(msg.into());
@ -144,29 +137,18 @@ impl<'p> ParseIdentifiers<'p> {
} }
} }
impl std::fmt::Debug for ParseIdentifiers<'_> { #[derive(Debug, Clone)]
fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result { pub struct ParseWords {
fmt.debug_struct("Checks")
.field("parser", self.parser)
.field("check_filenames", &self.check_filenames)
.field("check_files", &self.check_files)
.field("binary", &self.binary)
.finish()
}
}
#[derive(Clone)]
pub struct ParseWords<'p> {
parser: &'p tokens::Parser,
check_filenames: bool, check_filenames: bool,
check_files: bool, check_files: bool,
binary: bool, binary: bool,
} }
impl<'p> ParseWords<'p> { impl ParseWords {
pub fn check_filename( pub fn check_filename(
&self, &self,
path: &std::path::Path, path: &std::path::Path,
parser: &tokens::Parser,
report: report::Report, report: report::Report,
) -> Result<bool, crate::Error> { ) -> Result<bool, crate::Error> {
let typos_found = false; let typos_found = false;
@ -179,8 +161,7 @@ impl<'p> ParseWords<'p> {
let msg = report::Parse { let msg = report::Parse {
path, path,
kind: report::ParseKind::Word, kind: report::ParseKind::Word,
data: self data: parser
.parser
.parse(part) .parse(part)
.flat_map(|ident| ident.split().map(|i| i.token())) .flat_map(|ident| ident.split().map(|i| i.token()))
.collect(), .collect(),
@ -196,6 +177,7 @@ impl<'p> ParseWords<'p> {
&self, &self,
path: &std::path::Path, path: &std::path::Path,
explicit: bool, explicit: bool,
parser: &tokens::Parser,
report: report::Report, report: report::Report,
) -> Result<bool, crate::Error> { ) -> Result<bool, crate::Error> {
let typos_found = false; let typos_found = false;
@ -219,8 +201,7 @@ impl<'p> ParseWords<'p> {
let msg = report::Parse { let msg = report::Parse {
path, path,
kind: report::ParseKind::Word, kind: report::ParseKind::Word,
data: self data: parser
.parser
.parse_bytes(line) .parse_bytes(line)
.flat_map(|ident| ident.split().map(|i| i.token())) .flat_map(|ident| ident.split().map(|i| i.token()))
.collect(), .collect(),
@ -233,30 +214,19 @@ impl<'p> ParseWords<'p> {
} }
} }
impl std::fmt::Debug for ParseWords<'_> { #[derive(Debug, Clone)]
fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result { pub struct Checks {
fmt.debug_struct("Checks")
.field("parser", self.parser)
.field("check_filenames", &self.check_filenames)
.field("check_files", &self.check_files)
.field("binary", &self.binary)
.finish()
}
}
#[derive(Clone)]
pub struct Checks<'d, 'p> {
dictionary: &'d dyn Dictionary,
parser: &'p tokens::Parser,
check_filenames: bool, check_filenames: bool,
check_files: bool, check_files: bool,
binary: bool, binary: bool,
} }
impl<'d, 'p> Checks<'d, 'p> { impl Checks {
pub fn check_filename( pub fn check_filename(
&self, &self,
path: &std::path::Path, path: &std::path::Path,
parser: &tokens::Parser,
dictionary: &dyn Dictionary,
report: report::Report, report: report::Report,
) -> Result<bool, crate::Error> { ) -> Result<bool, crate::Error> {
let mut typos_found = false; let mut typos_found = false;
@ -266,8 +236,8 @@ impl<'d, 'p> Checks<'d, 'p> {
} }
for part in path.components().filter_map(|c| c.as_os_str().to_str()) { for part in path.components().filter_map(|c| c.as_os_str().to_str()) {
for ident in self.parser.parse(part) { for ident in parser.parse(part) {
if let Some(correction) = self.dictionary.correct_ident(ident) { if let Some(correction) = dictionary.correct_ident(ident) {
let msg = report::FilenameCorrection { let msg = report::FilenameCorrection {
path, path,
typo: ident.token(), typo: ident.token(),
@ -278,7 +248,7 @@ impl<'d, 'p> Checks<'d, 'p> {
typos_found = true; typos_found = true;
} else { } else {
for word in ident.split() { for word in ident.split() {
if let Some(correction) = self.dictionary.correct_word(word) { if let Some(correction) = dictionary.correct_word(word) {
let msg = report::FilenameCorrection { let msg = report::FilenameCorrection {
path, path,
typo: word.token(), typo: word.token(),
@ -300,6 +270,8 @@ impl<'d, 'p> Checks<'d, 'p> {
&self, &self,
path: &std::path::Path, path: &std::path::Path,
explicit: bool, explicit: bool,
parser: &tokens::Parser,
dictionary: &dyn Dictionary,
report: report::Report, report: report::Report,
) -> Result<bool, crate::Error> { ) -> Result<bool, crate::Error> {
let mut typos_found = false; let mut typos_found = false;
@ -321,8 +293,8 @@ impl<'d, 'p> Checks<'d, 'p> {
for (line_idx, line) in buffer.lines().enumerate() { for (line_idx, line) in buffer.lines().enumerate() {
let line_num = line_idx + 1; let line_num = line_idx + 1;
for ident in self.parser.parse_bytes(line) { for ident in parser.parse_bytes(line) {
if let Some(correction) = self.dictionary.correct_ident(ident) { if let Some(correction) = dictionary.correct_ident(ident) {
let col_num = ident.offset(); let col_num = ident.offset();
let msg = report::Correction { let msg = report::Correction {
path, path,
@ -337,7 +309,7 @@ impl<'d, 'p> Checks<'d, 'p> {
report(msg.into()); report(msg.into());
} else { } else {
for word in ident.split() { for word in ident.split() {
if let Some(correction) = self.dictionary.correct_word(word) { if let Some(correction) = dictionary.correct_word(word) {
let col_num = word.offset(); let col_num = word.offset();
let msg = report::Correction { let msg = report::Correction {
path, path,
@ -360,17 +332,6 @@ impl<'d, 'p> Checks<'d, 'p> {
} }
} }
impl std::fmt::Debug for Checks<'_, '_> {
fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
fmt.debug_struct("Checks")
.field("parser", self.parser)
.field("check_filenames", &self.check_filenames)
.field("check_files", &self.check_files)
.field("binary", &self.binary)
.finish()
}
}
fn is_binary(buffer: &[u8]) -> bool { fn is_binary(buffer: &[u8]) -> bool {
let null_max = std::cmp::min(buffer.len(), 1024); let null_max = std::cmp::min(buffer.len(), 1024);
buffer[0..null_max].find_byte(b'\0').is_some() buffer[0..null_max].find_byte(b'\0').is_some()