Merge pull request #67 from epage/custom

Refactor in prep for custom file types
This commit is contained in:
Ed Page 2019-11-16 13:35:20 +00:00 committed by GitHub
commit 0767e52f77
WARNING! Although there is a key with this ID in the database it does not verify this commit! This commit is SUSPICIOUS.
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 138 additions and 121 deletions

View file

@ -87,8 +87,15 @@ fn bench_parse_ident(data: &str, b: &mut test::Bencher) {
sample_path.write_str(data).unwrap();
let parser = typos::tokens::Parser::new();
let checks = typos::checks::TyposSettings::new().build_identifier_parser(&parser);
b.iter(|| checks.check_file(sample_path.path(), true, typos::report::print_silent));
let checks = typos::checks::TyposSettings::new().build_identifier_parser();
b.iter(|| {
checks.check_file(
sample_path.path(),
true,
&parser,
typos::report::print_silent,
)
});
temp.close().unwrap();
}
@ -129,8 +136,15 @@ fn bench_parse_word(data: &str, b: &mut test::Bencher) {
sample_path.write_str(data).unwrap();
let parser = typos::tokens::Parser::new();
let checks = typos::checks::TyposSettings::new().build_word_parser(&parser);
b.iter(|| checks.check_file(sample_path.path(), true, typos::report::print_silent));
let checks = typos::checks::TyposSettings::new().build_word_parser();
b.iter(|| {
checks.check_file(
sample_path.path(),
true,
&parser,
typos::report::print_silent,
)
});
temp.close().unwrap();
}
@ -172,8 +186,16 @@ fn bench_check_file(data: &str, b: &mut test::Bencher) {
let corrections = typos_cli::dict::BuiltIn::new();
let parser = typos::tokens::Parser::new();
let checks = typos::checks::TyposSettings::new().build_checks(&corrections, &parser);
b.iter(|| checks.check_file(sample_path.path(), true, typos::report::print_silent));
let checks = typos::checks::TyposSettings::new().build_checks();
b.iter(|| {
checks.check_file(
sample_path.path(),
true,
&parser,
&corrections,
typos::report::print_silent,
)
});
temp.close().unwrap();
}

View file

@ -265,6 +265,8 @@ trait Checks {
fn check_filename(
&self,
path: &std::path::Path,
parser: &typos::tokens::Parser,
dictionary: &dyn typos::Dictionary,
report: typos::report::Report,
) -> Result<bool, typos::Error>;
@ -272,64 +274,78 @@ trait Checks {
&self,
path: &std::path::Path,
explicit: bool,
parser: &typos::tokens::Parser,
dictionary: &dyn typos::Dictionary,
report: typos::report::Report,
) -> Result<bool, typos::Error>;
}
impl<'p> Checks for typos::checks::ParseIdentifiers<'p> {
impl<'p> Checks for typos::checks::ParseIdentifiers {
fn check_filename(
&self,
path: &std::path::Path,
parser: &typos::tokens::Parser,
_dictionary: &dyn typos::Dictionary,
report: typos::report::Report,
) -> Result<bool, typos::Error> {
self.check_filename(path, report)
self.check_filename(path, parser, report)
}
fn check_file(
&self,
path: &std::path::Path,
explicit: bool,
parser: &typos::tokens::Parser,
_dictionary: &dyn typos::Dictionary,
report: typos::report::Report,
) -> Result<bool, typos::Error> {
self.check_file(path, explicit, report)
self.check_file(path, explicit, parser, report)
}
}
impl<'p> Checks for typos::checks::ParseWords<'p> {
impl<'p> Checks for typos::checks::ParseWords {
fn check_filename(
&self,
path: &std::path::Path,
parser: &typos::tokens::Parser,
_dictionary: &dyn typos::Dictionary,
report: typos::report::Report,
) -> Result<bool, typos::Error> {
self.check_filename(path, report)
self.check_filename(path, parser, report)
}
fn check_file(
&self,
path: &std::path::Path,
explicit: bool,
parser: &typos::tokens::Parser,
_dictionary: &dyn typos::Dictionary,
report: typos::report::Report,
) -> Result<bool, typos::Error> {
self.check_file(path, explicit, report)
self.check_file(path, explicit, parser, report)
}
}
impl<'d, 'p> Checks for typos::checks::Checks<'d, 'p> {
impl<'d, 'p> Checks for typos::checks::Checks {
fn check_filename(
&self,
path: &std::path::Path,
parser: &typos::tokens::Parser,
dictionary: &dyn typos::Dictionary,
report: typos::report::Report,
) -> Result<bool, typos::Error> {
self.check_filename(path, report)
self.check_filename(path, parser, dictionary, report)
}
fn check_file(
&self,
path: &std::path::Path,
explicit: bool,
parser: &typos::tokens::Parser,
dictionary: &dyn typos::Dictionary,
report: typos::report::Report,
) -> Result<bool, typos::Error> {
self.check_file(path, explicit, report)
self.check_file(path, explicit, parser, dictionary, report)
}
}
@ -356,20 +372,47 @@ fn init_logging(level: Option<log::Level>) {
}
}
fn check_path(
mut walk: ignore::Walk,
format: Format,
checks: &dyn Checks,
parser: &typos::tokens::Parser,
dictionary: &dyn typos::Dictionary,
) -> Result<(bool, bool), anyhow::Error> {
let mut typos_found = false;
let mut errors_found = false;
for entry in walk {
match check_entry(entry, format, checks, parser, dictionary) {
Ok(true) => typos_found = true,
Err(err) => {
let msg = typos::report::Error::new(err.to_string());
format.report()(msg.into());
errors_found = true
}
_ => (),
}
}
Ok((typos_found, errors_found))
}
fn check_entry(
entry: Result<ignore::DirEntry, ignore::Error>,
format: Format,
checks: &dyn Checks,
parser: &typos::tokens::Parser,
dictionary: &dyn typos::Dictionary,
) -> Result<bool, anyhow::Error> {
let mut typos_found = false;
let entry = entry?;
if entry.file_type().map(|t| t.is_file()).unwrap_or(true) {
let explicit = entry.depth() == 0;
if checks.check_filename(entry.path(), format.report())? {
if checks.check_filename(entry.path(), parser, dictionary, format.report())? {
typos_found = true;
}
if checks.check_file(entry.path(), explicit, format.report())? {
if checks.check_file(entry.path(), explicit, parser, dictionary, format.report())? {
typos_found = true;
}
}
@ -408,8 +451,6 @@ fn run() -> Result<i32, anyhow::Error> {
config.default.update(&args.overrides);
let config = config;
let dictionary = crate::dict::BuiltIn::new();
let parser = typos::tokens::ParserBuilder::new()
.ignore_hex(config.default.ignore_hex())
.leading_digits(config.default.identifier_leading_digits())
@ -418,6 +459,8 @@ fn run() -> Result<i32, anyhow::Error> {
.include_chars(config.default.identifier_include_chars().to_owned())
.build();
let dictionary = crate::dict::BuiltIn::new();
let mut settings = typos::checks::TyposSettings::new();
settings
.check_filenames(config.default.check_filename())
@ -446,43 +489,34 @@ fn run() -> Result<i32, anyhow::Error> {
}
}
} else if args.identifiers {
let checks = settings.build_identifier_parser(&parser);
for entry in walk.build() {
match check_entry(entry, args.format, &checks) {
Ok(true) => typos_found = true,
Err(err) => {
let msg = typos::report::Error::new(err.to_string());
args.format.report()(msg.into());
errors_found = true
}
_ => (),
}
let checks = settings.build_identifier_parser();
let (cur_typos, cur_errors) =
check_path(walk.build(), args.format, &checks, &parser, &dictionary)?;
if cur_typos {
typos_found = true;
}
if cur_errors {
errors_found = true;
}
} else if args.words {
let checks = settings.build_word_parser(&parser);
for entry in walk.build() {
match check_entry(entry, args.format, &checks) {
Ok(true) => typos_found = true,
Err(err) => {
let msg = typos::report::Error::new(err.to_string());
args.format.report()(msg.into());
errors_found = true
}
_ => (),
}
let checks = settings.build_word_parser();
let (cur_typos, cur_errors) =
check_path(walk.build(), args.format, &checks, &parser, &dictionary)?;
if cur_typos {
typos_found = true;
}
if cur_errors {
errors_found = true;
}
} else {
let checks = settings.build_checks(&dictionary, &parser);
for entry in walk.build() {
match check_entry(entry, args.format, &checks) {
Ok(true) => typos_found = true,
Err(err) => {
let msg = typos::report::Error::new(err.to_string());
args.format.report()(msg.into());
errors_found = true
}
_ => (),
}
let checks = settings.build_checks();
let (cur_typos, cur_errors) =
check_path(walk.build(), args.format, &checks, &parser, &dictionary)?;
if cur_typos {
typos_found = true;
}
if cur_errors {
errors_found = true;
}
}
}

View file

@ -31,32 +31,24 @@ impl TyposSettings {
self
}
pub fn build_checks<'d, 'p>(
&self,
dictionary: &'d dyn Dictionary,
parser: &'p tokens::Parser,
) -> Checks<'d, 'p> {
pub fn build_checks(&self) -> Checks {
Checks {
dictionary,
parser,
check_filenames: self.check_filenames,
check_files: self.check_files,
binary: self.binary,
}
}
pub fn build_identifier_parser<'p>(&self, parser: &'p tokens::Parser) -> ParseIdentifiers<'p> {
pub fn build_identifier_parser(&self) -> ParseIdentifiers {
ParseIdentifiers {
parser,
check_filenames: self.check_filenames,
check_files: self.check_files,
binary: self.binary,
}
}
pub fn build_word_parser<'p>(&self, parser: &'p tokens::Parser) -> ParseWords<'p> {
pub fn build_word_parser(&self) -> ParseWords {
ParseWords {
parser,
check_filenames: self.check_filenames,
check_files: self.check_files,
binary: self.binary,
@ -74,18 +66,18 @@ impl Default for TyposSettings {
}
}
#[derive(Clone)]
pub struct ParseIdentifiers<'p> {
parser: &'p tokens::Parser,
#[derive(Debug, Clone)]
pub struct ParseIdentifiers {
check_filenames: bool,
check_files: bool,
binary: bool,
}
impl<'p> ParseIdentifiers<'p> {
impl ParseIdentifiers {
pub fn check_filename(
&self,
path: &std::path::Path,
parser: &tokens::Parser,
report: report::Report,
) -> Result<bool, crate::Error> {
let typos_found = false;
@ -98,7 +90,7 @@ impl<'p> ParseIdentifiers<'p> {
let msg = report::Parse {
path,
kind: report::ParseKind::Identifier,
data: self.parser.parse(part).map(|i| i.token()).collect(),
data: parser.parse(part).map(|i| i.token()).collect(),
non_exhaustive: (),
};
report(msg.into());
@ -111,6 +103,7 @@ impl<'p> ParseIdentifiers<'p> {
&self,
path: &std::path::Path,
explicit: bool,
parser: &tokens::Parser,
report: report::Report,
) -> Result<bool, crate::Error> {
let typos_found = false;
@ -134,7 +127,7 @@ impl<'p> ParseIdentifiers<'p> {
let msg = report::Parse {
path,
kind: report::ParseKind::Identifier,
data: self.parser.parse_bytes(line).map(|i| i.token()).collect(),
data: parser.parse_bytes(line).map(|i| i.token()).collect(),
non_exhaustive: (),
};
report(msg.into());
@ -144,29 +137,18 @@ impl<'p> ParseIdentifiers<'p> {
}
}
impl std::fmt::Debug for ParseIdentifiers<'_> {
fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
fmt.debug_struct("Checks")
.field("parser", self.parser)
.field("check_filenames", &self.check_filenames)
.field("check_files", &self.check_files)
.field("binary", &self.binary)
.finish()
}
}
#[derive(Clone)]
pub struct ParseWords<'p> {
parser: &'p tokens::Parser,
#[derive(Debug, Clone)]
pub struct ParseWords {
check_filenames: bool,
check_files: bool,
binary: bool,
}
impl<'p> ParseWords<'p> {
impl ParseWords {
pub fn check_filename(
&self,
path: &std::path::Path,
parser: &tokens::Parser,
report: report::Report,
) -> Result<bool, crate::Error> {
let typos_found = false;
@ -179,8 +161,7 @@ impl<'p> ParseWords<'p> {
let msg = report::Parse {
path,
kind: report::ParseKind::Word,
data: self
.parser
data: parser
.parse(part)
.flat_map(|ident| ident.split().map(|i| i.token()))
.collect(),
@ -196,6 +177,7 @@ impl<'p> ParseWords<'p> {
&self,
path: &std::path::Path,
explicit: bool,
parser: &tokens::Parser,
report: report::Report,
) -> Result<bool, crate::Error> {
let typos_found = false;
@ -219,8 +201,7 @@ impl<'p> ParseWords<'p> {
let msg = report::Parse {
path,
kind: report::ParseKind::Word,
data: self
.parser
data: parser
.parse_bytes(line)
.flat_map(|ident| ident.split().map(|i| i.token()))
.collect(),
@ -233,30 +214,19 @@ impl<'p> ParseWords<'p> {
}
}
impl std::fmt::Debug for ParseWords<'_> {
fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
fmt.debug_struct("Checks")
.field("parser", self.parser)
.field("check_filenames", &self.check_filenames)
.field("check_files", &self.check_files)
.field("binary", &self.binary)
.finish()
}
}
#[derive(Clone)]
pub struct Checks<'d, 'p> {
dictionary: &'d dyn Dictionary,
parser: &'p tokens::Parser,
#[derive(Debug, Clone)]
pub struct Checks {
check_filenames: bool,
check_files: bool,
binary: bool,
}
impl<'d, 'p> Checks<'d, 'p> {
impl Checks {
pub fn check_filename(
&self,
path: &std::path::Path,
parser: &tokens::Parser,
dictionary: &dyn Dictionary,
report: report::Report,
) -> Result<bool, crate::Error> {
let mut typos_found = false;
@ -266,8 +236,8 @@ impl<'d, 'p> Checks<'d, 'p> {
}
for part in path.components().filter_map(|c| c.as_os_str().to_str()) {
for ident in self.parser.parse(part) {
if let Some(correction) = self.dictionary.correct_ident(ident) {
for ident in parser.parse(part) {
if let Some(correction) = dictionary.correct_ident(ident) {
let msg = report::FilenameCorrection {
path,
typo: ident.token(),
@ -278,7 +248,7 @@ impl<'d, 'p> Checks<'d, 'p> {
typos_found = true;
} else {
for word in ident.split() {
if let Some(correction) = self.dictionary.correct_word(word) {
if let Some(correction) = dictionary.correct_word(word) {
let msg = report::FilenameCorrection {
path,
typo: word.token(),
@ -300,6 +270,8 @@ impl<'d, 'p> Checks<'d, 'p> {
&self,
path: &std::path::Path,
explicit: bool,
parser: &tokens::Parser,
dictionary: &dyn Dictionary,
report: report::Report,
) -> Result<bool, crate::Error> {
let mut typos_found = false;
@ -321,8 +293,8 @@ impl<'d, 'p> Checks<'d, 'p> {
for (line_idx, line) in buffer.lines().enumerate() {
let line_num = line_idx + 1;
for ident in self.parser.parse_bytes(line) {
if let Some(correction) = self.dictionary.correct_ident(ident) {
for ident in parser.parse_bytes(line) {
if let Some(correction) = dictionary.correct_ident(ident) {
let col_num = ident.offset();
let msg = report::Correction {
path,
@ -337,7 +309,7 @@ impl<'d, 'p> Checks<'d, 'p> {
report(msg.into());
} else {
for word in ident.split() {
if let Some(correction) = self.dictionary.correct_word(word) {
if let Some(correction) = dictionary.correct_word(word) {
let col_num = word.offset();
let msg = report::Correction {
path,
@ -360,17 +332,6 @@ impl<'d, 'p> Checks<'d, 'p> {
}
}
impl std::fmt::Debug for Checks<'_, '_> {
fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
fmt.debug_struct("Checks")
.field("parser", self.parser)
.field("check_filenames", &self.check_filenames)
.field("check_files", &self.check_files)
.field("binary", &self.binary)
.finish()
}
}
fn is_binary(buffer: &[u8]) -> bool {
let null_max = std::cmp::min(buffer.len(), 1024);
buffer[0..null_max].find_byte(b'\0').is_some()