diff --git a/benches/file.rs b/benches/file.rs index 0cfea12..d25c7d1 100644 --- a/benches/file.rs +++ b/benches/file.rs @@ -14,17 +14,8 @@ fn process_empty(b: &mut test::Bencher) { let corrections = typos::Dictionary::new(); let parser = typos::tokens::Parser::new(); - b.iter(|| { - typos::process_file( - sample_path.path(), - &corrections, - true, - true, - &parser, - false, - typos::report::print_silent, - ) - }); + let checks = typos::checks::CheckSettings::new().build(&corrections, &parser); + b.iter(|| checks.check_file(sample_path.path(), typos::report::print_silent)); temp.close().unwrap(); } @@ -37,17 +28,8 @@ fn process_no_tokens(b: &mut test::Bencher) { let corrections = typos::Dictionary::new(); let parser = typos::tokens::Parser::new(); - b.iter(|| { - typos::process_file( - sample_path.path(), - &corrections, - true, - true, - &parser, - false, - typos::report::print_silent, - ) - }); + let checks = typos::checks::CheckSettings::new().build(&corrections, &parser); + b.iter(|| checks.check_file(sample_path.path(), typos::report::print_silent)); temp.close().unwrap(); } @@ -60,17 +42,8 @@ fn process_single_token(b: &mut test::Bencher) { let corrections = typos::Dictionary::new(); let parser = typos::tokens::Parser::new(); - b.iter(|| { - typos::process_file( - sample_path.path(), - &corrections, - true, - true, - &parser, - false, - typos::report::print_silent, - ) - }); + let checks = typos::checks::CheckSettings::new().build(&corrections, &parser); + b.iter(|| checks.check_file(sample_path.path(), typos::report::print_silent)); temp.close().unwrap(); } @@ -83,17 +56,8 @@ fn process_sherlock(b: &mut test::Bencher) { let corrections = typos::Dictionary::new(); let parser = typos::tokens::Parser::new(); - b.iter(|| { - typos::process_file( - sample_path.path(), - &corrections, - true, - true, - &parser, - false, - typos::report::print_silent, - ) - }); + let checks = typos::checks::CheckSettings::new().build(&corrections, &parser); + b.iter(|| checks.check_file(sample_path.path(), typos::report::print_silent)); temp.close().unwrap(); } @@ -106,17 +70,8 @@ fn process_code(b: &mut test::Bencher) { let corrections = typos::Dictionary::new(); let parser = typos::tokens::Parser::new(); - b.iter(|| { - typos::process_file( - sample_path.path(), - &corrections, - true, - true, - &parser, - false, - typos::report::print_silent, - ) - }); + let checks = typos::checks::CheckSettings::new().build(&corrections, &parser); + b.iter(|| checks.check_file(sample_path.path(), typos::report::print_silent)); temp.close().unwrap(); } @@ -129,17 +84,8 @@ fn process_corpus(b: &mut test::Bencher) { let corrections = typos::Dictionary::new(); let parser = typos::tokens::Parser::new(); - b.iter(|| { - typos::process_file( - sample_path.path(), - &corrections, - true, - true, - &parser, - false, - typos::report::print_silent, - ) - }); + let checks = typos::checks::CheckSettings::new().build(&corrections, &parser); + b.iter(|| checks.check_file(sample_path.path(), typos::report::print_silent)); temp.close().unwrap(); } diff --git a/src/checks.rs b/src/checks.rs new file mode 100644 index 0000000..308d724 --- /dev/null +++ b/src/checks.rs @@ -0,0 +1,171 @@ +use std::fs::File; +use std::io::Read; + +use bstr::ByteSlice; + +use crate::report; +use crate::tokens; +use crate::Dictionary; + +pub struct CheckSettings { + check_filenames: bool, + check_files: bool, + binary: bool, +} + +impl CheckSettings { + pub fn new() -> Self { + Default::default() + } + + pub fn check_filenames(&mut self, yes: bool) -> &mut Self { + self.check_filenames = yes; + self + } + + pub fn check_files(&mut self, yes: bool) -> &mut Self { + self.check_files = yes; + self + } + + pub fn binary(&mut self, yes: bool) -> &mut Self { + self.binary = yes; + self + } + + pub fn build<'d, 'p>( + &self, + dictionary: &'d Dictionary, + parser: &'p tokens::Parser, + ) -> Checks<'d, 'p> { + Checks { + dictionary, + parser, + check_filenames: self.check_filenames, + check_files: self.check_files, + binary: self.binary, + } + } +} + +impl Default for CheckSettings { + fn default() -> Self { + Self { + check_filenames: true, + check_files: true, + binary: false, + } + } +} + +pub struct Checks<'d, 'p> { + dictionary: &'d Dictionary, + parser: &'p tokens::Parser, + check_filenames: bool, + check_files: bool, + binary: bool, +} + +impl<'d, 'p> Checks<'d, 'p> { + pub fn check_filename( + &self, + path: &std::path::Path, + report: report::Report, + ) -> Result { + let mut typos_found = false; + + if !self.check_filenames { + return Ok(typos_found); + } + + for part in path.components().filter_map(|c| c.as_os_str().to_str()) { + for ident in self.parser.parse(part) { + if let Some(correction) = self.dictionary.correct_ident(ident) { + let msg = report::FilenameCorrection { + path, + typo: ident.token(), + correction, + non_exhaustive: (), + }; + report(msg.into()); + typos_found = true; + } + for word in ident.split() { + if let Some(correction) = self.dictionary.correct_word(word) { + let msg = report::FilenameCorrection { + path, + typo: word.token(), + correction, + non_exhaustive: (), + }; + report(msg.into()); + typos_found = true; + } + } + } + } + + Ok(typos_found) + } + + pub fn check_file( + &self, + path: &std::path::Path, + report: report::Report, + ) -> Result { + let mut typos_found = false; + + if !self.check_files { + return Ok(typos_found); + } + + let mut buffer = Vec::new(); + File::open(path)?.read_to_end(&mut buffer)?; + if !self.binary && buffer.find_byte(b'\0').is_some() { + let msg = report::BinaryFile { + path, + non_exhaustive: (), + }; + report(msg.into()); + return Ok(typos_found); + } + + for (line_idx, line) in buffer.lines().enumerate() { + let line_num = line_idx + 1; + for ident in self.parser.parse_bytes(line) { + if let Some(correction) = self.dictionary.correct_ident(ident) { + let col_num = ident.offset(); + let msg = report::Correction { + path, + line, + line_num, + col_num, + typo: ident.token(), + correction, + non_exhaustive: (), + }; + typos_found = true; + report(msg.into()); + } + for word in ident.split() { + if let Some(correction) = self.dictionary.correct_word(word) { + let col_num = word.offset(); + let msg = report::Correction { + path, + line, + line_num, + col_num, + typo: word.token(), + correction, + non_exhaustive: (), + }; + typos_found = true; + report(msg.into()); + } + } + } + } + + Ok(typos_found) + } +} diff --git a/src/lib.rs b/src/lib.rs index fb61469..b5201df 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,104 +4,8 @@ extern crate serde_derive; mod dict; mod dict_codegen; +pub mod checks; pub mod report; pub mod tokens; pub use crate::dict::*; - -use std::fs::File; -use std::io::Read; - -use bstr::ByteSlice; - -pub fn process_file( - path: &std::path::Path, - dictionary: &Dictionary, - check_filenames: bool, - check_files: bool, - parser: &tokens::Parser, - binary: bool, - report: report::Report, -) -> Result { - let mut typos_found = false; - - if check_filenames { - for part in path.components().filter_map(|c| c.as_os_str().to_str()) { - for ident in parser.parse(part) { - if let Some(correction) = dictionary.correct_ident(ident) { - let msg = report::FilenameCorrection { - path, - typo: ident.token(), - correction, - non_exhaustive: (), - }; - report(msg.into()); - typos_found = true; - } - for word in ident.split() { - if let Some(correction) = dictionary.correct_word(word) { - let msg = report::FilenameCorrection { - path, - typo: word.token(), - correction, - non_exhaustive: (), - }; - report(msg.into()); - typos_found = true; - } - } - } - } - } - - if check_files { - let mut buffer = Vec::new(); - File::open(path)?.read_to_end(&mut buffer)?; - if !binary && buffer.find_byte(b'\0').is_some() { - let msg = report::BinaryFile { - path, - non_exhaustive: (), - }; - report(msg.into()); - return Ok(typos_found); - } - - for (line_idx, line) in buffer.lines().enumerate() { - let line_num = line_idx + 1; - for ident in parser.parse_bytes(line) { - if let Some(correction) = dictionary.correct_ident(ident) { - let col_num = ident.offset(); - let msg = report::Correction { - path, - line, - line_num, - col_num, - typo: ident.token(), - correction, - non_exhaustive: (), - }; - typos_found = true; - report(msg.into()); - } - for word in ident.split() { - if let Some(correction) = dictionary.correct_word(word) { - let col_num = word.offset(); - let msg = report::Correction { - path, - line, - line_num, - col_num, - typo: word.token(), - correction, - non_exhaustive: (), - }; - typos_found = true; - report(msg.into()); - } - } - } - } - } - - Ok(typos_found) -} diff --git a/src/main.rs b/src/main.rs index a512db9..efe688e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -255,16 +255,23 @@ fn run() -> Result { let mut builder = get_logging(options.verbose.log_level()); builder.init(); - let dictionary = typos::Dictionary::new(); let check_filenames = options.check_filenames().unwrap_or(true); let check_files = options.check_files().unwrap_or(true); let ignore_hex = options.ignore_hex().unwrap_or(true); let binary = options.binary().unwrap_or(false); + let dictionary = typos::Dictionary::new(); + let parser = typos::tokens::ParserBuilder::new() .ignore_hex(ignore_hex) .build(); + let checks = typos::checks::CheckSettings::new() + .check_filenames(check_filenames) + .check_files(check_files) + .binary(binary) + .build(&dictionary, &parser); + let first_path = &options .path .get(0) @@ -283,15 +290,10 @@ fn run() -> Result { for entry in walk.build() { let entry = entry?; if entry.file_type().map(|t| t.is_file()).unwrap_or(true) { - if typos::process_file( - entry.path(), - &dictionary, - check_filenames, - check_files, - &parser, - binary, - options.format.report(), - )? { + if checks.check_filename(entry.path(), options.format.report())? { + typos_found = true; + } + if checks.check_file(entry.path(), options.format.report())? { typos_found = true; } }