From ce365ae12e12fddfb6fc42a7f1e5ea71834d6051 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Wed, 30 Oct 2019 07:26:59 -0600 Subject: [PATCH] feat: Dump files, identifiers, and words This will help people debug their configurations. Fixes #41 --- benches/file.rs | 391 +++++++++++++------------------------------- src/main.rs | 159 ++++++++++++++++-- typos/src/checks.rs | 204 ++++++++++++++++++++++- typos/src/report.rs | 55 +++++++ typos/src/tokens.rs | 6 +- 5 files changed, 509 insertions(+), 306 deletions(-) diff --git a/benches/file.rs b/benches/file.rs index e8d667b..ab9e21b 100644 --- a/benches/file.rs +++ b/benches/file.rs @@ -7,374 +7,203 @@ mod data; use assert_fs::prelude::*; use bstr::ByteSlice; -#[bench] -fn check_file_empty(b: &mut test::Bencher) { +fn bench_read(data: &str, b: &mut test::Bencher) { let temp = assert_fs::TempDir::new().unwrap(); let sample_path = temp.child("sample"); - sample_path.write_str(data::EMPTY).unwrap(); + sample_path.write_str(data).unwrap(); - let corrections = typos_cli::dict::BuiltIn::new(); - let parser = typos::tokens::Parser::new(); - let checks = typos::checks::CheckSettings::new().build(&corrections, &parser); - b.iter(|| checks.check_file(sample_path.path(), true, typos::report::print_silent)); - - temp.close().unwrap(); -} - -#[bench] -fn check_file_no_tokens(b: &mut test::Bencher) { - let temp = assert_fs::TempDir::new().unwrap(); - let sample_path = temp.child("sample"); - sample_path.write_str(data::NO_TOKENS).unwrap(); - - let corrections = typos_cli::dict::BuiltIn::new(); - let parser = typos::tokens::Parser::new(); - let checks = typos::checks::CheckSettings::new().build(&corrections, &parser); - b.iter(|| checks.check_file(sample_path.path(), true, typos::report::print_silent)); - - temp.close().unwrap(); -} - -#[bench] -fn check_file_single_token(b: &mut test::Bencher) { - let temp = assert_fs::TempDir::new().unwrap(); - let sample_path = temp.child("sample"); - sample_path.write_str(data::SINGLE_TOKEN).unwrap(); - - let corrections = typos_cli::dict::BuiltIn::new(); - let parser = typos::tokens::Parser::new(); - let checks = typos::checks::CheckSettings::new().build(&corrections, &parser); - b.iter(|| checks.check_file(sample_path.path(), true, typos::report::print_silent)); - - temp.close().unwrap(); -} - -#[bench] -fn check_file_sherlock(b: &mut test::Bencher) { - let temp = assert_fs::TempDir::new().unwrap(); - let sample_path = temp.child("sample"); - sample_path.write_str(data::SHERLOCK).unwrap(); - - let corrections = typos_cli::dict::BuiltIn::new(); - let parser = typos::tokens::Parser::new(); - let checks = typos::checks::CheckSettings::new().build(&corrections, &parser); - b.iter(|| checks.check_file(sample_path.path(), true, typos::report::print_silent)); - - temp.close().unwrap(); -} - -#[bench] -fn check_file_code(b: &mut test::Bencher) { - let temp = assert_fs::TempDir::new().unwrap(); - let sample_path = temp.child("sample"); - sample_path.write_str(data::CODE).unwrap(); - - let corrections = typos_cli::dict::BuiltIn::new(); - let parser = typos::tokens::Parser::new(); - let checks = typos::checks::CheckSettings::new().build(&corrections, &parser); - b.iter(|| checks.check_file(sample_path.path(), true, typos::report::print_silent)); - - temp.close().unwrap(); -} - -#[bench] -fn check_file_corpus(b: &mut test::Bencher) { - let temp = assert_fs::TempDir::new().unwrap(); - let sample_path = temp.child("sample"); - sample_path.write_str(data::CORPUS).unwrap(); - - let corrections = typos_cli::dict::BuiltIn::new(); - let parser = typos::tokens::Parser::new(); - let checks = typos::checks::CheckSettings::new().build(&corrections, &parser); - b.iter(|| checks.check_file(sample_path.path(), true, typos::report::print_silent)); + b.iter(|| std::fs::read(sample_path.path())); temp.close().unwrap(); } #[bench] fn read_empty(b: &mut test::Bencher) { - let temp = assert_fs::TempDir::new().unwrap(); - let sample_path = temp.child("sample"); - sample_path.write_str(data::EMPTY).unwrap(); - - b.iter(|| std::fs::read(sample_path.path())); - - temp.close().unwrap(); + bench_read(data::EMPTY, b); } #[bench] fn read_no_tokens(b: &mut test::Bencher) { - let temp = assert_fs::TempDir::new().unwrap(); - let sample_path = temp.child("sample"); - sample_path.write_str(data::NO_TOKENS).unwrap(); - - b.iter(|| std::fs::read(sample_path.path())); - - temp.close().unwrap(); + bench_read(data::NO_TOKENS, b); } #[bench] fn read_single_token(b: &mut test::Bencher) { - let temp = assert_fs::TempDir::new().unwrap(); - let sample_path = temp.child("sample"); - sample_path.write_str(data::SINGLE_TOKEN).unwrap(); - - b.iter(|| std::fs::read(sample_path.path())); - - temp.close().unwrap(); + bench_read(data::SINGLE_TOKEN, b); } #[bench] fn read_sherlock(b: &mut test::Bencher) { - let temp = assert_fs::TempDir::new().unwrap(); - let sample_path = temp.child("sample"); - sample_path.write_str(data::SHERLOCK).unwrap(); - - b.iter(|| std::fs::read(sample_path.path())); - - temp.close().unwrap(); + bench_read(data::SHERLOCK, b); } #[bench] fn read_code(b: &mut test::Bencher) { - let temp = assert_fs::TempDir::new().unwrap(); - let sample_path = temp.child("sample"); - sample_path.write_str(data::CODE).unwrap(); - - b.iter(|| std::fs::read(sample_path.path())); - - temp.close().unwrap(); + bench_read(data::CODE, b); } #[bench] fn read_corpus(b: &mut test::Bencher) { + bench_read(data::CORPUS, b); +} + +fn bench_split_lines(data: &str, b: &mut test::Bencher) { + b.iter(|| data.as_bytes().lines().enumerate().last()); +} + +#[bench] +fn parse_words_lines_empty(b: &mut test::Bencher) { + bench_split_lines(data::EMPTY, b); +} + +#[bench] +fn parse_words_lines_no_tokens(b: &mut test::Bencher) { + bench_split_lines(data::NO_TOKENS, b); +} + +#[bench] +fn parse_words_lines_single_token(b: &mut test::Bencher) { + bench_split_lines(data::SINGLE_TOKEN, b); +} + +#[bench] +fn parse_words_lines_sherlock(b: &mut test::Bencher) { + bench_split_lines(data::SHERLOCK, b); +} + +#[bench] +fn parse_words_lines_code(b: &mut test::Bencher) { + bench_split_lines(data::CODE, b); +} + +#[bench] +fn parse_words_lines_corpus(b: &mut test::Bencher) { + bench_split_lines(data::CORPUS, b); +} + +fn bench_parse_ident(data: &str, b: &mut test::Bencher) { let temp = assert_fs::TempDir::new().unwrap(); let sample_path = temp.child("sample"); - sample_path.write_str(data::CORPUS).unwrap(); + sample_path.write_str(data).unwrap(); - b.iter(|| std::fs::read(sample_path.path())); + let parser = typos::tokens::Parser::new(); + let checks = typos::checks::TyposSettings::new().build_identifier_parser(&parser); + b.iter(|| checks.check_file(sample_path.path(), true, typos::report::print_silent)); temp.close().unwrap(); } #[bench] -fn split_lines_empty(b: &mut test::Bencher) { - b.iter(|| data::EMPTY.as_bytes().lines().enumerate().last()); +fn parse_idents_empty(b: &mut test::Bencher) { + bench_parse_ident(data::EMPTY, b); } #[bench] -fn split_lines_no_tokens(b: &mut test::Bencher) { - b.iter(|| data::NO_TOKENS.as_bytes().lines().enumerate().last()); +fn parse_idents_no_tokens(b: &mut test::Bencher) { + bench_parse_ident(data::NO_TOKENS, b); } #[bench] -fn split_lines_single_token(b: &mut test::Bencher) { - b.iter(|| data::SINGLE_TOKEN.as_bytes().lines().enumerate().last()); +fn parse_idents_single_token(b: &mut test::Bencher) { + bench_parse_ident(data::SINGLE_TOKEN, b); } #[bench] -fn split_lines_sherlock(b: &mut test::Bencher) { - b.iter(|| data::SHERLOCK.as_bytes().lines().enumerate().last()); +fn parse_idents_sherlock(b: &mut test::Bencher) { + bench_parse_ident(data::SHERLOCK, b); } #[bench] -fn split_lines_code(b: &mut test::Bencher) { - b.iter(|| data::CODE.as_bytes().lines().enumerate().last()); +fn parse_idents_code(b: &mut test::Bencher) { + bench_parse_ident(data::CODE, b); } #[bench] -fn split_lines_corpus(b: &mut test::Bencher) { - b.iter(|| data::CORPUS.as_bytes().lines().enumerate().last()); +fn parse_idents_corpus(b: &mut test::Bencher) { + bench_parse_ident(data::CORPUS, b); } -#[bench] -fn parse_empty(b: &mut test::Bencher) { +fn bench_parse_word(data: &str, b: &mut test::Bencher) { + let temp = assert_fs::TempDir::new().unwrap(); + let sample_path = temp.child("sample"); + sample_path.write_str(data).unwrap(); + let parser = typos::tokens::Parser::new(); - b.iter(|| { - data::EMPTY - .as_bytes() - .lines() - .enumerate() - .for_each(|(_idx, l)| { - parser.parse_bytes(l).last(); - () - }) - }); + let checks = typos::checks::TyposSettings::new().build_word_parser(&parser); + b.iter(|| checks.check_file(sample_path.path(), true, typos::report::print_silent)); + + temp.close().unwrap(); } #[bench] -fn parse_no_tokens(b: &mut test::Bencher) { - let parser = typos::tokens::Parser::new(); - b.iter(|| { - data::NO_TOKENS - .as_bytes() - .lines() - .enumerate() - .for_each(|(_idx, l)| { - parser.parse_bytes(l).last(); - () - }) - }); +fn parse_words_empty(b: &mut test::Bencher) { + bench_parse_word(data::EMPTY, b); } #[bench] -fn parse_single_token(b: &mut test::Bencher) { - let parser = typos::tokens::Parser::new(); - b.iter(|| { - data::SINGLE_TOKEN - .as_bytes() - .lines() - .enumerate() - .for_each(|(_idx, l)| { - parser.parse_bytes(l).last(); - () - }) - }); +fn parse_words_no_tokens(b: &mut test::Bencher) { + bench_parse_word(data::NO_TOKENS, b); } #[bench] -fn parse_sherlock(b: &mut test::Bencher) { - let parser = typos::tokens::Parser::new(); - b.iter(|| { - data::SHERLOCK - .as_bytes() - .lines() - .enumerate() - .for_each(|(_idx, l)| { - parser.parse_bytes(l).last(); - () - }) - }); +fn parse_words_single_token(b: &mut test::Bencher) { + bench_parse_word(data::SINGLE_TOKEN, b); } #[bench] -fn parse_code(b: &mut test::Bencher) { - let parser = typos::tokens::Parser::new(); - b.iter(|| { - data::CODE - .as_bytes() - .lines() - .enumerate() - .for_each(|(_idx, l)| { - parser.parse_bytes(l).last(); - () - }) - }); +fn parse_words_sherlock(b: &mut test::Bencher) { + bench_parse_word(data::SHERLOCK, b); } #[bench] -fn parse_corpus(b: &mut test::Bencher) { - let parser = typos::tokens::Parser::new(); - b.iter(|| { - data::CORPUS - .as_bytes() - .lines() - .enumerate() - .for_each(|(_idx, l)| { - parser.parse_bytes(l).last(); - () - }) - }); +fn parse_words_code(b: &mut test::Bencher) { + bench_parse_word(data::CODE, b); } #[bench] -fn split_empty(b: &mut test::Bencher) { +fn parse_words_corpus(b: &mut test::Bencher) { + bench_parse_word(data::CORPUS, b); +} + +fn bench_check_file(data: &str, b: &mut test::Bencher) { + let temp = assert_fs::TempDir::new().unwrap(); + let sample_path = temp.child("sample"); + sample_path.write_str(data).unwrap(); + + let corrections = typos_cli::dict::BuiltIn::new(); let parser = typos::tokens::Parser::new(); - b.iter(|| { - data::EMPTY - .as_bytes() - .lines() - .enumerate() - .for_each(|(_idx, l)| { - parser.parse_bytes(l).for_each(|l| { - l.split().last(); - () - }) - }) - }); + let checks = typos::checks::TyposSettings::new().build_checks(&corrections, &parser); + b.iter(|| checks.check_file(sample_path.path(), true, typos::report::print_silent)); + + temp.close().unwrap(); } #[bench] -fn split_no_tokens(b: &mut test::Bencher) { - let parser = typos::tokens::Parser::new(); - b.iter(|| { - data::NO_TOKENS - .as_bytes() - .lines() - .enumerate() - .for_each(|(_idx, l)| { - parser.parse_bytes(l).for_each(|l| { - l.split().last(); - () - }) - }) - }); +fn check_file_empty(b: &mut test::Bencher) { + bench_check_file(data::EMPTY, b); } #[bench] -fn split_single_token(b: &mut test::Bencher) { - let parser = typos::tokens::Parser::new(); - b.iter(|| { - data::SINGLE_TOKEN - .as_bytes() - .lines() - .enumerate() - .for_each(|(_idx, l)| { - parser.parse_bytes(l).for_each(|l| { - l.split().last(); - () - }) - }) - }); +fn check_file_no_tokens(b: &mut test::Bencher) { + bench_check_file(data::NO_TOKENS, b); } #[bench] -fn split_sherlock(b: &mut test::Bencher) { - let parser = typos::tokens::Parser::new(); - b.iter(|| { - data::SHERLOCK - .as_bytes() - .lines() - .enumerate() - .for_each(|(_idx, l)| { - parser.parse_bytes(l).for_each(|l| { - l.split().last(); - () - }) - }) - }); +fn check_file_single_token(b: &mut test::Bencher) { + bench_check_file(data::SINGLE_TOKEN, b); } #[bench] -fn split_code(b: &mut test::Bencher) { - let parser = typos::tokens::Parser::new(); - b.iter(|| { - data::CODE - .as_bytes() - .lines() - .enumerate() - .for_each(|(_idx, l)| { - parser.parse_bytes(l).for_each(|l| { - l.split().last(); - () - }) - }) - }); +fn check_file_sherlock(b: &mut test::Bencher) { + bench_check_file(data::SHERLOCK, b); } #[bench] -fn split_corpus(b: &mut test::Bencher) { - let parser = typos::tokens::Parser::new(); - b.iter(|| { - data::CORPUS - .as_bytes() - .lines() - .enumerate() - .for_each(|(_idx, l)| { - parser.parse_bytes(l).for_each(|l| { - l.split().last(); - () - }) - }) - }); +fn check_file_code(b: &mut test::Bencher) { + bench_check_file(data::CODE, b); +} + +#[bench] +fn check_file_corpus(b: &mut test::Bencher) { + bench_check_file(data::CORPUS, b); } diff --git a/src/main.rs b/src/main.rs index ad97085..bab7e13 100644 --- a/src/main.rs +++ b/src/main.rs @@ -51,6 +51,18 @@ struct Args { /// Ignore implicit configuration files. isolated: bool, + #[structopt(long)] + /// Print each file that would be spellchecked. + files: bool, + + #[structopt(long)] + /// Print each identifier that would be spellchecked. + identifiers: bool, + + #[structopt(long)] + /// Print each word that would be spellchecked. + words: bool, + #[structopt(flatten)] overrides: FileArgs, @@ -244,7 +256,79 @@ impl config::WalkSource for WalkArgs { } } -pub fn init_logging(level: Option) { +trait Checks { + fn check_filename( + &self, + path: &std::path::Path, + report: typos::report::Report, + ) -> Result; + + fn check_file( + &self, + path: &std::path::Path, + explicit: bool, + report: typos::report::Report, + ) -> Result; +} + +impl<'p> Checks for typos::checks::ParseIdentifiers<'p> { + fn check_filename( + &self, + path: &std::path::Path, + report: typos::report::Report, + ) -> Result { + self.check_filename(path, report) + } + + fn check_file( + &self, + path: &std::path::Path, + explicit: bool, + report: typos::report::Report, + ) -> Result { + self.check_file(path, explicit, report) + } +} + +impl<'p> Checks for typos::checks::ParseWords<'p> { + fn check_filename( + &self, + path: &std::path::Path, + report: typos::report::Report, + ) -> Result { + self.check_filename(path, report) + } + + fn check_file( + &self, + path: &std::path::Path, + explicit: bool, + report: typos::report::Report, + ) -> Result { + self.check_file(path, explicit, report) + } +} + +impl<'d, 'p> Checks for typos::checks::Checks<'d, 'p> { + fn check_filename( + &self, + path: &std::path::Path, + report: typos::report::Report, + ) -> Result { + self.check_filename(path, report) + } + + fn check_file( + &self, + path: &std::path::Path, + explicit: bool, + report: typos::report::Report, + ) -> Result { + self.check_file(path, explicit, report) + } +} + +fn init_logging(level: Option) { if let Some(level) = level { let mut builder = env_logger::Builder::new(); @@ -269,18 +353,18 @@ pub fn init_logging(level: Option) { fn check_entry( entry: Result, - args: &Args, - checks: &typos::checks::Checks, + format: Format, + checks: &dyn Checks, ) -> Result { let mut typos_found = false; let entry = entry?; if entry.file_type().map(|t| t.is_file()).unwrap_or(true) { let explicit = entry.depth() == 0; - if checks.check_filename(entry.path(), args.format.report())? { + if checks.check_filename(entry.path(), format.report())? { typos_found = true; } - if checks.check_file(entry.path(), explicit, args.format.report())? { + if checks.check_file(entry.path(), explicit, format.report())? { typos_found = true; } } @@ -327,11 +411,11 @@ fn run() -> Result { .include_chars(config.default.identifier_include_chars().to_owned()) .build(); - let checks = typos::checks::CheckSettings::new() + let mut settings = typos::checks::TyposSettings::new(); + settings .check_filenames(config.default.check_filename()) .check_files(config.default.check_file()) - .binary(config.files.binary()) - .build(&dictionary, &parser); + .binary(config.files.binary()); let mut walk = ignore::WalkBuilder::new(path); walk.hidden(config.files.ignore_hidden()) @@ -340,15 +424,58 @@ fn run() -> Result { .git_ignore(config.files.ignore_vcs()) .git_exclude(config.files.ignore_vcs()) .parents(config.files.ignore_parent()); - for entry in walk.build() { - match check_entry(entry, &args, &checks) { - Ok(true) => typos_found = true, - Err(err) => { - let msg = typos::report::Error::new(err.to_string()); - args.format.report()(msg.into()); - errors_found = true + if args.files { + for entry in walk.build() { + match entry { + Ok(entry) => { + let msg = typos::report::File::new(entry.path()); + args.format.report()(msg.into()); + } + Err(err) => { + let msg = typos::report::Error::new(err.to_string()); + args.format.report()(msg.into()); + errors_found = true + } + } + } + } else if args.identifiers { + let checks = settings.build_identifier_parser(&parser); + for entry in walk.build() { + match check_entry(entry, args.format, &checks) { + Ok(true) => typos_found = true, + Err(err) => { + let msg = typos::report::Error::new(err.to_string()); + args.format.report()(msg.into()); + errors_found = true + } + _ => (), + } + } + } else if args.words { + let checks = settings.build_word_parser(&parser); + for entry in walk.build() { + match check_entry(entry, args.format, &checks) { + Ok(true) => typos_found = true, + Err(err) => { + let msg = typos::report::Error::new(err.to_string()); + args.format.report()(msg.into()); + errors_found = true + } + _ => (), + } + } + } else { + let checks = settings.build_checks(&dictionary, &parser); + for entry in walk.build() { + match check_entry(entry, args.format, &checks) { + Ok(true) => typos_found = true, + Err(err) => { + let msg = typos::report::Error::new(err.to_string()); + args.format.report()(msg.into()); + errors_found = true + } + _ => (), } - _ => (), } } } diff --git a/typos/src/checks.rs b/typos/src/checks.rs index 4d101bf..00bf19c 100644 --- a/typos/src/checks.rs +++ b/typos/src/checks.rs @@ -5,13 +5,13 @@ use crate::tokens; use crate::Dictionary; #[derive(Debug, Clone, PartialEq, Eq)] -pub struct CheckSettings { +pub struct TyposSettings { check_filenames: bool, check_files: bool, binary: bool, } -impl CheckSettings { +impl TyposSettings { pub fn new() -> Self { Default::default() } @@ -31,7 +31,7 @@ impl CheckSettings { self } - pub fn build<'d, 'p>( + pub fn build_checks<'d, 'p>( &self, dictionary: &'d dyn Dictionary, parser: &'p tokens::Parser, @@ -44,9 +44,27 @@ impl CheckSettings { binary: self.binary, } } + + pub fn build_identifier_parser<'p>(&self, parser: &'p tokens::Parser) -> ParseIdentifiers<'p> { + ParseIdentifiers { + parser, + check_filenames: self.check_filenames, + check_files: self.check_files, + binary: self.binary, + } + } + + pub fn build_word_parser<'p>(&self, parser: &'p tokens::Parser) -> ParseWords<'p> { + ParseWords { + parser, + check_filenames: self.check_filenames, + check_files: self.check_files, + binary: self.binary, + } + } } -impl Default for CheckSettings { +impl Default for TyposSettings { fn default() -> Self { Self { check_filenames: true, @@ -56,6 +74,176 @@ impl Default for CheckSettings { } } +#[derive(Clone)] +pub struct ParseIdentifiers<'p> { + parser: &'p tokens::Parser, + check_filenames: bool, + check_files: bool, + binary: bool, +} + +impl<'p> ParseIdentifiers<'p> { + pub fn check_filename( + &self, + path: &std::path::Path, + report: report::Report, + ) -> Result { + let typos_found = false; + + if !self.check_filenames { + return Ok(typos_found); + } + + for part in path.components().filter_map(|c| c.as_os_str().to_str()) { + let msg = report::Parse { + path, + kind: report::ParseKind::Identifier, + data: self.parser.parse(part).map(|i| i.token()).collect(), + non_exhaustive: (), + }; + report(msg.into()); + } + + Ok(typos_found) + } + + pub fn check_file( + &self, + path: &std::path::Path, + explicit: bool, + report: report::Report, + ) -> Result { + let typos_found = false; + + if !self.check_files { + return Ok(typos_found); + } + + let buffer = std::fs::read(path) + .map_err(|e| crate::ErrorKind::IoError.into_error().with_source(e))?; + if !explicit && !self.binary && is_binary(&buffer) { + let msg = report::BinaryFile { + path, + non_exhaustive: (), + }; + report(msg.into()); + return Ok(typos_found); + } + + for line in buffer.lines() { + let msg = report::Parse { + path, + kind: report::ParseKind::Identifier, + data: self.parser.parse_bytes(line).map(|i| i.token()).collect(), + non_exhaustive: (), + }; + report(msg.into()); + } + + Ok(typos_found) + } +} + +impl std::fmt::Debug for ParseIdentifiers<'_> { + fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result { + fmt.debug_struct("Checks") + .field("parser", self.parser) + .field("check_filenames", &self.check_filenames) + .field("check_files", &self.check_files) + .field("binary", &self.binary) + .finish() + } +} + +#[derive(Clone)] +pub struct ParseWords<'p> { + parser: &'p tokens::Parser, + check_filenames: bool, + check_files: bool, + binary: bool, +} + +impl<'p> ParseWords<'p> { + pub fn check_filename( + &self, + path: &std::path::Path, + report: report::Report, + ) -> Result { + let typos_found = false; + + if !self.check_filenames { + return Ok(typos_found); + } + + for part in path.components().filter_map(|c| c.as_os_str().to_str()) { + let msg = report::Parse { + path, + kind: report::ParseKind::Word, + data: self + .parser + .parse(part) + .flat_map(|ident| ident.split().map(|i| i.token())) + .collect(), + non_exhaustive: (), + }; + report(msg.into()); + } + + Ok(typos_found) + } + + pub fn check_file( + &self, + path: &std::path::Path, + explicit: bool, + report: report::Report, + ) -> Result { + let typos_found = false; + + if !self.check_files { + return Ok(typos_found); + } + + let buffer = std::fs::read(path) + .map_err(|e| crate::ErrorKind::IoError.into_error().with_source(e))?; + if !explicit && !self.binary && is_binary(&buffer) { + let msg = report::BinaryFile { + path, + non_exhaustive: (), + }; + report(msg.into()); + return Ok(typos_found); + } + + for line in buffer.lines() { + let msg = report::Parse { + path, + kind: report::ParseKind::Word, + data: self + .parser + .parse_bytes(line) + .flat_map(|ident| ident.split().map(|i| i.token())) + .collect(), + non_exhaustive: (), + }; + report(msg.into()); + } + + Ok(typos_found) + } +} + +impl std::fmt::Debug for ParseWords<'_> { + fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result { + fmt.debug_struct("Checks") + .field("parser", self.parser) + .field("check_filenames", &self.check_filenames) + .field("check_files", &self.check_files) + .field("binary", &self.binary) + .finish() + } +} + #[derive(Clone)] pub struct Checks<'d, 'p> { dictionary: &'d dyn Dictionary, @@ -122,8 +310,7 @@ impl<'d, 'p> Checks<'d, 'p> { let buffer = std::fs::read(path) .map_err(|e| crate::ErrorKind::IoError.into_error().with_source(e))?; - let null_max = std::cmp::min(buffer.len(), 1024); - if !explicit && !self.binary && buffer[0..null_max].find_byte(b'\0').is_some() { + if !explicit && !self.binary && is_binary(&buffer) { let msg = report::BinaryFile { path, non_exhaustive: (), @@ -183,3 +370,8 @@ impl std::fmt::Debug for Checks<'_, '_> { .finish() } } + +fn is_binary(buffer: &[u8]) -> bool { + let null_max = std::cmp::min(buffer.len(), 1024); + buffer[0..null_max].find_byte(b'\0').is_some() +} diff --git a/typos/src/report.rs b/typos/src/report.rs index a3d6a96..451e445 100644 --- a/typos/src/report.rs +++ b/typos/src/report.rs @@ -8,8 +8,12 @@ pub enum Message<'m> { BinaryFile(BinaryFile<'m>), Correction(Correction<'m>), FilenameCorrection(FilenameCorrection<'m>), + File(File<'m>), + Parse(Parse<'m>), PathError(PathError<'m>), Error(Error), + #[serde(skip)] + __NonExhaustive, } #[derive(Clone, Debug, serde::Serialize, derive_more::Display)] @@ -42,6 +46,39 @@ pub struct FilenameCorrection<'m> { pub(crate) non_exhaustive: (), } +#[derive(Copy, Clone, Debug, serde::Serialize)] +pub enum ParseKind { + Identifier, + Word, + #[doc(hidden)] + __NonExhaustive, +} + +#[derive(Clone, Debug, serde::Serialize)] +pub struct File<'m> { + pub path: &'m std::path::Path, + #[serde(skip)] + pub(crate) non_exhaustive: (), +} + +impl<'m> File<'m> { + pub fn new(path: &'m std::path::Path) -> Self { + Self { + path, + non_exhaustive: (), + } + } +} + +#[derive(Clone, Debug, serde::Serialize)] +pub struct Parse<'m> { + pub path: &'m std::path::Path, + pub kind: ParseKind, + pub data: Vec<&'m str>, + #[serde(skip)] + pub(crate) non_exhaustive: (), +} + #[derive(Clone, Debug, serde::Serialize)] pub struct PathError<'m> { pub path: &'m std::path::Path, @@ -88,12 +125,21 @@ pub fn print_brief(msg: Message) { Message::FilenameCorrection(msg) => { println!("{}: {} -> {}", msg.path.display(), msg.typo, msg.correction); } + Message::File(msg) => { + println!("{}", msg.path.display()); + } + Message::Parse(msg) => { + println!("{}", itertools::join(msg.data.iter(), " ")); + } Message::PathError(msg) => { println!("{}: {}", msg.path.display(), msg.msg); } Message::Error(msg) => { println!("{}", msg.msg); } + Message::__NonExhaustive => { + unreachable!("Non-creatable case"); + } } } @@ -111,12 +157,21 @@ pub fn print_long(msg: Message) { msg.correction ); } + Message::File(msg) => { + println!("{}", msg.path.display()); + } + Message::Parse(msg) => { + println!("{}", itertools::join(msg.data.iter(), " ")); + } Message::PathError(msg) => { println!("{}: {}", msg.path.display(), msg.msg); } Message::Error(msg) => { println!("{}", msg.msg); } + Message::__NonExhaustive => { + unreachable!("Non-creatable case"); + } } } diff --git a/typos/src/tokens.rs b/typos/src/tokens.rs index c2c8c61..acc1a36 100644 --- a/typos/src/tokens.rs +++ b/typos/src/tokens.rs @@ -123,7 +123,7 @@ impl<'t> Identifier<'t> { Self { token, offset } } - pub fn token(&self) -> &str { + pub fn token(&self) -> &'t str { self.token } @@ -135,7 +135,7 @@ impl<'t> Identifier<'t> { self.offset } - pub fn split(&self) -> impl Iterator> { + pub fn split(&self) -> impl Iterator> { split_ident(self.token, self.offset) } } @@ -177,7 +177,7 @@ impl<'t> Word<'t> { } } - pub fn token(&self) -> &str { + pub fn token(&self) -> &'t str { self.token }