From 7fdd0dee164f74ca4d0dd642254419b02639fdc8 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Mon, 28 Dec 2020 21:45:19 -0600 Subject: [PATCH 01/16] style(typos): Make parser ordering clearer --- crates/typos/src/tokens.rs | 112 ++++++++++++++++++------------------- 1 file changed, 56 insertions(+), 56 deletions(-) diff --git a/crates/typos/src/tokens.rs b/crates/typos/src/tokens.rs index 9f2728c..0a9e7f2 100644 --- a/crates/typos/src/tokens.rs +++ b/crates/typos/src/tokens.rs @@ -1,11 +1,3 @@ -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum Case { - Title, - Lower, - Scream, - None, -} - #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct ParserBuilder { ignore_hex: bool, @@ -237,52 +229,8 @@ impl<'t> Word<'t> { } } -/// Tracks the current 'mode' of the transformation algorithm as it scans the input string. -/// -/// The mode is a tri-state which tracks the case of the last cased character of the current -/// word. If there is no cased character (either lowercase or uppercase) since the previous -/// word boundary, than the mode is `Boundary`. If the last cased character is lowercase, then -/// the mode is `Lowercase`. Otherrwise, the mode is `Uppercase`. -#[derive(Clone, Copy, PartialEq, Debug)] -enum WordMode { - /// There have been no lowercase or uppercase characters in the current word. - Boundary, - /// The previous cased character in the current word is lowercase. - Lowercase, - /// The previous cased character in the current word is uppercase. - Uppercase, - Number, -} - -impl WordMode { - fn classify(c: char) -> Self { - if c.is_lowercase() { - WordMode::Lowercase - } else if c.is_uppercase() { - WordMode::Uppercase - } else if c.is_ascii_digit() { - WordMode::Number - } else { - // This assumes all characters are either lower or upper case. - WordMode::Boundary - } - } - - fn case(self, last: WordMode) -> Case { - match (self, last) { - (WordMode::Uppercase, WordMode::Uppercase) => Case::Scream, - (WordMode::Uppercase, WordMode::Lowercase) => Case::Title, - (WordMode::Lowercase, WordMode::Lowercase) => Case::Lower, - (WordMode::Number, WordMode::Number) => Case::None, - (WordMode::Number, _) - | (_, WordMode::Number) - | (WordMode::Boundary, _) - | (_, WordMode::Boundary) - | (WordMode::Lowercase, WordMode::Uppercase) => { - unreachable!("Invalid case combination: ({:?}, {:?})", self, last) - } - } - } +fn split_ident(ident: &str, offset: usize) -> impl Iterator> { + SplitIdent::new(ident, offset) } struct SplitIdent<'s> { @@ -377,8 +325,60 @@ impl<'s> Iterator for SplitIdent<'s> { } } -fn split_ident(ident: &str, offset: usize) -> impl Iterator> { - SplitIdent::new(ident, offset) +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Case { + Title, + Lower, + Scream, + None, +} + +/// Tracks the current 'mode' of the transformation algorithm as it scans the input string. +/// +/// The mode is a tri-state which tracks the case of the last cased character of the current +/// word. If there is no cased character (either lowercase or uppercase) since the previous +/// word boundary, than the mode is `Boundary`. If the last cased character is lowercase, then +/// the mode is `Lowercase`. Otherrwise, the mode is `Uppercase`. +#[derive(Clone, Copy, PartialEq, Debug)] +enum WordMode { + /// There have been no lowercase or uppercase characters in the current word. + Boundary, + /// The previous cased character in the current word is lowercase. + Lowercase, + /// The previous cased character in the current word is uppercase. + Uppercase, + Number, +} + +impl WordMode { + fn classify(c: char) -> Self { + if c.is_lowercase() { + WordMode::Lowercase + } else if c.is_uppercase() { + WordMode::Uppercase + } else if c.is_ascii_digit() { + WordMode::Number + } else { + // This assumes all characters are either lower or upper case. + WordMode::Boundary + } + } + + fn case(self, last: WordMode) -> Case { + match (self, last) { + (WordMode::Uppercase, WordMode::Uppercase) => Case::Scream, + (WordMode::Uppercase, WordMode::Lowercase) => Case::Title, + (WordMode::Lowercase, WordMode::Lowercase) => Case::Lower, + (WordMode::Number, WordMode::Number) => Case::None, + (WordMode::Number, _) + | (_, WordMode::Number) + | (WordMode::Boundary, _) + | (_, WordMode::Boundary) + | (WordMode::Lowercase, WordMode::Uppercase) => { + unreachable!("Invalid case combination: ({:?}, {:?})", self, last) + } + } + } } #[cfg(test)] From 1e64080c0534038a05c52f2cecb19dd40a7195c1 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Mon, 28 Dec 2020 21:51:44 -0600 Subject: [PATCH 02/16] refactor(typos): Open up the name Parser --- benches/checks.rs | 10 +++++----- benches/tokenize.rs | 12 ++++++------ crates/typos/src/checks.rs | 28 ++++++++++++++-------------- crates/typos/src/tokens.rs | 38 +++++++++++++++++++------------------- src/checks.rs | 6 +++--- src/main.rs | 2 +- 6 files changed, 48 insertions(+), 48 deletions(-) diff --git a/benches/checks.rs b/benches/checks.rs index 8f8695e..f6aa46c 100644 --- a/benches/checks.rs +++ b/benches/checks.rs @@ -9,7 +9,7 @@ use typos::checks::Check; fn bench_parse_ident_str(data: &str, b: &mut test::Bencher) { let corrections = typos_cli::dict::BuiltIn::new(Default::default()); - let parser = typos::tokens::Parser::new(); + let parser = typos::tokens::Tokenizer::new(); let checks = typos::checks::TyposSettings::new().build_identifier_parser(); b.iter(|| checks.check_str(data, &parser, &corrections, &typos::report::PrintSilent)); } @@ -46,7 +46,7 @@ fn parse_idents_corpus_str(b: &mut test::Bencher) { fn bench_parse_ident_bytes(data: &str, b: &mut test::Bencher) { let corrections = typos_cli::dict::BuiltIn::new(Default::default()); - let parser = typos::tokens::Parser::new(); + let parser = typos::tokens::Tokenizer::new(); let checks = typos::checks::TyposSettings::new().build_identifier_parser(); b.iter(|| { checks.check_bytes( @@ -90,7 +90,7 @@ fn parse_idents_corpus_bytes(b: &mut test::Bencher) { fn bench_parse_word_str(data: &str, b: &mut test::Bencher) { let corrections = typos_cli::dict::BuiltIn::new(Default::default()); - let parser = typos::tokens::Parser::new(); + let parser = typos::tokens::Tokenizer::new(); let checks = typos::checks::TyposSettings::new().build_word_parser(); b.iter(|| checks.check_str(data, &parser, &corrections, &typos::report::PrintSilent)); } @@ -127,7 +127,7 @@ fn parse_words_corpus(b: &mut test::Bencher) { fn bench_typos(data: &str, b: &mut test::Bencher) { let corrections = typos_cli::dict::BuiltIn::new(Default::default()); - let parser = typos::tokens::Parser::new(); + let parser = typos::tokens::Tokenizer::new(); let checks = typos::checks::TyposSettings::new().build_typos(); b.iter(|| checks.check_str(data, &parser, &corrections, &typos::report::PrintSilent)); } @@ -168,7 +168,7 @@ fn bench_check_file(data: &str, b: &mut test::Bencher) { sample_path.write_str(data).unwrap(); let corrections = typos_cli::dict::BuiltIn::new(Default::default()); - let parser = typos::tokens::Parser::new(); + let parser = typos::tokens::Tokenizer::new(); let checks = typos::checks::TyposSettings::new().build_typos(); b.iter(|| { checks.check_file( diff --git a/benches/tokenize.rs b/benches/tokenize.rs index 32e6a74..efcce0b 100644 --- a/benches/tokenize.rs +++ b/benches/tokenize.rs @@ -6,19 +6,19 @@ mod data; #[bench] fn ident_parse_empty(b: &mut test::Bencher) { - let parser = typos::tokens::Parser::new(); + let parser = typos::tokens::Tokenizer::new(); b.iter(|| parser.parse_bytes(data::EMPTY.as_bytes()).last()); } #[bench] fn ident_parse_no_tokens(b: &mut test::Bencher) { - let parser = typos::tokens::Parser::new(); + let parser = typos::tokens::Tokenizer::new(); b.iter(|| parser.parse_bytes(data::NO_TOKENS.as_bytes()).last()); } #[bench] fn ident_parse_single_token(b: &mut test::Bencher) { - let parser = typos::tokens::Parser::new(); + let parser = typos::tokens::Tokenizer::new(); b.iter(|| { parser.parse_bytes(data::SINGLE_TOKEN.as_bytes()).last(); }); @@ -26,19 +26,19 @@ fn ident_parse_single_token(b: &mut test::Bencher) { #[bench] fn ident_parse_sherlock(b: &mut test::Bencher) { - let parser = typos::tokens::Parser::new(); + let parser = typos::tokens::Tokenizer::new(); b.iter(|| parser.parse_bytes(data::SHERLOCK.as_bytes()).last()); } #[bench] fn ident_parse_code(b: &mut test::Bencher) { - let parser = typos::tokens::Parser::new(); + let parser = typos::tokens::Tokenizer::new(); b.iter(|| parser.parse_bytes(data::CODE.as_bytes()).last()); } #[bench] fn ident_parse_corpus(b: &mut test::Bencher) { - let parser = typos::tokens::Parser::new(); + let parser = typos::tokens::Tokenizer::new(); b.iter(|| parser.parse_bytes(data::CORPUS.as_bytes()).last()); } diff --git a/crates/typos/src/checks.rs b/crates/typos/src/checks.rs index 6040440..62bae7d 100644 --- a/crates/typos/src/checks.rs +++ b/crates/typos/src/checks.rs @@ -9,7 +9,7 @@ pub trait Check: Send + Sync { fn check_str( &self, buffer: &str, - parser: &tokens::Parser, + parser: &tokens::Tokenizer, dictionary: &dyn Dictionary, reporter: &dyn report::Report, ) -> Result<(), std::io::Error>; @@ -17,7 +17,7 @@ pub trait Check: Send + Sync { fn check_bytes( &self, buffer: &[u8], - parser: &tokens::Parser, + parser: &tokens::Tokenizer, dictionary: &dyn Dictionary, reporter: &dyn report::Report, ) -> Result<(), std::io::Error>; @@ -31,7 +31,7 @@ pub trait Check: Send + Sync { fn check_filename( &self, path: &std::path::Path, - parser: &tokens::Parser, + parser: &tokens::Tokenizer, dictionary: &dyn Dictionary, reporter: &dyn report::Report, ) -> Result<(), std::io::Error> { @@ -54,7 +54,7 @@ pub trait Check: Send + Sync { &self, path: &std::path::Path, explicit: bool, - parser: &tokens::Parser, + parser: &tokens::Tokenizer, dictionary: &dyn Dictionary, reporter: &dyn report::Report, ) -> Result<(), std::io::Error> { @@ -172,7 +172,7 @@ impl Check for Typos { fn check_str( &self, buffer: &str, - parser: &tokens::Parser, + parser: &tokens::Tokenizer, dictionary: &dyn Dictionary, reporter: &dyn report::Report, ) -> Result<(), std::io::Error> { @@ -217,7 +217,7 @@ impl Check for Typos { fn check_bytes( &self, buffer: &[u8], - parser: &tokens::Parser, + parser: &tokens::Tokenizer, dictionary: &dyn Dictionary, reporter: &dyn report::Report, ) -> Result<(), std::io::Error> { @@ -284,7 +284,7 @@ impl Check for ParseIdentifiers { fn check_str( &self, buffer: &str, - parser: &tokens::Parser, + parser: &tokens::Tokenizer, _dictionary: &dyn Dictionary, reporter: &dyn report::Report, ) -> Result<(), std::io::Error> { @@ -303,7 +303,7 @@ impl Check for ParseIdentifiers { fn check_bytes( &self, buffer: &[u8], - parser: &tokens::Parser, + parser: &tokens::Tokenizer, _dictionary: &dyn Dictionary, reporter: &dyn report::Report, ) -> Result<(), std::io::Error> { @@ -343,7 +343,7 @@ impl Check for ParseWords { fn check_str( &self, buffer: &str, - parser: &tokens::Parser, + parser: &tokens::Tokenizer, _dictionary: &dyn Dictionary, reporter: &dyn report::Report, ) -> Result<(), std::io::Error> { @@ -365,7 +365,7 @@ impl Check for ParseWords { fn check_bytes( &self, buffer: &[u8], - parser: &tokens::Parser, + parser: &tokens::Tokenizer, _dictionary: &dyn Dictionary, reporter: &dyn report::Report, ) -> Result<(), std::io::Error> { @@ -404,7 +404,7 @@ impl Check for Files { fn check_str( &self, _buffer: &str, - _parser: &tokens::Parser, + _parser: &tokens::Tokenizer, _dictionary: &dyn Dictionary, _reporter: &dyn report::Report, ) -> Result<(), std::io::Error> { @@ -414,7 +414,7 @@ impl Check for Files { fn check_bytes( &self, _buffer: &[u8], - _parser: &tokens::Parser, + _parser: &tokens::Tokenizer, _dictionary: &dyn Dictionary, _reporter: &dyn report::Report, ) -> Result<(), std::io::Error> { @@ -436,7 +436,7 @@ impl Check for Files { fn check_filename( &self, _path: &std::path::Path, - _parser: &tokens::Parser, + _parser: &tokens::Tokenizer, _dictionary: &dyn Dictionary, _reporter: &dyn report::Report, ) -> Result<(), std::io::Error> { @@ -447,7 +447,7 @@ impl Check for Files { &self, path: &std::path::Path, _explicit: bool, - _parser: &tokens::Parser, + _parser: &tokens::Tokenizer, _dictionary: &dyn Dictionary, reporter: &dyn report::Report, ) -> Result<(), std::io::Error> { diff --git a/crates/typos/src/tokens.rs b/crates/typos/src/tokens.rs index 0a9e7f2..f372c96 100644 --- a/crates/typos/src/tokens.rs +++ b/crates/typos/src/tokens.rs @@ -1,5 +1,5 @@ #[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct ParserBuilder { +pub struct TokenizerBuilder { ignore_hex: bool, leading_digits: bool, leading_chars: String, @@ -7,7 +7,7 @@ pub struct ParserBuilder { include_chars: String, } -impl ParserBuilder { +impl TokenizerBuilder { pub fn new() -> Self { Default::default() } @@ -37,7 +37,7 @@ impl ParserBuilder { self } - pub fn build(&self) -> Parser { + pub fn build(&self) -> Tokenizer { let mut pattern = r#"\b("#.to_owned(); Self::push_pattern(&mut pattern, self.leading_digits, &self.leading_chars); Self::push_pattern(&mut pattern, self.include_digits, &self.include_chars); @@ -46,7 +46,7 @@ impl ParserBuilder { let words_str = regex::Regex::new(&pattern).unwrap(); let words_bytes = regex::bytes::Regex::new(&pattern).unwrap(); - Parser { + Tokenizer { words_str, words_bytes, // `leading_digits` let's us bypass the regexes since you can't have a decimal or @@ -69,7 +69,7 @@ impl ParserBuilder { } } -impl Default for ParserBuilder { +impl Default for TokenizerBuilder { fn default() -> Self { Self { ignore_hex: true, @@ -82,16 +82,16 @@ impl Default for ParserBuilder { } #[derive(Debug, Clone)] -pub struct Parser { +pub struct Tokenizer { words_str: regex::Regex, words_bytes: regex::bytes::Regex, ignore_numbers: bool, ignore_hex: bool, } -impl Parser { +impl Tokenizer { pub fn new() -> Self { - ParserBuilder::default().build() + TokenizerBuilder::default().build() } pub fn parse_str<'c>(&'c self, content: &'c str) -> impl Iterator> { @@ -124,7 +124,7 @@ impl Parser { } } -impl Default for Parser { +impl Default for Tokenizer { fn default() -> Self { Self::new() } @@ -387,7 +387,7 @@ mod test { #[test] fn tokenize_empty_is_empty() { - let parser = Parser::new(); + let parser = Tokenizer::new(); let input = ""; let expected: Vec = vec![]; @@ -399,7 +399,7 @@ mod test { #[test] fn tokenize_word_is_word() { - let parser = Parser::new(); + let parser = Tokenizer::new(); let input = "word"; let expected: Vec = vec![Identifier::new_unchecked("word", 0)]; @@ -411,7 +411,7 @@ mod test { #[test] fn tokenize_space_separated_words() { - let parser = Parser::new(); + let parser = Tokenizer::new(); let input = "A B"; let expected: Vec = vec![ @@ -426,7 +426,7 @@ mod test { #[test] fn tokenize_dot_separated_words() { - let parser = Parser::new(); + let parser = Tokenizer::new(); let input = "A.B"; let expected: Vec = vec![ @@ -441,7 +441,7 @@ mod test { #[test] fn tokenize_namespace_separated_words() { - let parser = Parser::new(); + let parser = Tokenizer::new(); let input = "A::B"; let expected: Vec = vec![ @@ -456,7 +456,7 @@ mod test { #[test] fn tokenize_underscore_doesnt_separate() { - let parser = Parser::new(); + let parser = Tokenizer::new(); let input = "A_B"; let expected: Vec = vec![Identifier::new_unchecked("A_B", 0)]; @@ -468,7 +468,7 @@ mod test { #[test] fn tokenize_ignore_hex_enabled() { - let parser = ParserBuilder::new().ignore_hex(true).build(); + let parser = TokenizerBuilder::new().ignore_hex(true).build(); let input = "Hello 0xDEADBEEF World"; let expected: Vec = vec![ @@ -483,7 +483,7 @@ mod test { #[test] fn tokenize_ignore_hex_disabled() { - let parser = ParserBuilder::new() + let parser = TokenizerBuilder::new() .ignore_hex(false) .leading_digits(true) .build(); @@ -523,11 +523,11 @@ mod test { &[("A", Case::Scream, 0), ("String", Case::Title, 1)], ), ( - "SimpleXMLParser", + "SimpleXMLTokenizer", &[ ("Simple", Case::Title, 0), ("XML", Case::Scream, 6), - ("Parser", Case::Title, 9), + ("Tokenizer", Case::Title, 9), ], ), ( diff --git a/src/checks.rs b/src/checks.rs index 35d51c6..78b9718 100644 --- a/src/checks.rs +++ b/src/checks.rs @@ -1,7 +1,7 @@ pub(crate) fn check_path( walk: ignore::Walk, checks: &dyn typos::checks::Check, - parser: &typos::tokens::Parser, + parser: &typos::tokens::Tokenizer, dictionary: &dyn typos::Dictionary, reporter: &dyn typos::report::Report, ) -> Result<(), ignore::Error> { @@ -14,7 +14,7 @@ pub(crate) fn check_path( pub(crate) fn check_path_parallel( walk: ignore::WalkParallel, checks: &dyn typos::checks::Check, - parser: &typos::tokens::Parser, + parser: &typos::tokens::Tokenizer, dictionary: &dyn typos::Dictionary, reporter: &dyn typos::report::Report, ) -> Result<(), ignore::Error> { @@ -37,7 +37,7 @@ pub(crate) fn check_path_parallel( fn check_entry( entry: Result, checks: &dyn typos::checks::Check, - parser: &typos::tokens::Parser, + parser: &typos::tokens::Tokenizer, dictionary: &dyn typos::Dictionary, reporter: &dyn typos::report::Report, ) -> Result<(), ignore::Error> { diff --git a/src/main.rs b/src/main.rs index 80d0b99..6275b95 100644 --- a/src/main.rs +++ b/src/main.rs @@ -61,7 +61,7 @@ fn run() -> proc_exit::ExitResult { config.default.update(&args.overrides); let config = config; - let parser = typos::tokens::ParserBuilder::new() + let parser = typos::tokens::TokenizerBuilder::new() .ignore_hex(config.default.ignore_hex()) .leading_digits(config.default.identifier_leading_digits()) .leading_chars(config.default.identifier_leading_chars().to_owned()) From e741f96de33c05731328221b156d46ba59767a21 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Wed, 30 Dec 2020 18:58:35 -0600 Subject: [PATCH 03/16] refactor(typos): Decouple parsing from checks --- crates/typos/src/checks.rs | 170 ++++++++++++++----------------------- crates/typos/src/dict.rs | 15 ++++ crates/typos/src/lib.rs | 4 +- crates/typos/src/parser.rs | 164 +++++++++++++++++++++++++++++++++++ crates/typos/src/report.rs | 8 +- 5 files changed, 250 insertions(+), 111 deletions(-) create mode 100644 crates/typos/src/parser.rs diff --git a/crates/typos/src/checks.rs b/crates/typos/src/checks.rs index 62bae7d..027857c 100644 --- a/crates/typos/src/checks.rs +++ b/crates/typos/src/checks.rs @@ -3,7 +3,6 @@ use bstr::ByteSlice; use crate::report; use crate::tokens; use crate::Dictionary; -use crate::Status; pub trait Check: Send + Sync { fn check_str( @@ -172,44 +171,23 @@ impl Check for Typos { fn check_str( &self, buffer: &str, - parser: &tokens::Tokenizer, + tokenizer: &tokens::Tokenizer, dictionary: &dyn Dictionary, reporter: &dyn report::Report, ) -> Result<(), std::io::Error> { - for ident in parser.parse_str(buffer) { - match dictionary.correct_ident(ident) { - Some(Status::Valid) => {} - Some(corrections) => { - let byte_offset = ident.offset(); - let msg = report::Typo { - context: None, - buffer: std::borrow::Cow::Borrowed(buffer.as_bytes()), - byte_offset, - typo: ident.token(), - corrections, - }; - reporter.report(msg.into())?; - } - None => { - for word in ident.split() { - match dictionary.correct_word(word) { - Some(Status::Valid) => {} - Some(corrections) => { - let byte_offset = word.offset(); - let msg = report::Typo { - context: None, - buffer: std::borrow::Cow::Borrowed(buffer.as_bytes()), - byte_offset, - typo: word.token(), - corrections, - }; - reporter.report(msg.into())?; - } - None => {} - } - } - } - } + let parser = crate::ParserBuilder::new() + .tokenizer(tokenizer) + .dictionary(dictionary) + .typos(); + for typo in parser.parse_str(buffer) { + let msg = report::Typo { + context: None, + buffer: std::borrow::Cow::Borrowed(buffer.as_bytes()), + byte_offset: typo.byte_offset, + typo: typo.typo, + corrections: typo.corrections, + }; + reporter.report(msg.into())?; } Ok(()) } @@ -217,46 +195,24 @@ impl Check for Typos { fn check_bytes( &self, buffer: &[u8], - parser: &tokens::Tokenizer, + tokenizer: &tokens::Tokenizer, dictionary: &dyn Dictionary, reporter: &dyn report::Report, ) -> Result<(), std::io::Error> { - for ident in parser.parse_bytes(buffer) { - match dictionary.correct_ident(ident) { - Some(Status::Valid) => {} - Some(corrections) => { - let byte_offset = ident.offset(); - let msg = report::Typo { - context: None, - buffer: std::borrow::Cow::Borrowed(buffer), - byte_offset, - typo: ident.token(), - corrections, - }; - reporter.report(msg.into())?; - } - None => { - for word in ident.split() { - match dictionary.correct_word(word) { - Some(Status::Valid) => {} - Some(corrections) => { - let byte_offset = word.offset(); - let msg = report::Typo { - context: None, - buffer: std::borrow::Cow::Borrowed(buffer), - byte_offset, - typo: word.token(), - corrections, - }; - reporter.report(msg.into())?; - } - None => {} - } - } - } - } + let parser = crate::ParserBuilder::new() + .tokenizer(tokenizer) + .dictionary(dictionary) + .typos(); + for typo in parser.parse_bytes(buffer) { + let msg = report::Typo { + context: None, + buffer: std::borrow::Cow::Borrowed(buffer.as_bytes()), + byte_offset: typo.byte_offset, + typo: typo.typo, + corrections: typo.corrections, + }; + reporter.report(msg.into())?; } - Ok(()) } @@ -284,16 +240,19 @@ impl Check for ParseIdentifiers { fn check_str( &self, buffer: &str, - parser: &tokens::Tokenizer, + tokenizer: &tokens::Tokenizer, _dictionary: &dyn Dictionary, reporter: &dyn report::Report, ) -> Result<(), std::io::Error> { - let msg = report::Parse { - context: None, - kind: report::ParseKind::Identifier, - data: parser.parse_str(buffer).map(|i| i.token()).collect(), - }; - if !msg.data.is_empty() { + let parser = crate::ParserBuilder::new() + .tokenizer(tokenizer) + .identifiers(); + for word in parser.parse_str(buffer) { + let msg = report::Parse { + context: None, + kind: report::ParseKind::Word, + data: word.token(), + }; reporter.report(msg.into())?; } @@ -303,16 +262,19 @@ impl Check for ParseIdentifiers { fn check_bytes( &self, buffer: &[u8], - parser: &tokens::Tokenizer, + tokenizer: &tokens::Tokenizer, _dictionary: &dyn Dictionary, reporter: &dyn report::Report, ) -> Result<(), std::io::Error> { - let msg = report::Parse { - context: None, - kind: report::ParseKind::Identifier, - data: parser.parse_bytes(buffer).map(|i| i.token()).collect(), - }; - if !msg.data.is_empty() { + let parser = crate::ParserBuilder::new() + .tokenizer(tokenizer) + .identifiers(); + for word in parser.parse_bytes(buffer) { + let msg = report::Parse { + context: None, + kind: report::ParseKind::Word, + data: word.token(), + }; reporter.report(msg.into())?; } @@ -343,19 +305,17 @@ impl Check for ParseWords { fn check_str( &self, buffer: &str, - parser: &tokens::Tokenizer, + tokenizer: &tokens::Tokenizer, _dictionary: &dyn Dictionary, reporter: &dyn report::Report, ) -> Result<(), std::io::Error> { - let msg = report::Parse { - context: None, - kind: report::ParseKind::Word, - data: parser - .parse_str(buffer) - .flat_map(|ident| ident.split().map(|i| i.token())) - .collect(), - }; - if !msg.data.is_empty() { + let word_parser = crate::ParserBuilder::new().tokenizer(tokenizer).words(); + for word in word_parser.parse_str(buffer) { + let msg = report::Parse { + context: None, + kind: report::ParseKind::Word, + data: word.token(), + }; reporter.report(msg.into())?; } @@ -365,19 +325,17 @@ impl Check for ParseWords { fn check_bytes( &self, buffer: &[u8], - parser: &tokens::Tokenizer, + tokenizer: &tokens::Tokenizer, _dictionary: &dyn Dictionary, reporter: &dyn report::Report, ) -> Result<(), std::io::Error> { - let msg = report::Parse { - context: None, - kind: report::ParseKind::Word, - data: parser - .parse_bytes(buffer) - .flat_map(|ident| ident.split().map(|i| i.token())) - .collect(), - }; - if !msg.data.is_empty() { + let parser = crate::ParserBuilder::new().tokenizer(tokenizer).words(); + for word in parser.parse_bytes(buffer) { + let msg = report::Parse { + context: None, + kind: report::ParseKind::Word, + data: word.token(), + }; reporter.report(msg.into())?; } diff --git a/crates/typos/src/dict.rs b/crates/typos/src/dict.rs index 2fded93..083ebe2 100644 --- a/crates/typos/src/dict.rs +++ b/crates/typos/src/dict.rs @@ -47,3 +47,18 @@ pub trait Dictionary: Send + Sync { fn correct_word<'s, 'w>(&'s self, word: crate::tokens::Word<'w>) -> Option>; } + +pub(crate) struct NullDictionary; + +impl Dictionary for NullDictionary { + fn correct_ident<'s, 'w>( + &'s self, + _ident: crate::tokens::Identifier<'w>, + ) -> Option> { + None + } + + fn correct_word<'s, 'w>(&'s self, _word: crate::tokens::Word<'w>) -> Option> { + None + } +} diff --git a/crates/typos/src/lib.rs b/crates/typos/src/lib.rs index 1cb77c9..7c09efb 100644 --- a/crates/typos/src/lib.rs +++ b/crates/typos/src/lib.rs @@ -1,7 +1,9 @@ mod dict; +mod parser; pub mod checks; pub mod report; pub mod tokens; -pub use crate::dict::*; +pub use dict::*; +pub use parser::*; diff --git a/crates/typos/src/parser.rs b/crates/typos/src/parser.rs new file mode 100644 index 0000000..613fdad --- /dev/null +++ b/crates/typos/src/parser.rs @@ -0,0 +1,164 @@ +use crate::tokens; +use crate::Dictionary; + +#[derive(Clone)] +pub struct ParserBuilder<'p, 'd> { + tokenizer: Option<&'p tokens::Tokenizer>, + dictionary: &'d dyn Dictionary, +} + +impl<'p> ParserBuilder<'p, 'static> { + pub fn new() -> Self { + Default::default() + } +} + +impl<'p, 'd> ParserBuilder<'p, 'd> { + pub fn tokenizer(mut self, tokenizer: &'p tokens::Tokenizer) -> Self { + self.tokenizer = Some(tokenizer); + self + } + + pub fn dictionary<'d1>(self, dictionary: &'d1 dyn Dictionary) -> ParserBuilder<'p, 'd1> { + ParserBuilder { + tokenizer: self.tokenizer, + dictionary: dictionary, + } + } + + pub fn typos(&self) -> TyposParser<'p, 'd> { + TyposParser { + tokenizer: self.tokenizer.unwrap_or_else(|| &DEFAULT_TOKENIZER), + dictionary: self.dictionary, + } + } + + pub fn identifiers(&self) -> IdentifiersParser<'p> { + IdentifiersParser { + tokenizer: self.tokenizer.unwrap_or_else(|| &DEFAULT_TOKENIZER), + } + } + + pub fn words(&self) -> WordsParser<'p> { + WordsParser { + tokenizer: self.tokenizer.unwrap_or_else(|| &DEFAULT_TOKENIZER), + } + } +} + +impl<'p> Default for ParserBuilder<'p, 'static> { + fn default() -> Self { + Self { + tokenizer: None, + dictionary: &crate::NullDictionary, + } + } +} + +static DEFAULT_TOKENIZER: once_cell::sync::Lazy = + once_cell::sync::Lazy::new(|| tokens::Tokenizer::new()); + +#[derive(Clone)] +pub struct TyposParser<'p, 'd> { + tokenizer: &'p tokens::Tokenizer, + dictionary: &'d dyn Dictionary, +} + +impl<'p, 'd> TyposParser<'p, 'd> { + pub fn parse_str<'b, 's: 'b>(&'s self, buffer: &'b str) -> impl Iterator> { + self.tokenizer + .parse_str(buffer) + .flat_map(move |ident| self.process_ident(ident)) + } + + pub fn parse_bytes<'b, 's: 'b>(&'s self, buffer: &'b [u8]) -> impl Iterator> { + self.tokenizer + .parse_bytes(buffer) + .flat_map(move |ident| self.process_ident(ident)) + } + + fn process_ident<'i, 's: 'i>( + &'s self, + ident: tokens::Identifier<'i>, + ) -> impl Iterator> { + match self.dictionary.correct_ident(ident) { + Some(crate::Status::Valid) => itertools::Either::Left(None.into_iter()), + Some(corrections) => { + let typo = Typo { + byte_offset: ident.offset(), + typo: ident.token(), + corrections, + }; + itertools::Either::Left(Some(typo).into_iter()) + } + None => itertools::Either::Right( + ident + .split() + .filter_map(move |word| self.process_word(word)), + ), + } + } + + fn process_word<'w, 's: 'w>(&'s self, word: tokens::Word<'w>) -> Option> { + match self.dictionary.correct_word(word) { + Some(crate::Status::Valid) => None, + Some(corrections) => { + let typo = Typo { + byte_offset: word.offset(), + typo: word.token(), + corrections, + }; + Some(typo) + } + None => None, + } + } +} + +#[derive(Clone, Debug, derive_setters::Setters)] +#[non_exhaustive] +pub struct Typo<'m> { + pub byte_offset: usize, + pub typo: &'m str, + pub corrections: crate::Status<'m>, +} + +impl<'m> Default for Typo<'m> { + fn default() -> Self { + Self { + byte_offset: 0, + typo: "", + corrections: crate::Status::Invalid, + } + } +} + +#[derive(Debug, Clone)] +pub struct IdentifiersParser<'p> { + tokenizer: &'p tokens::Tokenizer, +} + +impl<'p> IdentifiersParser<'p> { + pub fn parse_str(&self, buffer: &'p str) -> impl Iterator> { + self.tokenizer.parse_str(buffer) + } + + pub fn parse_bytes(&self, buffer: &'p [u8]) -> impl Iterator> { + self.tokenizer.parse_bytes(buffer) + } +} + +#[derive(Debug, Clone)] +pub struct WordsParser<'p> { + tokenizer: &'p tokens::Tokenizer, +} + +impl<'p> WordsParser<'p> { + pub fn parse_str(&self, buffer: &'p str) -> impl Iterator> { + self.tokenizer.parse_str(buffer).flat_map(|i| i.split()) + } + + pub fn parse_bytes(&self, buffer: &'p [u8]) -> impl Iterator> { + self.tokenizer.parse_bytes(buffer).flat_map(|i| i.split()) + } +} diff --git a/crates/typos/src/report.rs b/crates/typos/src/report.rs index d2d7ce9..bce2d3d 100644 --- a/crates/typos/src/report.rs +++ b/crates/typos/src/report.rs @@ -168,7 +168,7 @@ pub struct Parse<'m> { #[serde(flatten)] pub context: Option>, pub kind: ParseKind, - pub data: Vec<&'m str>, + pub data: &'m str, } impl<'m> Default for Parse<'m> { @@ -176,7 +176,7 @@ impl<'m> Default for Parse<'m> { Self { context: None, kind: ParseKind::Identifier, - data: vec![], + data: "", } } } @@ -265,7 +265,7 @@ impl Report for PrintBrief { writeln!(io::stdout(), "{}", msg.path.display())?; } Message::Parse(msg) => { - writeln!(io::stdout(), "{}", itertools::join(msg.data.iter(), " "))?; + writeln!(io::stdout(), "{}", msg.data)?; } Message::Error(msg) => { log::error!("{}: {}", context_display(&msg.context), msg.msg); @@ -289,7 +289,7 @@ impl Report for PrintLong { writeln!(io::stdout(), "{}", msg.path.display())?; } Message::Parse(msg) => { - writeln!(io::stdout(), "{}", itertools::join(msg.data.iter(), " "))?; + writeln!(io::stdout(), "{}", msg.data)?; } Message::Error(msg) => { log::error!("{}: {}", context_display(&msg.context), msg.msg); From bc90bacff2f6e8502f6ed761b068e3033da629e5 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Wed, 30 Dec 2020 19:41:08 -0600 Subject: [PATCH 04/16] refactor(typos): Pull out file logic --- Cargo.lock | 144 +++++---- Cargo.toml | 6 + benches/checks.rs | 22 +- crates/typos/Cargo.toml | 5 - crates/typos/src/checks.rs | 447 --------------------------- crates/typos/src/dict.rs | 2 +- crates/typos/src/lib.rs | 2 - crates/typos/src/parser.rs | 2 +- src/args.rs | 10 +- src/checks.rs | 463 +++++++++++++++++++++++++++- src/diff.rs | 12 +- src/lib.rs | 4 + src/main.rs | 19 +- src/replace.rs | 40 +-- {crates/typos/src => src}/report.rs | 16 +- 15 files changed, 596 insertions(+), 598 deletions(-) delete mode 100644 crates/typos/src/checks.rs rename {crates/typos/src => src}/report.rs (96%) diff --git a/Cargo.lock b/Cargo.lock index 6630c4e..e624421 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,9 +2,9 @@ # It is not intended for manual editing. [[package]] name = "addr2line" -version = "0.14.0" +version = "0.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c0929d69e78dd9bf5408269919fcbcaeb2e35e5d43e5815517cdc6a8e11a423" +checksum = "a55f82cfe485775d02112886f4169bde0c5894d75e79ead7eafe7e40a25e45f7" dependencies = [ "gimli", ] @@ -17,9 +17,9 @@ checksum = "ee2a4ec343196209d6594e19543ae87a39f96d5534d7174822a3ad825dd6ed7e" [[package]] name = "ahash" -version = "0.6.1" +version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "865f8b0b3fced577b7df82e9b0eb7609595d7209c0b39e78d0646672e244b1b1" +checksum = "a75b7e6a93ecd6dbd2c225154d0fa7f86205574ecaa6c87429fb5f66ee677c44" dependencies = [ "getrandom 0.2.0", "lazy_static", @@ -46,9 +46,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.34" +version = "1.0.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf8dcb5b4bbaa28653b647d8c77bd4ed40183b48882e130c1f1ffb73de069fd7" +checksum = "ee67c11feeac938fae061b232e38e0b6d94f97a9df10e6271319325ac4c56a86" [[package]] name = "arrayvec" @@ -207,12 +207,6 @@ dependencies = [ "unicase", ] -[[package]] -name = "const_fn" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c478836e029dcef17fb47c89023448c64f781a046e0300e257ad8225ae59afab" - [[package]] name = "content_inspector" version = "0.2.4" @@ -224,13 +218,12 @@ dependencies = [ [[package]] name = "crossbeam-utils" -version = "0.8.0" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec91540d98355f690a86367e566ecad2e9e579f230230eb7c21398372be73ea5" +checksum = "02d96d1e189ef58269ebe5b97953da3274d83a93af647c2ddd6f9dab28cedb8d" dependencies = [ "autocfg", "cfg-if 1.0.0", - "const_fn", "lazy_static", ] @@ -275,9 +268,9 @@ dependencies = [ "fnv", "ident_case", "proc-macro2 1.0.24", - "quote 1.0.7", + "quote 1.0.8", "strsim 0.9.3", - "syn 1.0.50", + "syn 1.0.57", ] [[package]] @@ -287,8 +280,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9b5a2f4ac4969822c62224815d069952656cadc7084fdca9751e6d959189b72" dependencies = [ "darling_core", - "quote 1.0.7", - "syn 1.0.50", + "quote 1.0.8", + "syn 1.0.57", ] [[package]] @@ -312,8 +305,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "41cb0e6161ad61ed084a36ba71fbba9e3ac5aee3606fb607fe08da6acbcf3d8c" dependencies = [ "proc-macro2 1.0.24", - "quote 1.0.7", - "syn 1.0.50", + "quote 1.0.8", + "syn 1.0.57", ] [[package]] @@ -324,8 +317,8 @@ checksum = "6604612c19dd3bb353650b715b61f09bcb089dd17bdca1a9a42637079bf5e428" dependencies = [ "darling", "proc-macro2 1.0.24", - "quote 1.0.7", - "syn 1.0.50", + "quote 1.0.8", + "syn 1.0.57", ] [[package]] @@ -374,8 +367,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "946ee94e3dbf58fdd324f9ce245c7b238d46a66f00e86a020b71996349e46cce" dependencies = [ "proc-macro2 1.0.24", - "quote 1.0.7", - "syn 1.0.50", + "quote 1.0.8", + "syn 1.0.57", ] [[package]] @@ -421,17 +414,17 @@ checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" [[package]] name = "funty" -version = "1.0.1" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ba62103ce691c2fd80fbae2213dfdda9ce60804973ac6b6e97de818ea7f52c8" +checksum = "fed34cd105917e91daa4da6b3728c47b068749d6a62c59811f06ed2ac71d9da7" [[package]] name = "getrandom" -version = "0.1.15" +version = "0.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc587bc0ec293155d5bfa6b9891ec18a1e330c234f896ea47fbada4cadbe47e6" +checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce" dependencies = [ - "cfg-if 0.1.10", + "cfg-if 1.0.0", "libc", "wasi", ] @@ -478,9 +471,9 @@ dependencies = [ [[package]] name = "heck" -version = "0.3.1" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "20564e78d53d2bb135c343b3f47714a56af2061f1c928fdb541dc7b9fdd94205" +checksum = "87cbf45460356b7deeb5e3415b5563308c0a9b057c85e12b06ad551f98d0a6ac" dependencies = [ "unicode-segmentation", ] @@ -559,9 +552,9 @@ dependencies = [ [[package]] name = "itoa" -version = "0.4.6" +version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc6f3ad7b9d11a0c00842ff8de1b60ee58661048eb8049ed33c73594f359d7e6" +checksum = "dd25036021b0de88a0aff6b850051563c6516d0bf53f8638938edbb9de732736" [[package]] name = "lazy_static" @@ -584,9 +577,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.80" +version = "0.2.81" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d58d1b70b004888f764dfbf6a26a3b0342a1632d33968e4a179d8011c760614" +checksum = "1482821306169ec4d07f6aca392a4681f66c75c9918aa49641a2595db64053cb" [[package]] name = "log" @@ -730,9 +723,9 @@ checksum = "ac74c624d6b2d21f425f752262f42188365d7b8ff1aff74c82e45136510a4857" [[package]] name = "predicates" -version = "1.0.5" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96bfead12e90dccead362d62bb2c90a5f6fc4584963645bc7f71a735e0b0735a" +checksum = "73dd9b7b200044694dfede9edf907c1ca19630908443e9447e624993700c6932" dependencies = [ "difference", "float-cmp", @@ -743,15 +736,15 @@ dependencies = [ [[package]] name = "predicates-core" -version = "1.0.0" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06075c3a3e92559ff8929e7a280684489ea27fe44805174c3ebd9328dcb37178" +checksum = "fb3dbeaaf793584e29c58c7e3a82bbb3c7c06b63cea68d13b0e3cddc124104dc" [[package]] name = "predicates-tree" -version = "1.0.0" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e63c4859013b38a76eca2414c64911fba30def9e3202ac461a2d22831220124" +checksum = "aee95d988ee893cb35c06b148c80ed2cd52c8eea927f50ba7a0be1a786aeab73" dependencies = [ "predicates-core", "treeline", @@ -771,8 +764,8 @@ checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" dependencies = [ "proc-macro-error-attr", "proc-macro2 1.0.24", - "quote 1.0.7", - "syn 1.0.50", + "quote 1.0.8", + "syn 1.0.57", "version_check", ] @@ -783,7 +776,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" dependencies = [ "proc-macro2 1.0.24", - "quote 1.0.7", + "quote 1.0.8", "version_check", ] @@ -822,9 +815,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.7" +version = "1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa563d17ecb180e500da1cfd2b028310ac758de548efdd203e18f283af693f37" +checksum = "991431c3519a3f36861882da93630ce66b52918dcf1b8e2fd66b397fc96f28df" dependencies = [ "proc-macro2 1.0.24", ] @@ -841,7 +834,7 @@ version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" dependencies = [ - "getrandom 0.1.15", + "getrandom 0.1.16", "libc", "rand_chacha", "rand_core", @@ -865,7 +858,7 @@ version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" dependencies = [ - "getrandom 0.1.15", + "getrandom 0.1.16", ] [[package]] @@ -975,29 +968,29 @@ checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" [[package]] name = "serde" -version = "1.0.117" +version = "1.0.118" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b88fa983de7720629c9387e9f517353ed404164b1e482c970a90c1a4aaf7dc1a" +checksum = "06c64263859d87aa2eb554587e2d23183398d617427327cf2b3d0ed8c69e4800" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.117" +version = "1.0.118" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cbd1ae72adb44aab48f325a02444a5fc079349a8d804c1fc922aed3f7454c74e" +checksum = "c84d3526699cd55261af4b941e4e725444df67aa4f9e6a3564f18030d12672df" dependencies = [ "proc-macro2 1.0.24", - "quote 1.0.7", - "syn 1.0.50", + "quote 1.0.8", + "syn 1.0.57", ] [[package]] name = "serde_json" -version = "1.0.59" +version = "1.0.61" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dcac07dbffa1c65e7f816ab9eba78eb142c6d44410f4eeba1e26e4f5dfa56b95" +checksum = "4fceb2595057b6891a4ee808f70054bd2d12f0e97f1cbb78689b59f676df325a" dependencies = [ "itoa", "ryu", @@ -1048,8 +1041,8 @@ dependencies = [ "heck", "proc-macro-error", "proc-macro2 1.0.24", - "quote 1.0.7", - "syn 1.0.50", + "quote 1.0.8", + "syn 1.0.57", ] [[package]] @@ -1065,12 +1058,12 @@ dependencies = [ [[package]] name = "syn" -version = "1.0.50" +version = "1.0.57" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "443b4178719c5a851e1bde36ce12da21d74a0e60b4d982ec3385a933c812f0f6" +checksum = "4211ce9909eb971f111059df92c45640aad50a619cf55cd76476be803c4c68e6" dependencies = [ "proc-macro2 1.0.24", - "quote 1.0.7", + "quote 1.0.8", "unicode-xid 0.2.1", ] @@ -1114,22 +1107,22 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.22" +version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e9ae34b84616eedaaf1e9dd6026dbe00dcafa92aa0c8077cb69df1fcfe5e53e" +checksum = "76cc616c6abf8c8928e2fdcc0dbfab37175edd8fb49a4641066ad1364fdab146" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.22" +version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ba20f23e85b10754cd195504aebf6a27e2e6cbe28c17778a0c930724628dd56" +checksum = "9be73a2caec27583d0046ef3796c3794f868a5bc813db689eed00c7631275cd1" dependencies = [ "proc-macro2 1.0.24", - "quote 1.0.7", - "syn 1.0.50", + "quote 1.0.8", + "syn 1.0.57", ] [[package]] @@ -1143,9 +1136,9 @@ dependencies = [ [[package]] name = "toml" -version = "0.5.7" +version = "0.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75cf45bb0bef80604d001caaec0d09da99611b3c0fd39d3080468875cdb65645" +checksum = "a31142970826733df8241ef35dc040ef98c679ab14d7c3e54d827099b3acecaa" dependencies = [ "serde", ] @@ -1161,16 +1154,11 @@ name = "typos" version = "0.3.0" dependencies = [ "anyhow", - "bstr", - "content_inspector", - "derive_more 0.99.11", - "derive_setters", "itertools", "log", "once_cell", "regex", "serde", - "serde_json", "thiserror", "unicode-segmentation", ] @@ -1185,21 +1173,27 @@ dependencies = [ "bstr", "clap", "clap-verbosity-flag", + "content_inspector", + "derive_more 0.99.11", + "derive_setters", "difflib", "env_logger 0.8.2", "human-panic", "ignore", + "itertools", "log", "phf", "predicates", "proc-exit", "serde", + "serde_json", "structopt", "toml", "typos", "typos-dict", "typos-vars", "unicase", + "unicode-segmentation", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index bd427a7..32a6f22 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -50,6 +50,12 @@ ahash = "0.6.1" difflib = "0.4" proc-exit = "1.0" human-panic = "1.0.3" +content_inspector = "0.2.4" +unicode-segmentation = "1.6.0" +derive_more = "0.99.11" +derive_setters = "0.1" +itertools = "0.9" +serde_json = "1.0" [dev-dependencies] assert_fs = "1.0" diff --git a/benches/checks.rs b/benches/checks.rs index f6aa46c..92e4eb8 100644 --- a/benches/checks.rs +++ b/benches/checks.rs @@ -5,13 +5,13 @@ extern crate test; mod data; use assert_fs::prelude::*; -use typos::checks::Check; +use typos_cli::checks::Check; fn bench_parse_ident_str(data: &str, b: &mut test::Bencher) { let corrections = typos_cli::dict::BuiltIn::new(Default::default()); let parser = typos::tokens::Tokenizer::new(); - let checks = typos::checks::TyposSettings::new().build_identifier_parser(); - b.iter(|| checks.check_str(data, &parser, &corrections, &typos::report::PrintSilent)); + let checks = typos_cli::checks::TyposSettings::new().build_identifier_parser(); + b.iter(|| checks.check_str(data, &parser, &corrections, &typos_cli::report::PrintSilent)); } #[bench] @@ -47,13 +47,13 @@ fn parse_idents_corpus_str(b: &mut test::Bencher) { fn bench_parse_ident_bytes(data: &str, b: &mut test::Bencher) { let corrections = typos_cli::dict::BuiltIn::new(Default::default()); let parser = typos::tokens::Tokenizer::new(); - let checks = typos::checks::TyposSettings::new().build_identifier_parser(); + let checks = typos_cli::checks::TyposSettings::new().build_identifier_parser(); b.iter(|| { checks.check_bytes( data.as_bytes(), &parser, &corrections, - &typos::report::PrintSilent, + &typos_cli::report::PrintSilent, ) }); } @@ -91,8 +91,8 @@ fn parse_idents_corpus_bytes(b: &mut test::Bencher) { fn bench_parse_word_str(data: &str, b: &mut test::Bencher) { let corrections = typos_cli::dict::BuiltIn::new(Default::default()); let parser = typos::tokens::Tokenizer::new(); - let checks = typos::checks::TyposSettings::new().build_word_parser(); - b.iter(|| checks.check_str(data, &parser, &corrections, &typos::report::PrintSilent)); + let checks = typos_cli::checks::TyposSettings::new().build_word_parser(); + b.iter(|| checks.check_str(data, &parser, &corrections, &typos_cli::report::PrintSilent)); } #[bench] @@ -128,8 +128,8 @@ fn parse_words_corpus(b: &mut test::Bencher) { fn bench_typos(data: &str, b: &mut test::Bencher) { let corrections = typos_cli::dict::BuiltIn::new(Default::default()); let parser = typos::tokens::Tokenizer::new(); - let checks = typos::checks::TyposSettings::new().build_typos(); - b.iter(|| checks.check_str(data, &parser, &corrections, &typos::report::PrintSilent)); + let checks = typos_cli::checks::TyposSettings::new().build_typos(); + b.iter(|| checks.check_str(data, &parser, &corrections, &typos_cli::report::PrintSilent)); } #[bench] @@ -169,14 +169,14 @@ fn bench_check_file(data: &str, b: &mut test::Bencher) { let corrections = typos_cli::dict::BuiltIn::new(Default::default()); let parser = typos::tokens::Tokenizer::new(); - let checks = typos::checks::TyposSettings::new().build_typos(); + let checks = typos_cli::checks::TyposSettings::new().build_typos(); b.iter(|| { checks.check_file( sample_path.path(), true, &parser, &corrections, - &typos::report::PrintSilent, + &typos_cli::report::PrintSilent, ) }); diff --git a/crates/typos/Cargo.toml b/crates/typos/Cargo.toml index 15a317e..a128a7c 100644 --- a/crates/typos/Cargo.toml +++ b/crates/typos/Cargo.toml @@ -20,11 +20,6 @@ thiserror = "1.0" regex = "1.3" once_cell = "1.2.0" serde = { version = "1.0", features = ["derive"] } -serde_json = "1.0" itertools = "0.9" -bstr = "0.2" log = "0.4" unicode-segmentation = "1.7.1" -derive_more = "0.99.11" -derive_setters = "0.1" -content_inspector = "0.2.4" diff --git a/crates/typos/src/checks.rs b/crates/typos/src/checks.rs deleted file mode 100644 index 027857c..0000000 --- a/crates/typos/src/checks.rs +++ /dev/null @@ -1,447 +0,0 @@ -use bstr::ByteSlice; - -use crate::report; -use crate::tokens; -use crate::Dictionary; - -pub trait Check: Send + Sync { - fn check_str( - &self, - buffer: &str, - parser: &tokens::Tokenizer, - dictionary: &dyn Dictionary, - reporter: &dyn report::Report, - ) -> Result<(), std::io::Error>; - - fn check_bytes( - &self, - buffer: &[u8], - parser: &tokens::Tokenizer, - dictionary: &dyn Dictionary, - reporter: &dyn report::Report, - ) -> Result<(), std::io::Error>; - - fn check_filenames(&self) -> bool; - - fn check_files(&self) -> bool; - - fn binary(&self) -> bool; - - fn check_filename( - &self, - path: &std::path::Path, - parser: &tokens::Tokenizer, - dictionary: &dyn Dictionary, - reporter: &dyn report::Report, - ) -> Result<(), std::io::Error> { - if !self.check_filenames() { - return Ok(()); - } - - if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) { - let context_reporter = ReportContext { - reporter, - context: report::PathContext { path }.into(), - }; - self.check_str(file_name, parser, dictionary, &context_reporter)?; - } - - Ok(()) - } - - fn check_file( - &self, - path: &std::path::Path, - explicit: bool, - parser: &tokens::Tokenizer, - dictionary: &dyn Dictionary, - reporter: &dyn report::Report, - ) -> Result<(), std::io::Error> { - if !self.check_files() { - return Ok(()); - } - - let buffer = read_file(path, reporter)?; - let (buffer, content_type) = massage_data(buffer)?; - if !explicit && !self.binary() && content_type.is_binary() { - let msg = report::BinaryFile { path }; - reporter.report(msg.into())?; - return Ok(()); - } - - for (line_idx, line) in buffer.lines().enumerate() { - let line_num = line_idx + 1; - let context_reporter = ReportContext { - reporter, - context: report::FileContext { path, line_num }.into(), - }; - self.check_bytes(line, parser, dictionary, &context_reporter)?; - } - - Ok(()) - } -} - -struct ReportContext<'m, 'r> { - reporter: &'r dyn report::Report, - context: report::Context<'m>, -} - -impl<'m, 'r> report::Report for ReportContext<'m, 'r> { - fn report(&self, msg: report::Message) -> Result<(), std::io::Error> { - let msg = msg.context(Some(self.context.clone())); - self.reporter.report(msg) - } -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct TyposSettings { - check_filenames: bool, - check_files: bool, - binary: bool, -} - -impl TyposSettings { - pub fn new() -> Self { - Default::default() - } - - pub fn check_filenames(&mut self, yes: bool) -> &mut Self { - self.check_filenames = yes; - self - } - - pub fn check_files(&mut self, yes: bool) -> &mut Self { - self.check_files = yes; - self - } - - pub fn binary(&mut self, yes: bool) -> &mut Self { - self.binary = yes; - self - } - - pub fn build_typos(&self) -> Typos { - Typos { - check_filenames: self.check_filenames, - check_files: self.check_files, - binary: self.binary, - } - } - - pub fn build_identifier_parser(&self) -> ParseIdentifiers { - ParseIdentifiers { - check_filenames: self.check_filenames, - check_files: self.check_files, - binary: self.binary, - } - } - - pub fn build_word_parser(&self) -> ParseWords { - ParseWords { - check_filenames: self.check_filenames, - check_files: self.check_files, - binary: self.binary, - } - } - - pub fn build_files(&self) -> Files { - Files {} - } -} - -impl Default for TyposSettings { - fn default() -> Self { - Self { - check_filenames: true, - check_files: true, - binary: false, - } - } -} - -#[derive(Debug, Clone)] -pub struct Typos { - check_filenames: bool, - check_files: bool, - binary: bool, -} - -impl Check for Typos { - fn check_str( - &self, - buffer: &str, - tokenizer: &tokens::Tokenizer, - dictionary: &dyn Dictionary, - reporter: &dyn report::Report, - ) -> Result<(), std::io::Error> { - let parser = crate::ParserBuilder::new() - .tokenizer(tokenizer) - .dictionary(dictionary) - .typos(); - for typo in parser.parse_str(buffer) { - let msg = report::Typo { - context: None, - buffer: std::borrow::Cow::Borrowed(buffer.as_bytes()), - byte_offset: typo.byte_offset, - typo: typo.typo, - corrections: typo.corrections, - }; - reporter.report(msg.into())?; - } - Ok(()) - } - - fn check_bytes( - &self, - buffer: &[u8], - tokenizer: &tokens::Tokenizer, - dictionary: &dyn Dictionary, - reporter: &dyn report::Report, - ) -> Result<(), std::io::Error> { - let parser = crate::ParserBuilder::new() - .tokenizer(tokenizer) - .dictionary(dictionary) - .typos(); - for typo in parser.parse_bytes(buffer) { - let msg = report::Typo { - context: None, - buffer: std::borrow::Cow::Borrowed(buffer.as_bytes()), - byte_offset: typo.byte_offset, - typo: typo.typo, - corrections: typo.corrections, - }; - reporter.report(msg.into())?; - } - Ok(()) - } - - fn check_filenames(&self) -> bool { - self.check_filenames - } - - fn check_files(&self) -> bool { - self.check_files - } - - fn binary(&self) -> bool { - self.binary - } -} - -#[derive(Debug, Clone)] -pub struct ParseIdentifiers { - check_filenames: bool, - check_files: bool, - binary: bool, -} - -impl Check for ParseIdentifiers { - fn check_str( - &self, - buffer: &str, - tokenizer: &tokens::Tokenizer, - _dictionary: &dyn Dictionary, - reporter: &dyn report::Report, - ) -> Result<(), std::io::Error> { - let parser = crate::ParserBuilder::new() - .tokenizer(tokenizer) - .identifiers(); - for word in parser.parse_str(buffer) { - let msg = report::Parse { - context: None, - kind: report::ParseKind::Word, - data: word.token(), - }; - reporter.report(msg.into())?; - } - - Ok(()) - } - - fn check_bytes( - &self, - buffer: &[u8], - tokenizer: &tokens::Tokenizer, - _dictionary: &dyn Dictionary, - reporter: &dyn report::Report, - ) -> Result<(), std::io::Error> { - let parser = crate::ParserBuilder::new() - .tokenizer(tokenizer) - .identifiers(); - for word in parser.parse_bytes(buffer) { - let msg = report::Parse { - context: None, - kind: report::ParseKind::Word, - data: word.token(), - }; - reporter.report(msg.into())?; - } - - Ok(()) - } - - fn check_filenames(&self) -> bool { - self.check_filenames - } - - fn check_files(&self) -> bool { - self.check_files - } - - fn binary(&self) -> bool { - self.binary - } -} - -#[derive(Debug, Clone)] -pub struct ParseWords { - check_filenames: bool, - check_files: bool, - binary: bool, -} - -impl Check for ParseWords { - fn check_str( - &self, - buffer: &str, - tokenizer: &tokens::Tokenizer, - _dictionary: &dyn Dictionary, - reporter: &dyn report::Report, - ) -> Result<(), std::io::Error> { - let word_parser = crate::ParserBuilder::new().tokenizer(tokenizer).words(); - for word in word_parser.parse_str(buffer) { - let msg = report::Parse { - context: None, - kind: report::ParseKind::Word, - data: word.token(), - }; - reporter.report(msg.into())?; - } - - Ok(()) - } - - fn check_bytes( - &self, - buffer: &[u8], - tokenizer: &tokens::Tokenizer, - _dictionary: &dyn Dictionary, - reporter: &dyn report::Report, - ) -> Result<(), std::io::Error> { - let parser = crate::ParserBuilder::new().tokenizer(tokenizer).words(); - for word in parser.parse_bytes(buffer) { - let msg = report::Parse { - context: None, - kind: report::ParseKind::Word, - data: word.token(), - }; - reporter.report(msg.into())?; - } - - Ok(()) - } - - fn check_filenames(&self) -> bool { - self.check_filenames - } - - fn check_files(&self) -> bool { - self.check_files - } - - fn binary(&self) -> bool { - self.binary - } -} - -#[derive(Debug, Clone)] -pub struct Files {} - -impl Check for Files { - fn check_str( - &self, - _buffer: &str, - _parser: &tokens::Tokenizer, - _dictionary: &dyn Dictionary, - _reporter: &dyn report::Report, - ) -> Result<(), std::io::Error> { - Ok(()) - } - - fn check_bytes( - &self, - _buffer: &[u8], - _parser: &tokens::Tokenizer, - _dictionary: &dyn Dictionary, - _reporter: &dyn report::Report, - ) -> Result<(), std::io::Error> { - Ok(()) - } - - fn check_filenames(&self) -> bool { - true - } - - fn check_files(&self) -> bool { - true - } - - fn binary(&self) -> bool { - true - } - - fn check_filename( - &self, - _path: &std::path::Path, - _parser: &tokens::Tokenizer, - _dictionary: &dyn Dictionary, - _reporter: &dyn report::Report, - ) -> Result<(), std::io::Error> { - Ok(()) - } - - fn check_file( - &self, - path: &std::path::Path, - _explicit: bool, - _parser: &tokens::Tokenizer, - _dictionary: &dyn Dictionary, - reporter: &dyn report::Report, - ) -> Result<(), std::io::Error> { - let msg = report::File::new(path); - reporter.report(msg.into())?; - - Ok(()) - } -} - -fn read_file( - path: &std::path::Path, - reporter: &dyn report::Report, -) -> Result, std::io::Error> { - let buffer = match std::fs::read(path) { - Ok(buffer) => buffer, - Err(err) => { - let msg = report::Error::new(err.to_string()); - reporter.report(msg.into())?; - Vec::new() - } - }; - Ok(buffer) -} - -fn massage_data( - buffer: Vec, -) -> Result<(Vec, content_inspector::ContentType), std::io::Error> { - let mut content_type = content_inspector::inspect(&buffer); - - // HACK: We only support UTF-8 at the moment - if content_type != content_inspector::ContentType::UTF_8_BOM - && content_type != content_inspector::ContentType::UTF_8 - { - content_type = content_inspector::ContentType::BINARY; - } - - Ok((buffer, content_type)) -} diff --git a/crates/typos/src/dict.rs b/crates/typos/src/dict.rs index 083ebe2..6e0a7f8 100644 --- a/crates/typos/src/dict.rs +++ b/crates/typos/src/dict.rs @@ -1,6 +1,6 @@ use std::borrow::Cow; -#[derive(Clone, PartialEq, Eq, Debug, serde::Serialize, derive_more::From)] +#[derive(Clone, PartialEq, Eq, Debug, serde::Serialize)] #[serde(rename_all = "snake_case")] #[serde(untagged)] pub enum Status<'c> { diff --git a/crates/typos/src/lib.rs b/crates/typos/src/lib.rs index 7c09efb..93ba77d 100644 --- a/crates/typos/src/lib.rs +++ b/crates/typos/src/lib.rs @@ -1,8 +1,6 @@ mod dict; mod parser; -pub mod checks; -pub mod report; pub mod tokens; pub use dict::*; diff --git a/crates/typos/src/parser.rs b/crates/typos/src/parser.rs index 613fdad..d427da1 100644 --- a/crates/typos/src/parser.rs +++ b/crates/typos/src/parser.rs @@ -115,7 +115,7 @@ impl<'p, 'd> TyposParser<'p, 'd> { } } -#[derive(Clone, Debug, derive_setters::Setters)] +#[derive(Clone, Debug)] #[non_exhaustive] pub struct Typo<'m> { pub byte_offset: usize, diff --git a/src/args.rs b/src/args.rs index 1b1d153..4a3398a 100644 --- a/src/args.rs +++ b/src/args.rs @@ -12,13 +12,13 @@ arg_enum! { } } -pub const PRINT_SILENT: typos::report::PrintSilent = typos::report::PrintSilent; -pub const PRINT_BRIEF: typos::report::PrintBrief = typos::report::PrintBrief; -pub const PRINT_LONG: typos::report::PrintLong = typos::report::PrintLong; -pub const PRINT_JSON: typos::report::PrintJson = typos::report::PrintJson; +pub const PRINT_SILENT: typos_cli::report::PrintSilent = typos_cli::report::PrintSilent; +pub const PRINT_BRIEF: typos_cli::report::PrintBrief = typos_cli::report::PrintBrief; +pub const PRINT_LONG: typos_cli::report::PrintLong = typos_cli::report::PrintLong; +pub const PRINT_JSON: typos_cli::report::PrintJson = typos_cli::report::PrintJson; impl Format { - pub(crate) fn reporter(self) -> &'static dyn typos::report::Report { + pub(crate) fn reporter(self) -> &'static dyn typos_cli::report::Report { match self { Format::Silent => &PRINT_SILENT, Format::Brief => &PRINT_BRIEF, diff --git a/src/checks.rs b/src/checks.rs index 78b9718..7692cc0 100644 --- a/src/checks.rs +++ b/src/checks.rs @@ -1,9 +1,456 @@ -pub(crate) fn check_path( +use bstr::ByteSlice; + +use crate::report; +use typos::tokens; +use typos::Dictionary; + +pub trait Check: Send + Sync { + fn check_str( + &self, + buffer: &str, + parser: &tokens::Tokenizer, + dictionary: &dyn Dictionary, + reporter: &dyn report::Report, + ) -> Result<(), std::io::Error>; + + fn check_bytes( + &self, + buffer: &[u8], + parser: &tokens::Tokenizer, + dictionary: &dyn Dictionary, + reporter: &dyn report::Report, + ) -> Result<(), std::io::Error>; + + fn check_filenames(&self) -> bool; + + fn check_files(&self) -> bool; + + fn binary(&self) -> bool; + + fn check_filename( + &self, + path: &std::path::Path, + parser: &tokens::Tokenizer, + dictionary: &dyn Dictionary, + reporter: &dyn report::Report, + ) -> Result<(), std::io::Error> { + if !self.check_filenames() { + return Ok(()); + } + + if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) { + let context_reporter = ReportContext { + reporter, + context: report::PathContext { path }.into(), + }; + self.check_str(file_name, parser, dictionary, &context_reporter)?; + } + + Ok(()) + } + + fn check_file( + &self, + path: &std::path::Path, + explicit: bool, + parser: &tokens::Tokenizer, + dictionary: &dyn Dictionary, + reporter: &dyn report::Report, + ) -> Result<(), std::io::Error> { + if !self.check_files() { + return Ok(()); + } + + let buffer = read_file(path, reporter)?; + let (buffer, content_type) = massage_data(buffer)?; + if !explicit && !self.binary() && content_type.is_binary() { + let msg = report::BinaryFile { path }; + reporter.report(msg.into())?; + return Ok(()); + } + + for (line_idx, line) in buffer.lines().enumerate() { + let line_num = line_idx + 1; + let context_reporter = ReportContext { + reporter, + context: report::FileContext { path, line_num }.into(), + }; + self.check_bytes(line, parser, dictionary, &context_reporter)?; + } + + Ok(()) + } +} + +struct ReportContext<'m, 'r> { + reporter: &'r dyn report::Report, + context: report::Context<'m>, +} + +impl<'m, 'r> report::Report for ReportContext<'m, 'r> { + fn report(&self, msg: report::Message) -> Result<(), std::io::Error> { + let msg = msg.context(Some(self.context.clone())); + self.reporter.report(msg) + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct TyposSettings { + check_filenames: bool, + check_files: bool, + binary: bool, +} + +impl TyposSettings { + pub fn new() -> Self { + Default::default() + } + + pub fn check_filenames(&mut self, yes: bool) -> &mut Self { + self.check_filenames = yes; + self + } + + pub fn check_files(&mut self, yes: bool) -> &mut Self { + self.check_files = yes; + self + } + + pub fn binary(&mut self, yes: bool) -> &mut Self { + self.binary = yes; + self + } + + pub fn build_typos(&self) -> Typos { + Typos { + check_filenames: self.check_filenames, + check_files: self.check_files, + binary: self.binary, + } + } + + pub fn build_identifier_parser(&self) -> ParseIdentifiers { + ParseIdentifiers { + check_filenames: self.check_filenames, + check_files: self.check_files, + binary: self.binary, + } + } + + pub fn build_word_parser(&self) -> ParseWords { + ParseWords { + check_filenames: self.check_filenames, + check_files: self.check_files, + binary: self.binary, + } + } + + pub fn build_files(&self) -> Files { + Files {} + } +} + +impl Default for TyposSettings { + fn default() -> Self { + Self { + check_filenames: true, + check_files: true, + binary: false, + } + } +} + +#[derive(Debug, Clone)] +pub struct Typos { + check_filenames: bool, + check_files: bool, + binary: bool, +} + +impl Check for Typos { + fn check_str( + &self, + buffer: &str, + tokenizer: &tokens::Tokenizer, + dictionary: &dyn Dictionary, + reporter: &dyn report::Report, + ) -> Result<(), std::io::Error> { + let parser = typos::ParserBuilder::new() + .tokenizer(tokenizer) + .dictionary(dictionary) + .typos(); + for typo in parser.parse_str(buffer) { + let msg = report::Typo { + context: None, + buffer: std::borrow::Cow::Borrowed(buffer.as_bytes()), + byte_offset: typo.byte_offset, + typo: typo.typo, + corrections: typo.corrections, + }; + reporter.report(msg.into())?; + } + Ok(()) + } + + fn check_bytes( + &self, + buffer: &[u8], + tokenizer: &tokens::Tokenizer, + dictionary: &dyn Dictionary, + reporter: &dyn report::Report, + ) -> Result<(), std::io::Error> { + let parser = typos::ParserBuilder::new() + .tokenizer(tokenizer) + .dictionary(dictionary) + .typos(); + for typo in parser.parse_bytes(buffer) { + let msg = report::Typo { + context: None, + buffer: std::borrow::Cow::Borrowed(buffer.as_bytes()), + byte_offset: typo.byte_offset, + typo: typo.typo, + corrections: typo.corrections, + }; + reporter.report(msg.into())?; + } + Ok(()) + } + + fn check_filenames(&self) -> bool { + self.check_filenames + } + + fn check_files(&self) -> bool { + self.check_files + } + + fn binary(&self) -> bool { + self.binary + } +} + +#[derive(Debug, Clone)] +pub struct ParseIdentifiers { + check_filenames: bool, + check_files: bool, + binary: bool, +} + +impl Check for ParseIdentifiers { + fn check_str( + &self, + buffer: &str, + tokenizer: &tokens::Tokenizer, + _dictionary: &dyn Dictionary, + reporter: &dyn report::Report, + ) -> Result<(), std::io::Error> { + let parser = typos::ParserBuilder::new() + .tokenizer(tokenizer) + .identifiers(); + for word in parser.parse_str(buffer) { + let msg = report::Parse { + context: None, + kind: report::ParseKind::Word, + data: word.token(), + }; + reporter.report(msg.into())?; + } + + Ok(()) + } + + fn check_bytes( + &self, + buffer: &[u8], + tokenizer: &tokens::Tokenizer, + _dictionary: &dyn Dictionary, + reporter: &dyn report::Report, + ) -> Result<(), std::io::Error> { + let parser = typos::ParserBuilder::new() + .tokenizer(tokenizer) + .identifiers(); + for word in parser.parse_bytes(buffer) { + let msg = report::Parse { + context: None, + kind: report::ParseKind::Word, + data: word.token(), + }; + reporter.report(msg.into())?; + } + + Ok(()) + } + + fn check_filenames(&self) -> bool { + self.check_filenames + } + + fn check_files(&self) -> bool { + self.check_files + } + + fn binary(&self) -> bool { + self.binary + } +} + +#[derive(Debug, Clone)] +pub struct ParseWords { + check_filenames: bool, + check_files: bool, + binary: bool, +} + +impl Check for ParseWords { + fn check_str( + &self, + buffer: &str, + tokenizer: &tokens::Tokenizer, + _dictionary: &dyn Dictionary, + reporter: &dyn report::Report, + ) -> Result<(), std::io::Error> { + let word_parser = typos::ParserBuilder::new().tokenizer(tokenizer).words(); + for word in word_parser.parse_str(buffer) { + let msg = report::Parse { + context: None, + kind: report::ParseKind::Word, + data: word.token(), + }; + reporter.report(msg.into())?; + } + + Ok(()) + } + + fn check_bytes( + &self, + buffer: &[u8], + tokenizer: &tokens::Tokenizer, + _dictionary: &dyn Dictionary, + reporter: &dyn report::Report, + ) -> Result<(), std::io::Error> { + let parser = typos::ParserBuilder::new().tokenizer(tokenizer).words(); + for word in parser.parse_bytes(buffer) { + let msg = report::Parse { + context: None, + kind: report::ParseKind::Word, + data: word.token(), + }; + reporter.report(msg.into())?; + } + + Ok(()) + } + + fn check_filenames(&self) -> bool { + self.check_filenames + } + + fn check_files(&self) -> bool { + self.check_files + } + + fn binary(&self) -> bool { + self.binary + } +} + +#[derive(Debug, Clone)] +pub struct Files {} + +impl Check for Files { + fn check_str( + &self, + _buffer: &str, + _parser: &tokens::Tokenizer, + _dictionary: &dyn Dictionary, + _reporter: &dyn report::Report, + ) -> Result<(), std::io::Error> { + Ok(()) + } + + fn check_bytes( + &self, + _buffer: &[u8], + _parser: &tokens::Tokenizer, + _dictionary: &dyn Dictionary, + _reporter: &dyn report::Report, + ) -> Result<(), std::io::Error> { + Ok(()) + } + + fn check_filenames(&self) -> bool { + true + } + + fn check_files(&self) -> bool { + true + } + + fn binary(&self) -> bool { + true + } + + fn check_filename( + &self, + _path: &std::path::Path, + _parser: &tokens::Tokenizer, + _dictionary: &dyn Dictionary, + _reporter: &dyn report::Report, + ) -> Result<(), std::io::Error> { + Ok(()) + } + + fn check_file( + &self, + path: &std::path::Path, + _explicit: bool, + _parser: &tokens::Tokenizer, + _dictionary: &dyn Dictionary, + reporter: &dyn report::Report, + ) -> Result<(), std::io::Error> { + let msg = report::File::new(path); + reporter.report(msg.into())?; + + Ok(()) + } +} + +fn read_file( + path: &std::path::Path, + reporter: &dyn report::Report, +) -> Result, std::io::Error> { + let buffer = match std::fs::read(path) { + Ok(buffer) => buffer, + Err(err) => { + let msg = report::Error::new(err.to_string()); + reporter.report(msg.into())?; + Vec::new() + } + }; + Ok(buffer) +} + +fn massage_data( + buffer: Vec, +) -> Result<(Vec, content_inspector::ContentType), std::io::Error> { + let mut content_type = content_inspector::inspect(&buffer); + + // HACK: We only support UTF-8 at the moment + if content_type != content_inspector::ContentType::UTF_8_BOM + && content_type != content_inspector::ContentType::UTF_8 + { + content_type = content_inspector::ContentType::BINARY; + } + + Ok((buffer, content_type)) +} +pub fn check_path( walk: ignore::Walk, - checks: &dyn typos::checks::Check, + checks: &dyn Check, parser: &typos::tokens::Tokenizer, dictionary: &dyn typos::Dictionary, - reporter: &dyn typos::report::Report, + reporter: &dyn report::Report, ) -> Result<(), ignore::Error> { for entry in walk { check_entry(entry, checks, parser, dictionary, reporter)?; @@ -11,12 +458,12 @@ pub(crate) fn check_path( Ok(()) } -pub(crate) fn check_path_parallel( +pub fn check_path_parallel( walk: ignore::WalkParallel, - checks: &dyn typos::checks::Check, + checks: &dyn Check, parser: &typos::tokens::Tokenizer, dictionary: &dyn typos::Dictionary, - reporter: &dyn typos::report::Report, + reporter: &dyn report::Report, ) -> Result<(), ignore::Error> { let error: std::sync::Mutex> = std::sync::Mutex::new(Ok(())); walk.run(|| { @@ -36,10 +483,10 @@ pub(crate) fn check_path_parallel( fn check_entry( entry: Result, - checks: &dyn typos::checks::Check, + checks: &dyn Check, parser: &typos::tokens::Tokenizer, dictionary: &dyn typos::Dictionary, - reporter: &dyn typos::report::Report, + reporter: &dyn report::Report, ) -> Result<(), ignore::Error> { let entry = entry?; if entry.file_type().map(|t| t.is_file()).unwrap_or(true) { diff --git a/src/diff.rs b/src/diff.rs index c99ac4e..8457588 100644 --- a/src/diff.rs +++ b/src/diff.rs @@ -4,12 +4,12 @@ use std::sync; use bstr::ByteSlice; pub struct Diff<'r> { - reporter: &'r dyn typos::report::Report, + reporter: &'r dyn crate::report::Report, deferred: sync::Mutex, } impl<'r> Diff<'r> { - pub(crate) fn new(reporter: &'r dyn typos::report::Report) -> Self { + pub fn new(reporter: &'r dyn crate::report::Report) -> Self { Self { reporter, deferred: sync::Mutex::new(crate::replace::Deferred::default()), @@ -56,10 +56,10 @@ impl<'r> Diff<'r> { } } -impl<'r> typos::report::Report for Diff<'r> { - fn report(&self, msg: typos::report::Message<'_>) -> Result<(), std::io::Error> { +impl<'r> crate::report::Report for Diff<'r> { + fn report(&self, msg: crate::report::Message<'_>) -> Result<(), std::io::Error> { let typo = match &msg { - typos::report::Message::Typo(typo) => typo, + crate::report::Message::Typo(typo) => typo, _ => return self.reporter.report(msg), }; @@ -69,7 +69,7 @@ impl<'r> typos::report::Report for Diff<'r> { }; match &typo.context { - Some(typos::report::Context::File(file)) => { + Some(crate::report::Context::File(file)) => { let path = file.path.to_owned(); let line_num = file.line_num; let correction = crate::replace::Correction::new( diff --git a/src/lib.rs b/src/lib.rs index db703d1..632c1b0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,2 +1,6 @@ +pub mod checks; pub mod config; pub mod dict; +pub mod diff; +pub mod replace; +pub mod report; diff --git a/src/main.rs b/src/main.rs index 6275b95..d417642 100644 --- a/src/main.rs +++ b/src/main.rs @@ -7,11 +7,12 @@ use std::io::Write; use structopt::StructOpt; mod args; -mod checks; -mod config; -mod dict; -mod diff; -mod replace; +use typos_cli::checks; +use typos_cli::config; +use typos_cli::dict; +use typos_cli::diff; +use typos_cli::replace; +use typos_cli::report; use proc_exit::WithCodeResultExt; @@ -74,7 +75,7 @@ fn run() -> proc_exit::ExitResult { dictionary.identifiers(config.default.extend_identifiers()); dictionary.words(config.default.extend_words()); - let mut settings = typos::checks::TyposSettings::new(); + let mut settings = checks::TyposSettings::new(); settings .check_filenames(config.default.check_filename()) .check_files(config.default.check_file()) @@ -98,8 +99,8 @@ fn run() -> proc_exit::ExitResult { } else { args.format.reporter() }; - let status_reporter = typos::report::MessageStatus::new(output_reporter); - let mut reporter: &dyn typos::report::Report = &status_reporter; + let status_reporter = report::MessageStatus::new(output_reporter); + let mut reporter: &dyn report::Report = &status_reporter; let replace_reporter = replace::Replace::new(reporter); let diff_reporter = diff::Diff::new(reporter); if args.diff { @@ -109,7 +110,7 @@ fn run() -> proc_exit::ExitResult { } let (files, identifier_parser, word_parser, checks); - let selected_checks: &dyn typos::checks::Check = if args.files { + let selected_checks: &dyn checks::Check = if args.files { files = settings.build_files(); &files } else if args.identifiers { diff --git a/src/replace.rs b/src/replace.rs index 1ac129a..4bec030 100644 --- a/src/replace.rs +++ b/src/replace.rs @@ -6,12 +6,12 @@ use std::sync; use bstr::ByteSlice; pub struct Replace<'r> { - reporter: &'r dyn typos::report::Report, + reporter: &'r dyn crate::report::Report, deferred: sync::Mutex, } impl<'r> Replace<'r> { - pub(crate) fn new(reporter: &'r dyn typos::report::Report) -> Self { + pub fn new(reporter: &'r dyn crate::report::Report) -> Self { Self { reporter, deferred: sync::Mutex::new(Deferred::default()), @@ -54,10 +54,10 @@ impl<'r> Replace<'r> { } } -impl<'r> typos::report::Report for Replace<'r> { - fn report(&self, msg: typos::report::Message<'_>) -> Result<(), std::io::Error> { +impl<'r> crate::report::Report for Replace<'r> { + fn report(&self, msg: crate::report::Message<'_>) -> Result<(), std::io::Error> { let typo = match &msg { - typos::report::Message::Typo(typo) => typo, + crate::report::Message::Typo(typo) => typo, _ => return self.reporter.report(msg), }; @@ -67,7 +67,7 @@ impl<'r> typos::report::Report for Replace<'r> { }; match &typo.context { - Some(typos::report::Context::File(file)) => { + Some(crate::report::Context::File(file)) => { let path = file.path.to_owned(); let line_num = file.line_num; let correction = @@ -82,7 +82,7 @@ impl<'r> typos::report::Report for Replace<'r> { content.push(correction); Ok(()) } - Some(typos::report::Context::Path(path)) => { + Some(crate::report::Context::Path(path)) => { let path = path.path.to_owned(); let correction = Correction::new(typo.byte_offset, typo.typo, corrections[0].as_ref()); @@ -97,20 +97,20 @@ impl<'r> typos::report::Report for Replace<'r> { } #[derive(Clone, Debug, Default)] -pub(crate) struct Deferred { - pub(crate) content: BTreeMap>>, - pub(crate) paths: BTreeMap>, +pub struct Deferred { + pub content: BTreeMap>>, + pub paths: BTreeMap>, } #[derive(Clone, Debug, PartialOrd, Ord, PartialEq, Eq)] -pub(crate) struct Correction { +pub struct Correction { pub byte_offset: usize, pub typo: Vec, pub correction: Vec, } impl Correction { - pub(crate) fn new(byte_offset: usize, typo: &str, correction: &str) -> Self { + pub fn new(byte_offset: usize, typo: &str, correction: &str) -> Self { Self { byte_offset, typo: typo.as_bytes().to_vec(), @@ -119,7 +119,7 @@ impl Correction { } } -pub(crate) fn correct(mut line: Vec, corrections: &[Correction]) -> Vec { +pub fn correct(mut line: Vec, corrections: &[Correction]) -> Vec { let mut corrections: Vec<_> = corrections.iter().collect(); corrections.sort_unstable(); corrections.reverse(); @@ -137,8 +137,8 @@ pub(crate) fn correct(mut line: Vec, corrections: &[Correction]) -> Vec mod test { use super::*; + use crate::report::Report; use assert_fs::prelude::*; - use typos::report::Report; fn simple_correct(line: &str, corrections: Vec<(usize, &str, &str)>) -> String { let line = line.as_bytes().to_vec(); @@ -205,13 +205,13 @@ mod test { let input_file = temp.child("foo.txt"); input_file.write_str("1 foo 2\n3 4 5").unwrap(); - let primary = typos::report::PrintSilent; + let primary = crate::report::PrintSilent; let replace = Replace::new(&primary); replace .report( - typos::report::Typo::default() + crate::report::Typo::default() .context(Some( - typos::report::FileContext::default() + crate::report::FileContext::default() .path(input_file.path()) .line_num(1) .into(), @@ -236,13 +236,13 @@ mod test { let input_file = temp.child("foo.txt"); input_file.write_str("foo foo foo").unwrap(); - let primary = typos::report::PrintSilent; + let primary = crate::report::PrintSilent; let replace = Replace::new(&primary); replace .report( - typos::report::Typo::default() + crate::report::Typo::default() .context(Some( - typos::report::PathContext::default() + crate::report::PathContext::default() .path(input_file.path()) .into(), )) diff --git a/crates/typos/src/report.rs b/src/report.rs similarity index 96% rename from crates/typos/src/report.rs rename to src/report.rs index bce2d3d..3d213cd 100644 --- a/crates/typos/src/report.rs +++ b/src/report.rs @@ -72,7 +72,7 @@ pub struct Typo<'m> { pub buffer: Cow<'m, [u8]>, pub byte_offset: usize, pub typo: &'m str, - pub corrections: crate::Status<'m>, + pub corrections: typos::Status<'m>, } impl<'m> Default for Typo<'m> { @@ -82,7 +82,7 @@ impl<'m> Default for Typo<'m> { buffer: Cow::Borrowed(&[]), byte_offset: 0, typo: "", - corrections: crate::Status::Invalid, + corrections: typos::Status::Invalid, } } } @@ -308,8 +308,8 @@ fn print_brief_correction(msg: &Typo) -> Result<(), std::io::Error> { ) .count(); match &msg.corrections { - crate::Status::Valid => {} - crate::Status::Invalid => { + typos::Status::Valid => {} + typos::Status::Invalid => { writeln!( io::stdout(), "{}:{}: `{}` is disallowed", @@ -318,7 +318,7 @@ fn print_brief_correction(msg: &Typo) -> Result<(), std::io::Error> { msg.typo, )?; } - crate::Status::Corrections(corrections) => { + typos::Status::Corrections(corrections) => { writeln!( io::stdout(), "{}:{}: `{}` -> {}", @@ -345,11 +345,11 @@ fn print_long_correction(msg: &Typo) -> Result<(), std::io::Error> { ) .count(); match &msg.corrections { - crate::Status::Valid => {} - crate::Status::Invalid => { + typos::Status::Valid => {} + typos::Status::Invalid => { writeln!(handle, "error: `{}` is disallowed`", msg.typo,)?; } - crate::Status::Corrections(corrections) => { + typos::Status::Corrections(corrections) => { writeln!( handle, "error: `{}` should be {}", From 220a79ff300e002a99e663e224d8667ebf84ee6c Mon Sep 17 00:00:00 2001 From: Ed Page Date: Wed, 30 Dec 2020 21:13:20 -0600 Subject: [PATCH 05/16] refactor: Make room for parent function --- benches/checks.rs | 2 +- src/checks.rs | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/benches/checks.rs b/benches/checks.rs index 92e4eb8..3379392 100644 --- a/benches/checks.rs +++ b/benches/checks.rs @@ -171,7 +171,7 @@ fn bench_check_file(data: &str, b: &mut test::Bencher) { let parser = typos::tokens::Tokenizer::new(); let checks = typos_cli::checks::TyposSettings::new().build_typos(); b.iter(|| { - checks.check_file( + checks.check_file_content( sample_path.path(), true, &parser, diff --git a/src/checks.rs b/src/checks.rs index 7692cc0..01bfd91 100644 --- a/src/checks.rs +++ b/src/checks.rs @@ -49,7 +49,7 @@ pub trait Check: Send + Sync { Ok(()) } - fn check_file( + fn check_file_content( &self, path: &std::path::Path, explicit: bool, @@ -401,7 +401,7 @@ impl Check for Files { Ok(()) } - fn check_file( + fn check_file_content( &self, path: &std::path::Path, _explicit: bool, @@ -492,7 +492,7 @@ fn check_entry( if entry.file_type().map(|t| t.is_file()).unwrap_or(true) { let explicit = entry.depth() == 0; checks.check_filename(entry.path(), parser, dictionary, reporter)?; - checks.check_file(entry.path(), explicit, parser, dictionary, reporter)?; + checks.check_file_content(entry.path(), explicit, parser, dictionary, reporter)?; } Ok(()) From 6c28376e503ab48f230d06b2e41456e6bb3b12ff Mon Sep 17 00:00:00 2001 From: Ed Page Date: Wed, 30 Dec 2020 21:17:28 -0600 Subject: [PATCH 06/16] refactor: Give checks full control --- src/checks.rs | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/checks.rs b/src/checks.rs index 01bfd91..c84c76a 100644 --- a/src/checks.rs +++ b/src/checks.rs @@ -80,6 +80,19 @@ pub trait Check: Send + Sync { Ok(()) } + + fn check_file( + &self, + path: &std::path::Path, + explicit: bool, + parser: &tokens::Tokenizer, + dictionary: &dyn Dictionary, + reporter: &dyn report::Report, + ) -> Result<(), std::io::Error> { + self.check_filename(path, parser, dictionary, reporter)?; + self.check_file_content(path, explicit, parser, dictionary, reporter)?; + Ok(()) + } } struct ReportContext<'m, 'r> { @@ -491,8 +504,7 @@ fn check_entry( let entry = entry?; if entry.file_type().map(|t| t.is_file()).unwrap_or(true) { let explicit = entry.depth() == 0; - checks.check_filename(entry.path(), parser, dictionary, reporter)?; - checks.check_file_content(entry.path(), explicit, parser, dictionary, reporter)?; + checks.check_file(entry.path(), explicit, parser, dictionary, reporter)?; } Ok(()) From d28174439b512a18d799a3116d15b9b8152863f5 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Wed, 30 Dec 2020 21:26:48 -0600 Subject: [PATCH 07/16] refactor: Switch FoundFiles to check_file --- src/checks.rs | 45 +++++++++++++++++++++++++++++++++++++-------- 1 file changed, 37 insertions(+), 8 deletions(-) diff --git a/src/checks.rs b/src/checks.rs index c84c76a..a6ef3d1 100644 --- a/src/checks.rs +++ b/src/checks.rs @@ -158,8 +158,10 @@ impl TyposSettings { } } - pub fn build_files(&self) -> Files { - Files {} + pub fn build_files(&self) -> FoundFiles { + FoundFiles { + binary: self.binary, + } } } @@ -369,9 +371,11 @@ impl Check for ParseWords { } #[derive(Debug, Clone)] -pub struct Files {} +pub struct FoundFiles { + binary: bool, +} -impl Check for Files { +impl Check for FoundFiles { fn check_str( &self, _buffer: &str, @@ -401,7 +405,7 @@ impl Check for Files { } fn binary(&self) -> bool { - true + self.binary } fn check_filename( @@ -416,14 +420,38 @@ impl Check for Files { fn check_file_content( &self, - path: &std::path::Path, + _path: &std::path::Path, _explicit: bool, _parser: &tokens::Tokenizer, _dictionary: &dyn Dictionary, + _reporter: &dyn report::Report, + ) -> Result<(), std::io::Error> { + Ok(()) + } + + fn check_file( + &self, + path: &std::path::Path, + explicit: bool, + _parser: &tokens::Tokenizer, + _dictionary: &dyn Dictionary, reporter: &dyn report::Report, ) -> Result<(), std::io::Error> { - let msg = report::File::new(path); - reporter.report(msg.into())?; + // Check `self.binary` first so we can easily check performance of walking vs reading + if self.binary { + let msg = report::File::new(path); + reporter.report(msg.into())?; + } else { + let buffer = read_file(path, reporter)?; + let (_buffer, content_type) = massage_data(buffer)?; + if !explicit && content_type.is_binary() { + let msg = report::BinaryFile { path }; + reporter.report(msg.into())?; + } else { + let msg = report::File::new(path); + reporter.report(msg.into())?; + } + } Ok(()) } @@ -458,6 +486,7 @@ fn massage_data( Ok((buffer, content_type)) } + pub fn check_path( walk: ignore::Walk, checks: &dyn Check, From 6e53d7e7196682a10933260bc1b7e669f1ae4f4e Mon Sep 17 00:00:00 2001 From: Ed Page Date: Wed, 30 Dec 2020 21:42:30 -0600 Subject: [PATCH 08/16] refactor: Switch Words/Identifiers to check_file --- src/checks.rs | 150 +++++++++++++++++++++++++++++++++----------------- 1 file changed, 98 insertions(+), 52 deletions(-) diff --git a/src/checks.rs b/src/checks.rs index a6ef3d1..ea8e5cb 100644 --- a/src/checks.rs +++ b/src/checks.rs @@ -142,16 +142,16 @@ impl TyposSettings { } } - pub fn build_identifier_parser(&self) -> ParseIdentifiers { - ParseIdentifiers { + pub fn build_identifier_parser(&self) -> Identifiers { + Identifiers { check_filenames: self.check_filenames, check_files: self.check_files, binary: self.binary, } } - pub fn build_word_parser(&self) -> ParseWords { - ParseWords { + pub fn build_word_parser(&self) -> Words { + Words { check_filenames: self.check_filenames, check_files: self.check_files, binary: self.binary, @@ -245,38 +245,37 @@ impl Check for Typos { } #[derive(Debug, Clone)] -pub struct ParseIdentifiers { +pub struct Identifiers { check_filenames: bool, check_files: bool, binary: bool, } -impl Check for ParseIdentifiers { +impl Check for Identifiers { fn check_str( &self, - buffer: &str, - tokenizer: &tokens::Tokenizer, + _buffer: &str, + _tokenizer: &tokens::Tokenizer, _dictionary: &dyn Dictionary, - reporter: &dyn report::Report, + _reporter: &dyn report::Report, ) -> Result<(), std::io::Error> { - let parser = typos::ParserBuilder::new() - .tokenizer(tokenizer) - .identifiers(); - for word in parser.parse_str(buffer) { - let msg = report::Parse { - context: None, - kind: report::ParseKind::Word, - data: word.token(), - }; - reporter.report(msg.into())?; - } - Ok(()) } fn check_bytes( &self, - buffer: &[u8], + _buffer: &[u8], + _tokenizer: &tokens::Tokenizer, + _dictionary: &dyn Dictionary, + _reporter: &dyn report::Report, + ) -> Result<(), std::io::Error> { + Ok(()) + } + + fn check_file( + &self, + path: &std::path::Path, + explicit: bool, tokenizer: &tokens::Tokenizer, _dictionary: &dyn Dictionary, reporter: &dyn report::Report, @@ -284,13 +283,36 @@ impl Check for ParseIdentifiers { let parser = typos::ParserBuilder::new() .tokenizer(tokenizer) .identifiers(); - for word in parser.parse_bytes(buffer) { - let msg = report::Parse { - context: None, - kind: report::ParseKind::Word, - data: word.token(), - }; - reporter.report(msg.into())?; + + if self.check_filenames() { + if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) { + for word in parser.parse_str(file_name) { + let msg = report::Parse { + context: Some(report::PathContext { path }.into()), + kind: report::ParseKind::Identifier, + data: word.token(), + }; + reporter.report(msg.into())?; + } + } + } + + if self.check_files() { + let buffer = read_file(path, reporter)?; + let (buffer, content_type) = massage_data(buffer)?; + if !explicit && !self.binary() && content_type.is_binary() { + let msg = report::BinaryFile { path }; + reporter.report(msg.into())?; + } else { + for word in parser.parse_bytes(&buffer) { + let msg = report::Parse { + context: Some(report::FileContext { path, line_num: 0 }.into()), + kind: report::ParseKind::Identifier, + data: word.token(), + }; + reporter.report(msg.into())?; + } + } } Ok(()) @@ -310,48 +332,72 @@ impl Check for ParseIdentifiers { } #[derive(Debug, Clone)] -pub struct ParseWords { +pub struct Words { check_filenames: bool, check_files: bool, binary: bool, } -impl Check for ParseWords { +impl Check for Words { fn check_str( &self, - buffer: &str, - tokenizer: &tokens::Tokenizer, + _buffer: &str, + _tokenizer: &tokens::Tokenizer, _dictionary: &dyn Dictionary, - reporter: &dyn report::Report, + _reporter: &dyn report::Report, ) -> Result<(), std::io::Error> { - let word_parser = typos::ParserBuilder::new().tokenizer(tokenizer).words(); - for word in word_parser.parse_str(buffer) { - let msg = report::Parse { - context: None, - kind: report::ParseKind::Word, - data: word.token(), - }; - reporter.report(msg.into())?; - } - Ok(()) } fn check_bytes( &self, - buffer: &[u8], + _buffer: &[u8], + _tokenizer: &tokens::Tokenizer, + _dictionary: &dyn Dictionary, + _reporter: &dyn report::Report, + ) -> Result<(), std::io::Error> { + Ok(()) + } + + fn check_file( + &self, + path: &std::path::Path, + explicit: bool, tokenizer: &tokens::Tokenizer, _dictionary: &dyn Dictionary, reporter: &dyn report::Report, ) -> Result<(), std::io::Error> { let parser = typos::ParserBuilder::new().tokenizer(tokenizer).words(); - for word in parser.parse_bytes(buffer) { - let msg = report::Parse { - context: None, - kind: report::ParseKind::Word, - data: word.token(), - }; - reporter.report(msg.into())?; + + if self.check_filenames() { + if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) { + for word in parser.parse_str(file_name) { + let msg = report::Parse { + context: Some(report::PathContext { path }.into()), + kind: report::ParseKind::Word, + data: word.token(), + }; + reporter.report(msg.into())?; + } + } + } + + if self.check_files() { + let buffer = read_file(path, reporter)?; + let (buffer, content_type) = massage_data(buffer)?; + if !explicit && !self.binary() && content_type.is_binary() { + let msg = report::BinaryFile { path }; + reporter.report(msg.into())?; + } else { + for word in parser.parse_bytes(&buffer) { + let msg = report::Parse { + context: Some(report::FileContext { path, line_num: 0 }.into()), + kind: report::ParseKind::Word, + data: word.token(), + }; + reporter.report(msg.into())?; + } + } } Ok(()) From 663eb94d32a980ede97039a67cbe416e736f06b1 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Thu, 31 Dec 2020 17:41:32 -0600 Subject: [PATCH 09/16] refactor: Switch Typos to check_file --- benches/checks.rs | 236 +++++++++++++++--------------- src/checks.rs | 358 ++++++++++++---------------------------------- 2 files changed, 212 insertions(+), 382 deletions(-) diff --git a/benches/checks.rs b/benches/checks.rs index 3379392..dcb8dc2 100644 --- a/benches/checks.rs +++ b/benches/checks.rs @@ -7,129 +7,178 @@ mod data; use assert_fs::prelude::*; use typos_cli::checks::Check; -fn bench_parse_ident_str(data: &str, b: &mut test::Bencher) { +fn bench_files(data: &str, b: &mut test::Bencher) { + let temp = assert_fs::TempDir::new().unwrap(); + let sample_path = temp.child("sample"); + sample_path.write_str(data).unwrap(); + let corrections = typos_cli::dict::BuiltIn::new(Default::default()); let parser = typos::tokens::Tokenizer::new(); - let checks = typos_cli::checks::TyposSettings::new().build_identifier_parser(); - b.iter(|| checks.check_str(data, &parser, &corrections, &typos_cli::report::PrintSilent)); -} - -#[bench] -fn parse_idents_empty_str(b: &mut test::Bencher) { - bench_parse_ident_str(data::EMPTY, b); -} - -#[bench] -fn parse_idents_no_tokens_str(b: &mut test::Bencher) { - bench_parse_ident_str(data::NO_TOKENS, b); -} - -#[bench] -fn parse_idents_single_token_str(b: &mut test::Bencher) { - bench_parse_ident_str(data::SINGLE_TOKEN, b); -} - -#[bench] -fn parse_idents_sherlock_str(b: &mut test::Bencher) { - bench_parse_ident_str(data::SHERLOCK, b); -} - -#[bench] -fn parse_idents_code_str(b: &mut test::Bencher) { - bench_parse_ident_str(data::CODE, b); -} - -#[bench] -fn parse_idents_corpus_str(b: &mut test::Bencher) { - bench_parse_ident_str(data::CORPUS, b); -} - -fn bench_parse_ident_bytes(data: &str, b: &mut test::Bencher) { - let corrections = typos_cli::dict::BuiltIn::new(Default::default()); - let parser = typos::tokens::Tokenizer::new(); - let checks = typos_cli::checks::TyposSettings::new().build_identifier_parser(); + let checks = typos_cli::checks::TyposSettings::new().build_files(); b.iter(|| { - checks.check_bytes( - data.as_bytes(), + checks.check_file( + sample_path.path(), + true, &parser, &corrections, &typos_cli::report::PrintSilent, ) }); + + temp.close().unwrap(); } #[bench] -fn parse_idents_empty_bytes(b: &mut test::Bencher) { - bench_parse_ident_bytes(data::EMPTY, b); +fn files_empty(b: &mut test::Bencher) { + bench_files(data::EMPTY, b); } #[bench] -fn parse_idents_no_tokens_bytes(b: &mut test::Bencher) { - bench_parse_ident_bytes(data::NO_TOKENS, b); +fn files_no_tokens(b: &mut test::Bencher) { + bench_files(data::NO_TOKENS, b); } #[bench] -fn parse_idents_single_token_bytes(b: &mut test::Bencher) { - bench_parse_ident_bytes(data::SINGLE_TOKEN, b); +fn files_single_token(b: &mut test::Bencher) { + bench_files(data::SINGLE_TOKEN, b); } #[bench] -fn parse_idents_sherlock_bytes(b: &mut test::Bencher) { - bench_parse_ident_bytes(data::SHERLOCK, b); +fn files_sherlock(b: &mut test::Bencher) { + bench_files(data::SHERLOCK, b); } #[bench] -fn parse_idents_code_bytes(b: &mut test::Bencher) { - bench_parse_ident_bytes(data::CODE, b); +fn files_code(b: &mut test::Bencher) { + bench_files(data::CODE, b); } #[bench] -fn parse_idents_corpus_bytes(b: &mut test::Bencher) { - bench_parse_ident_bytes(data::CORPUS, b); +fn files_corpus(b: &mut test::Bencher) { + bench_files(data::CORPUS, b); } -fn bench_parse_word_str(data: &str, b: &mut test::Bencher) { +fn bench_identifiers(data: &str, b: &mut test::Bencher) { + let temp = assert_fs::TempDir::new().unwrap(); + let sample_path = temp.child("sample"); + sample_path.write_str(data).unwrap(); + + let corrections = typos_cli::dict::BuiltIn::new(Default::default()); + let parser = typos::tokens::Tokenizer::new(); + let checks = typos_cli::checks::TyposSettings::new().build_identifier_parser(); + b.iter(|| { + checks.check_file( + sample_path.path(), + true, + &parser, + &corrections, + &typos_cli::report::PrintSilent, + ) + }); + + temp.close().unwrap(); +} + +#[bench] +fn identifiers_empty(b: &mut test::Bencher) { + bench_identifiers(data::EMPTY, b); +} + +#[bench] +fn identifiers_no_tokens(b: &mut test::Bencher) { + bench_identifiers(data::NO_TOKENS, b); +} + +#[bench] +fn identifiers_single_token(b: &mut test::Bencher) { + bench_identifiers(data::SINGLE_TOKEN, b); +} + +#[bench] +fn identifiers_sherlock(b: &mut test::Bencher) { + bench_identifiers(data::SHERLOCK, b); +} + +#[bench] +fn identifiers_code(b: &mut test::Bencher) { + bench_identifiers(data::CODE, b); +} + +#[bench] +fn identifiers_corpus(b: &mut test::Bencher) { + bench_identifiers(data::CORPUS, b); +} + +fn bench_words(data: &str, b: &mut test::Bencher) { + let temp = assert_fs::TempDir::new().unwrap(); + let sample_path = temp.child("sample"); + sample_path.write_str(data).unwrap(); + let corrections = typos_cli::dict::BuiltIn::new(Default::default()); let parser = typos::tokens::Tokenizer::new(); let checks = typos_cli::checks::TyposSettings::new().build_word_parser(); - b.iter(|| checks.check_str(data, &parser, &corrections, &typos_cli::report::PrintSilent)); + b.iter(|| { + checks.check_file( + sample_path.path(), + true, + &parser, + &corrections, + &typos_cli::report::PrintSilent, + ) + }); + + temp.close().unwrap(); } #[bench] -fn parse_words_empty(b: &mut test::Bencher) { - bench_parse_word_str(data::EMPTY, b); +fn words_empty(b: &mut test::Bencher) { + bench_words(data::EMPTY, b); } #[bench] -fn parse_words_no_tokens(b: &mut test::Bencher) { - bench_parse_word_str(data::NO_TOKENS, b); +fn words_no_tokens(b: &mut test::Bencher) { + bench_words(data::NO_TOKENS, b); } #[bench] -fn parse_words_single_token(b: &mut test::Bencher) { - bench_parse_word_str(data::SINGLE_TOKEN, b); +fn words_single_token(b: &mut test::Bencher) { + bench_words(data::SINGLE_TOKEN, b); } #[bench] -fn parse_words_sherlock(b: &mut test::Bencher) { - bench_parse_word_str(data::SHERLOCK, b); +fn words_sherlock(b: &mut test::Bencher) { + bench_words(data::SHERLOCK, b); } #[bench] -fn parse_words_code(b: &mut test::Bencher) { - bench_parse_word_str(data::CODE, b); +fn words_code(b: &mut test::Bencher) { + bench_words(data::CODE, b); } #[bench] -fn parse_words_corpus(b: &mut test::Bencher) { - bench_parse_word_str(data::CORPUS, b); +fn words_corpus(b: &mut test::Bencher) { + bench_words(data::CORPUS, b); } fn bench_typos(data: &str, b: &mut test::Bencher) { + let temp = assert_fs::TempDir::new().unwrap(); + let sample_path = temp.child("sample"); + sample_path.write_str(data).unwrap(); + let corrections = typos_cli::dict::BuiltIn::new(Default::default()); let parser = typos::tokens::Tokenizer::new(); let checks = typos_cli::checks::TyposSettings::new().build_typos(); - b.iter(|| checks.check_str(data, &parser, &corrections, &typos_cli::report::PrintSilent)); + b.iter(|| { + checks.check_file( + sample_path.path(), + true, + &parser, + &corrections, + &typos_cli::report::PrintSilent, + ) + }); + + temp.close().unwrap(); } #[bench] @@ -161,54 +210,3 @@ fn typos_code(b: &mut test::Bencher) { fn typos_corpus(b: &mut test::Bencher) { bench_typos(data::CORPUS, b); } - -fn bench_check_file(data: &str, b: &mut test::Bencher) { - let temp = assert_fs::TempDir::new().unwrap(); - let sample_path = temp.child("sample"); - sample_path.write_str(data).unwrap(); - - let corrections = typos_cli::dict::BuiltIn::new(Default::default()); - let parser = typos::tokens::Tokenizer::new(); - let checks = typos_cli::checks::TyposSettings::new().build_typos(); - b.iter(|| { - checks.check_file_content( - sample_path.path(), - true, - &parser, - &corrections, - &typos_cli::report::PrintSilent, - ) - }); - - temp.close().unwrap(); -} - -#[bench] -fn check_file_empty(b: &mut test::Bencher) { - bench_check_file(data::EMPTY, b); -} - -#[bench] -fn check_file_no_tokens(b: &mut test::Bencher) { - bench_check_file(data::NO_TOKENS, b); -} - -#[bench] -fn check_file_single_token(b: &mut test::Bencher) { - bench_check_file(data::SINGLE_TOKEN, b); -} - -#[bench] -fn check_file_sherlock(b: &mut test::Bencher) { - bench_check_file(data::SHERLOCK, b); -} - -#[bench] -fn check_file_code(b: &mut test::Bencher) { - bench_check_file(data::CODE, b); -} - -#[bench] -fn check_file_corpus(b: &mut test::Bencher) { - bench_check_file(data::CORPUS, b); -} diff --git a/src/checks.rs b/src/checks.rs index ea8e5cb..65d7e48 100644 --- a/src/checks.rs +++ b/src/checks.rs @@ -5,82 +5,6 @@ use typos::tokens; use typos::Dictionary; pub trait Check: Send + Sync { - fn check_str( - &self, - buffer: &str, - parser: &tokens::Tokenizer, - dictionary: &dyn Dictionary, - reporter: &dyn report::Report, - ) -> Result<(), std::io::Error>; - - fn check_bytes( - &self, - buffer: &[u8], - parser: &tokens::Tokenizer, - dictionary: &dyn Dictionary, - reporter: &dyn report::Report, - ) -> Result<(), std::io::Error>; - - fn check_filenames(&self) -> bool; - - fn check_files(&self) -> bool; - - fn binary(&self) -> bool; - - fn check_filename( - &self, - path: &std::path::Path, - parser: &tokens::Tokenizer, - dictionary: &dyn Dictionary, - reporter: &dyn report::Report, - ) -> Result<(), std::io::Error> { - if !self.check_filenames() { - return Ok(()); - } - - if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) { - let context_reporter = ReportContext { - reporter, - context: report::PathContext { path }.into(), - }; - self.check_str(file_name, parser, dictionary, &context_reporter)?; - } - - Ok(()) - } - - fn check_file_content( - &self, - path: &std::path::Path, - explicit: bool, - parser: &tokens::Tokenizer, - dictionary: &dyn Dictionary, - reporter: &dyn report::Report, - ) -> Result<(), std::io::Error> { - if !self.check_files() { - return Ok(()); - } - - let buffer = read_file(path, reporter)?; - let (buffer, content_type) = massage_data(buffer)?; - if !explicit && !self.binary() && content_type.is_binary() { - let msg = report::BinaryFile { path }; - reporter.report(msg.into())?; - return Ok(()); - } - - for (line_idx, line) in buffer.lines().enumerate() { - let line_num = line_idx + 1; - let context_reporter = ReportContext { - reporter, - context: report::FileContext { path, line_num }.into(), - }; - self.check_bytes(line, parser, dictionary, &context_reporter)?; - } - - Ok(()) - } - fn check_file( &self, path: &std::path::Path, @@ -88,23 +12,7 @@ pub trait Check: Send + Sync { parser: &tokens::Tokenizer, dictionary: &dyn Dictionary, reporter: &dyn report::Report, - ) -> Result<(), std::io::Error> { - self.check_filename(path, parser, dictionary, reporter)?; - self.check_file_content(path, explicit, parser, dictionary, reporter)?; - Ok(()) - } -} - -struct ReportContext<'m, 'r> { - reporter: &'r dyn report::Report, - context: report::Context<'m>, -} - -impl<'m, 'r> report::Report for ReportContext<'m, 'r> { - fn report(&self, msg: report::Message) -> Result<(), std::io::Error> { - let msg = msg.context(Some(self.context.clone())); - self.reporter.report(msg) - } + ) -> Result<(), std::io::Error>; } #[derive(Debug, Clone, PartialEq, Eq)] @@ -183,9 +91,10 @@ pub struct Typos { } impl Check for Typos { - fn check_str( + fn check_file( &self, - buffer: &str, + path: &std::path::Path, + explicit: bool, tokenizer: &tokens::Tokenizer, dictionary: &dyn Dictionary, reporter: &dyn report::Report, @@ -194,54 +103,47 @@ impl Check for Typos { .tokenizer(tokenizer) .dictionary(dictionary) .typos(); - for typo in parser.parse_str(buffer) { - let msg = report::Typo { - context: None, - buffer: std::borrow::Cow::Borrowed(buffer.as_bytes()), - byte_offset: typo.byte_offset, - typo: typo.typo, - corrections: typo.corrections, - }; - reporter.report(msg.into())?; + + if self.check_filenames { + if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) { + for typo in parser.parse_str(file_name) { + let msg = report::Typo { + context: Some(report::PathContext { path }.into()), + buffer: std::borrow::Cow::Borrowed(file_name.as_bytes()), + byte_offset: typo.byte_offset, + typo: typo.typo, + corrections: typo.corrections, + }; + reporter.report(msg.into())?; + } + } } - Ok(()) - } - fn check_bytes( - &self, - buffer: &[u8], - tokenizer: &tokens::Tokenizer, - dictionary: &dyn Dictionary, - reporter: &dyn report::Report, - ) -> Result<(), std::io::Error> { - let parser = typos::ParserBuilder::new() - .tokenizer(tokenizer) - .dictionary(dictionary) - .typos(); - for typo in parser.parse_bytes(buffer) { - let msg = report::Typo { - context: None, - buffer: std::borrow::Cow::Borrowed(buffer.as_bytes()), - byte_offset: typo.byte_offset, - typo: typo.typo, - corrections: typo.corrections, - }; - reporter.report(msg.into())?; + if self.check_files { + let buffer = read_file(path, reporter)?; + let (buffer, content_type) = massage_data(buffer)?; + if !explicit && !self.binary && content_type.is_binary() { + let msg = report::BinaryFile { path }; + reporter.report(msg.into())?; + } else { + let mut accum_line_num = AccumulateLineNum::new(); + for typo in parser.parse_bytes(&buffer) { + let line_num = accum_line_num.line_num(&buffer, typo.byte_offset); + let (line, line_offset) = extract_line(&buffer, typo.byte_offset); + let msg = report::Typo { + context: Some(report::FileContext { path, line_num }.into()), + buffer: std::borrow::Cow::Borrowed(line), + byte_offset: line_offset, + typo: typo.typo, + corrections: typo.corrections, + }; + reporter.report(msg.into())?; + } + } } + Ok(()) } - - fn check_filenames(&self) -> bool { - self.check_filenames - } - - fn check_files(&self) -> bool { - self.check_files - } - - fn binary(&self) -> bool { - self.binary - } } #[derive(Debug, Clone)] @@ -252,26 +154,6 @@ pub struct Identifiers { } impl Check for Identifiers { - fn check_str( - &self, - _buffer: &str, - _tokenizer: &tokens::Tokenizer, - _dictionary: &dyn Dictionary, - _reporter: &dyn report::Report, - ) -> Result<(), std::io::Error> { - Ok(()) - } - - fn check_bytes( - &self, - _buffer: &[u8], - _tokenizer: &tokens::Tokenizer, - _dictionary: &dyn Dictionary, - _reporter: &dyn report::Report, - ) -> Result<(), std::io::Error> { - Ok(()) - } - fn check_file( &self, path: &std::path::Path, @@ -284,7 +166,7 @@ impl Check for Identifiers { .tokenizer(tokenizer) .identifiers(); - if self.check_filenames() { + if self.check_filenames { if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) { for word in parser.parse_str(file_name) { let msg = report::Parse { @@ -297,16 +179,20 @@ impl Check for Identifiers { } } - if self.check_files() { + if self.check_files { let buffer = read_file(path, reporter)?; let (buffer, content_type) = massage_data(buffer)?; - if !explicit && !self.binary() && content_type.is_binary() { + if !explicit && !self.binary && content_type.is_binary() { let msg = report::BinaryFile { path }; reporter.report(msg.into())?; } else { for word in parser.parse_bytes(&buffer) { + // HACK: Don't look up the line_num per entry to better match the performance + // of Typos for comparison purposes. We don't really get much out of it + // anyway. + let line_num = 0; let msg = report::Parse { - context: Some(report::FileContext { path, line_num: 0 }.into()), + context: Some(report::FileContext { path, line_num }.into()), kind: report::ParseKind::Identifier, data: word.token(), }; @@ -317,18 +203,6 @@ impl Check for Identifiers { Ok(()) } - - fn check_filenames(&self) -> bool { - self.check_filenames - } - - fn check_files(&self) -> bool { - self.check_files - } - - fn binary(&self) -> bool { - self.binary - } } #[derive(Debug, Clone)] @@ -339,26 +213,6 @@ pub struct Words { } impl Check for Words { - fn check_str( - &self, - _buffer: &str, - _tokenizer: &tokens::Tokenizer, - _dictionary: &dyn Dictionary, - _reporter: &dyn report::Report, - ) -> Result<(), std::io::Error> { - Ok(()) - } - - fn check_bytes( - &self, - _buffer: &[u8], - _tokenizer: &tokens::Tokenizer, - _dictionary: &dyn Dictionary, - _reporter: &dyn report::Report, - ) -> Result<(), std::io::Error> { - Ok(()) - } - fn check_file( &self, path: &std::path::Path, @@ -369,7 +223,7 @@ impl Check for Words { ) -> Result<(), std::io::Error> { let parser = typos::ParserBuilder::new().tokenizer(tokenizer).words(); - if self.check_filenames() { + if self.check_filenames { if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) { for word in parser.parse_str(file_name) { let msg = report::Parse { @@ -382,16 +236,20 @@ impl Check for Words { } } - if self.check_files() { + if self.check_files { let buffer = read_file(path, reporter)?; let (buffer, content_type) = massage_data(buffer)?; - if !explicit && !self.binary() && content_type.is_binary() { + if !explicit && !self.binary && content_type.is_binary() { let msg = report::BinaryFile { path }; reporter.report(msg.into())?; } else { for word in parser.parse_bytes(&buffer) { + // HACK: Don't look up the line_num per entry to better match the performance + // of Typos for comparison purposes. We don't really get much out of it + // anyway. + let line_num = 0; let msg = report::Parse { - context: Some(report::FileContext { path, line_num: 0 }.into()), + context: Some(report::FileContext { path, line_num }.into()), kind: report::ParseKind::Word, data: word.token(), }; @@ -402,18 +260,6 @@ impl Check for Words { Ok(()) } - - fn check_filenames(&self) -> bool { - self.check_filenames - } - - fn check_files(&self) -> bool { - self.check_files - } - - fn binary(&self) -> bool { - self.binary - } } #[derive(Debug, Clone)] @@ -422,59 +268,6 @@ pub struct FoundFiles { } impl Check for FoundFiles { - fn check_str( - &self, - _buffer: &str, - _parser: &tokens::Tokenizer, - _dictionary: &dyn Dictionary, - _reporter: &dyn report::Report, - ) -> Result<(), std::io::Error> { - Ok(()) - } - - fn check_bytes( - &self, - _buffer: &[u8], - _parser: &tokens::Tokenizer, - _dictionary: &dyn Dictionary, - _reporter: &dyn report::Report, - ) -> Result<(), std::io::Error> { - Ok(()) - } - - fn check_filenames(&self) -> bool { - true - } - - fn check_files(&self) -> bool { - true - } - - fn binary(&self) -> bool { - self.binary - } - - fn check_filename( - &self, - _path: &std::path::Path, - _parser: &tokens::Tokenizer, - _dictionary: &dyn Dictionary, - _reporter: &dyn report::Report, - ) -> Result<(), std::io::Error> { - Ok(()) - } - - fn check_file_content( - &self, - _path: &std::path::Path, - _explicit: bool, - _parser: &tokens::Tokenizer, - _dictionary: &dyn Dictionary, - _reporter: &dyn report::Report, - ) -> Result<(), std::io::Error> { - Ok(()) - } - fn check_file( &self, path: &std::path::Path, @@ -533,6 +326,45 @@ fn massage_data( Ok((buffer, content_type)) } +struct AccumulateLineNum { + line_num: usize, + last_offset: usize, +} + +impl AccumulateLineNum { + fn new() -> Self { + Self { + // 1-indexed + line_num: 1, + last_offset: 0, + } + } + + fn line_num(&mut self, buffer: &[u8], byte_offset: usize) -> usize { + assert!(self.last_offset <= byte_offset); + let slice = &buffer[self.last_offset..byte_offset]; + let newlines = slice.lines().count(); + let line_num = self.line_num + newlines; + self.line_num = line_num; + self.last_offset = byte_offset; + line_num + } +} + +fn extract_line(buffer: &[u8], byte_offset: usize) -> (&[u8], usize) { + let line_start = buffer[0..byte_offset] + .rfind_byte(b'\n') + // Skip the newline + .map(|s| s + 1) + .unwrap_or(0); + let line = buffer[line_start..] + .lines() + .next() + .expect("should always be at least a line"); + let line_offset = byte_offset - line_start; + (line, line_offset) +} + pub fn check_path( walk: ignore::Walk, checks: &dyn Check, From 48112a47e92d892c6dd8665315923daf90de96df Mon Sep 17 00:00:00 2001 From: Ed Page Date: Thu, 31 Dec 2020 19:29:45 -0600 Subject: [PATCH 10/16] refactor(parser): Abstract over lifetimes --- crates/typos/src/dict.rs | 14 ++++++++++++++ crates/typos/src/parser.rs | 27 +++++++++++++++++++++++---- src/checks.rs | 4 ++-- 3 files changed, 39 insertions(+), 6 deletions(-) diff --git a/crates/typos/src/dict.rs b/crates/typos/src/dict.rs index 6e0a7f8..7c92d5b 100644 --- a/crates/typos/src/dict.rs +++ b/crates/typos/src/dict.rs @@ -27,6 +27,20 @@ impl<'c> Status<'c> { } } + pub fn into_owned(self) -> Status<'static> { + match self { + Status::Valid => Status::Valid, + Status::Invalid => Status::Invalid, + Status::Corrections(corrections) => { + let corrections = corrections + .into_iter() + .map(|c| Cow::Owned(c.into_owned())) + .collect(); + Status::Corrections(corrections) + } + } + } + pub fn borrow(&self) -> Status<'_> { match self { Status::Corrections(corrections) => { diff --git a/crates/typos/src/parser.rs b/crates/typos/src/parser.rs index d427da1..95d0d4c 100644 --- a/crates/typos/src/parser.rs +++ b/crates/typos/src/parser.rs @@ -1,5 +1,6 @@ use crate::tokens; use crate::Dictionary; +use std::borrow::Cow; #[derive(Clone)] pub struct ParserBuilder<'p, 'd> { @@ -86,7 +87,7 @@ impl<'p, 'd> TyposParser<'p, 'd> { Some(corrections) => { let typo = Typo { byte_offset: ident.offset(), - typo: ident.token(), + typo: ident.token().into(), corrections, }; itertools::Either::Left(Some(typo).into_iter()) @@ -105,7 +106,7 @@ impl<'p, 'd> TyposParser<'p, 'd> { Some(corrections) => { let typo = Typo { byte_offset: word.offset(), - typo: word.token(), + typo: word.token().into(), corrections, }; Some(typo) @@ -119,15 +120,33 @@ impl<'p, 'd> TyposParser<'p, 'd> { #[non_exhaustive] pub struct Typo<'m> { pub byte_offset: usize, - pub typo: &'m str, + pub typo: Cow<'m, str>, pub corrections: crate::Status<'m>, } +impl<'m> Typo<'m> { + pub fn into_owned(self) -> Typo<'static> { + Typo { + byte_offset: self.byte_offset, + typo: Cow::Owned(self.typo.into_owned()), + corrections: self.corrections.into_owned(), + } + } + + pub fn borrow(&self) -> Typo<'_> { + Typo { + byte_offset: self.byte_offset, + typo: Cow::Borrowed(self.typo.as_ref()), + corrections: self.corrections.borrow(), + } + } +} + impl<'m> Default for Typo<'m> { fn default() -> Self { Self { byte_offset: 0, - typo: "", + typo: "".into(), corrections: crate::Status::Invalid, } } diff --git a/src/checks.rs b/src/checks.rs index 65d7e48..84e9c83 100644 --- a/src/checks.rs +++ b/src/checks.rs @@ -111,7 +111,7 @@ impl Check for Typos { context: Some(report::PathContext { path }.into()), buffer: std::borrow::Cow::Borrowed(file_name.as_bytes()), byte_offset: typo.byte_offset, - typo: typo.typo, + typo: typo.typo.as_ref(), corrections: typo.corrections, }; reporter.report(msg.into())?; @@ -134,7 +134,7 @@ impl Check for Typos { context: Some(report::FileContext { path, line_num }.into()), buffer: std::borrow::Cow::Borrowed(line), byte_offset: line_offset, - typo: typo.typo, + typo: typo.typo.as_ref(), corrections: typo.corrections, }; reporter.report(msg.into())?; From c900e485938e0d46aefc88b76735ef87af510af4 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Fri, 1 Jan 2021 18:25:48 -0600 Subject: [PATCH 11/16] fix: Arg write-changes reports immediately --- src/checks.rs | 161 +++++++++++++++++++++++++++++++++++++++----- src/lib.rs | 2 +- src/main.rs | 13 ++-- src/replace.rs | 178 +++---------------------------------------------- 4 files changed, 159 insertions(+), 195 deletions(-) diff --git a/src/checks.rs b/src/checks.rs index 84e9c83..7053d24 100644 --- a/src/checks.rs +++ b/src/checks.rs @@ -50,6 +50,14 @@ impl TyposSettings { } } + pub fn build_fix_typos(&self) -> FixTypos { + FixTypos { + check_filenames: self.check_filenames, + check_files: self.check_files, + binary: self.binary, + } + } + pub fn build_identifier_parser(&self) -> Identifiers { Identifiers { check_filenames: self.check_filenames, @@ -120,8 +128,7 @@ impl Check for Typos { } if self.check_files { - let buffer = read_file(path, reporter)?; - let (buffer, content_type) = massage_data(buffer)?; + let (buffer, content_type) = read_file(path, reporter)?; if !explicit && !self.binary && content_type.is_binary() { let msg = report::BinaryFile { path }; reporter.report(msg.into())?; @@ -146,6 +153,91 @@ impl Check for Typos { } } +#[derive(Debug, Clone)] +pub struct FixTypos { + check_filenames: bool, + check_files: bool, + binary: bool, +} + +impl Check for FixTypos { + fn check_file( + &self, + path: &std::path::Path, + explicit: bool, + tokenizer: &tokens::Tokenizer, + dictionary: &dyn Dictionary, + reporter: &dyn report::Report, + ) -> Result<(), std::io::Error> { + let parser = typos::ParserBuilder::new() + .tokenizer(tokenizer) + .dictionary(dictionary) + .typos(); + + if self.check_files { + let (buffer, content_type) = read_file(path, reporter)?; + if !explicit && !self.binary && content_type.is_binary() { + let msg = report::BinaryFile { path }; + reporter.report(msg.into())?; + } else { + let mut fixes = Vec::new(); + let mut accum_line_num = AccumulateLineNum::new(); + for typo in parser.parse_bytes(&buffer) { + if is_fixable(&typo) { + fixes.push(typo.into_owned()); + } else { + let line_num = accum_line_num.line_num(&buffer, typo.byte_offset); + let (line, line_offset) = extract_line(&buffer, typo.byte_offset); + let msg = report::Typo { + context: Some(report::FileContext { path, line_num }.into()), + buffer: std::borrow::Cow::Borrowed(line), + byte_offset: line_offset, + typo: typo.typo.as_ref(), + corrections: typo.corrections, + }; + reporter.report(msg.into())?; + } + } + if !fixes.is_empty() { + let buffer = fix_buffer(buffer, fixes.into_iter()); + write_file(path, content_type, &buffer, reporter)?; + } + } + } + + // Ensure the above write can happen before renaming the file. + if self.check_filenames { + if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) { + let mut fixes = Vec::new(); + for typo in parser.parse_str(file_name) { + if is_fixable(&typo) { + fixes.push(typo.into_owned()); + } else { + let msg = report::Typo { + context: Some(report::PathContext { path }.into()), + buffer: std::borrow::Cow::Borrowed(file_name.as_bytes()), + byte_offset: typo.byte_offset, + typo: typo.typo.as_ref(), + corrections: typo.corrections, + }; + reporter.report(msg.into())?; + } + } + if !fixes.is_empty() { + let file_name = file_name.to_owned().into_bytes(); + let new_name = fix_buffer(file_name, fixes.into_iter()); + let new_name = + String::from_utf8(new_name).expect("corrections are valid utf-8"); + let new_path = path.with_file_name(new_name); + std::fs::rename(path, new_path)?; + } + } + } + + Ok(()) + } +} + #[derive(Debug, Clone)] pub struct Identifiers { check_filenames: bool, @@ -180,8 +272,7 @@ impl Check for Identifiers { } if self.check_files { - let buffer = read_file(path, reporter)?; - let (buffer, content_type) = massage_data(buffer)?; + let (buffer, content_type) = read_file(path, reporter)?; if !explicit && !self.binary && content_type.is_binary() { let msg = report::BinaryFile { path }; reporter.report(msg.into())?; @@ -237,8 +328,7 @@ impl Check for Words { } if self.check_files { - let buffer = read_file(path, reporter)?; - let (buffer, content_type) = massage_data(buffer)?; + let (buffer, content_type) = read_file(path, reporter)?; if !explicit && !self.binary && content_type.is_binary() { let msg = report::BinaryFile { path }; reporter.report(msg.into())?; @@ -281,8 +371,7 @@ impl Check for FoundFiles { let msg = report::File::new(path); reporter.report(msg.into())?; } else { - let buffer = read_file(path, reporter)?; - let (_buffer, content_type) = massage_data(buffer)?; + let (_buffer, content_type) = read_file(path, reporter)?; if !explicit && content_type.is_binary() { let msg = report::BinaryFile { path }; reporter.report(msg.into())?; @@ -296,10 +385,10 @@ impl Check for FoundFiles { } } -fn read_file( +pub fn read_file( path: &std::path::Path, reporter: &dyn report::Report, -) -> Result, std::io::Error> { +) -> Result<(Vec, content_inspector::ContentType), std::io::Error> { let buffer = match std::fs::read(path) { Ok(buffer) => buffer, Err(err) => { @@ -308,14 +397,8 @@ fn read_file( Vec::new() } }; - Ok(buffer) -} -fn massage_data( - buffer: Vec, -) -> Result<(Vec, content_inspector::ContentType), std::io::Error> { let mut content_type = content_inspector::inspect(&buffer); - // HACK: We only support UTF-8 at the moment if content_type != content_inspector::ContentType::UTF_8_BOM && content_type != content_inspector::ContentType::UTF_8 @@ -326,6 +409,27 @@ fn massage_data( Ok((buffer, content_type)) } +pub fn write_file( + path: &std::path::Path, + content_type: content_inspector::ContentType, + buffer: &[u8], + reporter: &dyn report::Report, +) -> Result<(), std::io::Error> { + assert!( + content_type == content_inspector::ContentType::UTF_8_BOM + || content_type == content_inspector::ContentType::UTF_8 + || content_type == content_inspector::ContentType::BINARY + ); + match std::fs::write(path, buffer) { + Ok(()) => (), + Err(err) => { + let msg = report::Error::new(err.to_string()); + reporter.report(msg.into())?; + } + }; + Ok(()) +} + struct AccumulateLineNum { line_num: usize, last_offset: usize, @@ -365,6 +469,31 @@ fn extract_line(buffer: &[u8], byte_offset: usize) -> (&[u8], usize) { (line, line_offset) } +fn extract_fix<'t>(typo: &'t typos::Typo<'t>) -> Option<&'t str> { + match &typo.corrections { + typos::Status::Corrections(c) if c.len() == 1 => Some(c[0].as_ref()), + _ => None, + } +} + +fn is_fixable<'t>(typo: &typos::Typo<'t>) -> bool { + extract_fix(typo).is_some() +} + +fn fix_buffer(mut buffer: Vec, typos: impl Iterator>) -> Vec { + let mut offset = 0isize; + for typo in typos { + let fix = extract_fix(&typo).expect("Caller only provides fixable typos"); + let start = ((typo.byte_offset as isize) + offset) as usize; + let end = start + typo.typo.len(); + + buffer.splice(start..end, fix.as_bytes().iter().copied()); + + offset += (fix.len() as isize) - (typo.typo.len() as isize); + } + buffer +} + pub fn check_path( walk: ignore::Walk, checks: &dyn Check, diff --git a/src/lib.rs b/src/lib.rs index 632c1b0..2584359 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,5 +2,5 @@ pub mod checks; pub mod config; pub mod dict; pub mod diff; -pub mod replace; +pub(crate) mod replace; pub mod report; diff --git a/src/main.rs b/src/main.rs index d417642..5e33fd5 100644 --- a/src/main.rs +++ b/src/main.rs @@ -11,7 +11,6 @@ use typos_cli::checks; use typos_cli::config; use typos_cli::dict; use typos_cli::diff; -use typos_cli::replace; use typos_cli::report; use proc_exit::WithCodeResultExt; @@ -101,15 +100,12 @@ fn run() -> proc_exit::ExitResult { }; let status_reporter = report::MessageStatus::new(output_reporter); let mut reporter: &dyn report::Report = &status_reporter; - let replace_reporter = replace::Replace::new(reporter); let diff_reporter = diff::Diff::new(reporter); if args.diff { reporter = &diff_reporter; - } else if args.write_changes { - reporter = &replace_reporter; } - let (files, identifier_parser, word_parser, checks); + let (files, identifier_parser, word_parser, checks, fixer); let selected_checks: &dyn checks::Check = if args.files { files = settings.build_files(); &files @@ -119,6 +115,9 @@ fn run() -> proc_exit::ExitResult { } else if args.words { word_parser = settings.build_word_parser(); &word_parser + } else if args.write_changes { + fixer = settings.build_fix_typos(); + &fixer } else { checks = settings.build_typos(); &checks @@ -156,10 +155,6 @@ fn run() -> proc_exit::ExitResult { if args.diff { diff_reporter.show().with_code(proc_exit::Code::FAILURE)?; - } else if args.write_changes { - replace_reporter - .write() - .with_code(proc_exit::Code::FAILURE)?; } } diff --git a/src/replace.rs b/src/replace.rs index 4bec030..78f2a17 100644 --- a/src/replace.rs +++ b/src/replace.rs @@ -1,116 +1,21 @@ use std::collections::BTreeMap; -use std::io::Write; use std::path; -use std::sync; - -use bstr::ByteSlice; - -pub struct Replace<'r> { - reporter: &'r dyn crate::report::Report, - deferred: sync::Mutex, -} - -impl<'r> Replace<'r> { - pub fn new(reporter: &'r dyn crate::report::Report) -> Self { - Self { - reporter, - deferred: sync::Mutex::new(Deferred::default()), - } - } - - pub fn write(&self) -> Result<(), std::io::Error> { - let deferred = self.deferred.lock().unwrap(); - - for (path, corrections) in deferred.content.iter() { - let buffer = std::fs::read(path)?; - - let mut file = std::fs::File::create(path)?; - for (line_idx, line) in buffer.lines_with_terminator().enumerate() { - let line_num = line_idx + 1; - if let Some(corrections) = corrections.get(&line_num) { - let line = line.to_vec(); - let line = correct(line, &corrections); - file.write_all(&line)?; - } else { - file.write_all(&line)?; - } - } - } - - for (path, corrections) in deferred.paths.iter() { - let orig_name = path - .file_name() - .and_then(|s| s.to_str()) - .expect("generating a correction requires the filename to be valid.") - .to_owned() - .into_bytes(); - let new_name = correct(orig_name, &corrections); - let new_name = String::from_utf8(new_name).expect("corrections are valid utf-8"); - let new_path = path.with_file_name(new_name); - std::fs::rename(path, new_path)?; - } - - Ok(()) - } -} - -impl<'r> crate::report::Report for Replace<'r> { - fn report(&self, msg: crate::report::Message<'_>) -> Result<(), std::io::Error> { - let typo = match &msg { - crate::report::Message::Typo(typo) => typo, - _ => return self.reporter.report(msg), - }; - - let corrections = match &typo.corrections { - typos::Status::Corrections(corrections) if corrections.len() == 1 => corrections, - _ => return self.reporter.report(msg), - }; - - match &typo.context { - Some(crate::report::Context::File(file)) => { - let path = file.path.to_owned(); - let line_num = file.line_num; - let correction = - Correction::new(typo.byte_offset, typo.typo, corrections[0].as_ref()); - let mut deferred = self.deferred.lock().unwrap(); - let content = deferred - .content - .entry(path) - .or_insert_with(BTreeMap::new) - .entry(line_num) - .or_insert_with(Vec::new); - content.push(correction); - Ok(()) - } - Some(crate::report::Context::Path(path)) => { - let path = path.path.to_owned(); - let correction = - Correction::new(typo.byte_offset, typo.typo, corrections[0].as_ref()); - let mut deferred = self.deferred.lock().unwrap(); - let content = deferred.paths.entry(path).or_insert_with(Vec::new); - content.push(correction); - Ok(()) - } - _ => self.reporter.report(msg), - } - } -} #[derive(Clone, Debug, Default)] -pub struct Deferred { - pub content: BTreeMap>>, - pub paths: BTreeMap>, +pub(crate) struct Deferred { + pub(crate) content: BTreeMap>>, + pub(crate) paths: BTreeMap>, } #[derive(Clone, Debug, PartialOrd, Ord, PartialEq, Eq)] -pub struct Correction { - pub byte_offset: usize, - pub typo: Vec, - pub correction: Vec, +pub(crate) struct Correction { + pub(crate) byte_offset: usize, + pub(crate) typo: Vec, + pub(crate) correction: Vec, } impl Correction { - pub fn new(byte_offset: usize, typo: &str, correction: &str) -> Self { + pub(crate) fn new(byte_offset: usize, typo: &str, correction: &str) -> Self { Self { byte_offset, typo: typo.as_bytes().to_vec(), @@ -119,7 +24,7 @@ impl Correction { } } -pub fn correct(mut line: Vec, corrections: &[Correction]) -> Vec { +pub(crate) fn correct(mut line: Vec, corrections: &[Correction]) -> Vec { let mut corrections: Vec<_> = corrections.iter().collect(); corrections.sort_unstable(); corrections.reverse(); @@ -137,9 +42,6 @@ pub fn correct(mut line: Vec, corrections: &[Correction]) -> Vec { mod test { use super::*; - use crate::report::Report; - use assert_fs::prelude::*; - fn simple_correct(line: &str, corrections: Vec<(usize, &str, &str)>) -> String { let line = line.as_bytes().to_vec(); let corrections: Vec<_> = corrections @@ -198,66 +100,4 @@ mod test { ); assert_eq!(actual, "foo happy world"); } - - #[test] - fn test_replace_content() { - let temp = assert_fs::TempDir::new().unwrap(); - let input_file = temp.child("foo.txt"); - input_file.write_str("1 foo 2\n3 4 5").unwrap(); - - let primary = crate::report::PrintSilent; - let replace = Replace::new(&primary); - replace - .report( - crate::report::Typo::default() - .context(Some( - crate::report::FileContext::default() - .path(input_file.path()) - .line_num(1) - .into(), - )) - .buffer(std::borrow::Cow::Borrowed(b"1 foo 2\n3 4 5")) - .byte_offset(2) - .typo("foo") - .corrections(typos::Status::Corrections(vec![ - std::borrow::Cow::Borrowed("bar"), - ])) - .into(), - ) - .unwrap(); - replace.write().unwrap(); - - input_file.assert("1 bar 2\n3 4 5"); - } - - #[test] - fn test_replace_path() { - let temp = assert_fs::TempDir::new().unwrap(); - let input_file = temp.child("foo.txt"); - input_file.write_str("foo foo foo").unwrap(); - - let primary = crate::report::PrintSilent; - let replace = Replace::new(&primary); - replace - .report( - crate::report::Typo::default() - .context(Some( - crate::report::PathContext::default() - .path(input_file.path()) - .into(), - )) - .buffer(std::borrow::Cow::Borrowed(b"foo.txt")) - .byte_offset(0) - .typo("foo") - .corrections(typos::Status::Corrections(vec![ - std::borrow::Cow::Borrowed("bar"), - ])) - .into(), - ) - .unwrap(); - replace.write().unwrap(); - - input_file.assert(predicates::path::missing()); - temp.child("bar.txt").assert("foo foo foo"); - } } From 5f82dd60176e8a1378a8b9d068a38f988e5f4370 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Fri, 1 Jan 2021 21:16:20 -0600 Subject: [PATCH 12/16] fix: Arg diff reports immediately --- src/checks.rs | 125 +++++++++++++++++++++++++++++++++++++++++++++++++ src/diff.rs | 93 ------------------------------------ src/lib.rs | 2 - src/main.rs | 16 ++----- src/replace.rs | 103 ---------------------------------------- 5 files changed, 130 insertions(+), 209 deletions(-) delete mode 100644 src/diff.rs delete mode 100644 src/replace.rs diff --git a/src/checks.rs b/src/checks.rs index 7053d24..9349437 100644 --- a/src/checks.rs +++ b/src/checks.rs @@ -58,6 +58,14 @@ impl TyposSettings { } } + pub fn build_diff_typos(&self) -> DiffTypos { + DiffTypos { + check_filenames: self.check_filenames, + check_files: self.check_files, + binary: self.binary, + } + } + pub fn build_identifier_parser(&self) -> Identifiers { Identifiers { check_filenames: self.check_filenames, @@ -238,6 +246,123 @@ impl Check for FixTypos { } } +#[derive(Debug, Clone)] +pub struct DiffTypos { + check_filenames: bool, + check_files: bool, + binary: bool, +} + +impl Check for DiffTypos { + fn check_file( + &self, + path: &std::path::Path, + explicit: bool, + tokenizer: &tokens::Tokenizer, + dictionary: &dyn Dictionary, + reporter: &dyn report::Report, + ) -> Result<(), std::io::Error> { + let parser = typos::ParserBuilder::new() + .tokenizer(tokenizer) + .dictionary(dictionary) + .typos(); + + let mut content = Vec::new(); + let mut new_content = Vec::new(); + if self.check_files { + let (buffer, content_type) = read_file(path, reporter)?; + if !explicit && !self.binary && content_type.is_binary() { + let msg = report::BinaryFile { path }; + reporter.report(msg.into())?; + } else { + let mut fixes = Vec::new(); + let mut accum_line_num = AccumulateLineNum::new(); + for typo in parser.parse_bytes(&buffer) { + if is_fixable(&typo) { + fixes.push(typo.into_owned()); + } else { + let line_num = accum_line_num.line_num(&buffer, typo.byte_offset); + let (line, line_offset) = extract_line(&buffer, typo.byte_offset); + let msg = report::Typo { + context: Some(report::FileContext { path, line_num }.into()), + buffer: std::borrow::Cow::Borrowed(line), + byte_offset: line_offset, + typo: typo.typo.as_ref(), + corrections: typo.corrections, + }; + reporter.report(msg.into())?; + } + } + if !fixes.is_empty() { + new_content = fix_buffer(buffer.clone(), fixes.into_iter()); + content = buffer + } + } + } + + // Match FixTypos ordering for easy diffing. + let mut new_path = None; + if self.check_filenames { + if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) { + let mut fixes = Vec::new(); + for typo in parser.parse_str(file_name) { + if is_fixable(&typo) { + fixes.push(typo.into_owned()); + } else { + let msg = report::Typo { + context: Some(report::PathContext { path }.into()), + buffer: std::borrow::Cow::Borrowed(file_name.as_bytes()), + byte_offset: typo.byte_offset, + typo: typo.typo.as_ref(), + corrections: typo.corrections, + }; + reporter.report(msg.into())?; + } + } + if !fixes.is_empty() { + let file_name = file_name.to_owned().into_bytes(); + let new_name = fix_buffer(file_name, fixes.into_iter()); + let new_name = + String::from_utf8(new_name).expect("corrections are valid utf-8"); + new_path = Some(path.with_file_name(new_name)); + } + } + } + + if new_path.is_some() || !content.is_empty() { + let original_path = path.display().to_string(); + let fixed_path = new_path + .as_ref() + .map(|p| p.as_path()) + .unwrap_or(path) + .display() + .to_string(); + let original_content: Vec<_> = content + .lines_with_terminator() + .map(|s| String::from_utf8_lossy(s).into_owned()) + .collect(); + let fixed_content: Vec<_> = new_content + .lines_with_terminator() + .map(|s| String::from_utf8_lossy(s).into_owned()) + .collect(); + let diff = difflib::unified_diff( + &original_content, + &fixed_content, + original_path.as_str(), + fixed_path.as_str(), + "original", + "fixed", + 0, + ); + for line in diff { + print!("{}", line); + } + } + + Ok(()) + } +} + #[derive(Debug, Clone)] pub struct Identifiers { check_filenames: bool, diff --git a/src/diff.rs b/src/diff.rs deleted file mode 100644 index 8457588..0000000 --- a/src/diff.rs +++ /dev/null @@ -1,93 +0,0 @@ -use std::collections::BTreeMap; -use std::sync; - -use bstr::ByteSlice; - -pub struct Diff<'r> { - reporter: &'r dyn crate::report::Report, - deferred: sync::Mutex, -} - -impl<'r> Diff<'r> { - pub fn new(reporter: &'r dyn crate::report::Report) -> Self { - Self { - reporter, - deferred: sync::Mutex::new(crate::replace::Deferred::default()), - } - } - - pub fn show(&self) -> Result<(), std::io::Error> { - let deferred = self.deferred.lock().unwrap(); - - for (path, corrections) in deferred.content.iter() { - let buffer = std::fs::read(path)?; - - let mut original = Vec::new(); - let mut corrected = Vec::new(); - for (line_idx, line) in buffer.lines_with_terminator().enumerate() { - original.push(String::from_utf8_lossy(line).into_owned()); - - let line_num = line_idx + 1; - let line = if let Some(corrections) = corrections.get(&line_num) { - let line = line.to_vec(); - crate::replace::correct(line, &corrections) - } else { - line.to_owned() - }; - corrected.push(String::from_utf8_lossy(&line).into_owned()) - } - - let display_path = path.display().to_string(); - let diff = difflib::unified_diff( - &original, - &corrected, - display_path.as_str(), - display_path.as_str(), - "original", - "corrected", - 0, - ); - for line in diff { - print!("{}", line); - } - } - - Ok(()) - } -} - -impl<'r> crate::report::Report for Diff<'r> { - fn report(&self, msg: crate::report::Message<'_>) -> Result<(), std::io::Error> { - let typo = match &msg { - crate::report::Message::Typo(typo) => typo, - _ => return self.reporter.report(msg), - }; - - let corrections = match &typo.corrections { - typos::Status::Corrections(corrections) if corrections.len() == 1 => corrections, - _ => return self.reporter.report(msg), - }; - - match &typo.context { - Some(crate::report::Context::File(file)) => { - let path = file.path.to_owned(); - let line_num = file.line_num; - let correction = crate::replace::Correction::new( - typo.byte_offset, - typo.typo, - corrections[0].as_ref(), - ); - let mut deferred = self.deferred.lock().unwrap(); - let content = deferred - .content - .entry(path) - .or_insert_with(BTreeMap::new) - .entry(line_num) - .or_insert_with(Vec::new); - content.push(correction); - Ok(()) - } - _ => self.reporter.report(msg), - } - } -} diff --git a/src/lib.rs b/src/lib.rs index 2584359..4d0e01e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,4 @@ pub mod checks; pub mod config; pub mod dict; -pub mod diff; -pub(crate) mod replace; pub mod report; diff --git a/src/main.rs b/src/main.rs index 5e33fd5..ade1fee 100644 --- a/src/main.rs +++ b/src/main.rs @@ -10,7 +10,6 @@ mod args; use typos_cli::checks; use typos_cli::config; use typos_cli::dict; -use typos_cli::diff; use typos_cli::report; use proc_exit::WithCodeResultExt; @@ -99,13 +98,9 @@ fn run() -> proc_exit::ExitResult { args.format.reporter() }; let status_reporter = report::MessageStatus::new(output_reporter); - let mut reporter: &dyn report::Report = &status_reporter; - let diff_reporter = diff::Diff::new(reporter); - if args.diff { - reporter = &diff_reporter; - } + let reporter: &dyn report::Report = &status_reporter; - let (files, identifier_parser, word_parser, checks, fixer); + let (files, identifier_parser, word_parser, checks, fixer, differ); let selected_checks: &dyn checks::Check = if args.files { files = settings.build_files(); &files @@ -118,6 +113,9 @@ fn run() -> proc_exit::ExitResult { } else if args.write_changes { fixer = settings.build_fix_typos(); &fixer + } else if args.diff { + differ = settings.build_diff_typos(); + &differ } else { checks = settings.build_typos(); &checks @@ -152,10 +150,6 @@ fn run() -> proc_exit::ExitResult { if status_reporter.errors_found() { errors_found = true; } - - if args.diff { - diff_reporter.show().with_code(proc_exit::Code::FAILURE)?; - } } if errors_found { diff --git a/src/replace.rs b/src/replace.rs deleted file mode 100644 index 78f2a17..0000000 --- a/src/replace.rs +++ /dev/null @@ -1,103 +0,0 @@ -use std::collections::BTreeMap; -use std::path; - -#[derive(Clone, Debug, Default)] -pub(crate) struct Deferred { - pub(crate) content: BTreeMap>>, - pub(crate) paths: BTreeMap>, -} - -#[derive(Clone, Debug, PartialOrd, Ord, PartialEq, Eq)] -pub(crate) struct Correction { - pub(crate) byte_offset: usize, - pub(crate) typo: Vec, - pub(crate) correction: Vec, -} - -impl Correction { - pub(crate) fn new(byte_offset: usize, typo: &str, correction: &str) -> Self { - Self { - byte_offset, - typo: typo.as_bytes().to_vec(), - correction: correction.as_bytes().to_vec(), - } - } -} - -pub(crate) fn correct(mut line: Vec, corrections: &[Correction]) -> Vec { - let mut corrections: Vec<_> = corrections.iter().collect(); - corrections.sort_unstable(); - corrections.reverse(); - - for correction in corrections { - let start = correction.byte_offset; - let end = start + correction.typo.len(); - line.splice(start..end, correction.correction.iter().copied()); - } - - line -} - -#[cfg(test)] -mod test { - use super::*; - - fn simple_correct(line: &str, corrections: Vec<(usize, &str, &str)>) -> String { - let line = line.as_bytes().to_vec(); - let corrections: Vec<_> = corrections - .into_iter() - .map(|(byte_offset, typo, correction)| Correction { - byte_offset, - typo: typo.as_bytes().to_vec(), - correction: correction.as_bytes().to_vec(), - }) - .collect(); - let actual = correct(line, &corrections); - String::from_utf8(actual).unwrap() - } - - #[test] - fn test_correct_single() { - let actual = simple_correct("foo foo foo", vec![(4, "foo", "bar")]); - assert_eq!(actual, "foo bar foo"); - } - - #[test] - fn test_correct_single_grow() { - let actual = simple_correct("foo foo foo", vec![(4, "foo", "happy")]); - assert_eq!(actual, "foo happy foo"); - } - - #[test] - fn test_correct_single_shrink() { - let actual = simple_correct("foo foo foo", vec![(4, "foo", "if")]); - assert_eq!(actual, "foo if foo"); - } - - #[test] - fn test_correct_start() { - let actual = simple_correct("foo foo foo", vec![(0, "foo", "bar")]); - assert_eq!(actual, "bar foo foo"); - } - - #[test] - fn test_correct_end() { - let actual = simple_correct("foo foo foo", vec![(8, "foo", "bar")]); - assert_eq!(actual, "foo foo bar"); - } - - #[test] - fn test_correct_end_grow() { - let actual = simple_correct("foo foo foo", vec![(8, "foo", "happy")]); - assert_eq!(actual, "foo foo happy"); - } - - #[test] - fn test_correct_multiple() { - let actual = simple_correct( - "foo foo foo", - vec![(4, "foo", "happy"), (8, "foo", "world")], - ); - assert_eq!(actual, "foo happy world"); - } -} From aba85df4350c7bbbf127448f84c1b61ebcd6c3f3 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Fri, 1 Jan 2021 21:35:49 -0600 Subject: [PATCH 13/16] docs(typos): Clarify intent --- crates/typos/src/dict.rs | 51 ++++++++++++++++++++++---------------- crates/typos/src/parser.rs | 9 +++++++ crates/typos/src/tokens.rs | 11 ++++++++ 3 files changed, 49 insertions(+), 22 deletions(-) diff --git a/crates/typos/src/dict.rs b/crates/typos/src/dict.rs index 7c92d5b..971ca86 100644 --- a/crates/typos/src/dict.rs +++ b/crates/typos/src/dict.rs @@ -1,5 +1,34 @@ use std::borrow::Cow; +/// Look up the validity of a term. +pub trait Dictionary: Send + Sync { + /// Look up the validity of an Identifier. + /// + /// `None` if the status is unknown. + fn correct_ident<'s, 'w>(&'s self, ident: crate::tokens::Identifier<'w>) -> Option>; + + /// Look up the validity of a Word. + /// + /// `None` if the status is unknown. + fn correct_word<'s, 'w>(&'s self, word: crate::tokens::Word<'w>) -> Option>; +} + +pub(crate) struct NullDictionary; + +impl Dictionary for NullDictionary { + fn correct_ident<'s, 'w>( + &'s self, + _ident: crate::tokens::Identifier<'w>, + ) -> Option> { + None + } + + fn correct_word<'s, 'w>(&'s self, _word: crate::tokens::Word<'w>) -> Option> { + None + } +} + +/// Validity of a term in a Dictionary. #[derive(Clone, PartialEq, Eq, Debug, serde::Serialize)] #[serde(rename_all = "snake_case")] #[serde(untagged)] @@ -54,25 +83,3 @@ impl<'c> Status<'c> { } } } - -pub trait Dictionary: Send + Sync { - fn correct_ident<'s, 'w>(&'s self, _ident: crate::tokens::Identifier<'w>) - -> Option>; - - fn correct_word<'s, 'w>(&'s self, word: crate::tokens::Word<'w>) -> Option>; -} - -pub(crate) struct NullDictionary; - -impl Dictionary for NullDictionary { - fn correct_ident<'s, 'w>( - &'s self, - _ident: crate::tokens::Identifier<'w>, - ) -> Option> { - None - } - - fn correct_word<'s, 'w>(&'s self, _word: crate::tokens::Word<'w>) -> Option> { - None - } -} diff --git a/crates/typos/src/parser.rs b/crates/typos/src/parser.rs index 95d0d4c..ae23bfd 100644 --- a/crates/typos/src/parser.rs +++ b/crates/typos/src/parser.rs @@ -15,11 +15,13 @@ impl<'p> ParserBuilder<'p, 'static> { } impl<'p, 'd> ParserBuilder<'p, 'd> { + /// Set the Tokenizer used when parsing. pub fn tokenizer(mut self, tokenizer: &'p tokens::Tokenizer) -> Self { self.tokenizer = Some(tokenizer); self } + /// Set the dictionary used when parsing. pub fn dictionary<'d1>(self, dictionary: &'d1 dyn Dictionary) -> ParserBuilder<'p, 'd1> { ParserBuilder { tokenizer: self.tokenizer, @@ -27,6 +29,7 @@ impl<'p, 'd> ParserBuilder<'p, 'd> { } } + /// Extract typos from the buffer. pub fn typos(&self) -> TyposParser<'p, 'd> { TyposParser { tokenizer: self.tokenizer.unwrap_or_else(|| &DEFAULT_TOKENIZER), @@ -34,12 +37,14 @@ impl<'p, 'd> ParserBuilder<'p, 'd> { } } + /// Parse for Identifiers. pub fn identifiers(&self) -> IdentifiersParser<'p> { IdentifiersParser { tokenizer: self.tokenizer.unwrap_or_else(|| &DEFAULT_TOKENIZER), } } + /// Parse for Words. pub fn words(&self) -> WordsParser<'p> { WordsParser { tokenizer: self.tokenizer.unwrap_or_else(|| &DEFAULT_TOKENIZER), @@ -59,6 +64,7 @@ impl<'p> Default for ParserBuilder<'p, 'static> { static DEFAULT_TOKENIZER: once_cell::sync::Lazy = once_cell::sync::Lazy::new(|| tokens::Tokenizer::new()); +/// Extract typos from the buffer. #[derive(Clone)] pub struct TyposParser<'p, 'd> { tokenizer: &'p tokens::Tokenizer, @@ -116,6 +122,7 @@ impl<'p, 'd> TyposParser<'p, 'd> { } } +/// An invalid term found in the buffer. #[derive(Clone, Debug)] #[non_exhaustive] pub struct Typo<'m> { @@ -152,6 +159,7 @@ impl<'m> Default for Typo<'m> { } } +/// Parse for Identifiers. #[derive(Debug, Clone)] pub struct IdentifiersParser<'p> { tokenizer: &'p tokens::Tokenizer, @@ -167,6 +175,7 @@ impl<'p> IdentifiersParser<'p> { } } +/// Parse for Words. #[derive(Debug, Clone)] pub struct WordsParser<'p> { tokenizer: &'p tokens::Tokenizer, diff --git a/crates/typos/src/tokens.rs b/crates/typos/src/tokens.rs index f372c96..3f5aefc 100644 --- a/crates/typos/src/tokens.rs +++ b/crates/typos/src/tokens.rs @@ -1,3 +1,4 @@ +/// Define rules for tokenizaing a buffer. #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct TokenizerBuilder { ignore_hex: bool, @@ -12,26 +13,31 @@ impl TokenizerBuilder { Default::default() } + /// Specify that hexadecimal numbers should be ignored. pub fn ignore_hex(&mut self, yes: bool) -> &mut Self { self.ignore_hex = yes; self } + /// Specify that leading digits are allowed for Identifiers. pub fn leading_digits(&mut self, yes: bool) -> &mut Self { self.leading_digits = yes; self } + /// Extend accepted leading characters for Identifiers. pub fn leading_chars(&mut self, chars: String) -> &mut Self { self.leading_chars = chars; self } + /// Specify that digits can be included in Identifiers. pub fn include_digits(&mut self, yes: bool) -> &mut Self { self.include_digits = yes; self } + /// Extend accepted characters for Identifiers. pub fn include_chars(&mut self, chars: String) -> &mut Self { self.include_chars = chars; self @@ -81,6 +87,7 @@ impl Default for TokenizerBuilder { } } +/// Extract Identifiers from a buffer. #[derive(Debug, Clone)] pub struct Tokenizer { words_str: regex::Regex, @@ -148,6 +155,7 @@ fn is_hex(ident: &[u8]) -> bool { HEX.is_match(ident) } +/// A term composed of Words. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct Identifier<'t> { token: &'t str, @@ -171,11 +179,13 @@ impl<'t> Identifier<'t> { self.offset } + /// Split into individual Words. pub fn split(&self) -> impl Iterator> { split_ident(self.token, self.offset) } } +/// An indivisible term. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct Word<'t> { token: &'t str, @@ -325,6 +335,7 @@ impl<'s> Iterator for SplitIdent<'s> { } } +/// Format of the term. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum Case { Title, From 692f0ac095948f58d80fcf29fefd383838087ae8 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Sat, 2 Jan 2021 12:51:35 -0600 Subject: [PATCH 14/16] refactor(typos): Focus API on primary use case --- crates/typos/src/parser.rs | 49 ++------------------------------------ src/checks.rs | 20 ++++++---------- 2 files changed, 9 insertions(+), 60 deletions(-) diff --git a/crates/typos/src/parser.rs b/crates/typos/src/parser.rs index ae23bfd..f62bffb 100644 --- a/crates/typos/src/parser.rs +++ b/crates/typos/src/parser.rs @@ -2,6 +2,7 @@ use crate::tokens; use crate::Dictionary; use std::borrow::Cow; +/// Extract typos from the buffer. #[derive(Clone)] pub struct ParserBuilder<'p, 'd> { tokenizer: Option<&'p tokens::Tokenizer>, @@ -30,26 +31,12 @@ impl<'p, 'd> ParserBuilder<'p, 'd> { } /// Extract typos from the buffer. - pub fn typos(&self) -> TyposParser<'p, 'd> { + pub fn build(&self) -> TyposParser<'p, 'd> { TyposParser { tokenizer: self.tokenizer.unwrap_or_else(|| &DEFAULT_TOKENIZER), dictionary: self.dictionary, } } - - /// Parse for Identifiers. - pub fn identifiers(&self) -> IdentifiersParser<'p> { - IdentifiersParser { - tokenizer: self.tokenizer.unwrap_or_else(|| &DEFAULT_TOKENIZER), - } - } - - /// Parse for Words. - pub fn words(&self) -> WordsParser<'p> { - WordsParser { - tokenizer: self.tokenizer.unwrap_or_else(|| &DEFAULT_TOKENIZER), - } - } } impl<'p> Default for ParserBuilder<'p, 'static> { @@ -158,35 +145,3 @@ impl<'m> Default for Typo<'m> { } } } - -/// Parse for Identifiers. -#[derive(Debug, Clone)] -pub struct IdentifiersParser<'p> { - tokenizer: &'p tokens::Tokenizer, -} - -impl<'p> IdentifiersParser<'p> { - pub fn parse_str(&self, buffer: &'p str) -> impl Iterator> { - self.tokenizer.parse_str(buffer) - } - - pub fn parse_bytes(&self, buffer: &'p [u8]) -> impl Iterator> { - self.tokenizer.parse_bytes(buffer) - } -} - -/// Parse for Words. -#[derive(Debug, Clone)] -pub struct WordsParser<'p> { - tokenizer: &'p tokens::Tokenizer, -} - -impl<'p> WordsParser<'p> { - pub fn parse_str(&self, buffer: &'p str) -> impl Iterator> { - self.tokenizer.parse_str(buffer).flat_map(|i| i.split()) - } - - pub fn parse_bytes(&self, buffer: &'p [u8]) -> impl Iterator> { - self.tokenizer.parse_bytes(buffer).flat_map(|i| i.split()) - } -} diff --git a/src/checks.rs b/src/checks.rs index 9349437..3778396 100644 --- a/src/checks.rs +++ b/src/checks.rs @@ -118,7 +118,7 @@ impl Check for Typos { let parser = typos::ParserBuilder::new() .tokenizer(tokenizer) .dictionary(dictionary) - .typos(); + .build(); if self.check_filenames { if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) { @@ -180,7 +180,7 @@ impl Check for FixTypos { let parser = typos::ParserBuilder::new() .tokenizer(tokenizer) .dictionary(dictionary) - .typos(); + .build(); if self.check_files { let (buffer, content_type) = read_file(path, reporter)?; @@ -265,7 +265,7 @@ impl Check for DiffTypos { let parser = typos::ParserBuilder::new() .tokenizer(tokenizer) .dictionary(dictionary) - .typos(); + .build(); let mut content = Vec::new(); let mut new_content = Vec::new(); @@ -379,13 +379,9 @@ impl Check for Identifiers { _dictionary: &dyn Dictionary, reporter: &dyn report::Report, ) -> Result<(), std::io::Error> { - let parser = typos::ParserBuilder::new() - .tokenizer(tokenizer) - .identifiers(); - if self.check_filenames { if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) { - for word in parser.parse_str(file_name) { + for word in tokenizer.parse_str(file_name) { let msg = report::Parse { context: Some(report::PathContext { path }.into()), kind: report::ParseKind::Identifier, @@ -402,7 +398,7 @@ impl Check for Identifiers { let msg = report::BinaryFile { path }; reporter.report(msg.into())?; } else { - for word in parser.parse_bytes(&buffer) { + for word in tokenizer.parse_bytes(&buffer) { // HACK: Don't look up the line_num per entry to better match the performance // of Typos for comparison purposes. We don't really get much out of it // anyway. @@ -437,11 +433,9 @@ impl Check for Words { _dictionary: &dyn Dictionary, reporter: &dyn report::Report, ) -> Result<(), std::io::Error> { - let parser = typos::ParserBuilder::new().tokenizer(tokenizer).words(); - if self.check_filenames { if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) { - for word in parser.parse_str(file_name) { + for word in tokenizer.parse_str(file_name).flat_map(|i| i.split()) { let msg = report::Parse { context: Some(report::PathContext { path }.into()), kind: report::ParseKind::Word, @@ -458,7 +452,7 @@ impl Check for Words { let msg = report::BinaryFile { path }; reporter.report(msg.into())?; } else { - for word in parser.parse_bytes(&buffer) { + for word in tokenizer.parse_bytes(&buffer).flat_map(|i| i.split()) { // HACK: Don't look up the line_num per entry to better match the performance // of Typos for comparison purposes. We don't really get much out of it // anyway. From e6a4f49eb54eefcd34e815be536137fdac345758 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Sat, 2 Jan 2021 12:56:20 -0600 Subject: [PATCH 15/16] refactor: Clarify names --- benches/checks.rs | 2 +- src/checks.rs | 30 +++++++++++++++--------------- src/main.rs | 6 +++--- 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/benches/checks.rs b/benches/checks.rs index dcb8dc2..fbf1f42 100644 --- a/benches/checks.rs +++ b/benches/checks.rs @@ -5,7 +5,7 @@ extern crate test; mod data; use assert_fs::prelude::*; -use typos_cli::checks::Check; +use typos_cli::checks::FileChecker; fn bench_files(data: &str, b: &mut test::Bencher) { let temp = assert_fs::TempDir::new().unwrap(); diff --git a/src/checks.rs b/src/checks.rs index 3778396..36d8980 100644 --- a/src/checks.rs +++ b/src/checks.rs @@ -4,7 +4,7 @@ use crate::report; use typos::tokens; use typos::Dictionary; -pub trait Check: Send + Sync { +pub trait FileChecker: Send + Sync { fn check_file( &self, path: &std::path::Path, @@ -106,7 +106,7 @@ pub struct Typos { binary: bool, } -impl Check for Typos { +impl FileChecker for Typos { fn check_file( &self, path: &std::path::Path, @@ -168,7 +168,7 @@ pub struct FixTypos { binary: bool, } -impl Check for FixTypos { +impl FileChecker for FixTypos { fn check_file( &self, path: &std::path::Path, @@ -253,7 +253,7 @@ pub struct DiffTypos { binary: bool, } -impl Check for DiffTypos { +impl FileChecker for DiffTypos { fn check_file( &self, path: &std::path::Path, @@ -370,7 +370,7 @@ pub struct Identifiers { binary: bool, } -impl Check for Identifiers { +impl FileChecker for Identifiers { fn check_file( &self, path: &std::path::Path, @@ -424,7 +424,7 @@ pub struct Words { binary: bool, } -impl Check for Words { +impl FileChecker for Words { fn check_file( &self, path: &std::path::Path, @@ -476,7 +476,7 @@ pub struct FoundFiles { binary: bool, } -impl Check for FoundFiles { +impl FileChecker for FoundFiles { fn check_file( &self, path: &std::path::Path, @@ -613,22 +613,22 @@ fn fix_buffer(mut buffer: Vec, typos: impl Iterator Result<(), ignore::Error> { for entry in walk { - check_entry(entry, checks, parser, dictionary, reporter)?; + walk_entry(entry, checks, parser, dictionary, reporter)?; } Ok(()) } -pub fn check_path_parallel( +pub fn walk_path_parallel( walk: ignore::WalkParallel, - checks: &dyn Check, + checks: &dyn FileChecker, parser: &typos::tokens::Tokenizer, dictionary: &dyn typos::Dictionary, reporter: &dyn report::Report, @@ -636,7 +636,7 @@ pub fn check_path_parallel( let error: std::sync::Mutex> = std::sync::Mutex::new(Ok(())); walk.run(|| { Box::new(|entry: Result| { - match check_entry(entry, checks, parser, dictionary, reporter) { + match walk_entry(entry, checks, parser, dictionary, reporter) { Ok(()) => ignore::WalkState::Continue, Err(err) => { *error.lock().unwrap() = Err(err); @@ -649,9 +649,9 @@ pub fn check_path_parallel( error.into_inner().unwrap() } -fn check_entry( +fn walk_entry( entry: Result, - checks: &dyn Check, + checks: &dyn FileChecker, parser: &typos::tokens::Tokenizer, dictionary: &dyn typos::Dictionary, reporter: &dyn report::Report, diff --git a/src/main.rs b/src/main.rs index ade1fee..f5e206d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -101,7 +101,7 @@ fn run() -> proc_exit::ExitResult { let reporter: &dyn report::Report = &status_reporter; let (files, identifier_parser, word_parser, checks, fixer, differ); - let selected_checks: &dyn checks::Check = if args.files { + let selected_checks: &dyn checks::FileChecker = if args.files { files = settings.build_files(); &files } else if args.identifiers { @@ -122,7 +122,7 @@ fn run() -> proc_exit::ExitResult { }; if single_threaded { - checks::check_path( + checks::walk_path( walk.build(), selected_checks, &parser, @@ -130,7 +130,7 @@ fn run() -> proc_exit::ExitResult { reporter, ) } else { - checks::check_path_parallel( + checks::walk_path_parallel( walk.build_parallel(), selected_checks, &parser, From 67222e9338c695cf7ff24822076fa7af2851be09 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Sat, 2 Jan 2021 13:17:15 -0600 Subject: [PATCH 16/16] style: Address clippy --- crates/typos/src/parser.rs | 6 +++--- src/checks.rs | 9 ++------- src/report.rs | 19 +++++++++++++++---- 3 files changed, 20 insertions(+), 14 deletions(-) diff --git a/crates/typos/src/parser.rs b/crates/typos/src/parser.rs index f62bffb..883a730 100644 --- a/crates/typos/src/parser.rs +++ b/crates/typos/src/parser.rs @@ -26,14 +26,14 @@ impl<'p, 'd> ParserBuilder<'p, 'd> { pub fn dictionary<'d1>(self, dictionary: &'d1 dyn Dictionary) -> ParserBuilder<'p, 'd1> { ParserBuilder { tokenizer: self.tokenizer, - dictionary: dictionary, + dictionary, } } /// Extract typos from the buffer. pub fn build(&self) -> TyposParser<'p, 'd> { TyposParser { - tokenizer: self.tokenizer.unwrap_or_else(|| &DEFAULT_TOKENIZER), + tokenizer: self.tokenizer.unwrap_or(&DEFAULT_TOKENIZER), dictionary: self.dictionary, } } @@ -49,7 +49,7 @@ impl<'p> Default for ParserBuilder<'p, 'static> { } static DEFAULT_TOKENIZER: once_cell::sync::Lazy = - once_cell::sync::Lazy::new(|| tokens::Tokenizer::new()); + once_cell::sync::Lazy::new(tokens::Tokenizer::new); /// Extract typos from the buffer. #[derive(Clone)] diff --git a/src/checks.rs b/src/checks.rs index 36d8980..3bfb25b 100644 --- a/src/checks.rs +++ b/src/checks.rs @@ -331,12 +331,7 @@ impl FileChecker for DiffTypos { if new_path.is_some() || !content.is_empty() { let original_path = path.display().to_string(); - let fixed_path = new_path - .as_ref() - .map(|p| p.as_path()) - .unwrap_or(path) - .display() - .to_string(); + let fixed_path = new_path.as_deref().unwrap_or(path).display().to_string(); let original_content: Vec<_> = content .lines_with_terminator() .map(|s| String::from_utf8_lossy(s).into_owned()) @@ -595,7 +590,7 @@ fn extract_fix<'t>(typo: &'t typos::Typo<'t>) -> Option<&'t str> { } } -fn is_fixable<'t>(typo: &typos::Typo<'t>) -> bool { +fn is_fixable(typo: &typos::Typo<'_>) -> bool { extract_fix(typo).is_some() } diff --git a/src/report.rs b/src/report.rs index 3d213cd..08b2f28 100644 --- a/src/report.rs +++ b/src/report.rs @@ -234,10 +234,21 @@ impl<'r> MessageStatus<'r> { impl<'r> Report for MessageStatus<'r> { fn report(&self, msg: Message) -> Result<(), std::io::Error> { - self.typos_found - .compare_and_swap(false, msg.is_correction(), atomic::Ordering::Relaxed); - self.errors_found - .compare_and_swap(false, msg.is_error(), atomic::Ordering::Relaxed); + let _ = self.typos_found.compare_exchange( + false, + msg.is_correction(), + atomic::Ordering::Relaxed, + atomic::Ordering::Relaxed, + ); + let _ = self + .errors_found + .compare_exchange( + false, + msg.is_error(), + atomic::Ordering::Relaxed, + atomic::Ordering::Relaxed, + ) + .unwrap(); self.reporter.report(msg) } }