From 482d3204072f365ec0c5c3e48cfdd3e27d3b3fa2 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Wed, 11 Nov 2020 12:22:23 -0600 Subject: [PATCH 1/4] fix(dict): Ensure we fall through to built-in dict --- src/dict.rs | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/dict.rs b/src/dict.rs index d0e0aa9..a6993d6 100644 --- a/src/dict.rs +++ b/src/dict.rs @@ -212,16 +212,14 @@ impl<'i, 'w, D: typos::Dictionary> typos::Dictionary for Override<'i, 'w, D> { fn correct_word<'s, 't>(&'s self, word: typos::tokens::Word<'t>) -> Option> { // Skip hashing if we can - if !self.words.is_empty() { + let custom = if !self.words.is_empty() { let w = UniCase::new(word.token()); // HACK: couldn't figure out the lifetime issue with replacing `cloned` with `borrow` - self.words - .get(&w) - .cloned() - .or_else(|| self.inner.correct_word(word)) + self.words.get(&w).cloned() } else { None - } + }; + custom.or_else(|| self.inner.correct_word(word)) } } From 7a1fac7fabed6f039f3d81d99bcdcc7189eabac5 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Wed, 11 Nov 2020 12:19:22 -0600 Subject: [PATCH 2/4] refactor(report): Use native types --- crates/typos/src/checks.rs | 18 +++++++-------- crates/typos/src/report.rs | 45 +++++++++++++++++++++++--------------- src/replace.rs | 12 +++++----- 3 files changed, 42 insertions(+), 33 deletions(-) diff --git a/crates/typos/src/checks.rs b/crates/typos/src/checks.rs index 56a689d..922fc9d 100644 --- a/crates/typos/src/checks.rs +++ b/crates/typos/src/checks.rs @@ -94,7 +94,7 @@ struct ReportContext<'m, 'r> { impl<'m, 'r> report::Report for ReportContext<'m, 'r> { fn report(&self, msg: report::Message) -> bool { - let msg = msg.context(self.context.clone()); + let msg = msg.context(Some(self.context.clone())); self.reporter.report(msg) } } @@ -188,7 +188,7 @@ impl Check for Typos { Some(corrections) => { let byte_offset = ident.offset(); let msg = report::Typo { - context: report::Context::None, + context: None, buffer: std::borrow::Cow::Borrowed(buffer.as_bytes()), byte_offset, typo: ident.token(), @@ -203,7 +203,7 @@ impl Check for Typos { Some(corrections) => { let byte_offset = word.offset(); let msg = report::Typo { - context: report::Context::None, + context: None, buffer: std::borrow::Cow::Borrowed(buffer.as_bytes()), byte_offset, typo: word.token(), @@ -236,7 +236,7 @@ impl Check for Typos { Some(corrections) => { let byte_offset = ident.offset(); let msg = report::Typo { - context: report::Context::None, + context: None, buffer: std::borrow::Cow::Borrowed(buffer), byte_offset, typo: ident.token(), @@ -251,7 +251,7 @@ impl Check for Typos { Some(corrections) => { let byte_offset = word.offset(); let msg = report::Typo { - context: report::Context::None, + context: None, buffer: std::borrow::Cow::Borrowed(buffer), byte_offset, typo: word.token(), @@ -300,7 +300,7 @@ impl Check for ParseIdentifiers { let typos_found = false; let msg = report::Parse { - context: report::Context::None, + context: None, kind: report::ParseKind::Identifier, data: parser.parse_str(buffer).map(|i| i.token()).collect(), }; @@ -321,7 +321,7 @@ impl Check for ParseIdentifiers { let typos_found = false; let msg = report::Parse { - context: report::Context::None, + context: None, kind: report::ParseKind::Identifier, data: parser.parse_bytes(buffer).map(|i| i.token()).collect(), }; @@ -363,7 +363,7 @@ impl Check for ParseWords { let typos_found = false; let msg = report::Parse { - context: report::Context::None, + context: None, kind: report::ParseKind::Word, data: parser .parse_str(buffer) @@ -387,7 +387,7 @@ impl Check for ParseWords { let typos_found = false; let msg = report::Parse { - context: report::Context::None, + context: None, kind: report::ParseKind::Word, data: parser .parse_bytes(buffer) diff --git a/crates/typos/src/report.rs b/crates/typos/src/report.rs index a1386ca..bd45753 100644 --- a/crates/typos/src/report.rs +++ b/crates/typos/src/report.rs @@ -39,7 +39,7 @@ impl<'m> Message<'m> { } } - pub fn context(self, context: Context<'m>) -> Self { + pub fn context(self, context: Option>) -> Self { match self { Message::Typo(typo) => { let typo = typo.context(context); @@ -65,7 +65,7 @@ pub struct BinaryFile<'m> { #[non_exhaustive] pub struct Typo<'m> { #[serde(flatten)] - pub context: Context<'m>, + pub context: Option>, #[serde(skip)] pub buffer: Cow<'m, [u8]>, pub byte_offset: usize, @@ -76,7 +76,7 @@ pub struct Typo<'m> { impl<'m> Default for Typo<'m> { fn default() -> Self { Self { - context: Context::None, + context: None, buffer: Cow::Borrowed(&[]), byte_offset: 0, typo: "", @@ -91,13 +91,6 @@ impl<'m> Default for Typo<'m> { pub enum Context<'m> { File(FileContext<'m>), Path(PathContext<'m>), - None, -} - -impl<'m> Default for Context<'m> { - fn default() -> Self { - Context::None - } } impl<'m> std::fmt::Display for Context<'m> { @@ -105,7 +98,6 @@ impl<'m> std::fmt::Display for Context<'m> { match self { Context::File(c) => write!(f, "{}:{}", c.path.display(), c.line_num), Context::Path(c) => write!(f, "{}", c.path.display()), - Context::None => Ok(()), } } } @@ -172,7 +164,7 @@ impl<'m> Default for File<'m> { #[non_exhaustive] pub struct Parse<'m> { #[serde(flatten)] - pub context: Context<'m>, + pub context: Option>, pub kind: ParseKind, pub data: Vec<&'m str>, } @@ -180,7 +172,7 @@ pub struct Parse<'m> { impl<'m> Default for Parse<'m> { fn default() -> Self { Self { - context: Context::None, + context: None, kind: ParseKind::Identifier, data: vec![], } @@ -294,13 +286,15 @@ fn print_brief_correction(msg: &Typo) { crate::Status::Invalid => { println!( "{}:{}: {} is disallowed", - msg.context, msg.byte_offset, msg.typo, + context_display(&msg.context), + msg.byte_offset, + msg.typo, ); } crate::Status::Corrections(corrections) => { println!( "{}:{}: {} -> {}", - msg.context, + context_display(&msg.context), msg.byte_offset, msg.typo, itertools::join(corrections.iter(), ", ") @@ -318,7 +312,9 @@ fn print_long_correction(msg: &Typo) { writeln!( handle, "{}:{}: {} is disallowed", - msg.context, msg.byte_offset, msg.typo, + context_display(&msg.context), + msg.byte_offset, + msg.typo, ) .unwrap(); } @@ -332,9 +328,15 @@ fn print_long_correction(msg: &Typo) { .unwrap(); } } - writeln!(handle, " --> {}:{}", msg.context, msg.byte_offset).unwrap(); + writeln!( + handle, + " --> {}:{}", + context_display(&msg.context), + msg.byte_offset + ) + .unwrap(); - if let Context::File(context) = &msg.context { + if let Some(Context::File(context)) = &msg.context { let line_num = context.line_num.to_string(); let line_indent: String = itertools::repeat_n(" ", line_num.len()).collect(); @@ -350,6 +352,13 @@ fn print_long_correction(msg: &Typo) { } } +fn context_display<'c>(context: &'c Option>) -> &'c dyn std::fmt::Display { + context + .as_ref() + .map(|c| c as &dyn std::fmt::Display) + .unwrap_or(&"") +} + #[derive(Copy, Clone, Debug)] pub struct PrintJson; diff --git a/src/replace.rs b/src/replace.rs index 22a6376..ae1f101 100644 --- a/src/replace.rs +++ b/src/replace.rs @@ -67,7 +67,7 @@ impl<'r> typos::report::Report for Replace<'r> { }; match &typo.context { - typos::report::Context::File(file) => { + Some(typos::report::Context::File(file)) => { let path = file.path.to_owned(); let line_num = file.line_num; let correction = @@ -82,7 +82,7 @@ impl<'r> typos::report::Report for Replace<'r> { content.push(correction); false } - typos::report::Context::Path(path) => { + Some(typos::report::Context::Path(path)) => { let path = path.path.to_owned(); let correction = Correction::new(typo.byte_offset, typo.typo, corrections[0].as_ref()); @@ -209,12 +209,12 @@ mod test { let replace = Replace::new(&primary); replace.report( typos::report::Typo::default() - .context( + .context(Some( typos::report::FileContext::default() .path(input_file.path()) .line_num(1) .into(), - ) + )) .buffer(std::borrow::Cow::Borrowed(b"1 foo 2\n3 4 5")) .byte_offset(2) .typo("foo") @@ -238,11 +238,11 @@ mod test { let replace = Replace::new(&primary); replace.report( typos::report::Typo::default() - .context( + .context(Some( typos::report::PathContext::default() .path(input_file.path()) .into(), - ) + )) .buffer(std::borrow::Cow::Borrowed(b"foo.txt")) .byte_offset(0) .typo("foo") From d258e62f433395138892655b78864d83690355e7 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Wed, 11 Nov 2020 18:19:26 -0600 Subject: [PATCH 3/4] feat(report): Diff output mode --- Cargo.lock | 7 ++++ Cargo.toml | 1 + src/args.rs | 4 +++ src/diff.rs | 93 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/main.rs | 10 ++++-- src/replace.rs | 12 +++---- 6 files changed, 119 insertions(+), 8 deletions(-) create mode 100644 src/diff.rs diff --git a/Cargo.lock b/Cargo.lock index 27f7a71..696db77 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -291,6 +291,12 @@ version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "524cbf6897b527295dff137cec09ecf3a05f4fddffd7dfcd1585403449e74198" +[[package]] +name = "difflib" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6184e33543162437515c2e2b48714794e37845ec9851711914eec9d308f6ebe8" + [[package]] name = "doc-comment" version = "0.3.3" @@ -1078,6 +1084,7 @@ dependencies = [ "bstr", "clap", "clap-verbosity-flag", + "difflib", "env_logger 0.8.1", "ignore", "log", diff --git a/Cargo.toml b/Cargo.toml index 0ffd1af..f87e14c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -47,6 +47,7 @@ log = "0.4" env_logger = "0.8" bstr = "0.2" ahash = "0.5.8" +difflib = "0.4" [dev-dependencies] assert_fs = "1.0" diff --git a/src/args.rs b/src/args.rs index 2bab458..f9d2b0f 100644 --- a/src/args.rs +++ b/src/args.rs @@ -54,6 +54,10 @@ pub(crate) struct Args { /// Ignore implicit configuration files. pub(crate) isolated: bool, + #[structopt(long)] + /// Print a diff of what would change + pub(crate) diff: bool, + #[structopt(long, short = "w")] /// Write corrections out pub(crate) write_changes: bool, diff --git a/src/diff.rs b/src/diff.rs new file mode 100644 index 0000000..a49ae13 --- /dev/null +++ b/src/diff.rs @@ -0,0 +1,93 @@ +use std::collections::BTreeMap; +use std::sync; + +use bstr::ByteSlice; + +pub struct Diff<'r> { + reporter: &'r dyn typos::report::Report, + deferred: sync::Mutex, +} + +impl<'r> Diff<'r> { + pub(crate) fn new(reporter: &'r dyn typos::report::Report) -> Self { + Self { + reporter, + deferred: sync::Mutex::new(crate::replace::Deferred::default()), + } + } + + pub fn show(&self) -> Result<(), std::io::Error> { + let deferred = self.deferred.lock().unwrap(); + + for (path, corrections) in deferred.content.iter() { + let buffer = std::fs::read(path)?; + + let mut original = Vec::new(); + let mut corrected = Vec::new(); + for (line_idx, line) in buffer.lines_with_terminator().enumerate() { + original.push(String::from_utf8_lossy(line).into_owned()); + + let line_num = line_idx + 1; + let line = if let Some(corrections) = corrections.get(&line_num) { + let line = line.to_vec(); + crate::replace::correct(line, &corrections) + } else { + line.to_owned() + }; + corrected.push(String::from_utf8_lossy(&line).into_owned()) + } + + let display_path = path.display().to_string(); + let diff = difflib::unified_diff( + &original, + &corrected, + display_path.as_str(), + display_path.as_str(), + "original", + "corrected", + 0, + ); + for line in diff { + print!("{}", line); + } + } + + Ok(()) + } +} + +impl<'r> typos::report::Report for Diff<'r> { + fn report(&self, msg: typos::report::Message<'_>) -> bool { + let typo = match &msg { + typos::report::Message::Typo(typo) => typo, + _ => return self.reporter.report(msg), + }; + + let corrections = match &typo.corrections { + typos::Status::Corrections(corrections) if corrections.len() == 1 => corrections, + _ => return self.reporter.report(msg), + }; + + match &typo.context { + Some(typos::report::Context::File(file)) => { + let path = file.path.to_owned(); + let line_num = file.line_num; + let correction = crate::replace::Correction::new( + typo.byte_offset, + typo.typo, + corrections[0].as_ref(), + ); + let mut deferred = self.deferred.lock().unwrap(); + let content = deferred + .content + .entry(path) + .or_insert_with(BTreeMap::new) + .entry(line_num) + .or_insert_with(Vec::new); + content.push(correction); + false + } + _ => msg.is_correction(), + } + } +} diff --git a/src/main.rs b/src/main.rs index 5206c26..00ce2e5 100644 --- a/src/main.rs +++ b/src/main.rs @@ -10,6 +10,7 @@ mod args; mod checks; mod config; mod dict; +mod diff; mod replace; fn main() { @@ -86,7 +87,10 @@ fn run() -> Result { let mut reporter = args.format.reporter(); let replace_reporter = replace::Replace::new(reporter); - if args.write_changes { + let diff_reporter = diff::Diff::new(reporter); + if args.diff { + reporter = &diff_reporter; + } else if args.write_changes { reporter = &replace_reporter; } @@ -129,7 +133,9 @@ fn run() -> Result { errors_found = true; } - if args.write_changes { + if args.diff { + diff_reporter.show()?; + } else if args.write_changes { replace_reporter.write()?; } } diff --git a/src/replace.rs b/src/replace.rs index ae1f101..aef418a 100644 --- a/src/replace.rs +++ b/src/replace.rs @@ -97,20 +97,20 @@ impl<'r> typos::report::Report for Replace<'r> { } #[derive(Clone, Debug, Default)] -struct Deferred { - content: BTreeMap>>, - paths: BTreeMap>, +pub(crate) struct Deferred { + pub(crate) content: BTreeMap>>, + pub(crate) paths: BTreeMap>, } #[derive(Clone, Debug, PartialOrd, Ord, PartialEq, Eq)] -struct Correction { +pub(crate) struct Correction { pub byte_offset: usize, pub typo: Vec, pub correction: Vec, } impl Correction { - fn new(byte_offset: usize, typo: &str, correction: &str) -> Self { + pub(crate) fn new(byte_offset: usize, typo: &str, correction: &str) -> Self { Self { byte_offset, typo: typo.as_bytes().to_vec(), @@ -119,7 +119,7 @@ impl Correction { } } -fn correct(mut line: Vec, corrections: &[Correction]) -> Vec { +pub(crate) fn correct(mut line: Vec, corrections: &[Correction]) -> Vec { let mut corrections: Vec<_> = corrections.iter().collect(); corrections.sort_unstable(); corrections.reverse(); From ce16d38cfd852302696c4e2aed34fb08c1890346 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Wed, 11 Nov 2020 18:24:51 -0600 Subject: [PATCH 4/4] perf(dict): Skip checking numbers --- src/dict.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/dict.rs b/src/dict.rs index a6993d6..ab602d6 100644 --- a/src/dict.rs +++ b/src/dict.rs @@ -29,6 +29,10 @@ impl BuiltIn { &'s self, word_token: typos::tokens::Word<'w>, ) -> Option> { + if word_token.case() == typos::tokens::Case::None { + return None; + } + let word = word_token.token(); let mut corrections = if let Some(correction) = self.correct_with_dict(word) { self.correct_with_vars(word) @@ -211,6 +215,10 @@ impl<'i, 'w, D: typos::Dictionary> typos::Dictionary for Override<'i, 'w, D> { } fn correct_word<'s, 't>(&'s self, word: typos::tokens::Word<'t>) -> Option> { + if word.case() == typos::tokens::Case::None { + return None; + } + // Skip hashing if we can let custom = if !self.words.is_empty() { let w = UniCase::new(word.token());