diff --git a/src/checks.rs b/src/checks.rs index 84e9c83..7053d24 100644 --- a/src/checks.rs +++ b/src/checks.rs @@ -50,6 +50,14 @@ impl TyposSettings { } } + pub fn build_fix_typos(&self) -> FixTypos { + FixTypos { + check_filenames: self.check_filenames, + check_files: self.check_files, + binary: self.binary, + } + } + pub fn build_identifier_parser(&self) -> Identifiers { Identifiers { check_filenames: self.check_filenames, @@ -120,8 +128,7 @@ impl Check for Typos { } if self.check_files { - let buffer = read_file(path, reporter)?; - let (buffer, content_type) = massage_data(buffer)?; + let (buffer, content_type) = read_file(path, reporter)?; if !explicit && !self.binary && content_type.is_binary() { let msg = report::BinaryFile { path }; reporter.report(msg.into())?; @@ -146,6 +153,91 @@ impl Check for Typos { } } +#[derive(Debug, Clone)] +pub struct FixTypos { + check_filenames: bool, + check_files: bool, + binary: bool, +} + +impl Check for FixTypos { + fn check_file( + &self, + path: &std::path::Path, + explicit: bool, + tokenizer: &tokens::Tokenizer, + dictionary: &dyn Dictionary, + reporter: &dyn report::Report, + ) -> Result<(), std::io::Error> { + let parser = typos::ParserBuilder::new() + .tokenizer(tokenizer) + .dictionary(dictionary) + .typos(); + + if self.check_files { + let (buffer, content_type) = read_file(path, reporter)?; + if !explicit && !self.binary && content_type.is_binary() { + let msg = report::BinaryFile { path }; + reporter.report(msg.into())?; + } else { + let mut fixes = Vec::new(); + let mut accum_line_num = AccumulateLineNum::new(); + for typo in parser.parse_bytes(&buffer) { + if is_fixable(&typo) { + fixes.push(typo.into_owned()); + } else { + let line_num = accum_line_num.line_num(&buffer, typo.byte_offset); + let (line, line_offset) = extract_line(&buffer, typo.byte_offset); + let msg = report::Typo { + context: Some(report::FileContext { path, line_num }.into()), + buffer: std::borrow::Cow::Borrowed(line), + byte_offset: line_offset, + typo: typo.typo.as_ref(), + corrections: typo.corrections, + }; + reporter.report(msg.into())?; + } + } + if !fixes.is_empty() { + let buffer = fix_buffer(buffer, fixes.into_iter()); + write_file(path, content_type, &buffer, reporter)?; + } + } + } + + // Ensure the above write can happen before renaming the file. + if self.check_filenames { + if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) { + let mut fixes = Vec::new(); + for typo in parser.parse_str(file_name) { + if is_fixable(&typo) { + fixes.push(typo.into_owned()); + } else { + let msg = report::Typo { + context: Some(report::PathContext { path }.into()), + buffer: std::borrow::Cow::Borrowed(file_name.as_bytes()), + byte_offset: typo.byte_offset, + typo: typo.typo.as_ref(), + corrections: typo.corrections, + }; + reporter.report(msg.into())?; + } + } + if !fixes.is_empty() { + let file_name = file_name.to_owned().into_bytes(); + let new_name = fix_buffer(file_name, fixes.into_iter()); + let new_name = + String::from_utf8(new_name).expect("corrections are valid utf-8"); + let new_path = path.with_file_name(new_name); + std::fs::rename(path, new_path)?; + } + } + } + + Ok(()) + } +} + #[derive(Debug, Clone)] pub struct Identifiers { check_filenames: bool, @@ -180,8 +272,7 @@ impl Check for Identifiers { } if self.check_files { - let buffer = read_file(path, reporter)?; - let (buffer, content_type) = massage_data(buffer)?; + let (buffer, content_type) = read_file(path, reporter)?; if !explicit && !self.binary && content_type.is_binary() { let msg = report::BinaryFile { path }; reporter.report(msg.into())?; @@ -237,8 +328,7 @@ impl Check for Words { } if self.check_files { - let buffer = read_file(path, reporter)?; - let (buffer, content_type) = massage_data(buffer)?; + let (buffer, content_type) = read_file(path, reporter)?; if !explicit && !self.binary && content_type.is_binary() { let msg = report::BinaryFile { path }; reporter.report(msg.into())?; @@ -281,8 +371,7 @@ impl Check for FoundFiles { let msg = report::File::new(path); reporter.report(msg.into())?; } else { - let buffer = read_file(path, reporter)?; - let (_buffer, content_type) = massage_data(buffer)?; + let (_buffer, content_type) = read_file(path, reporter)?; if !explicit && content_type.is_binary() { let msg = report::BinaryFile { path }; reporter.report(msg.into())?; @@ -296,10 +385,10 @@ impl Check for FoundFiles { } } -fn read_file( +pub fn read_file( path: &std::path::Path, reporter: &dyn report::Report, -) -> Result, std::io::Error> { +) -> Result<(Vec, content_inspector::ContentType), std::io::Error> { let buffer = match std::fs::read(path) { Ok(buffer) => buffer, Err(err) => { @@ -308,14 +397,8 @@ fn read_file( Vec::new() } }; - Ok(buffer) -} -fn massage_data( - buffer: Vec, -) -> Result<(Vec, content_inspector::ContentType), std::io::Error> { let mut content_type = content_inspector::inspect(&buffer); - // HACK: We only support UTF-8 at the moment if content_type != content_inspector::ContentType::UTF_8_BOM && content_type != content_inspector::ContentType::UTF_8 @@ -326,6 +409,27 @@ fn massage_data( Ok((buffer, content_type)) } +pub fn write_file( + path: &std::path::Path, + content_type: content_inspector::ContentType, + buffer: &[u8], + reporter: &dyn report::Report, +) -> Result<(), std::io::Error> { + assert!( + content_type == content_inspector::ContentType::UTF_8_BOM + || content_type == content_inspector::ContentType::UTF_8 + || content_type == content_inspector::ContentType::BINARY + ); + match std::fs::write(path, buffer) { + Ok(()) => (), + Err(err) => { + let msg = report::Error::new(err.to_string()); + reporter.report(msg.into())?; + } + }; + Ok(()) +} + struct AccumulateLineNum { line_num: usize, last_offset: usize, @@ -365,6 +469,31 @@ fn extract_line(buffer: &[u8], byte_offset: usize) -> (&[u8], usize) { (line, line_offset) } +fn extract_fix<'t>(typo: &'t typos::Typo<'t>) -> Option<&'t str> { + match &typo.corrections { + typos::Status::Corrections(c) if c.len() == 1 => Some(c[0].as_ref()), + _ => None, + } +} + +fn is_fixable<'t>(typo: &typos::Typo<'t>) -> bool { + extract_fix(typo).is_some() +} + +fn fix_buffer(mut buffer: Vec, typos: impl Iterator>) -> Vec { + let mut offset = 0isize; + for typo in typos { + let fix = extract_fix(&typo).expect("Caller only provides fixable typos"); + let start = ((typo.byte_offset as isize) + offset) as usize; + let end = start + typo.typo.len(); + + buffer.splice(start..end, fix.as_bytes().iter().copied()); + + offset += (fix.len() as isize) - (typo.typo.len() as isize); + } + buffer +} + pub fn check_path( walk: ignore::Walk, checks: &dyn Check, diff --git a/src/lib.rs b/src/lib.rs index 632c1b0..2584359 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,5 +2,5 @@ pub mod checks; pub mod config; pub mod dict; pub mod diff; -pub mod replace; +pub(crate) mod replace; pub mod report; diff --git a/src/main.rs b/src/main.rs index d417642..5e33fd5 100644 --- a/src/main.rs +++ b/src/main.rs @@ -11,7 +11,6 @@ use typos_cli::checks; use typos_cli::config; use typos_cli::dict; use typos_cli::diff; -use typos_cli::replace; use typos_cli::report; use proc_exit::WithCodeResultExt; @@ -101,15 +100,12 @@ fn run() -> proc_exit::ExitResult { }; let status_reporter = report::MessageStatus::new(output_reporter); let mut reporter: &dyn report::Report = &status_reporter; - let replace_reporter = replace::Replace::new(reporter); let diff_reporter = diff::Diff::new(reporter); if args.diff { reporter = &diff_reporter; - } else if args.write_changes { - reporter = &replace_reporter; } - let (files, identifier_parser, word_parser, checks); + let (files, identifier_parser, word_parser, checks, fixer); let selected_checks: &dyn checks::Check = if args.files { files = settings.build_files(); &files @@ -119,6 +115,9 @@ fn run() -> proc_exit::ExitResult { } else if args.words { word_parser = settings.build_word_parser(); &word_parser + } else if args.write_changes { + fixer = settings.build_fix_typos(); + &fixer } else { checks = settings.build_typos(); &checks @@ -156,10 +155,6 @@ fn run() -> proc_exit::ExitResult { if args.diff { diff_reporter.show().with_code(proc_exit::Code::FAILURE)?; - } else if args.write_changes { - replace_reporter - .write() - .with_code(proc_exit::Code::FAILURE)?; } } diff --git a/src/replace.rs b/src/replace.rs index 4bec030..78f2a17 100644 --- a/src/replace.rs +++ b/src/replace.rs @@ -1,116 +1,21 @@ use std::collections::BTreeMap; -use std::io::Write; use std::path; -use std::sync; - -use bstr::ByteSlice; - -pub struct Replace<'r> { - reporter: &'r dyn crate::report::Report, - deferred: sync::Mutex, -} - -impl<'r> Replace<'r> { - pub fn new(reporter: &'r dyn crate::report::Report) -> Self { - Self { - reporter, - deferred: sync::Mutex::new(Deferred::default()), - } - } - - pub fn write(&self) -> Result<(), std::io::Error> { - let deferred = self.deferred.lock().unwrap(); - - for (path, corrections) in deferred.content.iter() { - let buffer = std::fs::read(path)?; - - let mut file = std::fs::File::create(path)?; - for (line_idx, line) in buffer.lines_with_terminator().enumerate() { - let line_num = line_idx + 1; - if let Some(corrections) = corrections.get(&line_num) { - let line = line.to_vec(); - let line = correct(line, &corrections); - file.write_all(&line)?; - } else { - file.write_all(&line)?; - } - } - } - - for (path, corrections) in deferred.paths.iter() { - let orig_name = path - .file_name() - .and_then(|s| s.to_str()) - .expect("generating a correction requires the filename to be valid.") - .to_owned() - .into_bytes(); - let new_name = correct(orig_name, &corrections); - let new_name = String::from_utf8(new_name).expect("corrections are valid utf-8"); - let new_path = path.with_file_name(new_name); - std::fs::rename(path, new_path)?; - } - - Ok(()) - } -} - -impl<'r> crate::report::Report for Replace<'r> { - fn report(&self, msg: crate::report::Message<'_>) -> Result<(), std::io::Error> { - let typo = match &msg { - crate::report::Message::Typo(typo) => typo, - _ => return self.reporter.report(msg), - }; - - let corrections = match &typo.corrections { - typos::Status::Corrections(corrections) if corrections.len() == 1 => corrections, - _ => return self.reporter.report(msg), - }; - - match &typo.context { - Some(crate::report::Context::File(file)) => { - let path = file.path.to_owned(); - let line_num = file.line_num; - let correction = - Correction::new(typo.byte_offset, typo.typo, corrections[0].as_ref()); - let mut deferred = self.deferred.lock().unwrap(); - let content = deferred - .content - .entry(path) - .or_insert_with(BTreeMap::new) - .entry(line_num) - .or_insert_with(Vec::new); - content.push(correction); - Ok(()) - } - Some(crate::report::Context::Path(path)) => { - let path = path.path.to_owned(); - let correction = - Correction::new(typo.byte_offset, typo.typo, corrections[0].as_ref()); - let mut deferred = self.deferred.lock().unwrap(); - let content = deferred.paths.entry(path).or_insert_with(Vec::new); - content.push(correction); - Ok(()) - } - _ => self.reporter.report(msg), - } - } -} #[derive(Clone, Debug, Default)] -pub struct Deferred { - pub content: BTreeMap>>, - pub paths: BTreeMap>, +pub(crate) struct Deferred { + pub(crate) content: BTreeMap>>, + pub(crate) paths: BTreeMap>, } #[derive(Clone, Debug, PartialOrd, Ord, PartialEq, Eq)] -pub struct Correction { - pub byte_offset: usize, - pub typo: Vec, - pub correction: Vec, +pub(crate) struct Correction { + pub(crate) byte_offset: usize, + pub(crate) typo: Vec, + pub(crate) correction: Vec, } impl Correction { - pub fn new(byte_offset: usize, typo: &str, correction: &str) -> Self { + pub(crate) fn new(byte_offset: usize, typo: &str, correction: &str) -> Self { Self { byte_offset, typo: typo.as_bytes().to_vec(), @@ -119,7 +24,7 @@ impl Correction { } } -pub fn correct(mut line: Vec, corrections: &[Correction]) -> Vec { +pub(crate) fn correct(mut line: Vec, corrections: &[Correction]) -> Vec { let mut corrections: Vec<_> = corrections.iter().collect(); corrections.sort_unstable(); corrections.reverse(); @@ -137,9 +42,6 @@ pub fn correct(mut line: Vec, corrections: &[Correction]) -> Vec { mod test { use super::*; - use crate::report::Report; - use assert_fs::prelude::*; - fn simple_correct(line: &str, corrections: Vec<(usize, &str, &str)>) -> String { let line = line.as_bytes().to_vec(); let corrections: Vec<_> = corrections @@ -198,66 +100,4 @@ mod test { ); assert_eq!(actual, "foo happy world"); } - - #[test] - fn test_replace_content() { - let temp = assert_fs::TempDir::new().unwrap(); - let input_file = temp.child("foo.txt"); - input_file.write_str("1 foo 2\n3 4 5").unwrap(); - - let primary = crate::report::PrintSilent; - let replace = Replace::new(&primary); - replace - .report( - crate::report::Typo::default() - .context(Some( - crate::report::FileContext::default() - .path(input_file.path()) - .line_num(1) - .into(), - )) - .buffer(std::borrow::Cow::Borrowed(b"1 foo 2\n3 4 5")) - .byte_offset(2) - .typo("foo") - .corrections(typos::Status::Corrections(vec![ - std::borrow::Cow::Borrowed("bar"), - ])) - .into(), - ) - .unwrap(); - replace.write().unwrap(); - - input_file.assert("1 bar 2\n3 4 5"); - } - - #[test] - fn test_replace_path() { - let temp = assert_fs::TempDir::new().unwrap(); - let input_file = temp.child("foo.txt"); - input_file.write_str("foo foo foo").unwrap(); - - let primary = crate::report::PrintSilent; - let replace = Replace::new(&primary); - replace - .report( - crate::report::Typo::default() - .context(Some( - crate::report::PathContext::default() - .path(input_file.path()) - .into(), - )) - .buffer(std::borrow::Cow::Borrowed(b"foo.txt")) - .byte_offset(0) - .typo("foo") - .corrections(typos::Status::Corrections(vec![ - std::borrow::Cow::Borrowed("bar"), - ])) - .into(), - ) - .unwrap(); - replace.write().unwrap(); - - input_file.assert(predicates::path::missing()); - temp.child("bar.txt").assert("foo foo foo"); - } }