fix: Arg write-changes reports immediately

This commit is contained in:
Ed Page 2021-01-01 18:25:48 -06:00
parent 48112a47e9
commit c900e48593
4 changed files with 159 additions and 195 deletions

View file

@ -50,6 +50,14 @@ impl TyposSettings {
} }
} }
pub fn build_fix_typos(&self) -> FixTypos {
FixTypos {
check_filenames: self.check_filenames,
check_files: self.check_files,
binary: self.binary,
}
}
pub fn build_identifier_parser(&self) -> Identifiers { pub fn build_identifier_parser(&self) -> Identifiers {
Identifiers { Identifiers {
check_filenames: self.check_filenames, check_filenames: self.check_filenames,
@ -120,8 +128,7 @@ impl Check for Typos {
} }
if self.check_files { if self.check_files {
let buffer = read_file(path, reporter)?; let (buffer, content_type) = read_file(path, reporter)?;
let (buffer, content_type) = massage_data(buffer)?;
if !explicit && !self.binary && content_type.is_binary() { if !explicit && !self.binary && content_type.is_binary() {
let msg = report::BinaryFile { path }; let msg = report::BinaryFile { path };
reporter.report(msg.into())?; reporter.report(msg.into())?;
@ -146,6 +153,91 @@ impl Check for Typos {
} }
} }
#[derive(Debug, Clone)]
pub struct FixTypos {
check_filenames: bool,
check_files: bool,
binary: bool,
}
impl Check for FixTypos {
fn check_file(
&self,
path: &std::path::Path,
explicit: bool,
tokenizer: &tokens::Tokenizer,
dictionary: &dyn Dictionary,
reporter: &dyn report::Report,
) -> Result<(), std::io::Error> {
let parser = typos::ParserBuilder::new()
.tokenizer(tokenizer)
.dictionary(dictionary)
.typos();
if self.check_files {
let (buffer, content_type) = read_file(path, reporter)?;
if !explicit && !self.binary && content_type.is_binary() {
let msg = report::BinaryFile { path };
reporter.report(msg.into())?;
} else {
let mut fixes = Vec::new();
let mut accum_line_num = AccumulateLineNum::new();
for typo in parser.parse_bytes(&buffer) {
if is_fixable(&typo) {
fixes.push(typo.into_owned());
} else {
let line_num = accum_line_num.line_num(&buffer, typo.byte_offset);
let (line, line_offset) = extract_line(&buffer, typo.byte_offset);
let msg = report::Typo {
context: Some(report::FileContext { path, line_num }.into()),
buffer: std::borrow::Cow::Borrowed(line),
byte_offset: line_offset,
typo: typo.typo.as_ref(),
corrections: typo.corrections,
};
reporter.report(msg.into())?;
}
}
if !fixes.is_empty() {
let buffer = fix_buffer(buffer, fixes.into_iter());
write_file(path, content_type, &buffer, reporter)?;
}
}
}
// Ensure the above write can happen before renaming the file.
if self.check_filenames {
if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
let mut fixes = Vec::new();
for typo in parser.parse_str(file_name) {
if is_fixable(&typo) {
fixes.push(typo.into_owned());
} else {
let msg = report::Typo {
context: Some(report::PathContext { path }.into()),
buffer: std::borrow::Cow::Borrowed(file_name.as_bytes()),
byte_offset: typo.byte_offset,
typo: typo.typo.as_ref(),
corrections: typo.corrections,
};
reporter.report(msg.into())?;
}
}
if !fixes.is_empty() {
let file_name = file_name.to_owned().into_bytes();
let new_name = fix_buffer(file_name, fixes.into_iter());
let new_name =
String::from_utf8(new_name).expect("corrections are valid utf-8");
let new_path = path.with_file_name(new_name);
std::fs::rename(path, new_path)?;
}
}
}
Ok(())
}
}
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct Identifiers { pub struct Identifiers {
check_filenames: bool, check_filenames: bool,
@ -180,8 +272,7 @@ impl Check for Identifiers {
} }
if self.check_files { if self.check_files {
let buffer = read_file(path, reporter)?; let (buffer, content_type) = read_file(path, reporter)?;
let (buffer, content_type) = massage_data(buffer)?;
if !explicit && !self.binary && content_type.is_binary() { if !explicit && !self.binary && content_type.is_binary() {
let msg = report::BinaryFile { path }; let msg = report::BinaryFile { path };
reporter.report(msg.into())?; reporter.report(msg.into())?;
@ -237,8 +328,7 @@ impl Check for Words {
} }
if self.check_files { if self.check_files {
let buffer = read_file(path, reporter)?; let (buffer, content_type) = read_file(path, reporter)?;
let (buffer, content_type) = massage_data(buffer)?;
if !explicit && !self.binary && content_type.is_binary() { if !explicit && !self.binary && content_type.is_binary() {
let msg = report::BinaryFile { path }; let msg = report::BinaryFile { path };
reporter.report(msg.into())?; reporter.report(msg.into())?;
@ -281,8 +371,7 @@ impl Check for FoundFiles {
let msg = report::File::new(path); let msg = report::File::new(path);
reporter.report(msg.into())?; reporter.report(msg.into())?;
} else { } else {
let buffer = read_file(path, reporter)?; let (_buffer, content_type) = read_file(path, reporter)?;
let (_buffer, content_type) = massage_data(buffer)?;
if !explicit && content_type.is_binary() { if !explicit && content_type.is_binary() {
let msg = report::BinaryFile { path }; let msg = report::BinaryFile { path };
reporter.report(msg.into())?; reporter.report(msg.into())?;
@ -296,10 +385,10 @@ impl Check for FoundFiles {
} }
} }
fn read_file( pub fn read_file(
path: &std::path::Path, path: &std::path::Path,
reporter: &dyn report::Report, reporter: &dyn report::Report,
) -> Result<Vec<u8>, std::io::Error> { ) -> Result<(Vec<u8>, content_inspector::ContentType), std::io::Error> {
let buffer = match std::fs::read(path) { let buffer = match std::fs::read(path) {
Ok(buffer) => buffer, Ok(buffer) => buffer,
Err(err) => { Err(err) => {
@ -308,14 +397,8 @@ fn read_file(
Vec::new() Vec::new()
} }
}; };
Ok(buffer)
}
fn massage_data(
buffer: Vec<u8>,
) -> Result<(Vec<u8>, content_inspector::ContentType), std::io::Error> {
let mut content_type = content_inspector::inspect(&buffer); let mut content_type = content_inspector::inspect(&buffer);
// HACK: We only support UTF-8 at the moment // HACK: We only support UTF-8 at the moment
if content_type != content_inspector::ContentType::UTF_8_BOM if content_type != content_inspector::ContentType::UTF_8_BOM
&& content_type != content_inspector::ContentType::UTF_8 && content_type != content_inspector::ContentType::UTF_8
@ -326,6 +409,27 @@ fn massage_data(
Ok((buffer, content_type)) Ok((buffer, content_type))
} }
pub fn write_file(
path: &std::path::Path,
content_type: content_inspector::ContentType,
buffer: &[u8],
reporter: &dyn report::Report,
) -> Result<(), std::io::Error> {
assert!(
content_type == content_inspector::ContentType::UTF_8_BOM
|| content_type == content_inspector::ContentType::UTF_8
|| content_type == content_inspector::ContentType::BINARY
);
match std::fs::write(path, buffer) {
Ok(()) => (),
Err(err) => {
let msg = report::Error::new(err.to_string());
reporter.report(msg.into())?;
}
};
Ok(())
}
struct AccumulateLineNum { struct AccumulateLineNum {
line_num: usize, line_num: usize,
last_offset: usize, last_offset: usize,
@ -365,6 +469,31 @@ fn extract_line(buffer: &[u8], byte_offset: usize) -> (&[u8], usize) {
(line, line_offset) (line, line_offset)
} }
fn extract_fix<'t>(typo: &'t typos::Typo<'t>) -> Option<&'t str> {
match &typo.corrections {
typos::Status::Corrections(c) if c.len() == 1 => Some(c[0].as_ref()),
_ => None,
}
}
fn is_fixable<'t>(typo: &typos::Typo<'t>) -> bool {
extract_fix(typo).is_some()
}
fn fix_buffer(mut buffer: Vec<u8>, typos: impl Iterator<Item = typos::Typo<'static>>) -> Vec<u8> {
let mut offset = 0isize;
for typo in typos {
let fix = extract_fix(&typo).expect("Caller only provides fixable typos");
let start = ((typo.byte_offset as isize) + offset) as usize;
let end = start + typo.typo.len();
buffer.splice(start..end, fix.as_bytes().iter().copied());
offset += (fix.len() as isize) - (typo.typo.len() as isize);
}
buffer
}
pub fn check_path( pub fn check_path(
walk: ignore::Walk, walk: ignore::Walk,
checks: &dyn Check, checks: &dyn Check,

View file

@ -2,5 +2,5 @@ pub mod checks;
pub mod config; pub mod config;
pub mod dict; pub mod dict;
pub mod diff; pub mod diff;
pub mod replace; pub(crate) mod replace;
pub mod report; pub mod report;

View file

@ -11,7 +11,6 @@ use typos_cli::checks;
use typos_cli::config; use typos_cli::config;
use typos_cli::dict; use typos_cli::dict;
use typos_cli::diff; use typos_cli::diff;
use typos_cli::replace;
use typos_cli::report; use typos_cli::report;
use proc_exit::WithCodeResultExt; use proc_exit::WithCodeResultExt;
@ -101,15 +100,12 @@ fn run() -> proc_exit::ExitResult {
}; };
let status_reporter = report::MessageStatus::new(output_reporter); let status_reporter = report::MessageStatus::new(output_reporter);
let mut reporter: &dyn report::Report = &status_reporter; let mut reporter: &dyn report::Report = &status_reporter;
let replace_reporter = replace::Replace::new(reporter);
let diff_reporter = diff::Diff::new(reporter); let diff_reporter = diff::Diff::new(reporter);
if args.diff { if args.diff {
reporter = &diff_reporter; reporter = &diff_reporter;
} else if args.write_changes {
reporter = &replace_reporter;
} }
let (files, identifier_parser, word_parser, checks); let (files, identifier_parser, word_parser, checks, fixer);
let selected_checks: &dyn checks::Check = if args.files { let selected_checks: &dyn checks::Check = if args.files {
files = settings.build_files(); files = settings.build_files();
&files &files
@ -119,6 +115,9 @@ fn run() -> proc_exit::ExitResult {
} else if args.words { } else if args.words {
word_parser = settings.build_word_parser(); word_parser = settings.build_word_parser();
&word_parser &word_parser
} else if args.write_changes {
fixer = settings.build_fix_typos();
&fixer
} else { } else {
checks = settings.build_typos(); checks = settings.build_typos();
&checks &checks
@ -156,10 +155,6 @@ fn run() -> proc_exit::ExitResult {
if args.diff { if args.diff {
diff_reporter.show().with_code(proc_exit::Code::FAILURE)?; diff_reporter.show().with_code(proc_exit::Code::FAILURE)?;
} else if args.write_changes {
replace_reporter
.write()
.with_code(proc_exit::Code::FAILURE)?;
} }
} }

View file

@ -1,116 +1,21 @@
use std::collections::BTreeMap; use std::collections::BTreeMap;
use std::io::Write;
use std::path; use std::path;
use std::sync;
use bstr::ByteSlice;
pub struct Replace<'r> {
reporter: &'r dyn crate::report::Report,
deferred: sync::Mutex<Deferred>,
}
impl<'r> Replace<'r> {
pub fn new(reporter: &'r dyn crate::report::Report) -> Self {
Self {
reporter,
deferred: sync::Mutex::new(Deferred::default()),
}
}
pub fn write(&self) -> Result<(), std::io::Error> {
let deferred = self.deferred.lock().unwrap();
for (path, corrections) in deferred.content.iter() {
let buffer = std::fs::read(path)?;
let mut file = std::fs::File::create(path)?;
for (line_idx, line) in buffer.lines_with_terminator().enumerate() {
let line_num = line_idx + 1;
if let Some(corrections) = corrections.get(&line_num) {
let line = line.to_vec();
let line = correct(line, &corrections);
file.write_all(&line)?;
} else {
file.write_all(&line)?;
}
}
}
for (path, corrections) in deferred.paths.iter() {
let orig_name = path
.file_name()
.and_then(|s| s.to_str())
.expect("generating a correction requires the filename to be valid.")
.to_owned()
.into_bytes();
let new_name = correct(orig_name, &corrections);
let new_name = String::from_utf8(new_name).expect("corrections are valid utf-8");
let new_path = path.with_file_name(new_name);
std::fs::rename(path, new_path)?;
}
Ok(())
}
}
impl<'r> crate::report::Report for Replace<'r> {
fn report(&self, msg: crate::report::Message<'_>) -> Result<(), std::io::Error> {
let typo = match &msg {
crate::report::Message::Typo(typo) => typo,
_ => return self.reporter.report(msg),
};
let corrections = match &typo.corrections {
typos::Status::Corrections(corrections) if corrections.len() == 1 => corrections,
_ => return self.reporter.report(msg),
};
match &typo.context {
Some(crate::report::Context::File(file)) => {
let path = file.path.to_owned();
let line_num = file.line_num;
let correction =
Correction::new(typo.byte_offset, typo.typo, corrections[0].as_ref());
let mut deferred = self.deferred.lock().unwrap();
let content = deferred
.content
.entry(path)
.or_insert_with(BTreeMap::new)
.entry(line_num)
.or_insert_with(Vec::new);
content.push(correction);
Ok(())
}
Some(crate::report::Context::Path(path)) => {
let path = path.path.to_owned();
let correction =
Correction::new(typo.byte_offset, typo.typo, corrections[0].as_ref());
let mut deferred = self.deferred.lock().unwrap();
let content = deferred.paths.entry(path).or_insert_with(Vec::new);
content.push(correction);
Ok(())
}
_ => self.reporter.report(msg),
}
}
}
#[derive(Clone, Debug, Default)] #[derive(Clone, Debug, Default)]
pub struct Deferred { pub(crate) struct Deferred {
pub content: BTreeMap<path::PathBuf, BTreeMap<usize, Vec<Correction>>>, pub(crate) content: BTreeMap<path::PathBuf, BTreeMap<usize, Vec<Correction>>>,
pub paths: BTreeMap<path::PathBuf, Vec<Correction>>, pub(crate) paths: BTreeMap<path::PathBuf, Vec<Correction>>,
} }
#[derive(Clone, Debug, PartialOrd, Ord, PartialEq, Eq)] #[derive(Clone, Debug, PartialOrd, Ord, PartialEq, Eq)]
pub struct Correction { pub(crate) struct Correction {
pub byte_offset: usize, pub(crate) byte_offset: usize,
pub typo: Vec<u8>, pub(crate) typo: Vec<u8>,
pub correction: Vec<u8>, pub(crate) correction: Vec<u8>,
} }
impl Correction { impl Correction {
pub fn new(byte_offset: usize, typo: &str, correction: &str) -> Self { pub(crate) fn new(byte_offset: usize, typo: &str, correction: &str) -> Self {
Self { Self {
byte_offset, byte_offset,
typo: typo.as_bytes().to_vec(), typo: typo.as_bytes().to_vec(),
@ -119,7 +24,7 @@ impl Correction {
} }
} }
pub fn correct(mut line: Vec<u8>, corrections: &[Correction]) -> Vec<u8> { pub(crate) fn correct(mut line: Vec<u8>, corrections: &[Correction]) -> Vec<u8> {
let mut corrections: Vec<_> = corrections.iter().collect(); let mut corrections: Vec<_> = corrections.iter().collect();
corrections.sort_unstable(); corrections.sort_unstable();
corrections.reverse(); corrections.reverse();
@ -137,9 +42,6 @@ pub fn correct(mut line: Vec<u8>, corrections: &[Correction]) -> Vec<u8> {
mod test { mod test {
use super::*; use super::*;
use crate::report::Report;
use assert_fs::prelude::*;
fn simple_correct(line: &str, corrections: Vec<(usize, &str, &str)>) -> String { fn simple_correct(line: &str, corrections: Vec<(usize, &str, &str)>) -> String {
let line = line.as_bytes().to_vec(); let line = line.as_bytes().to_vec();
let corrections: Vec<_> = corrections let corrections: Vec<_> = corrections
@ -198,66 +100,4 @@ mod test {
); );
assert_eq!(actual, "foo happy world"); assert_eq!(actual, "foo happy world");
} }
#[test]
fn test_replace_content() {
let temp = assert_fs::TempDir::new().unwrap();
let input_file = temp.child("foo.txt");
input_file.write_str("1 foo 2\n3 4 5").unwrap();
let primary = crate::report::PrintSilent;
let replace = Replace::new(&primary);
replace
.report(
crate::report::Typo::default()
.context(Some(
crate::report::FileContext::default()
.path(input_file.path())
.line_num(1)
.into(),
))
.buffer(std::borrow::Cow::Borrowed(b"1 foo 2\n3 4 5"))
.byte_offset(2)
.typo("foo")
.corrections(typos::Status::Corrections(vec![
std::borrow::Cow::Borrowed("bar"),
]))
.into(),
)
.unwrap();
replace.write().unwrap();
input_file.assert("1 bar 2\n3 4 5");
}
#[test]
fn test_replace_path() {
let temp = assert_fs::TempDir::new().unwrap();
let input_file = temp.child("foo.txt");
input_file.write_str("foo foo foo").unwrap();
let primary = crate::report::PrintSilent;
let replace = Replace::new(&primary);
replace
.report(
crate::report::Typo::default()
.context(Some(
crate::report::PathContext::default()
.path(input_file.path())
.into(),
))
.buffer(std::borrow::Cow::Borrowed(b"foo.txt"))
.byte_offset(0)
.typo("foo")
.corrections(typos::Status::Corrections(vec![
std::borrow::Cow::Borrowed("bar"),
]))
.into(),
)
.unwrap();
replace.write().unwrap();
input_file.assert(predicates::path::missing());
temp.child("bar.txt").assert("foo foo foo");
}
} }