2020-12-30 20:41:08 -05:00
|
|
|
use bstr::ByteSlice;
|
2020-11-03 20:52:39 -05:00
|
|
|
use encoding::Encoding;
|
2021-01-02 23:17:00 -05:00
|
|
|
use std::io::Read;
|
|
|
|
use std::io::Write;
|
2020-12-30 20:41:08 -05:00
|
|
|
|
|
|
|
use crate::report;
|
|
|
|
|
2021-01-02 13:56:20 -05:00
|
|
|
pub trait FileChecker: Send + Sync {
|
2020-12-30 22:17:28 -05:00
|
|
|
fn check_file(
|
|
|
|
&self,
|
|
|
|
path: &std::path::Path,
|
|
|
|
explicit: bool,
|
2021-02-12 19:43:12 -05:00
|
|
|
policy: &crate::policy::Policy,
|
2020-12-30 22:17:28 -05:00
|
|
|
reporter: &dyn report::Report,
|
2020-12-31 18:41:32 -05:00
|
|
|
) -> Result<(), std::io::Error>;
|
2020-12-30 20:41:08 -05:00
|
|
|
}
|
|
|
|
|
2021-01-05 22:29:33 -05:00
|
|
|
#[derive(Debug, Clone, Copy)]
|
|
|
|
pub struct Typos;
|
2020-12-30 20:41:08 -05:00
|
|
|
|
2021-01-02 13:56:20 -05:00
|
|
|
impl FileChecker for Typos {
|
2020-12-31 18:41:32 -05:00
|
|
|
fn check_file(
|
2020-12-30 20:41:08 -05:00
|
|
|
&self,
|
2020-12-31 18:41:32 -05:00
|
|
|
path: &std::path::Path,
|
|
|
|
explicit: bool,
|
2021-02-12 19:43:12 -05:00
|
|
|
policy: &crate::policy::Policy,
|
2020-12-30 20:41:08 -05:00
|
|
|
reporter: &dyn report::Report,
|
|
|
|
) -> Result<(), std::io::Error> {
|
2021-02-12 19:43:12 -05:00
|
|
|
if policy.check_filenames {
|
2020-12-31 18:41:32 -05:00
|
|
|
if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
|
2021-03-01 21:37:05 -05:00
|
|
|
for typo in typos::check_str(file_name, policy.tokenizer, policy.dict) {
|
2020-12-31 18:41:32 -05:00
|
|
|
let msg = report::Typo {
|
|
|
|
context: Some(report::PathContext { path }.into()),
|
|
|
|
buffer: std::borrow::Cow::Borrowed(file_name.as_bytes()),
|
|
|
|
byte_offset: typo.byte_offset,
|
2020-12-31 20:29:45 -05:00
|
|
|
typo: typo.typo.as_ref(),
|
2020-12-31 18:41:32 -05:00
|
|
|
corrections: typo.corrections,
|
|
|
|
};
|
|
|
|
reporter.report(msg.into())?;
|
|
|
|
}
|
|
|
|
}
|
2020-12-30 20:41:08 -05:00
|
|
|
}
|
|
|
|
|
2021-02-12 19:43:12 -05:00
|
|
|
if policy.check_files {
|
2021-01-01 19:25:48 -05:00
|
|
|
let (buffer, content_type) = read_file(path, reporter)?;
|
2021-02-12 19:43:12 -05:00
|
|
|
if !explicit && !policy.binary && content_type.is_binary() {
|
2020-12-31 18:41:32 -05:00
|
|
|
let msg = report::BinaryFile { path };
|
|
|
|
reporter.report(msg.into())?;
|
|
|
|
} else {
|
|
|
|
let mut accum_line_num = AccumulateLineNum::new();
|
2021-03-01 21:37:05 -05:00
|
|
|
for typo in typos::check_bytes(&buffer, policy.tokenizer, policy.dict) {
|
2020-12-31 18:41:32 -05:00
|
|
|
let line_num = accum_line_num.line_num(&buffer, typo.byte_offset);
|
|
|
|
let (line, line_offset) = extract_line(&buffer, typo.byte_offset);
|
|
|
|
let msg = report::Typo {
|
|
|
|
context: Some(report::FileContext { path, line_num }.into()),
|
|
|
|
buffer: std::borrow::Cow::Borrowed(line),
|
|
|
|
byte_offset: line_offset,
|
2020-12-31 20:29:45 -05:00
|
|
|
typo: typo.typo.as_ref(),
|
2020-12-31 18:41:32 -05:00
|
|
|
corrections: typo.corrections,
|
|
|
|
};
|
|
|
|
reporter.report(msg.into())?;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2020-12-30 20:41:08 -05:00
|
|
|
|
2020-12-31 18:41:32 -05:00
|
|
|
Ok(())
|
2020-12-30 20:41:08 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-01-05 22:29:33 -05:00
|
|
|
#[derive(Debug, Clone, Copy)]
|
|
|
|
pub struct FixTypos;
|
2021-01-01 19:25:48 -05:00
|
|
|
|
2021-01-02 13:56:20 -05:00
|
|
|
impl FileChecker for FixTypos {
|
2021-01-01 19:25:48 -05:00
|
|
|
fn check_file(
|
|
|
|
&self,
|
|
|
|
path: &std::path::Path,
|
|
|
|
explicit: bool,
|
2021-02-12 19:43:12 -05:00
|
|
|
policy: &crate::policy::Policy,
|
2021-01-01 19:25:48 -05:00
|
|
|
reporter: &dyn report::Report,
|
|
|
|
) -> Result<(), std::io::Error> {
|
2021-02-12 19:43:12 -05:00
|
|
|
if policy.check_files {
|
2021-01-01 19:25:48 -05:00
|
|
|
let (buffer, content_type) = read_file(path, reporter)?;
|
2021-02-12 19:43:12 -05:00
|
|
|
if !explicit && !policy.binary && content_type.is_binary() {
|
2021-01-01 19:25:48 -05:00
|
|
|
let msg = report::BinaryFile { path };
|
|
|
|
reporter.report(msg.into())?;
|
|
|
|
} else {
|
|
|
|
let mut fixes = Vec::new();
|
|
|
|
let mut accum_line_num = AccumulateLineNum::new();
|
2021-03-01 21:37:05 -05:00
|
|
|
for typo in typos::check_bytes(&buffer, policy.tokenizer, policy.dict) {
|
2021-01-01 19:25:48 -05:00
|
|
|
if is_fixable(&typo) {
|
|
|
|
fixes.push(typo.into_owned());
|
|
|
|
} else {
|
|
|
|
let line_num = accum_line_num.line_num(&buffer, typo.byte_offset);
|
|
|
|
let (line, line_offset) = extract_line(&buffer, typo.byte_offset);
|
|
|
|
let msg = report::Typo {
|
|
|
|
context: Some(report::FileContext { path, line_num }.into()),
|
|
|
|
buffer: std::borrow::Cow::Borrowed(line),
|
|
|
|
byte_offset: line_offset,
|
|
|
|
typo: typo.typo.as_ref(),
|
|
|
|
corrections: typo.corrections,
|
|
|
|
};
|
|
|
|
reporter.report(msg.into())?;
|
|
|
|
}
|
|
|
|
}
|
2021-01-02 23:17:00 -05:00
|
|
|
if !fixes.is_empty() || path == std::path::Path::new("-") {
|
2021-01-01 19:25:48 -05:00
|
|
|
let buffer = fix_buffer(buffer, fixes.into_iter());
|
2020-11-03 20:52:39 -05:00
|
|
|
write_file(path, content_type, buffer, reporter)?;
|
2021-01-01 19:25:48 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Ensure the above write can happen before renaming the file.
|
2021-02-12 19:43:12 -05:00
|
|
|
if policy.check_filenames {
|
2021-01-01 19:25:48 -05:00
|
|
|
if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
|
|
|
|
let mut fixes = Vec::new();
|
2021-03-01 21:37:05 -05:00
|
|
|
for typo in typos::check_str(file_name, policy.tokenizer, policy.dict) {
|
2021-01-01 19:25:48 -05:00
|
|
|
if is_fixable(&typo) {
|
|
|
|
fixes.push(typo.into_owned());
|
|
|
|
} else {
|
|
|
|
let msg = report::Typo {
|
|
|
|
context: Some(report::PathContext { path }.into()),
|
|
|
|
buffer: std::borrow::Cow::Borrowed(file_name.as_bytes()),
|
|
|
|
byte_offset: typo.byte_offset,
|
|
|
|
typo: typo.typo.as_ref(),
|
|
|
|
corrections: typo.corrections,
|
|
|
|
};
|
|
|
|
reporter.report(msg.into())?;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if !fixes.is_empty() {
|
|
|
|
let file_name = file_name.to_owned().into_bytes();
|
|
|
|
let new_name = fix_buffer(file_name, fixes.into_iter());
|
|
|
|
let new_name =
|
|
|
|
String::from_utf8(new_name).expect("corrections are valid utf-8");
|
|
|
|
let new_path = path.with_file_name(new_name);
|
|
|
|
std::fs::rename(path, new_path)?;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-01-05 22:29:33 -05:00
|
|
|
#[derive(Debug, Clone, Copy)]
|
|
|
|
pub struct DiffTypos;
|
2021-01-01 22:16:20 -05:00
|
|
|
|
2021-01-02 13:56:20 -05:00
|
|
|
impl FileChecker for DiffTypos {
|
2021-01-01 22:16:20 -05:00
|
|
|
fn check_file(
|
|
|
|
&self,
|
|
|
|
path: &std::path::Path,
|
|
|
|
explicit: bool,
|
2021-02-12 19:43:12 -05:00
|
|
|
policy: &crate::policy::Policy,
|
2021-01-01 22:16:20 -05:00
|
|
|
reporter: &dyn report::Report,
|
|
|
|
) -> Result<(), std::io::Error> {
|
|
|
|
let mut content = Vec::new();
|
|
|
|
let mut new_content = Vec::new();
|
2021-02-12 19:43:12 -05:00
|
|
|
if policy.check_files {
|
2021-01-01 22:16:20 -05:00
|
|
|
let (buffer, content_type) = read_file(path, reporter)?;
|
2021-02-12 19:43:12 -05:00
|
|
|
if !explicit && !policy.binary && content_type.is_binary() {
|
2021-01-01 22:16:20 -05:00
|
|
|
let msg = report::BinaryFile { path };
|
|
|
|
reporter.report(msg.into())?;
|
|
|
|
} else {
|
|
|
|
let mut fixes = Vec::new();
|
|
|
|
let mut accum_line_num = AccumulateLineNum::new();
|
2021-03-01 21:37:05 -05:00
|
|
|
for typo in typos::check_bytes(&buffer, policy.tokenizer, policy.dict) {
|
2021-01-01 22:16:20 -05:00
|
|
|
if is_fixable(&typo) {
|
|
|
|
fixes.push(typo.into_owned());
|
|
|
|
} else {
|
|
|
|
let line_num = accum_line_num.line_num(&buffer, typo.byte_offset);
|
|
|
|
let (line, line_offset) = extract_line(&buffer, typo.byte_offset);
|
|
|
|
let msg = report::Typo {
|
|
|
|
context: Some(report::FileContext { path, line_num }.into()),
|
|
|
|
buffer: std::borrow::Cow::Borrowed(line),
|
|
|
|
byte_offset: line_offset,
|
|
|
|
typo: typo.typo.as_ref(),
|
|
|
|
corrections: typo.corrections,
|
|
|
|
};
|
|
|
|
reporter.report(msg.into())?;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if !fixes.is_empty() {
|
|
|
|
new_content = fix_buffer(buffer.clone(), fixes.into_iter());
|
|
|
|
content = buffer
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Match FixTypos ordering for easy diffing.
|
|
|
|
let mut new_path = None;
|
2021-02-12 19:43:12 -05:00
|
|
|
if policy.check_filenames {
|
2021-01-01 22:16:20 -05:00
|
|
|
if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
|
|
|
|
let mut fixes = Vec::new();
|
2021-03-01 21:37:05 -05:00
|
|
|
for typo in typos::check_str(file_name, policy.tokenizer, policy.dict) {
|
2021-01-01 22:16:20 -05:00
|
|
|
if is_fixable(&typo) {
|
|
|
|
fixes.push(typo.into_owned());
|
|
|
|
} else {
|
|
|
|
let msg = report::Typo {
|
|
|
|
context: Some(report::PathContext { path }.into()),
|
|
|
|
buffer: std::borrow::Cow::Borrowed(file_name.as_bytes()),
|
|
|
|
byte_offset: typo.byte_offset,
|
|
|
|
typo: typo.typo.as_ref(),
|
|
|
|
corrections: typo.corrections,
|
|
|
|
};
|
|
|
|
reporter.report(msg.into())?;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if !fixes.is_empty() {
|
|
|
|
let file_name = file_name.to_owned().into_bytes();
|
|
|
|
let new_name = fix_buffer(file_name, fixes.into_iter());
|
|
|
|
let new_name =
|
|
|
|
String::from_utf8(new_name).expect("corrections are valid utf-8");
|
|
|
|
new_path = Some(path.with_file_name(new_name));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if new_path.is_some() || !content.is_empty() {
|
|
|
|
let original_path = path.display().to_string();
|
2021-01-02 14:17:15 -05:00
|
|
|
let fixed_path = new_path.as_deref().unwrap_or(path).display().to_string();
|
2021-01-01 22:16:20 -05:00
|
|
|
let original_content: Vec<_> = content
|
|
|
|
.lines_with_terminator()
|
|
|
|
.map(|s| String::from_utf8_lossy(s).into_owned())
|
|
|
|
.collect();
|
|
|
|
let fixed_content: Vec<_> = new_content
|
|
|
|
.lines_with_terminator()
|
|
|
|
.map(|s| String::from_utf8_lossy(s).into_owned())
|
|
|
|
.collect();
|
|
|
|
let diff = difflib::unified_diff(
|
|
|
|
&original_content,
|
|
|
|
&fixed_content,
|
|
|
|
original_path.as_str(),
|
|
|
|
fixed_path.as_str(),
|
|
|
|
"original",
|
|
|
|
"fixed",
|
|
|
|
0,
|
|
|
|
);
|
|
|
|
for line in diff {
|
2021-07-06 10:26:08 -04:00
|
|
|
write!(std::io::stdout(), "{}", line)?;
|
2021-01-01 22:16:20 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-01-05 22:29:33 -05:00
|
|
|
#[derive(Debug, Clone, Copy)]
|
|
|
|
pub struct Identifiers;
|
2020-12-30 20:41:08 -05:00
|
|
|
|
2021-01-02 13:56:20 -05:00
|
|
|
impl FileChecker for Identifiers {
|
2020-12-30 22:42:30 -05:00
|
|
|
fn check_file(
|
|
|
|
&self,
|
|
|
|
path: &std::path::Path,
|
|
|
|
explicit: bool,
|
2021-02-12 19:43:12 -05:00
|
|
|
policy: &crate::policy::Policy,
|
2020-12-30 20:41:08 -05:00
|
|
|
reporter: &dyn report::Report,
|
|
|
|
) -> Result<(), std::io::Error> {
|
2021-02-12 19:43:12 -05:00
|
|
|
if policy.check_filenames {
|
2020-12-30 22:42:30 -05:00
|
|
|
if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
|
2021-02-12 19:43:12 -05:00
|
|
|
for word in policy.tokenizer.parse_str(file_name) {
|
2020-12-30 22:42:30 -05:00
|
|
|
let msg = report::Parse {
|
|
|
|
context: Some(report::PathContext { path }.into()),
|
|
|
|
kind: report::ParseKind::Identifier,
|
|
|
|
data: word.token(),
|
|
|
|
};
|
|
|
|
reporter.report(msg.into())?;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-02-12 19:43:12 -05:00
|
|
|
if policy.check_files {
|
2021-01-01 19:25:48 -05:00
|
|
|
let (buffer, content_type) = read_file(path, reporter)?;
|
2021-02-12 19:43:12 -05:00
|
|
|
if !explicit && !policy.binary && content_type.is_binary() {
|
2020-12-30 22:42:30 -05:00
|
|
|
let msg = report::BinaryFile { path };
|
|
|
|
reporter.report(msg.into())?;
|
|
|
|
} else {
|
2021-02-12 19:43:12 -05:00
|
|
|
for word in policy.tokenizer.parse_bytes(&buffer) {
|
2020-12-31 18:41:32 -05:00
|
|
|
// HACK: Don't look up the line_num per entry to better match the performance
|
|
|
|
// of Typos for comparison purposes. We don't really get much out of it
|
|
|
|
// anyway.
|
|
|
|
let line_num = 0;
|
2020-12-30 22:42:30 -05:00
|
|
|
let msg = report::Parse {
|
2020-12-31 18:41:32 -05:00
|
|
|
context: Some(report::FileContext { path, line_num }.into()),
|
2020-12-30 22:42:30 -05:00
|
|
|
kind: report::ParseKind::Identifier,
|
|
|
|
data: word.token(),
|
|
|
|
};
|
|
|
|
reporter.report(msg.into())?;
|
|
|
|
}
|
|
|
|
}
|
2020-12-30 20:41:08 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-01-05 22:29:33 -05:00
|
|
|
#[derive(Debug, Clone, Copy)]
|
|
|
|
pub struct Words;
|
2020-12-30 20:41:08 -05:00
|
|
|
|
2021-01-02 13:56:20 -05:00
|
|
|
impl FileChecker for Words {
|
2020-12-30 22:42:30 -05:00
|
|
|
fn check_file(
|
|
|
|
&self,
|
|
|
|
path: &std::path::Path,
|
|
|
|
explicit: bool,
|
2021-02-12 19:43:12 -05:00
|
|
|
policy: &crate::policy::Policy,
|
2020-12-30 20:41:08 -05:00
|
|
|
reporter: &dyn report::Report,
|
|
|
|
) -> Result<(), std::io::Error> {
|
2021-02-12 19:43:12 -05:00
|
|
|
if policy.check_filenames {
|
2020-12-30 22:42:30 -05:00
|
|
|
if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
|
2021-02-12 19:43:12 -05:00
|
|
|
for word in policy
|
|
|
|
.tokenizer
|
|
|
|
.parse_str(file_name)
|
|
|
|
.flat_map(|i| i.split())
|
|
|
|
{
|
2020-12-30 22:42:30 -05:00
|
|
|
let msg = report::Parse {
|
|
|
|
context: Some(report::PathContext { path }.into()),
|
|
|
|
kind: report::ParseKind::Word,
|
|
|
|
data: word.token(),
|
|
|
|
};
|
|
|
|
reporter.report(msg.into())?;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-02-12 19:43:12 -05:00
|
|
|
if policy.check_files {
|
2021-01-01 19:25:48 -05:00
|
|
|
let (buffer, content_type) = read_file(path, reporter)?;
|
2021-02-12 19:43:12 -05:00
|
|
|
if !explicit && !policy.binary && content_type.is_binary() {
|
2020-12-30 22:42:30 -05:00
|
|
|
let msg = report::BinaryFile { path };
|
|
|
|
reporter.report(msg.into())?;
|
|
|
|
} else {
|
2021-02-12 19:43:12 -05:00
|
|
|
for word in policy
|
|
|
|
.tokenizer
|
|
|
|
.parse_bytes(&buffer)
|
|
|
|
.flat_map(|i| i.split())
|
|
|
|
{
|
2020-12-31 18:41:32 -05:00
|
|
|
// HACK: Don't look up the line_num per entry to better match the performance
|
|
|
|
// of Typos for comparison purposes. We don't really get much out of it
|
|
|
|
// anyway.
|
|
|
|
let line_num = 0;
|
2020-12-30 22:42:30 -05:00
|
|
|
let msg = report::Parse {
|
2020-12-31 18:41:32 -05:00
|
|
|
context: Some(report::FileContext { path, line_num }.into()),
|
2020-12-30 22:42:30 -05:00
|
|
|
kind: report::ParseKind::Word,
|
|
|
|
data: word.token(),
|
|
|
|
};
|
|
|
|
reporter.report(msg.into())?;
|
|
|
|
}
|
|
|
|
}
|
2020-12-30 20:41:08 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-01-05 22:29:33 -05:00
|
|
|
#[derive(Debug, Clone, Copy)]
|
|
|
|
pub struct FoundFiles;
|
2020-12-30 20:41:08 -05:00
|
|
|
|
2021-01-02 13:56:20 -05:00
|
|
|
impl FileChecker for FoundFiles {
|
2020-12-30 22:26:48 -05:00
|
|
|
fn check_file(
|
|
|
|
&self,
|
|
|
|
path: &std::path::Path,
|
|
|
|
explicit: bool,
|
2021-02-12 19:43:12 -05:00
|
|
|
policy: &crate::policy::Policy,
|
2020-12-30 20:41:08 -05:00
|
|
|
reporter: &dyn report::Report,
|
|
|
|
) -> Result<(), std::io::Error> {
|
2021-02-12 19:43:12 -05:00
|
|
|
// Check `policy.binary` first so we can easily check performance of walking vs reading
|
|
|
|
if policy.binary {
|
2020-12-30 22:26:48 -05:00
|
|
|
let msg = report::File::new(path);
|
|
|
|
reporter.report(msg.into())?;
|
|
|
|
} else {
|
2021-01-01 19:25:48 -05:00
|
|
|
let (_buffer, content_type) = read_file(path, reporter)?;
|
2020-12-30 22:26:48 -05:00
|
|
|
if !explicit && content_type.is_binary() {
|
|
|
|
let msg = report::BinaryFile { path };
|
|
|
|
reporter.report(msg.into())?;
|
|
|
|
} else {
|
|
|
|
let msg = report::File::new(path);
|
|
|
|
reporter.report(msg.into())?;
|
|
|
|
}
|
|
|
|
}
|
2020-12-30 20:41:08 -05:00
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-05-13 14:34:21 -04:00
|
|
|
fn read_file(
|
2020-12-30 20:41:08 -05:00
|
|
|
path: &std::path::Path,
|
|
|
|
reporter: &dyn report::Report,
|
2021-01-01 19:25:48 -05:00
|
|
|
) -> Result<(Vec<u8>, content_inspector::ContentType), std::io::Error> {
|
2021-01-02 23:17:00 -05:00
|
|
|
let buffer = if path == std::path::Path::new("-") {
|
|
|
|
let mut buffer = Vec::new();
|
|
|
|
report_result(std::io::stdin().read_to_end(&mut buffer), reporter)?;
|
|
|
|
buffer
|
|
|
|
} else {
|
|
|
|
report_result(std::fs::read(path), reporter)?
|
|
|
|
};
|
2020-11-03 20:52:39 -05:00
|
|
|
|
|
|
|
let content_type = content_inspector::inspect(&buffer);
|
|
|
|
|
|
|
|
let (buffer, content_type) = match content_type {
|
|
|
|
content_inspector::ContentType::BINARY |
|
|
|
|
// HACK: We don't support UTF-32 yet
|
|
|
|
content_inspector::ContentType::UTF_32LE |
|
|
|
|
content_inspector::ContentType::UTF_32BE => {
|
|
|
|
(buffer, content_inspector::ContentType::BINARY)
|
|
|
|
},
|
|
|
|
content_inspector::ContentType::UTF_8 |
|
|
|
|
content_inspector::ContentType::UTF_8_BOM => {
|
|
|
|
(buffer, content_type)
|
|
|
|
},
|
|
|
|
content_inspector::ContentType::UTF_16LE => {
|
2021-01-02 23:17:00 -05:00
|
|
|
let buffer = report_result(encoding::all::UTF_16LE.decode(&buffer, encoding::DecoderTrap::Strict), reporter)?;
|
2020-11-03 20:52:39 -05:00
|
|
|
(buffer.into_bytes(), content_type)
|
2020-12-30 20:41:08 -05:00
|
|
|
}
|
2020-11-03 20:52:39 -05:00
|
|
|
content_inspector::ContentType::UTF_16BE => {
|
2021-01-02 23:17:00 -05:00
|
|
|
let buffer = report_result(encoding::all::UTF_16BE.decode(&buffer, encoding::DecoderTrap::Strict), reporter)?;
|
2020-11-03 20:52:39 -05:00
|
|
|
(buffer.into_bytes(), content_type)
|
|
|
|
},
|
2020-12-30 20:41:08 -05:00
|
|
|
};
|
|
|
|
|
|
|
|
Ok((buffer, content_type))
|
|
|
|
}
|
2020-12-30 22:26:48 -05:00
|
|
|
|
2021-05-13 14:34:21 -04:00
|
|
|
fn write_file(
|
2021-01-01 19:25:48 -05:00
|
|
|
path: &std::path::Path,
|
|
|
|
content_type: content_inspector::ContentType,
|
2020-11-03 20:52:39 -05:00
|
|
|
buffer: Vec<u8>,
|
2021-01-01 19:25:48 -05:00
|
|
|
reporter: &dyn report::Report,
|
|
|
|
) -> Result<(), std::io::Error> {
|
2020-11-03 20:52:39 -05:00
|
|
|
let buffer = match content_type {
|
|
|
|
// HACK: We don't support UTF-32 yet
|
|
|
|
content_inspector::ContentType::UTF_32LE | content_inspector::ContentType::UTF_32BE => {
|
|
|
|
unreachable!("read_file should prevent these from being passed along");
|
|
|
|
}
|
|
|
|
content_inspector::ContentType::BINARY
|
|
|
|
| content_inspector::ContentType::UTF_8
|
|
|
|
| content_inspector::ContentType::UTF_8_BOM => buffer,
|
|
|
|
content_inspector::ContentType::UTF_16LE => {
|
2021-01-02 23:17:00 -05:00
|
|
|
let buffer = report_result(String::from_utf8(buffer), reporter)?;
|
2020-11-03 20:52:39 -05:00
|
|
|
if buffer.is_empty() {
|
|
|
|
// Error occurred, don't clear out the file
|
|
|
|
return Ok(());
|
|
|
|
}
|
2021-01-02 23:17:00 -05:00
|
|
|
report_result(
|
2020-11-03 20:52:39 -05:00
|
|
|
encoding::all::UTF_16LE.encode(&buffer, encoding::EncoderTrap::Strict),
|
|
|
|
reporter,
|
|
|
|
)?
|
|
|
|
}
|
|
|
|
content_inspector::ContentType::UTF_16BE => {
|
2021-01-02 23:17:00 -05:00
|
|
|
let buffer = report_result(String::from_utf8(buffer), reporter)?;
|
2020-11-03 20:52:39 -05:00
|
|
|
if buffer.is_empty() {
|
|
|
|
// Error occurred, don't clear out the file
|
|
|
|
return Ok(());
|
|
|
|
}
|
2021-01-02 23:17:00 -05:00
|
|
|
report_result(
|
2020-11-03 20:52:39 -05:00
|
|
|
encoding::all::UTF_16BE.encode(&buffer, encoding::EncoderTrap::Strict),
|
|
|
|
reporter,
|
|
|
|
)?
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2021-01-02 23:17:00 -05:00
|
|
|
if path == std::path::Path::new("-") {
|
|
|
|
report_result(std::io::stdout().write_all(&buffer), reporter)?;
|
|
|
|
} else {
|
|
|
|
report_result(std::fs::write(path, buffer), reporter)?;
|
|
|
|
}
|
2020-11-03 20:52:39 -05:00
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
2021-01-02 23:17:00 -05:00
|
|
|
fn report_result<T: Default, E: ToString>(
|
2020-11-03 20:52:39 -05:00
|
|
|
value: Result<T, E>,
|
|
|
|
reporter: &dyn report::Report,
|
|
|
|
) -> Result<T, std::io::Error> {
|
|
|
|
let buffer = match value {
|
|
|
|
Ok(value) => value,
|
2021-01-01 19:25:48 -05:00
|
|
|
Err(err) => {
|
2021-01-02 23:17:00 -05:00
|
|
|
report_error(err, reporter)?;
|
2020-11-03 20:52:39 -05:00
|
|
|
Default::default()
|
2021-01-01 19:25:48 -05:00
|
|
|
}
|
|
|
|
};
|
2020-11-03 20:52:39 -05:00
|
|
|
Ok(buffer)
|
2021-01-01 19:25:48 -05:00
|
|
|
}
|
|
|
|
|
2021-01-02 23:17:00 -05:00
|
|
|
fn report_error<E: ToString>(err: E, reporter: &dyn report::Report) -> Result<(), std::io::Error> {
|
|
|
|
let msg = report::Error::new(err.to_string());
|
|
|
|
reporter.report(msg.into())?;
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
2020-12-31 18:41:32 -05:00
|
|
|
struct AccumulateLineNum {
|
|
|
|
line_num: usize,
|
|
|
|
last_offset: usize,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl AccumulateLineNum {
|
|
|
|
fn new() -> Self {
|
|
|
|
Self {
|
|
|
|
// 1-indexed
|
|
|
|
line_num: 1,
|
|
|
|
last_offset: 0,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn line_num(&mut self, buffer: &[u8], byte_offset: usize) -> usize {
|
|
|
|
assert!(self.last_offset <= byte_offset);
|
|
|
|
let slice = &buffer[self.last_offset..byte_offset];
|
2021-05-27 14:22:12 -04:00
|
|
|
let newlines = slice.find_iter(b"\n").count();
|
2020-12-31 18:41:32 -05:00
|
|
|
let line_num = self.line_num + newlines;
|
|
|
|
self.line_num = line_num;
|
|
|
|
self.last_offset = byte_offset;
|
|
|
|
line_num
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn extract_line(buffer: &[u8], byte_offset: usize) -> (&[u8], usize) {
|
|
|
|
let line_start = buffer[0..byte_offset]
|
|
|
|
.rfind_byte(b'\n')
|
|
|
|
// Skip the newline
|
|
|
|
.map(|s| s + 1)
|
|
|
|
.unwrap_or(0);
|
|
|
|
let line = buffer[line_start..]
|
|
|
|
.lines()
|
|
|
|
.next()
|
|
|
|
.expect("should always be at least a line");
|
|
|
|
let line_offset = byte_offset - line_start;
|
|
|
|
(line, line_offset)
|
|
|
|
}
|
|
|
|
|
2021-01-01 19:25:48 -05:00
|
|
|
fn extract_fix<'t>(typo: &'t typos::Typo<'t>) -> Option<&'t str> {
|
|
|
|
match &typo.corrections {
|
|
|
|
typos::Status::Corrections(c) if c.len() == 1 => Some(c[0].as_ref()),
|
|
|
|
_ => None,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-01-02 14:17:15 -05:00
|
|
|
fn is_fixable(typo: &typos::Typo<'_>) -> bool {
|
2021-01-01 19:25:48 -05:00
|
|
|
extract_fix(typo).is_some()
|
|
|
|
}
|
|
|
|
|
|
|
|
fn fix_buffer(mut buffer: Vec<u8>, typos: impl Iterator<Item = typos::Typo<'static>>) -> Vec<u8> {
|
|
|
|
let mut offset = 0isize;
|
|
|
|
for typo in typos {
|
|
|
|
let fix = extract_fix(&typo).expect("Caller only provides fixable typos");
|
|
|
|
let start = ((typo.byte_offset as isize) + offset) as usize;
|
|
|
|
let end = start + typo.typo.len();
|
|
|
|
|
|
|
|
buffer.splice(start..end, fix.as_bytes().iter().copied());
|
|
|
|
|
|
|
|
offset += (fix.len() as isize) - (typo.typo.len() as isize);
|
|
|
|
}
|
|
|
|
buffer
|
|
|
|
}
|
|
|
|
|
2021-01-02 13:56:20 -05:00
|
|
|
pub fn walk_path(
|
2020-03-23 21:37:06 -04:00
|
|
|
walk: ignore::Walk,
|
2021-01-02 13:56:20 -05:00
|
|
|
checks: &dyn FileChecker,
|
2021-03-29 14:39:48 -04:00
|
|
|
engine: &crate::policy::ConfigEngine,
|
2020-12-30 20:41:08 -05:00
|
|
|
reporter: &dyn report::Report,
|
2020-11-23 11:08:38 -05:00
|
|
|
) -> Result<(), ignore::Error> {
|
2020-03-23 21:37:06 -04:00
|
|
|
for entry in walk {
|
2021-03-29 14:39:48 -04:00
|
|
|
walk_entry(entry, checks, engine, reporter)?;
|
2020-03-23 21:37:06 -04:00
|
|
|
}
|
2020-11-16 21:02:10 -05:00
|
|
|
Ok(())
|
2020-03-23 21:37:06 -04:00
|
|
|
}
|
|
|
|
|
2021-01-02 13:56:20 -05:00
|
|
|
pub fn walk_path_parallel(
|
2020-03-23 21:37:06 -04:00
|
|
|
walk: ignore::WalkParallel,
|
2021-01-02 13:56:20 -05:00
|
|
|
checks: &dyn FileChecker,
|
2021-03-29 14:39:48 -04:00
|
|
|
engine: &crate::policy::ConfigEngine,
|
2020-12-30 20:41:08 -05:00
|
|
|
reporter: &dyn report::Report,
|
2020-11-23 11:08:38 -05:00
|
|
|
) -> Result<(), ignore::Error> {
|
|
|
|
let error: std::sync::Mutex<Result<(), ignore::Error>> = std::sync::Mutex::new(Ok(()));
|
2020-03-23 21:37:06 -04:00
|
|
|
walk.run(|| {
|
|
|
|
Box::new(|entry: Result<ignore::DirEntry, ignore::Error>| {
|
2021-03-29 14:39:48 -04:00
|
|
|
match walk_entry(entry, checks, engine, reporter) {
|
2020-11-16 21:02:10 -05:00
|
|
|
Ok(()) => ignore::WalkState::Continue,
|
2020-03-23 21:37:06 -04:00
|
|
|
Err(err) => {
|
2020-11-16 21:02:10 -05:00
|
|
|
*error.lock().unwrap() = Err(err);
|
|
|
|
ignore::WalkState::Quit
|
2020-03-23 21:37:06 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
})
|
|
|
|
});
|
|
|
|
|
2020-11-16 21:02:10 -05:00
|
|
|
error.into_inner().unwrap()
|
2020-03-23 21:37:06 -04:00
|
|
|
}
|
|
|
|
|
2021-01-02 13:56:20 -05:00
|
|
|
fn walk_entry(
|
2020-03-23 21:37:06 -04:00
|
|
|
entry: Result<ignore::DirEntry, ignore::Error>,
|
2021-01-02 13:56:20 -05:00
|
|
|
checks: &dyn FileChecker,
|
2021-03-29 14:39:48 -04:00
|
|
|
engine: &crate::policy::ConfigEngine,
|
2020-12-30 20:41:08 -05:00
|
|
|
reporter: &dyn report::Report,
|
2020-11-23 11:08:38 -05:00
|
|
|
) -> Result<(), ignore::Error> {
|
2021-01-02 23:17:00 -05:00
|
|
|
let entry = match entry {
|
|
|
|
Ok(entry) => entry,
|
|
|
|
Err(err) => {
|
|
|
|
report_error(err, reporter)?;
|
|
|
|
return Ok(());
|
|
|
|
}
|
|
|
|
};
|
2020-03-23 21:37:06 -04:00
|
|
|
if entry.file_type().map(|t| t.is_file()).unwrap_or(true) {
|
|
|
|
let explicit = entry.depth() == 0;
|
2021-05-14 12:26:02 -04:00
|
|
|
let (path, lookup_path) = if entry.is_stdin() {
|
|
|
|
let path = std::path::Path::new("-");
|
2021-05-28 20:02:01 -04:00
|
|
|
(path, std::env::current_dir()?)
|
2021-01-02 23:17:00 -05:00
|
|
|
} else {
|
2021-05-14 12:26:02 -04:00
|
|
|
let path = entry.path();
|
|
|
|
(path, path.canonicalize()?)
|
2021-01-02 23:17:00 -05:00
|
|
|
};
|
2021-05-14 12:26:02 -04:00
|
|
|
let policy = engine.policy(&lookup_path);
|
2021-03-29 14:39:48 -04:00
|
|
|
checks.check_file(path, explicit, &policy, reporter)?;
|
2020-03-23 21:37:06 -04:00
|
|
|
}
|
|
|
|
|
2020-11-16 21:02:10 -05:00
|
|
|
Ok(())
|
2020-03-23 21:37:06 -04:00
|
|
|
}
|
2021-04-10 20:13:48 -04:00
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
mod test {
|
|
|
|
use super::*;
|
|
|
|
|
|
|
|
fn fix_simple(line: &str, corrections: Vec<(usize, &'static str, &'static str)>) -> String {
|
|
|
|
let line = line.as_bytes().to_vec();
|
|
|
|
let corrections: Vec<_> = corrections
|
|
|
|
.into_iter()
|
|
|
|
.map(|(byte_offset, typo, correction)| typos::Typo {
|
|
|
|
byte_offset,
|
|
|
|
typo: typo.into(),
|
|
|
|
corrections: typos::Status::Corrections(vec![correction.into()]),
|
|
|
|
})
|
|
|
|
.collect();
|
|
|
|
let actual = fix_buffer(line, corrections.into_iter());
|
|
|
|
String::from_utf8(actual).unwrap()
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_fix_buffer_single() {
|
|
|
|
let actual = fix_simple("foo foo foo", vec![(4, "foo", "bar")]);
|
|
|
|
assert_eq!(actual, "foo bar foo");
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_fix_buffer_single_grow() {
|
|
|
|
let actual = fix_simple("foo foo foo", vec![(4, "foo", "happy")]);
|
|
|
|
assert_eq!(actual, "foo happy foo");
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_fix_buffer_single_shrink() {
|
|
|
|
let actual = fix_simple("foo foo foo", vec![(4, "foo", "if")]);
|
|
|
|
assert_eq!(actual, "foo if foo");
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_fix_buffer_start() {
|
|
|
|
let actual = fix_simple("foo foo foo", vec![(0, "foo", "bar")]);
|
|
|
|
assert_eq!(actual, "bar foo foo");
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_fix_buffer_end() {
|
|
|
|
let actual = fix_simple("foo foo foo", vec![(8, "foo", "bar")]);
|
|
|
|
assert_eq!(actual, "foo foo bar");
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_fix_buffer_end_grow() {
|
|
|
|
let actual = fix_simple("foo foo foo", vec![(8, "foo", "happy")]);
|
|
|
|
assert_eq!(actual, "foo foo happy");
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_fix_buffer_multiple() {
|
|
|
|
let actual = fix_simple(
|
|
|
|
"foo foo foo",
|
|
|
|
vec![(4, "foo", "happy"), (8, "foo", "world")],
|
|
|
|
);
|
|
|
|
assert_eq!(actual, "foo happy world");
|
|
|
|
}
|
2021-04-10 21:52:34 -04:00
|
|
|
|
2021-05-27 14:22:12 -04:00
|
|
|
#[test]
|
|
|
|
fn test_line_count_first() {
|
|
|
|
let mut accum_line_num = AccumulateLineNum::new();
|
|
|
|
let line_num = accum_line_num.line_num(b"hello world", 6);
|
|
|
|
assert_eq!(line_num, 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_line_count_second() {
|
|
|
|
let mut accum_line_num = AccumulateLineNum::new();
|
|
|
|
let line_num = accum_line_num.line_num(b"1\n2\n3", 2);
|
|
|
|
assert_eq!(line_num, 2);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_line_count_multiple() {
|
|
|
|
let mut accum_line_num = AccumulateLineNum::new();
|
|
|
|
let line_num = accum_line_num.line_num(b"1\n2\n3", 0);
|
|
|
|
assert_eq!(line_num, 1);
|
|
|
|
let line_num = accum_line_num.line_num(b"1\n2\n3", 2);
|
|
|
|
assert_eq!(line_num, 2);
|
|
|
|
let line_num = accum_line_num.line_num(b"1\n2\n3", 4);
|
|
|
|
assert_eq!(line_num, 3);
|
|
|
|
}
|
|
|
|
|
2021-04-10 21:52:34 -04:00
|
|
|
#[test]
|
|
|
|
fn test_extract_line_single_line() {
|
2021-05-27 14:10:42 -04:00
|
|
|
let buffer = b"hello world";
|
|
|
|
let buffer_offset = 6;
|
|
|
|
let expected_line = b"hello world";
|
|
|
|
let (line, offset) = extract_line(buffer, buffer_offset);
|
|
|
|
assert_eq!(line, expected_line);
|
2021-04-10 21:52:34 -04:00
|
|
|
assert_eq!(offset, 6);
|
2021-05-27 14:10:42 -04:00
|
|
|
assert_eq!(line[offset], buffer[buffer_offset]);
|
2021-04-10 21:52:34 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_extract_line_first() {
|
2021-05-27 14:10:42 -04:00
|
|
|
let buffer = b"1\n2\n3";
|
|
|
|
let buffer_offset = 0;
|
|
|
|
let expected_line = b"1";
|
|
|
|
let (line, offset) = extract_line(buffer, buffer_offset);
|
|
|
|
assert_eq!(line, expected_line);
|
2021-04-10 21:52:34 -04:00
|
|
|
assert_eq!(offset, 0);
|
2021-05-27 14:10:42 -04:00
|
|
|
assert_eq!(line[offset], buffer[buffer_offset]);
|
2021-04-10 21:52:34 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_extract_line_middle() {
|
2021-05-27 14:10:42 -04:00
|
|
|
let buffer = b"1\n2\n3";
|
|
|
|
let buffer_offset = 2;
|
|
|
|
let expected_line = b"2";
|
|
|
|
let (line, offset) = extract_line(buffer, buffer_offset);
|
|
|
|
assert_eq!(line, expected_line);
|
2021-04-10 21:52:34 -04:00
|
|
|
assert_eq!(offset, 0);
|
2021-05-27 14:10:42 -04:00
|
|
|
assert_eq!(line[offset], buffer[buffer_offset]);
|
2021-04-10 21:52:34 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_extract_line_end() {
|
2021-05-27 14:10:42 -04:00
|
|
|
let buffer = b"1\n2\n3";
|
|
|
|
let buffer_offset = 4;
|
|
|
|
let expected_line = b"3";
|
|
|
|
let (line, offset) = extract_line(buffer, buffer_offset);
|
|
|
|
assert_eq!(line, expected_line);
|
2021-04-10 21:52:34 -04:00
|
|
|
assert_eq!(offset, 0);
|
2021-05-27 14:10:42 -04:00
|
|
|
assert_eq!(line[offset], buffer[buffer_offset]);
|
2021-04-10 21:52:34 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_extract_line_offset_change() {
|
2021-05-27 14:10:42 -04:00
|
|
|
let buffer = b"1\nhello world\n2";
|
|
|
|
let buffer_offset = 8;
|
|
|
|
let expected_line = b"hello world";
|
|
|
|
let (line, offset) = extract_line(buffer, buffer_offset);
|
|
|
|
assert_eq!(line, expected_line);
|
2021-04-10 21:52:34 -04:00
|
|
|
assert_eq!(offset, 6);
|
2021-05-27 14:10:42 -04:00
|
|
|
assert_eq!(line[offset], buffer[buffer_offset]);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_extract_line_windows() {
|
|
|
|
let buffer = b"1\r\nhello world\r\n2";
|
|
|
|
let buffer_offset = 9;
|
|
|
|
let expected_line = b"hello world";
|
|
|
|
let (line, offset) = extract_line(buffer, buffer_offset);
|
|
|
|
assert_eq!(line, expected_line);
|
|
|
|
assert_eq!(offset, 6);
|
|
|
|
assert_eq!(line[offset], buffer[buffer_offset]);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_extract_line_slovak() {
|
|
|
|
let buffer = b"LastErrorMessage=%1.%n%nChyba %2: %3\r\nSetupFileMissing=In\x9Atala\xE8n\xFD adres\xE1r neobsahuje s\xFAbor %1. Opravte, pros\xEDm, t\xFAto chybu alebo si zaobstarajte nov\xFA k\xF3piu tohto produktu.\r\nSetupFileCorrupt=S\xFAbory sprievodcu in\x9Atal\xE1ciou s\xFA po\x9Akoden\xE9. Zaobstarajte si, pros\xEDm, nov\xFA k\xF3piu tohto produktu.";
|
|
|
|
let buffer_offset = 66;
|
|
|
|
let expected_line = b"SetupFileMissing=In\x9Atala\xE8n\xFD adres\xE1r neobsahuje s\xFAbor %1. Opravte, pros\xEDm, t\xFAto chybu alebo si zaobstarajte nov\xFA k\xF3piu tohto produktu.";
|
|
|
|
let (line, offset) = extract_line(buffer, buffer_offset);
|
|
|
|
assert_eq!(line, expected_line);
|
|
|
|
assert_eq!(offset, 28);
|
|
|
|
assert_eq!(line[offset], buffer[buffer_offset]);
|
2021-04-10 21:52:34 -04:00
|
|
|
}
|
2021-04-10 20:13:48 -04:00
|
|
|
}
|