Merge pull request #158 from epage/refactor

Polish internals and json output
This commit is contained in:
Ed Page 2020-11-10 06:57:46 -06:00 committed by GitHub
commit deca842341
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 800 additions and 759 deletions

8
Cargo.lock generated
View file

@ -523,6 +523,12 @@ dependencies = [
"autocfg",
]
[[package]]
name = "once_cell"
version = "1.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "260e51e7efe62b592207e9e13a68e43692a7a279171d6ba57abd208bf23645ad"
[[package]]
name = "phf"
version = "0.8.0"
@ -982,8 +988,8 @@ dependencies = [
"derive_more 0.99.11",
"derive_setters",
"itertools",
"lazy_static",
"log",
"once_cell",
"regex",
"serde",
"serde_json",

214
benches/checks.rs Normal file
View file

@ -0,0 +1,214 @@
#![feature(test)]
extern crate test;
mod data;
use assert_fs::prelude::*;
use typos::checks::Check;
fn bench_parse_ident_str(data: &str, b: &mut test::Bencher) {
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
let parser = typos::tokens::Parser::new();
let checks = typos::checks::TyposSettings::new().build_identifier_parser();
b.iter(|| checks.check_str(data, &parser, &corrections, &typos::report::PrintSilent));
}
#[bench]
fn parse_idents_empty_str(b: &mut test::Bencher) {
bench_parse_ident_str(data::EMPTY, b);
}
#[bench]
fn parse_idents_no_tokens_str(b: &mut test::Bencher) {
bench_parse_ident_str(data::NO_TOKENS, b);
}
#[bench]
fn parse_idents_single_token_str(b: &mut test::Bencher) {
bench_parse_ident_str(data::SINGLE_TOKEN, b);
}
#[bench]
fn parse_idents_sherlock_str(b: &mut test::Bencher) {
bench_parse_ident_str(data::SHERLOCK, b);
}
#[bench]
fn parse_idents_code_str(b: &mut test::Bencher) {
bench_parse_ident_str(data::CODE, b);
}
#[bench]
fn parse_idents_corpus_str(b: &mut test::Bencher) {
bench_parse_ident_str(data::CORPUS, b);
}
fn bench_parse_ident_bytes(data: &str, b: &mut test::Bencher) {
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
let parser = typos::tokens::Parser::new();
let checks = typos::checks::TyposSettings::new().build_identifier_parser();
b.iter(|| {
checks.check_bytes(
data.as_bytes(),
&parser,
&corrections,
&typos::report::PrintSilent,
)
});
}
#[bench]
fn parse_idents_empty_bytes(b: &mut test::Bencher) {
bench_parse_ident_bytes(data::EMPTY, b);
}
#[bench]
fn parse_idents_no_tokens_bytes(b: &mut test::Bencher) {
bench_parse_ident_bytes(data::NO_TOKENS, b);
}
#[bench]
fn parse_idents_single_token_bytes(b: &mut test::Bencher) {
bench_parse_ident_bytes(data::SINGLE_TOKEN, b);
}
#[bench]
fn parse_idents_sherlock_bytes(b: &mut test::Bencher) {
bench_parse_ident_bytes(data::SHERLOCK, b);
}
#[bench]
fn parse_idents_code_bytes(b: &mut test::Bencher) {
bench_parse_ident_bytes(data::CODE, b);
}
#[bench]
fn parse_idents_corpus_bytes(b: &mut test::Bencher) {
bench_parse_ident_bytes(data::CORPUS, b);
}
fn bench_parse_word_str(data: &str, b: &mut test::Bencher) {
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
let parser = typos::tokens::Parser::new();
let checks = typos::checks::TyposSettings::new().build_word_parser();
b.iter(|| checks.check_str(data, &parser, &corrections, &typos::report::PrintSilent));
}
#[bench]
fn parse_words_empty(b: &mut test::Bencher) {
bench_parse_word_str(data::EMPTY, b);
}
#[bench]
fn parse_words_no_tokens(b: &mut test::Bencher) {
bench_parse_word_str(data::NO_TOKENS, b);
}
#[bench]
fn parse_words_single_token(b: &mut test::Bencher) {
bench_parse_word_str(data::SINGLE_TOKEN, b);
}
#[bench]
fn parse_words_sherlock(b: &mut test::Bencher) {
bench_parse_word_str(data::SHERLOCK, b);
}
#[bench]
fn parse_words_code(b: &mut test::Bencher) {
bench_parse_word_str(data::CODE, b);
}
#[bench]
fn parse_words_corpus(b: &mut test::Bencher) {
bench_parse_word_str(data::CORPUS, b);
}
fn bench_typos(data: &str, b: &mut test::Bencher) {
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
let parser = typos::tokens::Parser::new();
let checks = typos::checks::TyposSettings::new().build_typos();
b.iter(|| checks.check_str(data, &parser, &corrections, &typos::report::PrintSilent));
}
#[bench]
fn typos_empty(b: &mut test::Bencher) {
bench_typos(data::EMPTY, b);
}
#[bench]
fn typos_no_tokens(b: &mut test::Bencher) {
bench_typos(data::NO_TOKENS, b);
}
#[bench]
fn typos_single_token(b: &mut test::Bencher) {
bench_typos(data::SINGLE_TOKEN, b);
}
#[bench]
fn typos_sherlock(b: &mut test::Bencher) {
bench_typos(data::SHERLOCK, b);
}
#[bench]
fn typos_code(b: &mut test::Bencher) {
bench_typos(data::CODE, b);
}
#[bench]
fn typos_corpus(b: &mut test::Bencher) {
bench_typos(data::CORPUS, b);
}
fn bench_check_file(data: &str, b: &mut test::Bencher) {
let temp = assert_fs::TempDir::new().unwrap();
let sample_path = temp.child("sample");
sample_path.write_str(data).unwrap();
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
let parser = typos::tokens::Parser::new();
let checks = typos::checks::TyposSettings::new().build_typos();
b.iter(|| {
checks.check_file(
sample_path.path(),
true,
&parser,
&corrections,
&typos::report::PrintSilent,
)
});
temp.close().unwrap();
}
#[bench]
fn check_file_empty(b: &mut test::Bencher) {
bench_check_file(data::EMPTY, b);
}
#[bench]
fn check_file_no_tokens(b: &mut test::Bencher) {
bench_check_file(data::NO_TOKENS, b);
}
#[bench]
fn check_file_single_token(b: &mut test::Bencher) {
bench_check_file(data::SINGLE_TOKEN, b);
}
#[bench]
fn check_file_sherlock(b: &mut test::Bencher) {
bench_check_file(data::SHERLOCK, b);
}
#[bench]
fn check_file_code(b: &mut test::Bencher) {
bench_check_file(data::CODE, b);
}
#[bench]
fn check_file_corpus(b: &mut test::Bencher) {
bench_check_file(data::CORPUS, b);
}

View file

@ -1,231 +0,0 @@
#![feature(test)]
extern crate test;
mod data;
use assert_fs::prelude::*;
use bstr::ByteSlice;
fn bench_read(data: &str, b: &mut test::Bencher) {
let temp = assert_fs::TempDir::new().unwrap();
let sample_path = temp.child("sample");
sample_path.write_str(data).unwrap();
b.iter(|| std::fs::read(sample_path.path()));
temp.close().unwrap();
}
#[bench]
fn read_empty(b: &mut test::Bencher) {
bench_read(data::EMPTY, b);
}
#[bench]
fn read_no_tokens(b: &mut test::Bencher) {
bench_read(data::NO_TOKENS, b);
}
#[bench]
fn read_single_token(b: &mut test::Bencher) {
bench_read(data::SINGLE_TOKEN, b);
}
#[bench]
fn read_sherlock(b: &mut test::Bencher) {
bench_read(data::SHERLOCK, b);
}
#[bench]
fn read_code(b: &mut test::Bencher) {
bench_read(data::CODE, b);
}
#[bench]
fn read_corpus(b: &mut test::Bencher) {
bench_read(data::CORPUS, b);
}
fn bench_split_lines(data: &str, b: &mut test::Bencher) {
b.iter(|| data.as_bytes().lines().enumerate().last());
}
#[bench]
fn parse_lines_empty(b: &mut test::Bencher) {
bench_split_lines(data::EMPTY, b);
}
#[bench]
fn parse_lines_no_tokens(b: &mut test::Bencher) {
bench_split_lines(data::NO_TOKENS, b);
}
#[bench]
fn parse_lines_single_token(b: &mut test::Bencher) {
bench_split_lines(data::SINGLE_TOKEN, b);
}
#[bench]
fn parse_lines_sherlock(b: &mut test::Bencher) {
bench_split_lines(data::SHERLOCK, b);
}
#[bench]
fn parse_lines_code(b: &mut test::Bencher) {
bench_split_lines(data::CODE, b);
}
#[bench]
fn parse_lines_corpus(b: &mut test::Bencher) {
bench_split_lines(data::CORPUS, b);
}
fn bench_parse_ident(data: &str, b: &mut test::Bencher) {
let temp = assert_fs::TempDir::new().unwrap();
let sample_path = temp.child("sample");
sample_path.write_str(data).unwrap();
let parser = typos::tokens::Parser::new();
let checks = typos::checks::TyposSettings::new().build_identifier_parser();
b.iter(|| {
checks.check_file(
sample_path.path(),
true,
&parser,
&typos::report::PrintSilent,
)
});
temp.close().unwrap();
}
#[bench]
fn parse_idents_empty(b: &mut test::Bencher) {
bench_parse_ident(data::EMPTY, b);
}
#[bench]
fn parse_idents_no_tokens(b: &mut test::Bencher) {
bench_parse_ident(data::NO_TOKENS, b);
}
#[bench]
fn parse_idents_single_token(b: &mut test::Bencher) {
bench_parse_ident(data::SINGLE_TOKEN, b);
}
#[bench]
fn parse_idents_sherlock(b: &mut test::Bencher) {
bench_parse_ident(data::SHERLOCK, b);
}
#[bench]
fn parse_idents_code(b: &mut test::Bencher) {
bench_parse_ident(data::CODE, b);
}
#[bench]
fn parse_idents_corpus(b: &mut test::Bencher) {
bench_parse_ident(data::CORPUS, b);
}
fn bench_parse_word(data: &str, b: &mut test::Bencher) {
let temp = assert_fs::TempDir::new().unwrap();
let sample_path = temp.child("sample");
sample_path.write_str(data).unwrap();
let parser = typos::tokens::Parser::new();
let checks = typos::checks::TyposSettings::new().build_word_parser();
b.iter(|| {
checks.check_file(
sample_path.path(),
true,
&parser,
&typos::report::PrintSilent,
)
});
temp.close().unwrap();
}
#[bench]
fn parse_words_empty(b: &mut test::Bencher) {
bench_parse_word(data::EMPTY, b);
}
#[bench]
fn parse_words_no_tokens(b: &mut test::Bencher) {
bench_parse_word(data::NO_TOKENS, b);
}
#[bench]
fn parse_words_single_token(b: &mut test::Bencher) {
bench_parse_word(data::SINGLE_TOKEN, b);
}
#[bench]
fn parse_words_sherlock(b: &mut test::Bencher) {
bench_parse_word(data::SHERLOCK, b);
}
#[bench]
fn parse_words_code(b: &mut test::Bencher) {
bench_parse_word(data::CODE, b);
}
#[bench]
fn parse_words_corpus(b: &mut test::Bencher) {
bench_parse_word(data::CORPUS, b);
}
fn bench_check_file(data: &str, b: &mut test::Bencher) {
let temp = assert_fs::TempDir::new().unwrap();
let sample_path = temp.child("sample");
sample_path.write_str(data).unwrap();
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
let parser = typos::tokens::Parser::new();
let checks = typos::checks::TyposSettings::new().build_checks();
b.iter(|| {
checks.check_file(
sample_path.path(),
true,
&parser,
&corrections,
&typos::report::PrintSilent,
)
});
temp.close().unwrap();
}
#[bench]
fn check_file_empty(b: &mut test::Bencher) {
bench_check_file(data::EMPTY, b);
}
#[bench]
fn check_file_no_tokens(b: &mut test::Bencher) {
bench_check_file(data::NO_TOKENS, b);
}
#[bench]
fn check_file_single_token(b: &mut test::Bencher) {
bench_check_file(data::SINGLE_TOKEN, b);
}
#[bench]
fn check_file_sherlock(b: &mut test::Bencher) {
bench_check_file(data::SHERLOCK, b);
}
#[bench]
fn check_file_code(b: &mut test::Bencher) {
bench_check_file(data::CODE, b);
}
#[bench]
fn check_file_corpus(b: &mut test::Bencher) {
bench_check_file(data::CORPUS, b);
}

View file

@ -18,7 +18,7 @@ codecov = { repository = "crate-ci/typos" }
anyhow = "1.0"
thiserror = "1.0"
regex = "1.3"
lazy_static = "1.2.0"
once_cell = "1.2.0"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
itertools = "0.9"

View file

@ -5,6 +5,100 @@ use crate::tokens;
use crate::Dictionary;
use crate::Status;
pub trait Check: Send + Sync {
fn check_str(
&self,
buffer: &str,
parser: &tokens::Parser,
dictionary: &dyn Dictionary,
reporter: &dyn report::Report,
) -> Result<bool, crate::Error>;
fn check_bytes(
&self,
buffer: &[u8],
parser: &tokens::Parser,
dictionary: &dyn Dictionary,
reporter: &dyn report::Report,
) -> Result<bool, crate::Error>;
fn check_filenames(&self) -> bool;
fn check_files(&self) -> bool;
fn binary(&self) -> bool;
fn check_filename(
&self,
path: &std::path::Path,
parser: &tokens::Parser,
dictionary: &dyn Dictionary,
reporter: &dyn report::Report,
) -> Result<bool, crate::Error> {
let mut typos_found = false;
if !self.check_filenames() {
return Ok(typos_found);
}
if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
let context_reporter = ReportContext {
reporter,
context: report::PathContext { path }.into(),
};
typos_found |= self.check_str(file_name, parser, dictionary, &context_reporter)?;
}
Ok(typos_found)
}
fn check_file(
&self,
path: &std::path::Path,
explicit: bool,
parser: &tokens::Parser,
dictionary: &dyn Dictionary,
reporter: &dyn report::Report,
) -> Result<bool, crate::Error> {
let mut typos_found = false;
if !self.check_files() {
return Ok(typos_found);
}
let buffer = read_file(path)?;
let (buffer, content_type) = massage_data(buffer)?;
if !explicit && !self.binary() && content_type.is_binary() {
let msg = report::BinaryFile { path };
reporter.report(msg.into());
return Ok(typos_found);
}
for (line_idx, line) in buffer.lines().enumerate() {
let line_num = line_idx + 1;
let context_reporter = ReportContext {
reporter,
context: report::FileContext { path, line_num }.into(),
};
typos_found |= self.check_bytes(line, parser, dictionary, &context_reporter)?;
}
Ok(typos_found)
}
}
struct ReportContext<'m, 'r> {
reporter: &'r dyn report::Report,
context: report::Context<'m>,
}
impl<'m, 'r> report::Report for ReportContext<'m, 'r> {
fn report(&self, msg: report::Message) -> bool {
let msg = msg.context(self.context.clone());
self.reporter.report(msg)
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct TyposSettings {
check_filenames: bool,
@ -32,8 +126,8 @@ impl TyposSettings {
self
}
pub fn build_checks(&self) -> Checks {
Checks {
pub fn build_typos(&self) -> Typos {
Typos {
check_filenames: self.check_filenames,
check_files: self.check_files,
binary: self.binary,
@ -55,6 +149,10 @@ impl TyposSettings {
binary: self.binary,
}
}
pub fn build_files(&self) -> Files {
Files {}
}
}
impl Default for TyposSettings {
@ -67,6 +165,123 @@ impl Default for TyposSettings {
}
}
#[derive(Debug, Clone)]
pub struct Typos {
check_filenames: bool,
check_files: bool,
binary: bool,
}
impl Check for Typos {
fn check_str(
&self,
buffer: &str,
parser: &tokens::Parser,
dictionary: &dyn Dictionary,
reporter: &dyn report::Report,
) -> Result<bool, crate::Error> {
let mut typos_found = false;
for ident in parser.parse_str(buffer) {
match dictionary.correct_ident(ident) {
Some(Status::Valid) => {}
Some(corrections) => {
let byte_offset = ident.offset();
let msg = report::Typo {
context: report::Context::None,
buffer: std::borrow::Cow::Borrowed(buffer.as_bytes()),
byte_offset,
typo: ident.token(),
corrections,
};
typos_found |= reporter.report(msg.into());
}
None => {
for word in ident.split() {
match dictionary.correct_word(word) {
Some(Status::Valid) => {}
Some(corrections) => {
let byte_offset = word.offset();
let msg = report::Typo {
context: report::Context::None,
buffer: std::borrow::Cow::Borrowed(buffer.as_bytes()),
byte_offset,
typo: word.token(),
corrections,
};
typos_found |= reporter.report(msg.into());
}
None => {}
}
}
}
}
}
Ok(typos_found)
}
fn check_bytes(
&self,
buffer: &[u8],
parser: &tokens::Parser,
dictionary: &dyn Dictionary,
reporter: &dyn report::Report,
) -> Result<bool, crate::Error> {
let mut typos_found = false;
for ident in parser.parse_bytes(buffer) {
match dictionary.correct_ident(ident) {
Some(Status::Valid) => {}
Some(corrections) => {
let byte_offset = ident.offset();
let msg = report::Typo {
context: report::Context::None,
buffer: std::borrow::Cow::Borrowed(buffer),
byte_offset,
typo: ident.token(),
corrections,
};
typos_found |= reporter.report(msg.into());
}
None => {
for word in ident.split() {
match dictionary.correct_word(word) {
Some(Status::Valid) => {}
Some(corrections) => {
let byte_offset = word.offset();
let msg = report::Typo {
context: report::Context::None,
buffer: std::borrow::Cow::Borrowed(buffer),
byte_offset,
typo: word.token(),
corrections,
};
typos_found |= reporter.report(msg.into());
}
None => {}
}
}
}
}
}
Ok(typos_found)
}
fn check_filenames(&self) -> bool {
self.check_filenames
}
fn check_files(&self) -> bool {
self.check_files
}
fn binary(&self) -> bool {
self.binary
}
}
#[derive(Debug, Clone)]
pub struct ParseIdentifiers {
check_filenames: bool,
@ -74,63 +289,60 @@ pub struct ParseIdentifiers {
binary: bool,
}
impl ParseIdentifiers {
pub fn check_filename(
impl Check for ParseIdentifiers {
fn check_str(
&self,
path: &std::path::Path,
buffer: &str,
parser: &tokens::Parser,
_dictionary: &dyn Dictionary,
reporter: &dyn report::Report,
) -> Result<bool, crate::Error> {
let typos_found = false;
if !self.check_filenames {
return Ok(typos_found);
}
for part in path.components().filter_map(|c| c.as_os_str().to_str()) {
let msg = report::Parse {
path,
context: report::Context::None,
kind: report::ParseKind::Identifier,
data: parser.parse(part).map(|i| i.token()).collect(),
data: parser.parse_str(buffer).map(|i| i.token()).collect(),
};
if !msg.data.is_empty() {
reporter.report(msg.into());
}
Ok(typos_found)
}
pub fn check_file(
fn check_bytes(
&self,
path: &std::path::Path,
explicit: bool,
buffer: &[u8],
parser: &tokens::Parser,
_dictionary: &dyn Dictionary,
reporter: &dyn report::Report,
) -> Result<bool, crate::Error> {
let typos_found = false;
if !self.check_files {
return Ok(typos_found);
}
let buffer = read_file(path)?;
let (buffer, content_type) = massage_data(buffer)?;
if !explicit && !self.binary && content_type.is_binary() {
let msg = report::BinaryFile { path };
reporter.report(msg.into());
return Ok(typos_found);
}
for line in buffer.lines() {
let msg = report::Parse {
path,
context: report::Context::None,
kind: report::ParseKind::Identifier,
data: parser.parse_bytes(line).map(|i| i.token()).collect(),
data: parser.parse_bytes(buffer).map(|i| i.token()).collect(),
};
if !msg.data.is_empty() {
reporter.report(msg.into());
}
Ok(typos_found)
}
fn check_filenames(&self) -> bool {
self.check_filenames
}
fn check_files(&self) -> bool {
self.check_files
}
fn binary(&self) -> bool {
self.binary
}
}
#[derive(Debug, Clone)]
@ -140,195 +352,129 @@ pub struct ParseWords {
binary: bool,
}
impl ParseWords {
pub fn check_filename(
impl Check for ParseWords {
fn check_str(
&self,
path: &std::path::Path,
buffer: &str,
parser: &tokens::Parser,
_dictionary: &dyn Dictionary,
reporter: &dyn report::Report,
) -> Result<bool, crate::Error> {
let typos_found = false;
if !self.check_filenames {
return Ok(typos_found);
}
for part in path.components().filter_map(|c| c.as_os_str().to_str()) {
let msg = report::Parse {
path,
context: report::Context::None,
kind: report::ParseKind::Word,
data: parser
.parse(part)
.parse_str(buffer)
.flat_map(|ident| ident.split().map(|i| i.token()))
.collect(),
};
if !msg.data.is_empty() {
reporter.report(msg.into());
}
Ok(typos_found)
}
pub fn check_file(
fn check_bytes(
&self,
path: &std::path::Path,
explicit: bool,
buffer: &[u8],
parser: &tokens::Parser,
_dictionary: &dyn Dictionary,
reporter: &dyn report::Report,
) -> Result<bool, crate::Error> {
let typos_found = false;
if !self.check_files {
return Ok(typos_found);
}
let buffer = read_file(path)?;
let (buffer, content_type) = massage_data(buffer)?;
if !explicit && !self.binary && content_type.is_binary() {
let msg = report::BinaryFile { path };
reporter.report(msg.into());
return Ok(typos_found);
}
for line in buffer.lines() {
let msg = report::Parse {
path,
context: report::Context::None,
kind: report::ParseKind::Word,
data: parser
.parse_bytes(line)
.parse_bytes(buffer)
.flat_map(|ident| ident.split().map(|i| i.token()))
.collect(),
};
if !msg.data.is_empty() {
reporter.report(msg.into());
}
Ok(typos_found)
}
fn check_filenames(&self) -> bool {
self.check_filenames
}
fn check_files(&self) -> bool {
self.check_files
}
fn binary(&self) -> bool {
self.binary
}
}
#[derive(Debug, Clone)]
pub struct Checks {
check_filenames: bool,
check_files: bool,
binary: bool,
}
pub struct Files {}
impl Checks {
pub fn check_filename(
impl Check for Files {
fn check_str(
&self,
path: &std::path::Path,
parser: &tokens::Parser,
dictionary: &dyn Dictionary,
reporter: &dyn report::Report,
_buffer: &str,
_parser: &tokens::Parser,
_dictionary: &dyn Dictionary,
_reporter: &dyn report::Report,
) -> Result<bool, crate::Error> {
if !self.check_filenames {
return Ok(false);
}
let mut typos_found = false;
for ident in path
.file_name()
.and_then(|s| s.to_str())
.iter()
.flat_map(|part| parser.parse(part))
{
match dictionary.correct_ident(ident) {
Some(Status::Valid) => {}
Some(corrections) => {
let byte_offset = ident.offset();
let msg = report::PathTypo {
path,
byte_offset,
typo: ident.token(),
corrections,
};
typos_found |= reporter.report(msg.into());
}
None => {
for word in ident.split() {
match dictionary.correct_word(word) {
Some(Status::Valid) => {}
Some(corrections) => {
let byte_offset = word.offset();
let msg = report::PathTypo {
path,
byte_offset,
typo: word.token(),
corrections,
};
typos_found |= reporter.report(msg.into());
}
None => {}
}
}
}
}
}
let typos_found = false;
Ok(typos_found)
}
pub fn check_file(
fn check_bytes(
&self,
_buffer: &[u8],
_parser: &tokens::Parser,
_dictionary: &dyn Dictionary,
_reporter: &dyn report::Report,
) -> Result<bool, crate::Error> {
let typos_found = false;
Ok(typos_found)
}
fn check_filenames(&self) -> bool {
true
}
fn check_files(&self) -> bool {
true
}
fn binary(&self) -> bool {
true
}
fn check_filename(
&self,
_path: &std::path::Path,
_parser: &tokens::Parser,
_dictionary: &dyn Dictionary,
_reporter: &dyn report::Report,
) -> Result<bool, crate::Error> {
let typos_found = false;
Ok(typos_found)
}
fn check_file(
&self,
path: &std::path::Path,
explicit: bool,
parser: &tokens::Parser,
dictionary: &dyn Dictionary,
_explicit: bool,
_parser: &tokens::Parser,
_dictionary: &dyn Dictionary,
reporter: &dyn report::Report,
) -> Result<bool, crate::Error> {
let mut typos_found = false;
let typos_found = false;
if !self.check_files {
return Ok(typos_found);
}
let buffer = read_file(path)?;
let (buffer, content_type) = massage_data(buffer)?;
if !explicit && !self.binary && content_type.is_binary() {
let msg = report::BinaryFile { path };
let msg = report::File::new(path);
reporter.report(msg.into());
return Ok(typos_found);
}
for (line_idx, line) in buffer.lines().enumerate() {
let line_num = line_idx + 1;
for ident in parser.parse_bytes(line) {
match dictionary.correct_ident(ident) {
Some(Status::Valid) => {}
Some(corrections) => {
let byte_offset = ident.offset();
let msg = report::FileTypo {
path,
line,
line_num,
byte_offset,
typo: ident.token(),
corrections,
};
typos_found |= reporter.report(msg.into());
}
None => {
for word in ident.split() {
match dictionary.correct_word(word) {
Some(Status::Valid) => {}
Some(corrections) => {
let byte_offset = word.offset();
let msg = report::FileTypo {
path,
line,
line_num,
byte_offset,
typo: word.token(),
corrections,
};
typos_found |= reporter.report(msg.into());
}
None => {}
}
}
}
}
}
}
Ok(typos_found)
}

View file

@ -1,5 +1,6 @@
#![allow(clippy::needless_update)]
use std::borrow::Cow;
use std::io::{self, Write};
#[derive(Clone, Debug, serde::Serialize, derive_more::From)]
@ -8,8 +9,7 @@ use std::io::{self, Write};
#[non_exhaustive]
pub enum Message<'m> {
BinaryFile(BinaryFile<'m>),
FileTypo(FileTypo<'m>),
PathTypo(PathTypo<'m>),
Typo(Typo<'m>),
File(File<'m>),
Parse(Parse<'m>),
PathError(PathError<'m>),
@ -20,8 +20,7 @@ impl<'m> Message<'m> {
pub fn is_correction(&self) -> bool {
match self {
Message::BinaryFile(_) => false,
Message::FileTypo(c) => c.corrections.is_correction(),
Message::PathTypo(c) => c.corrections.is_correction(),
Message::Typo(c) => c.corrections.is_correction(),
Message::File(_) => false,
Message::Parse(_) => false,
Message::PathError(_) => false,
@ -32,14 +31,27 @@ impl<'m> Message<'m> {
pub fn is_error(&self) -> bool {
match self {
Message::BinaryFile(_) => false,
Message::FileTypo(_) => false,
Message::PathTypo(_) => false,
Message::Typo(_) => false,
Message::File(_) => false,
Message::Parse(_) => false,
Message::PathError(_) => true,
Message::Error(_) => true,
}
}
pub fn context(self, context: Context<'m>) -> Self {
match self {
Message::Typo(typo) => {
let typo = typo.context(context);
Message::Typo(typo)
}
Message::Parse(parse) => {
let parse = parse.context(context);
Message::Parse(parse)
}
_ => self,
}
}
}
#[derive(Clone, Debug, serde::Serialize, derive_more::Display, derive_setters::Setters)]
@ -51,22 +63,21 @@ pub struct BinaryFile<'m> {
#[derive(Clone, Debug, serde::Serialize, derive_setters::Setters)]
#[non_exhaustive]
pub struct FileTypo<'m> {
pub path: &'m std::path::Path,
pub struct Typo<'m> {
#[serde(flatten)]
pub context: Context<'m>,
#[serde(skip)]
pub line: &'m [u8],
pub line_num: usize,
pub buffer: Cow<'m, [u8]>,
pub byte_offset: usize,
pub typo: &'m str,
pub corrections: crate::Status<'m>,
}
impl<'m> Default for FileTypo<'m> {
impl<'m> Default for Typo<'m> {
fn default() -> Self {
Self {
path: std::path::Path::new("-"),
line: b"",
line_num: 0,
context: Context::None,
buffer: Cow::Borrowed(&[]),
byte_offset: 0,
typo: "",
corrections: crate::Status::Invalid,
@ -74,27 +85,63 @@ impl<'m> Default for FileTypo<'m> {
}
}
#[derive(Clone, Debug, serde::Serialize, derive_more::From)]
#[serde(untagged)]
#[non_exhaustive]
pub enum Context<'m> {
File(FileContext<'m>),
Path(PathContext<'m>),
None,
}
impl<'m> Default for Context<'m> {
fn default() -> Self {
Context::None
}
}
impl<'m> std::fmt::Display for Context<'m> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::result::Result<(), std::fmt::Error> {
match self {
Context::File(c) => write!(f, "{}:{}", c.path.display(), c.line_num),
Context::Path(c) => write!(f, "{}", c.path.display()),
Context::None => Ok(()),
}
}
}
#[derive(Clone, Debug, serde::Serialize, derive_setters::Setters)]
#[non_exhaustive]
pub struct PathTypo<'m> {
pub struct FileContext<'m> {
pub path: &'m std::path::Path,
pub byte_offset: usize,
pub typo: &'m str,
pub corrections: crate::Status<'m>,
pub line_num: usize,
}
impl<'m> Default for PathTypo<'m> {
impl<'m> Default for FileContext<'m> {
fn default() -> Self {
Self {
path: std::path::Path::new("-"),
line_num: 0,
}
}
}
#[derive(Clone, Debug, serde::Serialize, derive_setters::Setters)]
#[non_exhaustive]
pub struct PathContext<'m> {
pub path: &'m std::path::Path,
}
impl<'m> Default for PathContext<'m> {
fn default() -> Self {
Self {
path: std::path::Path::new("-"),
byte_offset: 0,
typo: "",
corrections: crate::Status::Invalid,
}
}
}
#[derive(Copy, Clone, Debug, serde::Serialize)]
#[serde(rename_all = "snake_case")]
#[non_exhaustive]
pub enum ParseKind {
Identifier,
@ -124,7 +171,8 @@ impl<'m> Default for File<'m> {
#[derive(Clone, Debug, serde::Serialize, derive_setters::Setters)]
#[non_exhaustive]
pub struct Parse<'m> {
pub path: &'m std::path::Path,
#[serde(flatten)]
pub context: Context<'m>,
pub kind: ParseKind,
pub data: Vec<&'m str>,
}
@ -132,7 +180,7 @@ pub struct Parse<'m> {
impl<'m> Default for Parse<'m> {
fn default() -> Self {
Self {
path: std::path::Path::new("-"),
context: Context::None,
kind: ParseKind::Identifier,
data: vec![],
}
@ -195,42 +243,7 @@ impl Report for PrintBrief {
Message::BinaryFile(msg) => {
log::info!("{}", msg);
}
Message::FileTypo(msg) => match &msg.corrections {
crate::Status::Valid => {}
crate::Status::Invalid => {
println!(
"{}:{}:{}: {} is disallowed",
msg.path.display(),
msg.line_num,
msg.byte_offset,
msg.typo,
);
}
crate::Status::Corrections(corrections) => {
println!(
"{}:{}:{}: {} -> {}",
msg.path.display(),
msg.line_num,
msg.byte_offset,
msg.typo,
itertools::join(corrections.iter(), ", ")
);
}
},
Message::PathTypo(msg) => match &msg.corrections {
crate::Status::Valid => {}
crate::Status::Invalid => {
println!("{}: {} is disallowed", msg.path.display(), msg.typo,);
}
crate::Status::Corrections(corrections) => {
println!(
"{}: {} -> {}",
msg.path.display(),
msg.typo,
itertools::join(corrections.iter(), ", ")
);
}
},
Message::Typo(msg) => print_brief_correction(msg),
Message::File(msg) => {
println!("{}", msg.path.display());
}
@ -257,25 +270,7 @@ impl Report for PrintLong {
Message::BinaryFile(msg) => {
log::info!("{}", msg);
}
Message::FileTypo(msg) => print_long_correction(msg),
Message::PathTypo(msg) => match &msg.corrections {
crate::Status::Valid => {}
crate::Status::Invalid => {
println!(
"{}: error: `{}` is disallowed",
msg.path.display(),
msg.typo,
);
}
crate::Status::Corrections(corrections) => {
println!(
"{}: error: `{}` should be {}",
msg.path.display(),
msg.typo,
itertools::join(corrections.iter().map(|c| format!("`{}`", c)), ", ")
);
}
},
Message::Typo(msg) => print_long_correction(msg),
Message::File(msg) => {
println!("{}", msg.path.display());
}
@ -293,45 +288,66 @@ impl Report for PrintLong {
}
}
fn print_long_correction(msg: &FileTypo) {
let line_num = msg.line_num.to_string();
let line_indent: String = itertools::repeat_n(" ", line_num.len()).collect();
let hl_indent: String = itertools::repeat_n(" ", msg.byte_offset).collect();
let hl: String = itertools::repeat_n("^", msg.typo.len()).collect();
let line = String::from_utf8_lossy(msg.line);
let line = line.replace("\t", " ");
fn print_brief_correction(msg: &Typo) {
match &msg.corrections {
crate::Status::Valid => {}
crate::Status::Invalid => {
println!(
"{}:{}: {} is disallowed",
msg.context, msg.byte_offset, msg.typo,
);
}
crate::Status::Corrections(corrections) => {
println!(
"{}:{}: {} -> {}",
msg.context,
msg.byte_offset,
msg.typo,
itertools::join(corrections.iter(), ", ")
);
}
}
}
fn print_long_correction(msg: &Typo) {
let stdout = io::stdout();
let mut handle = stdout.lock();
match &msg.corrections {
crate::Status::Valid => {}
crate::Status::Invalid => {
writeln!(handle, "error: `{}` is disallowed", msg.typo,).unwrap();
writeln!(
handle,
"{}:{}: {} is disallowed",
msg.context, msg.byte_offset, msg.typo,
)
.unwrap();
}
crate::Status::Corrections(corrections) => {
writeln!(
handle,
"error: `{}` should be {}",
msg.typo,
itertools::join(corrections.iter().map(|c| format!("`{}`", c)), ", ")
itertools::join(corrections.iter(), ", ")
)
.unwrap();
}
}
writeln!(
handle,
" --> {}:{}:{}",
msg.path.display(),
msg.line_num,
msg.byte_offset
)
.unwrap();
writeln!(handle, " --> {}:{}", msg.context, msg.byte_offset).unwrap();
if let Context::File(context) = &msg.context {
let line_num = context.line_num.to_string();
let line_indent: String = itertools::repeat_n(" ", line_num.len()).collect();
let hl_indent: String = itertools::repeat_n(" ", msg.byte_offset).collect();
let hl: String = itertools::repeat_n("^", msg.typo.len()).collect();
let line = String::from_utf8_lossy(msg.buffer.as_ref());
let line = line.replace("\t", " ");
writeln!(handle, "{} |", line_indent).unwrap();
writeln!(handle, "{} | {}", msg.line_num, line.trim_end()).unwrap();
writeln!(handle, "{} | {}", line_num, line.trim_end()).unwrap();
writeln!(handle, "{} | {}{}", line_indent, hl_indent, hl).unwrap();
writeln!(handle, "{} |", line_indent).unwrap();
}
}
#[derive(Copy, Clone, Debug)]

View file

@ -102,7 +102,7 @@ impl Parser {
ParserBuilder::default().build()
}
pub fn parse<'c>(&'c self, content: &'c str) -> impl Iterator<Item = Identifier<'c>> {
pub fn parse_str<'c>(&'c self, content: &'c str) -> impl Iterator<Item = Identifier<'c>> {
self.words_str
.find_iter(content)
.filter(move |m| self.accept(m.as_str().as_bytes()))
@ -138,21 +138,21 @@ impl Default for Parser {
}
}
// `_`: number literal separator in Rust and other languages
// `'`: number literal separator in C++
static DIGITS: once_cell::sync::Lazy<regex::bytes::Regex> =
once_cell::sync::Lazy::new(|| regex::bytes::Regex::new(r#"^[0-9_']+$"#).unwrap());
fn is_number(ident: &[u8]) -> bool {
lazy_static::lazy_static! {
// `_`: number literal separator in Rust and other languages
// `'`: number literal separator in C++
static ref DIGITS: regex::bytes::Regex = regex::bytes::Regex::new(r#"^[0-9_']+$"#).unwrap();
}
DIGITS.is_match(ident)
}
// `_`: number literal separator in Rust and other languages
// `'`: number literal separator in C++
static HEX: once_cell::sync::Lazy<regex::bytes::Regex> =
once_cell::sync::Lazy::new(|| regex::bytes::Regex::new(r#"^0[xX][0-9a-fA-F_']+$"#).unwrap());
fn is_hex(ident: &[u8]) -> bool {
lazy_static::lazy_static! {
// `_`: number literal separator in Rust and other languages
// `'`: number literal separator in C++
static ref HEX: regex::bytes::Regex = regex::bytes::Regex::new(r#"^0[xX][0-9a-fA-F_']+$"#).unwrap();
}
HEX.is_match(ident)
}
@ -390,7 +390,7 @@ mod test {
let expected: Vec<Identifier> = vec![];
let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect();
assert_eq!(expected, actual);
let actual: Vec<_> = parser.parse(input).collect();
let actual: Vec<_> = parser.parse_str(input).collect();
assert_eq!(expected, actual);
}
@ -402,7 +402,7 @@ mod test {
let expected: Vec<Identifier> = vec![Identifier::new_unchecked("word", 0)];
let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect();
assert_eq!(expected, actual);
let actual: Vec<_> = parser.parse(input).collect();
let actual: Vec<_> = parser.parse_str(input).collect();
assert_eq!(expected, actual);
}
@ -417,7 +417,7 @@ mod test {
];
let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect();
assert_eq!(expected, actual);
let actual: Vec<_> = parser.parse(input).collect();
let actual: Vec<_> = parser.parse_str(input).collect();
assert_eq!(expected, actual);
}
@ -432,7 +432,7 @@ mod test {
];
let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect();
assert_eq!(expected, actual);
let actual: Vec<_> = parser.parse(input).collect();
let actual: Vec<_> = parser.parse_str(input).collect();
assert_eq!(expected, actual);
}
@ -447,7 +447,7 @@ mod test {
];
let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect();
assert_eq!(expected, actual);
let actual: Vec<_> = parser.parse(input).collect();
let actual: Vec<_> = parser.parse_str(input).collect();
assert_eq!(expected, actual);
}
@ -459,7 +459,7 @@ mod test {
let expected: Vec<Identifier> = vec![Identifier::new_unchecked("A_B", 0)];
let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect();
assert_eq!(expected, actual);
let actual: Vec<_> = parser.parse(input).collect();
let actual: Vec<_> = parser.parse_str(input).collect();
assert_eq!(expected, actual);
}
@ -474,7 +474,7 @@ mod test {
];
let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect();
assert_eq!(expected, actual);
let actual: Vec<_> = parser.parse(input).collect();
let actual: Vec<_> = parser.parse_str(input).collect();
assert_eq!(expected, actual);
}
@ -493,7 +493,7 @@ mod test {
];
let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect();
assert_eq!(expected, actual);
let actual: Vec<_> = parser.parse(input).collect();
let actual: Vec<_> = parser.parse_str(input).collect();
assert_eq!(expected, actual);
}

View file

@ -1,96 +1,8 @@
use std::sync::atomic;
pub(crate) trait Checks: Send + Sync {
fn check_filename(
&self,
path: &std::path::Path,
parser: &typos::tokens::Parser,
dictionary: &dyn typos::Dictionary,
report: &dyn typos::report::Report,
) -> Result<bool, typos::Error>;
fn check_file(
&self,
path: &std::path::Path,
explicit: bool,
parser: &typos::tokens::Parser,
dictionary: &dyn typos::Dictionary,
report: &dyn typos::report::Report,
) -> Result<bool, typos::Error>;
}
impl<'p> Checks for typos::checks::ParseIdentifiers {
fn check_filename(
&self,
path: &std::path::Path,
parser: &typos::tokens::Parser,
_dictionary: &dyn typos::Dictionary,
report: &dyn typos::report::Report,
) -> Result<bool, typos::Error> {
self.check_filename(path, parser, report)
}
fn check_file(
&self,
path: &std::path::Path,
explicit: bool,
parser: &typos::tokens::Parser,
_dictionary: &dyn typos::Dictionary,
report: &dyn typos::report::Report,
) -> Result<bool, typos::Error> {
self.check_file(path, explicit, parser, report)
}
}
impl<'p> Checks for typos::checks::ParseWords {
fn check_filename(
&self,
path: &std::path::Path,
parser: &typos::tokens::Parser,
_dictionary: &dyn typos::Dictionary,
report: &dyn typos::report::Report,
) -> Result<bool, typos::Error> {
self.check_filename(path, parser, report)
}
fn check_file(
&self,
path: &std::path::Path,
explicit: bool,
parser: &typos::tokens::Parser,
_dictionary: &dyn typos::Dictionary,
report: &dyn typos::report::Report,
) -> Result<bool, typos::Error> {
self.check_file(path, explicit, parser, report)
}
}
impl<'d, 'p> Checks for typos::checks::Checks {
fn check_filename(
&self,
path: &std::path::Path,
parser: &typos::tokens::Parser,
dictionary: &dyn typos::Dictionary,
report: &dyn typos::report::Report,
) -> Result<bool, typos::Error> {
self.check_filename(path, parser, dictionary, report)
}
fn check_file(
&self,
path: &std::path::Path,
explicit: bool,
parser: &typos::tokens::Parser,
dictionary: &dyn typos::Dictionary,
report: &dyn typos::report::Report,
) -> Result<bool, typos::Error> {
self.check_file(path, explicit, parser, dictionary, report)
}
}
pub(crate) fn check_path(
walk: ignore::Walk,
checks: &dyn Checks,
checks: &dyn typos::checks::Check,
parser: &typos::tokens::Parser,
dictionary: &dyn typos::Dictionary,
reporter: &dyn typos::report::Report,
@ -115,7 +27,7 @@ pub(crate) fn check_path(
pub(crate) fn check_path_parallel(
walk: ignore::WalkParallel,
checks: &dyn Checks,
checks: &dyn typos::checks::Check,
parser: &typos::tokens::Parser,
dictionary: &dyn typos::Dictionary,
reporter: &dyn typos::report::Report,
@ -143,7 +55,7 @@ pub(crate) fn check_path_parallel(
fn check_entry(
entry: Result<ignore::DirEntry, ignore::Error>,
checks: &dyn Checks,
checks: &dyn typos::checks::Check,
parser: &typos::tokens::Parser,
dictionary: &dyn typos::Dictionary,
reporter: &dyn typos::report::Report,

View file

@ -3,7 +3,6 @@
extern crate clap;
use std::io::Write;
use std::sync::atomic;
use structopt::StructOpt;
@ -91,51 +90,18 @@ fn run() -> Result<i32, anyhow::Error> {
reporter = &replace_reporter;
}
if args.files {
if single_threaded {
for entry in walk.build() {
match entry {
Ok(entry) => {
let msg = typos::report::File::new(entry.path());
reporter.report(msg.into());
}
Err(err) => {
let msg = typos::report::Error::new(err.to_string());
reporter.report(msg.into());
errors_found = true
}
}
}
} else {
let atomic_errors = atomic::AtomicBool::new(errors_found);
walk.build_parallel().run(|| {
Box::new(|entry: Result<ignore::DirEntry, ignore::Error>| {
match entry {
Ok(entry) => {
let msg = typos::report::File::new(entry.path());
reporter.report(msg.into());
}
Err(err) => {
let msg = typos::report::Error::new(err.to_string());
reporter.report(msg.into());
atomic_errors.store(true, atomic::Ordering::Relaxed);
}
}
ignore::WalkState::Continue
})
});
errors_found = atomic_errors.into_inner();
}
} else {
let (identifier_parser, word_parser, checks);
let selected_checks: &dyn checks::Checks = if args.identifiers {
let (files, identifier_parser, word_parser, checks);
let selected_checks: &dyn typos::checks::Check = if args.files {
files = settings.build_files();
&files
} else if args.identifiers {
identifier_parser = settings.build_identifier_parser();
&identifier_parser
} else if args.words {
word_parser = settings.build_word_parser();
&word_parser
} else {
checks = settings.build_checks();
checks = settings.build_typos();
&checks
};
@ -162,7 +128,6 @@ fn run() -> Result<i32, anyhow::Error> {
if cur_errors {
errors_found = true;
}
}
if args.write_changes {
replace_reporter.write()?;

View file

@ -56,13 +56,22 @@ impl<'r> Replace<'r> {
impl<'r> typos::report::Report for Replace<'r> {
fn report(&self, msg: typos::report::Message<'_>) -> bool {
match msg {
typos::report::Message::FileTypo(msg) => match msg.corrections {
typos::Status::Corrections(corrections) if corrections.len() == 1 => {
let path = msg.path.to_owned();
let line_num = msg.line_num;
let typo = match &msg {
typos::report::Message::Typo(typo) => typo,
_ => return self.reporter.report(msg),
};
let corrections = match &typo.corrections {
typos::Status::Corrections(corrections) if corrections.len() == 1 => corrections,
_ => return self.reporter.report(msg),
};
match &typo.context {
typos::report::Context::File(file) => {
let path = file.path.to_owned();
let line_num = file.line_num;
let correction =
Correction::new(msg.byte_offset, msg.typo, corrections[0].as_ref());
Correction::new(typo.byte_offset, typo.typo, corrections[0].as_ref());
let mut deferred = self.deferred.lock().unwrap();
let content = deferred
.content
@ -73,20 +82,15 @@ impl<'r> typos::report::Report for Replace<'r> {
content.push(correction);
false
}
_ => self.reporter.report(typos::report::Message::FileTypo(msg)),
},
typos::report::Message::PathTypo(msg) => match msg.corrections {
typos::Status::Corrections(corrections) if corrections.len() == 1 => {
let path = msg.path.to_owned();
typos::report::Context::Path(path) => {
let path = path.path.to_owned();
let correction =
Correction::new(msg.byte_offset, msg.typo, corrections[0].as_ref());
Correction::new(typo.byte_offset, typo.typo, corrections[0].as_ref());
let mut deferred = self.deferred.lock().unwrap();
let content = deferred.paths.entry(path).or_insert_with(Vec::new);
content.push(correction);
false
}
_ => self.reporter.report(typos::report::Message::PathTypo(msg)),
},
_ => self.reporter.report(msg),
}
}
@ -204,10 +208,14 @@ mod test {
let primary = typos::report::PrintSilent;
let replace = Replace::new(&primary);
replace.report(
typos::report::FileTypo::default()
typos::report::Typo::default()
.context(
typos::report::FileContext::default()
.path(input_file.path())
.line(b"1 foo 2\n3 4 5")
.line_num(1)
.into(),
)
.buffer(std::borrow::Cow::Borrowed(b"1 foo 2\n3 4 5"))
.byte_offset(2)
.typo("foo")
.corrections(typos::Status::Corrections(vec![
@ -229,8 +237,13 @@ mod test {
let primary = typos::report::PrintSilent;
let replace = Replace::new(&primary);
replace.report(
typos::report::PathTypo::default()
typos::report::Typo::default()
.context(
typos::report::PathContext::default()
.path(input_file.path())
.into(),
)
.buffer(std::borrow::Cow::Borrowed(b"foo.txt"))
.byte_offset(0)
.typo("foo")
.corrections(typos::Status::Corrections(vec![