refactor(cli): Break out config->policy

This is prep for having many policies floating around
This commit is contained in:
Ed Page 2021-02-12 18:43:12 -06:00
parent b17f9c3a12
commit 8bcacf3ca6
7 changed files with 187 additions and 160 deletions

8
Cargo.lock generated
View file

@ -1474,6 +1474,12 @@ version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a7f741b240f1a48843f9b8e0444fb55fb2a4ff67293b50a9179dfd5ea67f8d41"
[[package]]
name = "typed-arena"
version = "2.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0685c84d5d54d1c26f7d3eb96cd41550adb97baed141a761cf335d3d33bcd0ae"
[[package]]
name = "typos"
version = "0.3.0"
@ -1510,6 +1516,7 @@ dependencies = [
"itertools 0.10.0",
"kstring",
"log",
"once_cell",
"phf",
"predicates",
"proc-exit",
@ -1517,6 +1524,7 @@ dependencies = [
"serde_json",
"structopt",
"toml",
"typed-arena",
"typos",
"typos-dict",
"typos-vars",

View file

@ -46,6 +46,7 @@ toml = "0.5"
log = "0.4"
env_logger = "0.8"
bstr = "0.2"
once_cell = "1.2.0"
ahash = "0.7"
difflib = "0.4"
proc-exit = "1.0"
@ -58,6 +59,7 @@ itertools = "0.10"
serde_json = "1.0"
encoding = "0.2"
kstring = "1.0"
typed-arena = "2.0.1"
[dev-dependencies]
assert_fs = "1.0"

View file

@ -5,6 +5,12 @@ use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
use typos_cli::file::FileChecker;
fn bench_checks(c: &mut Criterion) {
let dictionary = typos_cli::dict::BuiltIn::new(Default::default());
let tokenizer = typos::tokens::Tokenizer::new();
let policy = typos_cli::policy::Policy::new()
.dictionary(&dictionary)
.tokenizer(&tokenizer);
let mut group = c.benchmark_group("checks");
for (name, sample) in data::DATA {
let len = sample.len();
@ -13,16 +19,11 @@ fn bench_checks(c: &mut Criterion) {
let sample_path = temp.child("sample");
sample_path.write_str(sample).unwrap();
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
let parser = typos::tokens::Tokenizer::new();
let settings = typos_cli::file::CheckSettings::new();
b.iter(|| {
typos_cli::file::FoundFiles.check_file(
sample_path.path(),
true,
&settings,
&parser,
&corrections,
&policy,
&typos_cli::report::PrintSilent,
)
});
@ -34,16 +35,11 @@ fn bench_checks(c: &mut Criterion) {
let sample_path = temp.child("sample");
sample_path.write_str(sample).unwrap();
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
let parser = typos::tokens::Tokenizer::new();
let settings = typos_cli::file::CheckSettings::new();
b.iter(|| {
typos_cli::file::Identifiers.check_file(
sample_path.path(),
true,
&settings,
&parser,
&corrections,
&policy,
&typos_cli::report::PrintSilent,
)
});
@ -55,16 +51,11 @@ fn bench_checks(c: &mut Criterion) {
let sample_path = temp.child("sample");
sample_path.write_str(sample).unwrap();
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
let parser = typos::tokens::Tokenizer::new();
let settings = typos_cli::file::CheckSettings::new();
b.iter(|| {
typos_cli::file::Words.check_file(
sample_path.path(),
true,
&settings,
&parser,
&corrections,
&policy,
&typos_cli::report::PrintSilent,
)
});
@ -76,16 +67,11 @@ fn bench_checks(c: &mut Criterion) {
let sample_path = temp.child("sample");
sample_path.write_str(sample).unwrap();
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
let parser = typos::tokens::Tokenizer::new();
let settings = typos_cli::file::CheckSettings::new();
b.iter(|| {
typos_cli::file::Typos.check_file(
sample_path.path(),
true,
&settings,
&parser,
&corrections,
&policy,
&typos_cli::report::PrintSilent,
)
});

View file

@ -4,59 +4,17 @@ use std::io::Read;
use std::io::Write;
use crate::report;
use typos::tokens;
use typos::Dictionary;
pub trait FileChecker: Send + Sync {
fn check_file(
&self,
path: &std::path::Path,
explicit: bool,
settings: &CheckSettings,
tokenizer: &tokens::Tokenizer,
dictionary: &dyn Dictionary,
policy: &crate::policy::Policy,
reporter: &dyn report::Report,
) -> Result<(), std::io::Error>;
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct CheckSettings {
check_filenames: bool,
check_files: bool,
binary: bool,
}
impl CheckSettings {
pub fn new() -> Self {
Default::default()
}
pub fn check_filenames(&mut self, yes: bool) -> &mut Self {
self.check_filenames = yes;
self
}
pub fn check_files(&mut self, yes: bool) -> &mut Self {
self.check_files = yes;
self
}
pub fn binary(&mut self, yes: bool) -> &mut Self {
self.binary = yes;
self
}
}
impl Default for CheckSettings {
fn default() -> Self {
Self {
check_filenames: true,
check_files: true,
binary: false,
}
}
}
#[derive(Debug, Clone, Copy)]
pub struct Typos;
@ -65,14 +23,12 @@ impl FileChecker for Typos {
&self,
path: &std::path::Path,
explicit: bool,
settings: &CheckSettings,
tokenizer: &tokens::Tokenizer,
dictionary: &dyn Dictionary,
policy: &crate::policy::Policy,
reporter: &dyn report::Report,
) -> Result<(), std::io::Error> {
if settings.check_filenames {
if policy.check_filenames {
if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
for typo in typos::check_str(file_name, tokenizer, dictionary) {
for typo in typos::check_str(file_name, policy.tokenizer, policy.dictionary) {
let msg = report::Typo {
context: Some(report::PathContext { path }.into()),
buffer: std::borrow::Cow::Borrowed(file_name.as_bytes()),
@ -85,14 +41,14 @@ impl FileChecker for Typos {
}
}
if settings.check_files {
if policy.check_files {
let (buffer, content_type) = read_file(path, reporter)?;
if !explicit && !settings.binary && content_type.is_binary() {
if !explicit && !policy.binary && content_type.is_binary() {
let msg = report::BinaryFile { path };
reporter.report(msg.into())?;
} else {
let mut accum_line_num = AccumulateLineNum::new();
for typo in typos::check_bytes(&buffer, tokenizer, dictionary) {
for typo in typos::check_bytes(&buffer, policy.tokenizer, policy.dictionary) {
let line_num = accum_line_num.line_num(&buffer, typo.byte_offset);
let (line, line_offset) = extract_line(&buffer, typo.byte_offset);
let msg = report::Typo {
@ -119,20 +75,18 @@ impl FileChecker for FixTypos {
&self,
path: &std::path::Path,
explicit: bool,
settings: &CheckSettings,
tokenizer: &tokens::Tokenizer,
dictionary: &dyn Dictionary,
policy: &crate::policy::Policy,
reporter: &dyn report::Report,
) -> Result<(), std::io::Error> {
if settings.check_files {
if policy.check_files {
let (buffer, content_type) = read_file(path, reporter)?;
if !explicit && !settings.binary && content_type.is_binary() {
if !explicit && !policy.binary && content_type.is_binary() {
let msg = report::BinaryFile { path };
reporter.report(msg.into())?;
} else {
let mut fixes = Vec::new();
let mut accum_line_num = AccumulateLineNum::new();
for typo in typos::check_bytes(&buffer, tokenizer, dictionary) {
for typo in typos::check_bytes(&buffer, policy.tokenizer, policy.dictionary) {
if is_fixable(&typo) {
fixes.push(typo.into_owned());
} else {
@ -156,10 +110,10 @@ impl FileChecker for FixTypos {
}
// Ensure the above write can happen before renaming the file.
if settings.check_filenames {
if policy.check_filenames {
if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
let mut fixes = Vec::new();
for typo in typos::check_str(file_name, tokenizer, dictionary) {
for typo in typos::check_str(file_name, policy.tokenizer, policy.dictionary) {
if is_fixable(&typo) {
fixes.push(typo.into_owned());
} else {
@ -196,22 +150,20 @@ impl FileChecker for DiffTypos {
&self,
path: &std::path::Path,
explicit: bool,
settings: &CheckSettings,
tokenizer: &tokens::Tokenizer,
dictionary: &dyn Dictionary,
policy: &crate::policy::Policy,
reporter: &dyn report::Report,
) -> Result<(), std::io::Error> {
let mut content = Vec::new();
let mut new_content = Vec::new();
if settings.check_files {
if policy.check_files {
let (buffer, content_type) = read_file(path, reporter)?;
if !explicit && !settings.binary && content_type.is_binary() {
if !explicit && !policy.binary && content_type.is_binary() {
let msg = report::BinaryFile { path };
reporter.report(msg.into())?;
} else {
let mut fixes = Vec::new();
let mut accum_line_num = AccumulateLineNum::new();
for typo in typos::check_bytes(&buffer, tokenizer, dictionary) {
for typo in typos::check_bytes(&buffer, policy.tokenizer, policy.dictionary) {
if is_fixable(&typo) {
fixes.push(typo.into_owned());
} else {
@ -236,10 +188,10 @@ impl FileChecker for DiffTypos {
// Match FixTypos ordering for easy diffing.
let mut new_path = None;
if settings.check_filenames {
if policy.check_filenames {
if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
let mut fixes = Vec::new();
for typo in typos::check_str(file_name, tokenizer, dictionary) {
for typo in typos::check_str(file_name, policy.tokenizer, policy.dictionary) {
if is_fixable(&typo) {
fixes.push(typo.into_owned());
} else {
@ -300,14 +252,12 @@ impl FileChecker for Identifiers {
&self,
path: &std::path::Path,
explicit: bool,
settings: &CheckSettings,
tokenizer: &tokens::Tokenizer,
_dictionary: &dyn Dictionary,
policy: &crate::policy::Policy,
reporter: &dyn report::Report,
) -> Result<(), std::io::Error> {
if settings.check_filenames {
if policy.check_filenames {
if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
for word in tokenizer.parse_str(file_name) {
for word in policy.tokenizer.parse_str(file_name) {
let msg = report::Parse {
context: Some(report::PathContext { path }.into()),
kind: report::ParseKind::Identifier,
@ -318,13 +268,13 @@ impl FileChecker for Identifiers {
}
}
if settings.check_files {
if policy.check_files {
let (buffer, content_type) = read_file(path, reporter)?;
if !explicit && !settings.binary && content_type.is_binary() {
if !explicit && !policy.binary && content_type.is_binary() {
let msg = report::BinaryFile { path };
reporter.report(msg.into())?;
} else {
for word in tokenizer.parse_bytes(&buffer) {
for word in policy.tokenizer.parse_bytes(&buffer) {
// HACK: Don't look up the line_num per entry to better match the performance
// of Typos for comparison purposes. We don't really get much out of it
// anyway.
@ -351,14 +301,16 @@ impl FileChecker for Words {
&self,
path: &std::path::Path,
explicit: bool,
settings: &CheckSettings,
tokenizer: &tokens::Tokenizer,
_dictionary: &dyn Dictionary,
policy: &crate::policy::Policy,
reporter: &dyn report::Report,
) -> Result<(), std::io::Error> {
if settings.check_filenames {
if policy.check_filenames {
if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
for word in tokenizer.parse_str(file_name).flat_map(|i| i.split()) {
for word in policy
.tokenizer
.parse_str(file_name)
.flat_map(|i| i.split())
{
let msg = report::Parse {
context: Some(report::PathContext { path }.into()),
kind: report::ParseKind::Word,
@ -369,13 +321,17 @@ impl FileChecker for Words {
}
}
if settings.check_files {
if policy.check_files {
let (buffer, content_type) = read_file(path, reporter)?;
if !explicit && !settings.binary && content_type.is_binary() {
if !explicit && !policy.binary && content_type.is_binary() {
let msg = report::BinaryFile { path };
reporter.report(msg.into())?;
} else {
for word in tokenizer.parse_bytes(&buffer).flat_map(|i| i.split()) {
for word in policy
.tokenizer
.parse_bytes(&buffer)
.flat_map(|i| i.split())
{
// HACK: Don't look up the line_num per entry to better match the performance
// of Typos for comparison purposes. We don't really get much out of it
// anyway.
@ -402,13 +358,11 @@ impl FileChecker for FoundFiles {
&self,
path: &std::path::Path,
explicit: bool,
settings: &CheckSettings,
_parser: &tokens::Tokenizer,
_dictionary: &dyn Dictionary,
policy: &crate::policy::Policy,
reporter: &dyn report::Report,
) -> Result<(), std::io::Error> {
// Check `settings.binary` first so we can easily check performance of walking vs reading
if settings.binary {
// Check `policy.binary` first so we can easily check performance of walking vs reading
if policy.binary {
let msg = report::File::new(path);
reporter.report(msg.into())?;
} else {
@ -598,13 +552,11 @@ fn fix_buffer(mut buffer: Vec<u8>, typos: impl Iterator<Item = typos::Typo<'stat
pub fn walk_path(
walk: ignore::Walk,
checks: &dyn FileChecker,
settings: &CheckSettings,
tokenizer: &typos::tokens::Tokenizer,
dictionary: &dyn typos::Dictionary,
policy: &crate::policy::Policy,
reporter: &dyn report::Report,
) -> Result<(), ignore::Error> {
for entry in walk {
walk_entry(entry, checks, settings, tokenizer, dictionary, reporter)?;
walk_entry(entry, checks, policy, reporter)?;
}
Ok(())
}
@ -612,15 +564,13 @@ pub fn walk_path(
pub fn walk_path_parallel(
walk: ignore::WalkParallel,
checks: &dyn FileChecker,
settings: &CheckSettings,
tokenizer: &typos::tokens::Tokenizer,
dictionary: &dyn typos::Dictionary,
policy: &crate::policy::Policy,
reporter: &dyn report::Report,
) -> Result<(), ignore::Error> {
let error: std::sync::Mutex<Result<(), ignore::Error>> = std::sync::Mutex::new(Ok(()));
walk.run(|| {
Box::new(|entry: Result<ignore::DirEntry, ignore::Error>| {
match walk_entry(entry, checks, settings, tokenizer, dictionary, reporter) {
match walk_entry(entry, checks, policy, reporter) {
Ok(()) => ignore::WalkState::Continue,
Err(err) => {
*error.lock().unwrap() = Err(err);
@ -636,9 +586,7 @@ pub fn walk_path_parallel(
fn walk_entry(
entry: Result<ignore::DirEntry, ignore::Error>,
checks: &dyn FileChecker,
settings: &CheckSettings,
tokenizer: &typos::tokens::Tokenizer,
dictionary: &dyn typos::Dictionary,
policy: &crate::policy::Policy,
reporter: &dyn report::Report,
) -> Result<(), ignore::Error> {
let entry = match entry {
@ -655,7 +603,7 @@ fn walk_entry(
} else {
entry.path()
};
checks.check_file(path, explicit, settings, tokenizer, dictionary, reporter)?;
checks.check_file(path, explicit, policy, reporter)?;
}
Ok(())

View file

@ -1,4 +1,5 @@
pub mod config;
pub mod dict;
pub mod file;
pub mod policy;
pub mod report;

View file

@ -8,7 +8,6 @@ use structopt::StructOpt;
mod args;
use typos_cli::config;
use typos_cli::dict;
use typos_cli::report;
use proc_exit::WithCodeResultExt;
@ -91,36 +90,22 @@ fn run_checks(args: &args::Args) -> proc_exit::ExitResult {
};
let config = load_config(cwd, &args).with_code(proc_exit::Code::CONFIG_ERR)?;
let tokenizer = typos::tokens::TokenizerBuilder::new()
.ignore_hex(config.default.ignore_hex())
.leading_digits(config.default.identifier_leading_digits())
.leading_chars(config.default.identifier_leading_chars().to_owned())
.include_digits(config.default.identifier_include_digits())
.include_chars(config.default.identifier_include_chars().to_owned())
.build();
let dictionary = crate::dict::BuiltIn::new(config.default.locale());
let mut dictionary = crate::dict::Override::new(dictionary);
dictionary.identifiers(config.default.extend_identifiers());
dictionary.words(config.default.extend_words());
let mut settings = typos_cli::file::CheckSettings::new();
settings
.check_filenames(config.default.check_filename())
.check_files(config.default.check_file())
.binary(config.default.binary());
let storage = typos_cli::policy::ConfigStorage::new();
let engine = typos_cli::policy::ConfigEngine::new(config, &storage);
let files = engine.files();
let policy = engine.policy();
let threads = if path.is_file() { 1 } else { args.threads };
let single_threaded = threads == 1;
let mut walk = ignore::WalkBuilder::new(path);
walk.threads(args.threads)
.hidden(config.files.ignore_hidden())
.ignore(config.files.ignore_dot())
.git_global(config.files.ignore_global())
.git_ignore(config.files.ignore_vcs())
.git_exclude(config.files.ignore_vcs())
.parents(config.files.ignore_parent());
.hidden(files.ignore_hidden())
.ignore(files.ignore_dot())
.git_global(files.ignore_global())
.git_ignore(files.ignore_vcs())
.git_exclude(files.ignore_vcs())
.parents(files.ignore_parent());
// HACK: Diff doesn't handle mixing content
let output_reporter = if args.diff {
@ -146,21 +131,12 @@ fn run_checks(args: &args::Args) -> proc_exit::ExitResult {
};
if single_threaded {
typos_cli::file::walk_path(
walk.build(),
selected_checks,
&settings,
&tokenizer,
&dictionary,
reporter,
)
typos_cli::file::walk_path(walk.build(), selected_checks, &policy, reporter)
} else {
typos_cli::file::walk_path_parallel(
walk.build_parallel(),
selected_checks,
&settings,
&tokenizer,
&dictionary,
&policy,
reporter,
)
}

106
src/policy.rs Normal file
View file

@ -0,0 +1,106 @@
pub struct ConfigStorage {
arena: typed_arena::Arena<kstring::KString>,
}
impl ConfigStorage {
pub fn new() -> Self {
Self {
arena: typed_arena::Arena::new(),
}
}
fn get<'s>(&'s self, other: &str) -> &'s str {
self.arena.alloc(kstring::KString::from_ref(other))
}
}
pub struct ConfigEngine<'s> {
files: crate::config::Walk,
check_filenames: bool,
check_files: bool,
binary: bool,
tokenizer: typos::tokens::Tokenizer,
dictionary: crate::dict::Override<'s, 's, crate::dict::BuiltIn>,
}
impl<'s> ConfigEngine<'s> {
pub fn new(config: crate::config::Config, storage: &'s ConfigStorage) -> Self {
let crate::config::Config { files, default } = config;
let tokenizer = typos::tokens::TokenizerBuilder::new()
.ignore_hex(default.ignore_hex())
.leading_digits(default.identifier_leading_digits())
.leading_chars(default.identifier_leading_chars().to_owned())
.include_digits(default.identifier_include_digits())
.include_chars(default.identifier_include_chars().to_owned())
.build();
let dictionary = crate::dict::BuiltIn::new(default.locale());
let mut dictionary = crate::dict::Override::new(dictionary);
dictionary.identifiers(
default
.extend_identifiers()
.map(|(k, v)| (storage.get(k), storage.get(v))),
);
dictionary.words(
default
.extend_words()
.map(|(k, v)| (storage.get(k), storage.get(v))),
);
Self {
files,
check_filenames: default.check_filename(),
check_files: default.check_file(),
binary: default.binary(),
tokenizer,
dictionary,
}
}
pub fn files(&self) -> &crate::config::Walk {
&self.files
}
pub fn policy(&self) -> Policy<'_, '_> {
Policy {
check_filenames: self.check_filenames,
check_files: self.check_files,
binary: self.binary,
tokenizer: &self.tokenizer,
dictionary: &self.dictionary,
}
}
}
#[non_exhaustive]
#[derive(derive_setters::Setters)]
pub struct Policy<'t, 'd> {
pub check_filenames: bool,
pub check_files: bool,
pub binary: bool,
pub tokenizer: &'t typos::tokens::Tokenizer,
pub dictionary: &'d dyn typos::Dictionary,
}
impl<'t, 'd> Policy<'t, 'd> {
pub fn new() -> Self {
Default::default()
}
}
static DEFAULT_TOKENIZER: once_cell::sync::Lazy<typos::tokens::Tokenizer> =
once_cell::sync::Lazy::new(typos::tokens::Tokenizer::new);
static DEFAULT_DICT: crate::dict::BuiltIn = crate::dict::BuiltIn::new(crate::config::Locale::En);
impl<'t, 'd> Default for Policy<'t, 'd> {
fn default() -> Self {
Self {
check_filenames: true,
check_files: true,
binary: false,
tokenizer: &DEFAULT_TOKENIZER,
dictionary: &DEFAULT_DICT,
}
}
}