From b5827004a247ed09822dcc3cc60cb90a5a5f1c7a Mon Sep 17 00:00:00 2001 From: Ed Page Date: Mon, 1 Mar 2021 12:19:56 -0600 Subject: [PATCH 1/9] refactor(config): Simplify --- src/config.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/config.rs b/src/config.rs index 685ee91..064f23f 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1,5 +1,4 @@ use std::collections::HashMap; -use std::io::Read; pub trait ConfigSource { fn walk(&self) -> Option<&dyn WalkSource> { @@ -107,9 +106,7 @@ pub struct Config { impl Config { pub fn from_file(path: &std::path::Path) -> Result { - let mut file = std::fs::File::open(path)?; - let mut s = String::new(); - file.read_to_string(&mut s)?; + let s = std::fs::read_to_string(path)?; Self::from_toml(&s) } From 75ba4ac535c24d5831c27a71b4bd1f75b8dd5408 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Mon, 1 Mar 2021 12:25:51 -0600 Subject: [PATCH 2/9] perf(config): Get small-string optimization --- Cargo.lock | 10 ++++++++++ Cargo.toml | 1 + src/config.rs | 24 ++++++++++++++---------- 3 files changed, 25 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 150d6e7..16f0dcc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -724,6 +724,15 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "kstring" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1167388385b43067bd74f967def6c93b969284f14f41e2ab6035b715d9343215" +dependencies = [ + "serde", +] + [[package]] name = "lazy_static" version = "1.4.0" @@ -1499,6 +1508,7 @@ dependencies = [ "human-panic", "ignore", "itertools 0.10.0", + "kstring", "log", "phf", "predicates", diff --git a/Cargo.toml b/Cargo.toml index 07d574d..44e8895 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -57,6 +57,7 @@ derive_setters = "0.1" itertools = "0.10" serde_json = "1.0" encoding = "0.2" +kstring = "1.0" [dev-dependencies] assert_fs = "1.0" diff --git a/src/config.rs b/src/config.rs index 064f23f..bc7a7e6 100644 --- a/src/config.rs +++ b/src/config.rs @@ -260,12 +260,12 @@ pub struct FileConfig { pub check_file: Option, pub ignore_hex: Option, pub identifier_leading_digits: Option, - pub identifier_leading_chars: Option, + pub identifier_leading_chars: Option, pub identifier_include_digits: Option, - pub identifier_include_chars: Option, + pub identifier_include_chars: Option, pub locale: Option, - pub extend_identifiers: HashMap, - pub extend_words: HashMap, + pub extend_identifiers: HashMap, + pub extend_words: HashMap, } impl FileConfig { @@ -277,9 +277,13 @@ impl FileConfig { check_file: Some(empty.check_file()), ignore_hex: Some(empty.ignore_hex()), identifier_leading_digits: Some(empty.identifier_leading_digits()), - identifier_leading_chars: Some(empty.identifier_leading_chars().to_owned()), + identifier_leading_chars: Some(kstring::KString::from_ref( + empty.identifier_leading_chars(), + )), identifier_include_digits: Some(empty.identifier_include_digits()), - identifier_include_chars: Some(empty.identifier_include_chars().to_owned()), + identifier_include_chars: Some(kstring::KString::from_ref( + empty.identifier_include_chars(), + )), locale: Some(empty.locale()), extend_identifiers: Default::default(), extend_words: Default::default(), @@ -303,13 +307,13 @@ impl FileConfig { self.identifier_leading_digits = Some(source); } if let Some(source) = source.identifier_leading_chars() { - self.identifier_leading_chars = Some(source.to_owned()); + self.identifier_leading_chars = Some(kstring::KString::from_ref(source)); } if let Some(source) = source.identifier_include_digits() { self.identifier_include_digits = Some(source); } if let Some(source) = source.identifier_include_chars() { - self.identifier_include_chars = Some(source.to_owned()); + self.identifier_include_chars = Some(kstring::KString::from_ref(source)); } if let Some(source) = source.locale() { self.locale = Some(source); @@ -317,12 +321,12 @@ impl FileConfig { self.extend_identifiers.extend( source .extend_identifiers() - .map(|(k, v)| (k.to_owned(), v.to_owned())), + .map(|(k, v)| (kstring::KString::from_ref(k), kstring::KString::from_ref(v))), ); self.extend_words.extend( source .extend_words() - .map(|(k, v)| (k.to_owned(), v.to_owned())), + .map(|(k, v)| (kstring::KString::from_ref(k), kstring::KString::from_ref(v))), ); } From b17f9c3a12f6aac4d93cdfdd8e76dea7a1acdf53 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Mon, 1 Mar 2021 12:27:07 -0600 Subject: [PATCH 3/9] feat: Const some fns --- src/config.rs | 4 ++-- src/dict.rs | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/config.rs b/src/config.rs index bc7a7e6..2fcbc63 100644 --- a/src/config.rs +++ b/src/config.rs @@ -461,7 +461,7 @@ pub enum Locale { } impl Locale { - pub fn category(self) -> Option { + pub const fn category(self) -> Option { match self { Locale::En => None, Locale::EnUs => Some(typos_vars::Category::American), @@ -471,7 +471,7 @@ impl Locale { } } - pub fn variants() -> [&'static str; 5] { + pub const fn variants() -> [&'static str; 5] { ["en", "en-us", "en-gb", "en-ca", "en-au"] } } diff --git a/src/dict.rs b/src/dict.rs index ab602d6..a6413e4 100644 --- a/src/dict.rs +++ b/src/dict.rs @@ -12,7 +12,7 @@ pub struct BuiltIn { } impl BuiltIn { - pub fn new(locale: crate::config::Locale) -> Self { + pub const fn new(locale: crate::config::Locale) -> Self { Self { locale: locale.category(), } From 8bcacf3ca611b2f6250e2214c2a417dcf24ebfdf Mon Sep 17 00:00:00 2001 From: Ed Page Date: Fri, 12 Feb 2021 18:43:12 -0600 Subject: [PATCH 4/9] refactor(cli): Break out config->policy This is prep for having many policies floating around --- Cargo.lock | 8 +++ Cargo.toml | 2 + benches/checks.rs | 34 ++++------- src/file.rs | 148 +++++++++++++++------------------------------- src/lib.rs | 1 + src/main.rs | 48 ++++----------- src/policy.rs | 106 +++++++++++++++++++++++++++++++++ 7 files changed, 187 insertions(+), 160 deletions(-) create mode 100644 src/policy.rs diff --git a/Cargo.lock b/Cargo.lock index 16f0dcc..a8db752 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1474,6 +1474,12 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a7f741b240f1a48843f9b8e0444fb55fb2a4ff67293b50a9179dfd5ea67f8d41" +[[package]] +name = "typed-arena" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0685c84d5d54d1c26f7d3eb96cd41550adb97baed141a761cf335d3d33bcd0ae" + [[package]] name = "typos" version = "0.3.0" @@ -1510,6 +1516,7 @@ dependencies = [ "itertools 0.10.0", "kstring", "log", + "once_cell", "phf", "predicates", "proc-exit", @@ -1517,6 +1524,7 @@ dependencies = [ "serde_json", "structopt", "toml", + "typed-arena", "typos", "typos-dict", "typos-vars", diff --git a/Cargo.toml b/Cargo.toml index 44e8895..01fd4da 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -46,6 +46,7 @@ toml = "0.5" log = "0.4" env_logger = "0.8" bstr = "0.2" +once_cell = "1.2.0" ahash = "0.7" difflib = "0.4" proc-exit = "1.0" @@ -58,6 +59,7 @@ itertools = "0.10" serde_json = "1.0" encoding = "0.2" kstring = "1.0" +typed-arena = "2.0.1" [dev-dependencies] assert_fs = "1.0" diff --git a/benches/checks.rs b/benches/checks.rs index 9c6a540..77b4743 100644 --- a/benches/checks.rs +++ b/benches/checks.rs @@ -5,6 +5,12 @@ use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion}; use typos_cli::file::FileChecker; fn bench_checks(c: &mut Criterion) { + let dictionary = typos_cli::dict::BuiltIn::new(Default::default()); + let tokenizer = typos::tokens::Tokenizer::new(); + let policy = typos_cli::policy::Policy::new() + .dictionary(&dictionary) + .tokenizer(&tokenizer); + let mut group = c.benchmark_group("checks"); for (name, sample) in data::DATA { let len = sample.len(); @@ -13,16 +19,11 @@ fn bench_checks(c: &mut Criterion) { let sample_path = temp.child("sample"); sample_path.write_str(sample).unwrap(); - let corrections = typos_cli::dict::BuiltIn::new(Default::default()); - let parser = typos::tokens::Tokenizer::new(); - let settings = typos_cli::file::CheckSettings::new(); b.iter(|| { typos_cli::file::FoundFiles.check_file( sample_path.path(), true, - &settings, - &parser, - &corrections, + &policy, &typos_cli::report::PrintSilent, ) }); @@ -34,16 +35,11 @@ fn bench_checks(c: &mut Criterion) { let sample_path = temp.child("sample"); sample_path.write_str(sample).unwrap(); - let corrections = typos_cli::dict::BuiltIn::new(Default::default()); - let parser = typos::tokens::Tokenizer::new(); - let settings = typos_cli::file::CheckSettings::new(); b.iter(|| { typos_cli::file::Identifiers.check_file( sample_path.path(), true, - &settings, - &parser, - &corrections, + &policy, &typos_cli::report::PrintSilent, ) }); @@ -55,16 +51,11 @@ fn bench_checks(c: &mut Criterion) { let sample_path = temp.child("sample"); sample_path.write_str(sample).unwrap(); - let corrections = typos_cli::dict::BuiltIn::new(Default::default()); - let parser = typos::tokens::Tokenizer::new(); - let settings = typos_cli::file::CheckSettings::new(); b.iter(|| { typos_cli::file::Words.check_file( sample_path.path(), true, - &settings, - &parser, - &corrections, + &policy, &typos_cli::report::PrintSilent, ) }); @@ -76,16 +67,11 @@ fn bench_checks(c: &mut Criterion) { let sample_path = temp.child("sample"); sample_path.write_str(sample).unwrap(); - let corrections = typos_cli::dict::BuiltIn::new(Default::default()); - let parser = typos::tokens::Tokenizer::new(); - let settings = typos_cli::file::CheckSettings::new(); b.iter(|| { typos_cli::file::Typos.check_file( sample_path.path(), true, - &settings, - &parser, - &corrections, + &policy, &typos_cli::report::PrintSilent, ) }); diff --git a/src/file.rs b/src/file.rs index 87dbbef..240763f 100644 --- a/src/file.rs +++ b/src/file.rs @@ -4,59 +4,17 @@ use std::io::Read; use std::io::Write; use crate::report; -use typos::tokens; -use typos::Dictionary; pub trait FileChecker: Send + Sync { fn check_file( &self, path: &std::path::Path, explicit: bool, - settings: &CheckSettings, - tokenizer: &tokens::Tokenizer, - dictionary: &dyn Dictionary, + policy: &crate::policy::Policy, reporter: &dyn report::Report, ) -> Result<(), std::io::Error>; } -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct CheckSettings { - check_filenames: bool, - check_files: bool, - binary: bool, -} - -impl CheckSettings { - pub fn new() -> Self { - Default::default() - } - - pub fn check_filenames(&mut self, yes: bool) -> &mut Self { - self.check_filenames = yes; - self - } - - pub fn check_files(&mut self, yes: bool) -> &mut Self { - self.check_files = yes; - self - } - - pub fn binary(&mut self, yes: bool) -> &mut Self { - self.binary = yes; - self - } -} - -impl Default for CheckSettings { - fn default() -> Self { - Self { - check_filenames: true, - check_files: true, - binary: false, - } - } -} - #[derive(Debug, Clone, Copy)] pub struct Typos; @@ -65,14 +23,12 @@ impl FileChecker for Typos { &self, path: &std::path::Path, explicit: bool, - settings: &CheckSettings, - tokenizer: &tokens::Tokenizer, - dictionary: &dyn Dictionary, + policy: &crate::policy::Policy, reporter: &dyn report::Report, ) -> Result<(), std::io::Error> { - if settings.check_filenames { + if policy.check_filenames { if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) { - for typo in typos::check_str(file_name, tokenizer, dictionary) { + for typo in typos::check_str(file_name, policy.tokenizer, policy.dictionary) { let msg = report::Typo { context: Some(report::PathContext { path }.into()), buffer: std::borrow::Cow::Borrowed(file_name.as_bytes()), @@ -85,14 +41,14 @@ impl FileChecker for Typos { } } - if settings.check_files { + if policy.check_files { let (buffer, content_type) = read_file(path, reporter)?; - if !explicit && !settings.binary && content_type.is_binary() { + if !explicit && !policy.binary && content_type.is_binary() { let msg = report::BinaryFile { path }; reporter.report(msg.into())?; } else { let mut accum_line_num = AccumulateLineNum::new(); - for typo in typos::check_bytes(&buffer, tokenizer, dictionary) { + for typo in typos::check_bytes(&buffer, policy.tokenizer, policy.dictionary) { let line_num = accum_line_num.line_num(&buffer, typo.byte_offset); let (line, line_offset) = extract_line(&buffer, typo.byte_offset); let msg = report::Typo { @@ -119,20 +75,18 @@ impl FileChecker for FixTypos { &self, path: &std::path::Path, explicit: bool, - settings: &CheckSettings, - tokenizer: &tokens::Tokenizer, - dictionary: &dyn Dictionary, + policy: &crate::policy::Policy, reporter: &dyn report::Report, ) -> Result<(), std::io::Error> { - if settings.check_files { + if policy.check_files { let (buffer, content_type) = read_file(path, reporter)?; - if !explicit && !settings.binary && content_type.is_binary() { + if !explicit && !policy.binary && content_type.is_binary() { let msg = report::BinaryFile { path }; reporter.report(msg.into())?; } else { let mut fixes = Vec::new(); let mut accum_line_num = AccumulateLineNum::new(); - for typo in typos::check_bytes(&buffer, tokenizer, dictionary) { + for typo in typos::check_bytes(&buffer, policy.tokenizer, policy.dictionary) { if is_fixable(&typo) { fixes.push(typo.into_owned()); } else { @@ -156,10 +110,10 @@ impl FileChecker for FixTypos { } // Ensure the above write can happen before renaming the file. - if settings.check_filenames { + if policy.check_filenames { if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) { let mut fixes = Vec::new(); - for typo in typos::check_str(file_name, tokenizer, dictionary) { + for typo in typos::check_str(file_name, policy.tokenizer, policy.dictionary) { if is_fixable(&typo) { fixes.push(typo.into_owned()); } else { @@ -196,22 +150,20 @@ impl FileChecker for DiffTypos { &self, path: &std::path::Path, explicit: bool, - settings: &CheckSettings, - tokenizer: &tokens::Tokenizer, - dictionary: &dyn Dictionary, + policy: &crate::policy::Policy, reporter: &dyn report::Report, ) -> Result<(), std::io::Error> { let mut content = Vec::new(); let mut new_content = Vec::new(); - if settings.check_files { + if policy.check_files { let (buffer, content_type) = read_file(path, reporter)?; - if !explicit && !settings.binary && content_type.is_binary() { + if !explicit && !policy.binary && content_type.is_binary() { let msg = report::BinaryFile { path }; reporter.report(msg.into())?; } else { let mut fixes = Vec::new(); let mut accum_line_num = AccumulateLineNum::new(); - for typo in typos::check_bytes(&buffer, tokenizer, dictionary) { + for typo in typos::check_bytes(&buffer, policy.tokenizer, policy.dictionary) { if is_fixable(&typo) { fixes.push(typo.into_owned()); } else { @@ -236,10 +188,10 @@ impl FileChecker for DiffTypos { // Match FixTypos ordering for easy diffing. let mut new_path = None; - if settings.check_filenames { + if policy.check_filenames { if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) { let mut fixes = Vec::new(); - for typo in typos::check_str(file_name, tokenizer, dictionary) { + for typo in typos::check_str(file_name, policy.tokenizer, policy.dictionary) { if is_fixable(&typo) { fixes.push(typo.into_owned()); } else { @@ -300,14 +252,12 @@ impl FileChecker for Identifiers { &self, path: &std::path::Path, explicit: bool, - settings: &CheckSettings, - tokenizer: &tokens::Tokenizer, - _dictionary: &dyn Dictionary, + policy: &crate::policy::Policy, reporter: &dyn report::Report, ) -> Result<(), std::io::Error> { - if settings.check_filenames { + if policy.check_filenames { if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) { - for word in tokenizer.parse_str(file_name) { + for word in policy.tokenizer.parse_str(file_name) { let msg = report::Parse { context: Some(report::PathContext { path }.into()), kind: report::ParseKind::Identifier, @@ -318,13 +268,13 @@ impl FileChecker for Identifiers { } } - if settings.check_files { + if policy.check_files { let (buffer, content_type) = read_file(path, reporter)?; - if !explicit && !settings.binary && content_type.is_binary() { + if !explicit && !policy.binary && content_type.is_binary() { let msg = report::BinaryFile { path }; reporter.report(msg.into())?; } else { - for word in tokenizer.parse_bytes(&buffer) { + for word in policy.tokenizer.parse_bytes(&buffer) { // HACK: Don't look up the line_num per entry to better match the performance // of Typos for comparison purposes. We don't really get much out of it // anyway. @@ -351,14 +301,16 @@ impl FileChecker for Words { &self, path: &std::path::Path, explicit: bool, - settings: &CheckSettings, - tokenizer: &tokens::Tokenizer, - _dictionary: &dyn Dictionary, + policy: &crate::policy::Policy, reporter: &dyn report::Report, ) -> Result<(), std::io::Error> { - if settings.check_filenames { + if policy.check_filenames { if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) { - for word in tokenizer.parse_str(file_name).flat_map(|i| i.split()) { + for word in policy + .tokenizer + .parse_str(file_name) + .flat_map(|i| i.split()) + { let msg = report::Parse { context: Some(report::PathContext { path }.into()), kind: report::ParseKind::Word, @@ -369,13 +321,17 @@ impl FileChecker for Words { } } - if settings.check_files { + if policy.check_files { let (buffer, content_type) = read_file(path, reporter)?; - if !explicit && !settings.binary && content_type.is_binary() { + if !explicit && !policy.binary && content_type.is_binary() { let msg = report::BinaryFile { path }; reporter.report(msg.into())?; } else { - for word in tokenizer.parse_bytes(&buffer).flat_map(|i| i.split()) { + for word in policy + .tokenizer + .parse_bytes(&buffer) + .flat_map(|i| i.split()) + { // HACK: Don't look up the line_num per entry to better match the performance // of Typos for comparison purposes. We don't really get much out of it // anyway. @@ -402,13 +358,11 @@ impl FileChecker for FoundFiles { &self, path: &std::path::Path, explicit: bool, - settings: &CheckSettings, - _parser: &tokens::Tokenizer, - _dictionary: &dyn Dictionary, + policy: &crate::policy::Policy, reporter: &dyn report::Report, ) -> Result<(), std::io::Error> { - // Check `settings.binary` first so we can easily check performance of walking vs reading - if settings.binary { + // Check `policy.binary` first so we can easily check performance of walking vs reading + if policy.binary { let msg = report::File::new(path); reporter.report(msg.into())?; } else { @@ -598,13 +552,11 @@ fn fix_buffer(mut buffer: Vec, typos: impl Iterator Result<(), ignore::Error> { for entry in walk { - walk_entry(entry, checks, settings, tokenizer, dictionary, reporter)?; + walk_entry(entry, checks, policy, reporter)?; } Ok(()) } @@ -612,15 +564,13 @@ pub fn walk_path( pub fn walk_path_parallel( walk: ignore::WalkParallel, checks: &dyn FileChecker, - settings: &CheckSettings, - tokenizer: &typos::tokens::Tokenizer, - dictionary: &dyn typos::Dictionary, + policy: &crate::policy::Policy, reporter: &dyn report::Report, ) -> Result<(), ignore::Error> { let error: std::sync::Mutex> = std::sync::Mutex::new(Ok(())); walk.run(|| { Box::new(|entry: Result| { - match walk_entry(entry, checks, settings, tokenizer, dictionary, reporter) { + match walk_entry(entry, checks, policy, reporter) { Ok(()) => ignore::WalkState::Continue, Err(err) => { *error.lock().unwrap() = Err(err); @@ -636,9 +586,7 @@ pub fn walk_path_parallel( fn walk_entry( entry: Result, checks: &dyn FileChecker, - settings: &CheckSettings, - tokenizer: &typos::tokens::Tokenizer, - dictionary: &dyn typos::Dictionary, + policy: &crate::policy::Policy, reporter: &dyn report::Report, ) -> Result<(), ignore::Error> { let entry = match entry { @@ -655,7 +603,7 @@ fn walk_entry( } else { entry.path() }; - checks.check_file(path, explicit, settings, tokenizer, dictionary, reporter)?; + checks.check_file(path, explicit, policy, reporter)?; } Ok(()) diff --git a/src/lib.rs b/src/lib.rs index bc10a10..ab43959 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,5 @@ pub mod config; pub mod dict; pub mod file; +pub mod policy; pub mod report; diff --git a/src/main.rs b/src/main.rs index cfd035e..3e8a605 100644 --- a/src/main.rs +++ b/src/main.rs @@ -8,7 +8,6 @@ use structopt::StructOpt; mod args; use typos_cli::config; -use typos_cli::dict; use typos_cli::report; use proc_exit::WithCodeResultExt; @@ -91,36 +90,22 @@ fn run_checks(args: &args::Args) -> proc_exit::ExitResult { }; let config = load_config(cwd, &args).with_code(proc_exit::Code::CONFIG_ERR)?; - let tokenizer = typos::tokens::TokenizerBuilder::new() - .ignore_hex(config.default.ignore_hex()) - .leading_digits(config.default.identifier_leading_digits()) - .leading_chars(config.default.identifier_leading_chars().to_owned()) - .include_digits(config.default.identifier_include_digits()) - .include_chars(config.default.identifier_include_chars().to_owned()) - .build(); - - let dictionary = crate::dict::BuiltIn::new(config.default.locale()); - let mut dictionary = crate::dict::Override::new(dictionary); - dictionary.identifiers(config.default.extend_identifiers()); - dictionary.words(config.default.extend_words()); - - let mut settings = typos_cli::file::CheckSettings::new(); - settings - .check_filenames(config.default.check_filename()) - .check_files(config.default.check_file()) - .binary(config.default.binary()); + let storage = typos_cli::policy::ConfigStorage::new(); + let engine = typos_cli::policy::ConfigEngine::new(config, &storage); + let files = engine.files(); + let policy = engine.policy(); let threads = if path.is_file() { 1 } else { args.threads }; let single_threaded = threads == 1; let mut walk = ignore::WalkBuilder::new(path); walk.threads(args.threads) - .hidden(config.files.ignore_hidden()) - .ignore(config.files.ignore_dot()) - .git_global(config.files.ignore_global()) - .git_ignore(config.files.ignore_vcs()) - .git_exclude(config.files.ignore_vcs()) - .parents(config.files.ignore_parent()); + .hidden(files.ignore_hidden()) + .ignore(files.ignore_dot()) + .git_global(files.ignore_global()) + .git_ignore(files.ignore_vcs()) + .git_exclude(files.ignore_vcs()) + .parents(files.ignore_parent()); // HACK: Diff doesn't handle mixing content let output_reporter = if args.diff { @@ -146,21 +131,12 @@ fn run_checks(args: &args::Args) -> proc_exit::ExitResult { }; if single_threaded { - typos_cli::file::walk_path( - walk.build(), - selected_checks, - &settings, - &tokenizer, - &dictionary, - reporter, - ) + typos_cli::file::walk_path(walk.build(), selected_checks, &policy, reporter) } else { typos_cli::file::walk_path_parallel( walk.build_parallel(), selected_checks, - &settings, - &tokenizer, - &dictionary, + &policy, reporter, ) } diff --git a/src/policy.rs b/src/policy.rs new file mode 100644 index 0000000..f5bc6ac --- /dev/null +++ b/src/policy.rs @@ -0,0 +1,106 @@ +pub struct ConfigStorage { + arena: typed_arena::Arena, +} + +impl ConfigStorage { + pub fn new() -> Self { + Self { + arena: typed_arena::Arena::new(), + } + } + + fn get<'s>(&'s self, other: &str) -> &'s str { + self.arena.alloc(kstring::KString::from_ref(other)) + } +} + +pub struct ConfigEngine<'s> { + files: crate::config::Walk, + check_filenames: bool, + check_files: bool, + binary: bool, + tokenizer: typos::tokens::Tokenizer, + dictionary: crate::dict::Override<'s, 's, crate::dict::BuiltIn>, +} + +impl<'s> ConfigEngine<'s> { + pub fn new(config: crate::config::Config, storage: &'s ConfigStorage) -> Self { + let crate::config::Config { files, default } = config; + + let tokenizer = typos::tokens::TokenizerBuilder::new() + .ignore_hex(default.ignore_hex()) + .leading_digits(default.identifier_leading_digits()) + .leading_chars(default.identifier_leading_chars().to_owned()) + .include_digits(default.identifier_include_digits()) + .include_chars(default.identifier_include_chars().to_owned()) + .build(); + + let dictionary = crate::dict::BuiltIn::new(default.locale()); + let mut dictionary = crate::dict::Override::new(dictionary); + dictionary.identifiers( + default + .extend_identifiers() + .map(|(k, v)| (storage.get(k), storage.get(v))), + ); + dictionary.words( + default + .extend_words() + .map(|(k, v)| (storage.get(k), storage.get(v))), + ); + + Self { + files, + check_filenames: default.check_filename(), + check_files: default.check_file(), + binary: default.binary(), + tokenizer, + dictionary, + } + } + + pub fn files(&self) -> &crate::config::Walk { + &self.files + } + + pub fn policy(&self) -> Policy<'_, '_> { + Policy { + check_filenames: self.check_filenames, + check_files: self.check_files, + binary: self.binary, + tokenizer: &self.tokenizer, + dictionary: &self.dictionary, + } + } +} + +#[non_exhaustive] +#[derive(derive_setters::Setters)] +pub struct Policy<'t, 'd> { + pub check_filenames: bool, + pub check_files: bool, + pub binary: bool, + pub tokenizer: &'t typos::tokens::Tokenizer, + pub dictionary: &'d dyn typos::Dictionary, +} + +impl<'t, 'd> Policy<'t, 'd> { + pub fn new() -> Self { + Default::default() + } +} + +static DEFAULT_TOKENIZER: once_cell::sync::Lazy = + once_cell::sync::Lazy::new(typos::tokens::Tokenizer::new); +static DEFAULT_DICT: crate::dict::BuiltIn = crate::dict::BuiltIn::new(crate::config::Locale::En); + +impl<'t, 'd> Default for Policy<'t, 'd> { + fn default() -> Self { + Self { + check_filenames: true, + check_files: true, + binary: false, + tokenizer: &DEFAULT_TOKENIZER, + dictionary: &DEFAULT_DICT, + } + } +} From 4bbc59facf1b19eb1dd70eeba5783f20469eaaa6 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Mon, 1 Mar 2021 20:37:05 -0600 Subject: [PATCH 5/9] refactor(config)!: Detect when no dict config In preparing for smarter handling of config, we need to be able to tell what is present and what isn't. BREAKING CHANGE: `--hex` was removed, the value didn't seem high enough. --- benches/checks.rs | 4 +- docs/reference.md | 2 +- src/args.rs | 17 +--- src/config.rs | 240 +++++++++++++++++++++++++++++++--------------- src/file.rs | 12 +-- src/policy.rs | 47 +++++---- 6 files changed, 203 insertions(+), 119 deletions(-) diff --git a/benches/checks.rs b/benches/checks.rs index 77b4743..9b7f97d 100644 --- a/benches/checks.rs +++ b/benches/checks.rs @@ -5,10 +5,10 @@ use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion}; use typos_cli::file::FileChecker; fn bench_checks(c: &mut Criterion) { - let dictionary = typos_cli::dict::BuiltIn::new(Default::default()); + let dict = typos_cli::dict::BuiltIn::new(Default::default()); let tokenizer = typos::tokens::Tokenizer::new(); let policy = typos_cli::policy::Policy::new() - .dictionary(&dictionary) + .dict(&dict) .tokenizer(&tokenizer); let mut group = c.benchmark_group("checks"); diff --git a/docs/reference.md b/docs/reference.md index 7720c4c..32039da 100644 --- a/docs/reference.md +++ b/docs/reference.md @@ -28,6 +28,6 @@ Configuration is read from the following (in precedence order) | default.identifier-include-digits | \- | bool | Allow identifiers to include digits, in addition to letters. | | default.identifier-leading-chars | \- | string | Allow identifiers to start with one of these characters. | | default.identifier-include-chars | \- | string | Allow identifiers to include these characters. | -| default.locale | \- | en, en-us, en-gb, en-ca, en-au | English dialect to correct to. | +| default.locale | --locale | en, en-us, en-gb, en-ca, en-au | English dialect to correct to. | | default.extend-identifiers | \- | table of strings | Corrections for identifiers. When the correction is blank, the word is never valid. When the correction is the key, the word is always valid. | | default.extend-words | \- | table of strings | Corrections for identifiers. When the correction is blank, the word is never valid. When the correction is the key, the word is always valid. | diff --git a/src/args.rs b/src/args.rs index 53ae795..2c9c1db 100644 --- a/src/args.rs +++ b/src/args.rs @@ -122,12 +122,6 @@ pub(crate) struct FileArgs { #[structopt(long, overrides_with("no-check-files"), hidden(true))] check_files: bool, - #[structopt(long, overrides_with("hex"))] - /// Don't try to detect that an identifier looks like hex - no_hex: bool, - #[structopt(long, overrides_with("no-hex"), hidden(true))] - hex: bool, - #[structopt( long, possible_values(&config::Locale::variants()), @@ -163,15 +157,12 @@ impl config::FileSource for FileArgs { } } - fn ignore_hex(&self) -> Option { - match (self.hex, self.no_hex) { - (true, false) => Some(true), - (false, true) => Some(false), - (false, false) => None, - (_, _) => unreachable!("StructOpt should make this impossible"), - } + fn dict(&self) -> Option<&dyn config::DictSource> { + Some(self) } +} +impl config::DictSource for FileArgs { fn locale(&self) -> Option { self.locale } diff --git a/src/config.rs b/src/config.rs index 2fcbc63..1423ae9 100644 --- a/src/config.rs +++ b/src/config.rs @@ -58,6 +58,16 @@ pub trait FileSource { None } + fn tokenizer(&self) -> Option<&dyn TokenizerSource> { + None + } + + fn dict(&self) -> Option<&dyn DictSource> { + None + } +} + +pub trait TokenizerSource { /// Do not check identifiers that appear to be hexadecimal values. fn ignore_hex(&self) -> Option { None @@ -82,7 +92,9 @@ pub trait FileSource { fn identifier_include_chars(&self) -> Option<&str> { None } +} +pub trait DictSource { fn locale(&self) -> Option { None } @@ -258,14 +270,10 @@ pub struct FileConfig { pub binary: Option, pub check_filename: Option, pub check_file: Option, - pub ignore_hex: Option, - pub identifier_leading_digits: Option, - pub identifier_leading_chars: Option, - pub identifier_include_digits: Option, - pub identifier_include_chars: Option, - pub locale: Option, - pub extend_identifiers: HashMap, - pub extend_words: HashMap, + #[serde(flatten)] + pub tokenizer: Option, + #[serde(flatten)] + pub dict: Option, } impl FileConfig { @@ -275,18 +283,12 @@ impl FileConfig { binary: Some(empty.binary()), check_filename: Some(empty.check_filename()), check_file: Some(empty.check_file()), - ignore_hex: Some(empty.ignore_hex()), - identifier_leading_digits: Some(empty.identifier_leading_digits()), - identifier_leading_chars: Some(kstring::KString::from_ref( - empty.identifier_leading_chars(), - )), - identifier_include_digits: Some(empty.identifier_include_digits()), - identifier_include_chars: Some(kstring::KString::from_ref( - empty.identifier_include_chars(), - )), - locale: Some(empty.locale()), - extend_identifiers: Default::default(), - extend_words: Default::default(), + tokenizer: Some( + empty + .tokenizer + .unwrap_or_else(|| TokenizerConfig::from_defaults()), + ), + dict: Some(empty.dict.unwrap_or_else(|| DictConfig::from_defaults())), } } @@ -300,6 +302,87 @@ impl FileConfig { if let Some(source) = source.check_file() { self.check_file = Some(source); } + if let Some(source) = source.tokenizer() { + let mut tokenizer = None; + std::mem::swap(&mut tokenizer, &mut self.tokenizer); + let mut tokenizer = tokenizer.unwrap_or_default(); + tokenizer.update(source); + let mut tokenizer = Some(tokenizer); + std::mem::swap(&mut tokenizer, &mut self.tokenizer); + } + if let Some(source) = source.dict() { + let mut dict = None; + std::mem::swap(&mut dict, &mut self.dict); + let mut dict = dict.unwrap_or_default(); + dict.update(source); + let mut dict = Some(dict); + std::mem::swap(&mut dict, &mut self.dict); + } + } + + pub fn binary(&self) -> bool { + self.binary.unwrap_or(false) + } + + pub fn check_filename(&self) -> bool { + self.check_filename.unwrap_or(true) + } + + pub fn check_file(&self) -> bool { + self.check_file.unwrap_or(true) + } +} + +impl FileSource for FileConfig { + fn binary(&self) -> Option { + self.binary + } + + fn check_filename(&self) -> Option { + self.check_filename + } + + fn check_file(&self) -> Option { + self.check_file + } + + fn tokenizer(&self) -> Option<&dyn TokenizerSource> { + self.tokenizer.as_ref().map(|t| t as &dyn TokenizerSource) + } + + fn dict(&self) -> Option<&dyn DictSource> { + self.dict.as_ref().map(|d| d as &dyn DictSource) + } +} + +#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)] +#[serde(deny_unknown_fields, default)] +#[serde(rename_all = "kebab-case")] +pub struct TokenizerConfig { + pub ignore_hex: Option, + pub identifier_leading_digits: Option, + pub identifier_leading_chars: Option, + pub identifier_include_digits: Option, + pub identifier_include_chars: Option, +} + +impl TokenizerConfig { + pub fn from_defaults() -> Self { + let empty = Self::default(); + Self { + ignore_hex: Some(empty.ignore_hex()), + identifier_leading_digits: Some(empty.identifier_leading_digits()), + identifier_leading_chars: Some(kstring::KString::from_ref( + empty.identifier_leading_chars(), + )), + identifier_include_digits: Some(empty.identifier_include_digits()), + identifier_include_chars: Some(kstring::KString::from_ref( + empty.identifier_include_chars(), + )), + } + } + + pub fn update(&mut self, source: &dyn TokenizerSource) { if let Some(source) = source.ignore_hex() { self.ignore_hex = Some(source); } @@ -315,31 +398,6 @@ impl FileConfig { if let Some(source) = source.identifier_include_chars() { self.identifier_include_chars = Some(kstring::KString::from_ref(source)); } - if let Some(source) = source.locale() { - self.locale = Some(source); - } - self.extend_identifiers.extend( - source - .extend_identifiers() - .map(|(k, v)| (kstring::KString::from_ref(k), kstring::KString::from_ref(v))), - ); - self.extend_words.extend( - source - .extend_words() - .map(|(k, v)| (kstring::KString::from_ref(k), kstring::KString::from_ref(v))), - ); - } - - pub fn binary(&self) -> bool { - self.binary.unwrap_or(false) - } - - pub fn check_filename(&self) -> bool { - self.check_filename.unwrap_or(true) - } - - pub fn check_file(&self) -> bool { - self.check_file.unwrap_or(true) } pub fn ignore_hex(&self) -> bool { @@ -361,6 +419,64 @@ impl FileConfig { pub fn identifier_include_chars(&self) -> &str { self.identifier_include_chars.as_deref().unwrap_or("_'") } +} + +impl TokenizerSource for TokenizerConfig { + fn ignore_hex(&self) -> Option { + self.ignore_hex + } + + fn identifier_leading_digits(&self) -> Option { + self.identifier_leading_digits + } + + fn identifier_leading_chars(&self) -> Option<&str> { + self.identifier_leading_chars.as_deref() + } + + fn identifier_include_digits(&self) -> Option { + self.identifier_include_digits + } + + fn identifier_include_chars(&self) -> Option<&str> { + self.identifier_include_chars.as_deref() + } +} + +#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)] +#[serde(deny_unknown_fields, default)] +#[serde(rename_all = "kebab-case")] +pub struct DictConfig { + pub locale: Option, + pub extend_identifiers: HashMap, + pub extend_words: HashMap, +} + +impl DictConfig { + pub fn from_defaults() -> Self { + let empty = Self::default(); + Self { + locale: Some(empty.locale()), + extend_identifiers: Default::default(), + extend_words: Default::default(), + } + } + + pub fn update(&mut self, source: &dyn DictSource) { + if let Some(source) = source.locale() { + self.locale = Some(source); + } + self.extend_identifiers.extend( + source + .extend_identifiers() + .map(|(k, v)| (kstring::KString::from_ref(k), kstring::KString::from_ref(v))), + ); + self.extend_words.extend( + source + .extend_words() + .map(|(k, v)| (kstring::KString::from_ref(k), kstring::KString::from_ref(v))), + ); + } pub fn locale(&self) -> Locale { self.locale.unwrap_or_default() @@ -383,39 +499,7 @@ impl FileConfig { } } -impl FileSource for FileConfig { - fn binary(&self) -> Option { - self.binary - } - - fn check_filename(&self) -> Option { - self.check_filename - } - - fn check_file(&self) -> Option { - self.check_file - } - - fn ignore_hex(&self) -> Option { - self.ignore_hex - } - - fn identifier_leading_digits(&self) -> Option { - self.identifier_leading_digits - } - - fn identifier_leading_chars(&self) -> Option<&str> { - self.identifier_leading_chars.as_deref() - } - - fn identifier_include_digits(&self) -> Option { - self.identifier_include_digits - } - - fn identifier_include_chars(&self) -> Option<&str> { - self.identifier_include_chars.as_deref() - } - +impl DictSource for DictConfig { fn locale(&self) -> Option { self.locale } diff --git a/src/file.rs b/src/file.rs index 240763f..bc2015e 100644 --- a/src/file.rs +++ b/src/file.rs @@ -28,7 +28,7 @@ impl FileChecker for Typos { ) -> Result<(), std::io::Error> { if policy.check_filenames { if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) { - for typo in typos::check_str(file_name, policy.tokenizer, policy.dictionary) { + for typo in typos::check_str(file_name, policy.tokenizer, policy.dict) { let msg = report::Typo { context: Some(report::PathContext { path }.into()), buffer: std::borrow::Cow::Borrowed(file_name.as_bytes()), @@ -48,7 +48,7 @@ impl FileChecker for Typos { reporter.report(msg.into())?; } else { let mut accum_line_num = AccumulateLineNum::new(); - for typo in typos::check_bytes(&buffer, policy.tokenizer, policy.dictionary) { + for typo in typos::check_bytes(&buffer, policy.tokenizer, policy.dict) { let line_num = accum_line_num.line_num(&buffer, typo.byte_offset); let (line, line_offset) = extract_line(&buffer, typo.byte_offset); let msg = report::Typo { @@ -86,7 +86,7 @@ impl FileChecker for FixTypos { } else { let mut fixes = Vec::new(); let mut accum_line_num = AccumulateLineNum::new(); - for typo in typos::check_bytes(&buffer, policy.tokenizer, policy.dictionary) { + for typo in typos::check_bytes(&buffer, policy.tokenizer, policy.dict) { if is_fixable(&typo) { fixes.push(typo.into_owned()); } else { @@ -113,7 +113,7 @@ impl FileChecker for FixTypos { if policy.check_filenames { if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) { let mut fixes = Vec::new(); - for typo in typos::check_str(file_name, policy.tokenizer, policy.dictionary) { + for typo in typos::check_str(file_name, policy.tokenizer, policy.dict) { if is_fixable(&typo) { fixes.push(typo.into_owned()); } else { @@ -163,7 +163,7 @@ impl FileChecker for DiffTypos { } else { let mut fixes = Vec::new(); let mut accum_line_num = AccumulateLineNum::new(); - for typo in typos::check_bytes(&buffer, policy.tokenizer, policy.dictionary) { + for typo in typos::check_bytes(&buffer, policy.tokenizer, policy.dict) { if is_fixable(&typo) { fixes.push(typo.into_owned()); } else { @@ -191,7 +191,7 @@ impl FileChecker for DiffTypos { if policy.check_filenames { if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) { let mut fixes = Vec::new(); - for typo in typos::check_str(file_name, policy.tokenizer, policy.dictionary) { + for typo in typos::check_str(file_name, policy.tokenizer, policy.dict) { if is_fixable(&typo) { fixes.push(typo.into_owned()); } else { diff --git a/src/policy.rs b/src/policy.rs index f5bc6ac..fb032f9 100644 --- a/src/policy.rs +++ b/src/policy.rs @@ -20,41 +20,50 @@ pub struct ConfigEngine<'s> { check_files: bool, binary: bool, tokenizer: typos::tokens::Tokenizer, - dictionary: crate::dict::Override<'s, 's, crate::dict::BuiltIn>, + dict: crate::dict::Override<'s, 's, crate::dict::BuiltIn>, } impl<'s> ConfigEngine<'s> { pub fn new(config: crate::config::Config, storage: &'s ConfigStorage) -> Self { let crate::config::Config { files, default } = config; + let binary = default.binary(); + let check_filename = default.check_filename(); + let check_file = default.check_file(); + let crate::config::EngineConfig { + tokenizer, dict, .. + } = default; + let tokenizer_config = + tokenizer.unwrap_or_else(|| crate::config::TokenizerConfig::from_defaults()); + let dict_config = dict.unwrap_or_else(|| crate::config::DictConfig::from_defaults()); let tokenizer = typos::tokens::TokenizerBuilder::new() - .ignore_hex(default.ignore_hex()) - .leading_digits(default.identifier_leading_digits()) - .leading_chars(default.identifier_leading_chars().to_owned()) - .include_digits(default.identifier_include_digits()) - .include_chars(default.identifier_include_chars().to_owned()) + .ignore_hex(tokenizer_config.ignore_hex()) + .leading_digits(tokenizer_config.identifier_leading_digits()) + .leading_chars(tokenizer_config.identifier_leading_chars().to_owned()) + .include_digits(tokenizer_config.identifier_include_digits()) + .include_chars(tokenizer_config.identifier_include_chars().to_owned()) .build(); - let dictionary = crate::dict::BuiltIn::new(default.locale()); - let mut dictionary = crate::dict::Override::new(dictionary); - dictionary.identifiers( - default + let dict = crate::dict::BuiltIn::new(dict_config.locale()); + let mut dict = crate::dict::Override::new(dict); + dict.identifiers( + dict_config .extend_identifiers() .map(|(k, v)| (storage.get(k), storage.get(v))), ); - dictionary.words( - default + dict.words( + dict_config .extend_words() .map(|(k, v)| (storage.get(k), storage.get(v))), ); Self { files, - check_filenames: default.check_filename(), - check_files: default.check_file(), - binary: default.binary(), + check_filenames: check_filename, + check_files: check_file, + binary: binary, tokenizer, - dictionary, + dict, } } @@ -68,7 +77,7 @@ impl<'s> ConfigEngine<'s> { check_files: self.check_files, binary: self.binary, tokenizer: &self.tokenizer, - dictionary: &self.dictionary, + dict: &self.dict, } } } @@ -80,7 +89,7 @@ pub struct Policy<'t, 'd> { pub check_files: bool, pub binary: bool, pub tokenizer: &'t typos::tokens::Tokenizer, - pub dictionary: &'d dyn typos::Dictionary, + pub dict: &'d dyn typos::Dictionary, } impl<'t, 'd> Policy<'t, 'd> { @@ -100,7 +109,7 @@ impl<'t, 'd> Default for Policy<'t, 'd> { check_files: true, binary: false, tokenizer: &DEFAULT_TOKENIZER, - dictionary: &DEFAULT_DICT, + dict: &DEFAULT_DICT, } } } From f402d3ee773a400f15818c6b3a07586ecb4288c4 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Mon, 1 Mar 2021 20:40:21 -0600 Subject: [PATCH 6/9] refactor(config): Clarify config is not file-specific This is prep for the config being reused in other contexts, like commit messages. --- src/args.rs | 2 +- src/config.rs | 20 ++++++++++---------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/args.rs b/src/args.rs index 2c9c1db..38d6b1d 100644 --- a/src/args.rs +++ b/src/args.rs @@ -129,7 +129,7 @@ pub(crate) struct FileArgs { pub(crate) locale: Option, } -impl config::FileSource for FileArgs { +impl config::EngineSource for FileArgs { fn binary(&self) -> Option { match (self.binary, self.no_binary) { (true, false) => Some(true), diff --git a/src/config.rs b/src/config.rs index 1423ae9..fa10e79 100644 --- a/src/config.rs +++ b/src/config.rs @@ -5,7 +5,7 @@ pub trait ConfigSource { None } - fn default(&self) -> Option<&dyn FileSource> { + fn default(&self) -> Option<&dyn EngineSource> { None } } @@ -42,7 +42,7 @@ pub trait WalkSource { } } -pub trait FileSource { +pub trait EngineSource { /// Check binary files. fn binary(&self) -> Option { None @@ -113,7 +113,7 @@ pub trait DictSource { #[serde(rename_all = "kebab-case")] pub struct Config { pub files: Walk, - pub default: FileConfig, + pub default: EngineConfig, } impl Config { @@ -130,7 +130,7 @@ impl Config { pub fn from_defaults() -> Self { Self { files: Walk::from_defaults(), - default: FileConfig::from_defaults(), + default: EngineConfig::from_defaults(), } } @@ -157,7 +157,7 @@ impl ConfigSource for Config { Some(&self.files) } - fn default(&self) -> Option<&dyn FileSource> { + fn default(&self) -> Option<&dyn EngineSource> { Some(&self.default) } } @@ -266,7 +266,7 @@ impl WalkSource for Walk { #[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)] #[serde(deny_unknown_fields, default)] #[serde(rename_all = "kebab-case")] -pub struct FileConfig { +pub struct EngineConfig { pub binary: Option, pub check_filename: Option, pub check_file: Option, @@ -276,10 +276,10 @@ pub struct FileConfig { pub dict: Option, } -impl FileConfig { +impl EngineConfig { pub fn from_defaults() -> Self { let empty = Self::default(); - FileConfig { + EngineConfig { binary: Some(empty.binary()), check_filename: Some(empty.check_filename()), check_file: Some(empty.check_file()), @@ -292,7 +292,7 @@ impl FileConfig { } } - pub fn update(&mut self, source: &dyn FileSource) { + pub fn update(&mut self, source: &dyn EngineSource) { if let Some(source) = source.binary() { self.binary = Some(source); } @@ -333,7 +333,7 @@ impl FileConfig { } } -impl FileSource for FileConfig { +impl EngineSource for EngineConfig { fn binary(&self) -> Option { self.binary } From a76ddd42cef75dbd72773302027feb54b4306b1e Mon Sep 17 00:00:00 2001 From: Ed Page Date: Mon, 29 Mar 2021 13:39:48 -0500 Subject: [PATCH 7/9] refactor(cli): Pull out policy creation --- src/args.rs | 2 +- src/config.rs | 31 +++++---- src/file.rs | 13 ++-- src/main.rs | 52 +++++++------- src/policy.rs | 188 ++++++++++++++++++++++++++++++++++++++++++++------ 5 files changed, 217 insertions(+), 69 deletions(-) diff --git a/src/args.rs b/src/args.rs index 38d6b1d..16e2db7 100644 --- a/src/args.rs +++ b/src/args.rs @@ -101,7 +101,7 @@ pub(crate) struct Args { pub(crate) verbose: clap_verbosity_flag::Verbosity, } -#[derive(Debug, StructOpt)] +#[derive(Debug, Clone, StructOpt)] #[structopt(rename_all = "kebab-case")] pub(crate) struct FileArgs { #[structopt(long, overrides_with("no-binary"))] diff --git a/src/config.rs b/src/config.rs index fa10e79..6f9ca10 100644 --- a/src/config.rs +++ b/src/config.rs @@ -117,6 +117,17 @@ pub struct Config { } impl Config { + pub fn from_dir(cwd: &std::path::Path) -> Result, anyhow::Error> { + let config = if let Some(path) = + find_project_file(cwd, &["typos.toml", "_typos.toml", ".typos.toml"]) + { + Some(Self::from_file(&path)?) + } else { + None + }; + Ok(config) + } + pub fn from_file(path: &std::path::Path) -> Result { let s = std::fs::read_to_string(path)?; Self::from_toml(&s) @@ -134,14 +145,6 @@ impl Config { } } - pub fn derive(cwd: &std::path::Path) -> Result { - if let Some(path) = find_project_file(cwd, &["typos.toml", "_typos.toml", ".typos.toml"]) { - Self::from_file(&path) - } else { - Ok(Default::default()) - } - } - pub fn update(&mut self, source: &dyn ConfigSource) { if let Some(walk) = source.walk() { self.files.update(walk); @@ -522,13 +525,11 @@ impl DictSource for DictConfig { } fn find_project_file(dir: &std::path::Path, names: &[&str]) -> Option { - for ancestor in dir.ancestors() { - let mut file_path = ancestor.join("placeholder"); - for name in names { - file_path.set_file_name(name); - if file_path.exists() { - return Some(file_path); - } + let mut file_path = dir.join("placeholder"); + for name in names { + file_path.set_file_name(name); + if file_path.exists() { + return Some(file_path); } } None diff --git a/src/file.rs b/src/file.rs index bc2015e..549109c 100644 --- a/src/file.rs +++ b/src/file.rs @@ -552,11 +552,11 @@ fn fix_buffer(mut buffer: Vec, typos: impl Iterator Result<(), ignore::Error> { for entry in walk { - walk_entry(entry, checks, policy, reporter)?; + walk_entry(entry, checks, engine, reporter)?; } Ok(()) } @@ -564,13 +564,13 @@ pub fn walk_path( pub fn walk_path_parallel( walk: ignore::WalkParallel, checks: &dyn FileChecker, - policy: &crate::policy::Policy, + engine: &crate::policy::ConfigEngine, reporter: &dyn report::Report, ) -> Result<(), ignore::Error> { let error: std::sync::Mutex> = std::sync::Mutex::new(Ok(())); walk.run(|| { Box::new(|entry: Result| { - match walk_entry(entry, checks, policy, reporter) { + match walk_entry(entry, checks, engine, reporter) { Ok(()) => ignore::WalkState::Continue, Err(err) => { *error.lock().unwrap() = Err(err); @@ -586,7 +586,7 @@ pub fn walk_path_parallel( fn walk_entry( entry: Result, checks: &dyn FileChecker, - policy: &crate::policy::Policy, + engine: &crate::policy::ConfigEngine, reporter: &dyn report::Report, ) -> Result<(), ignore::Error> { let entry = match entry { @@ -603,7 +603,8 @@ fn walk_entry( } else { entry.path() }; - checks.check_file(path, explicit, policy, reporter)?; + let policy = engine.policy(path); + checks.check_file(path, explicit, &policy, reporter)?; } Ok(()) diff --git a/src/main.rs b/src/main.rs index 3e8a605..183d20d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -57,7 +57,19 @@ fn run_dump_config(args: &args::Args, output_path: &std::path::Path) -> proc_exi path.as_path() }; - let config = load_config(cwd, &args).with_code(proc_exit::Code::CONFIG_ERR)?; + let storage = typos_cli::policy::ConfigStorage::new(); + let mut overrides = config::EngineConfig::default(); + overrides.update(&args.overrides); + let mut engine = typos_cli::policy::ConfigEngine::new(&storage); + engine.set_isolated(args.isolated).set_overrides(overrides); + if let Some(path) = args.custom_config.as_ref() { + let custom = config::Config::from_file(path).with_code(proc_exit::Code::CONFIG_ERR)?; + engine.set_custom_config(custom); + } + let config = engine + .load_config(cwd) + .with_code(proc_exit::Code::CONFIG_ERR)?; + let mut defaulted_config = config::Config::from_defaults(); defaulted_config.update(&config); let output = toml::to_string_pretty(&defaulted_config).with_code(proc_exit::Code::FAILURE)?; @@ -88,12 +100,21 @@ fn run_checks(args: &args::Args) -> proc_exit::ExitResult { } else { path.as_path() }; - let config = load_config(cwd, &args).with_code(proc_exit::Code::CONFIG_ERR)?; let storage = typos_cli::policy::ConfigStorage::new(); - let engine = typos_cli::policy::ConfigEngine::new(config, &storage); - let files = engine.files(); - let policy = engine.policy(); + let mut overrides = config::EngineConfig::default(); + overrides.update(&args.overrides); + let mut engine = typos_cli::policy::ConfigEngine::new(&storage); + engine.set_isolated(args.isolated).set_overrides(overrides); + if let Some(path) = args.custom_config.as_ref() { + let custom = config::Config::from_file(path).with_code(proc_exit::Code::CONFIG_ERR)?; + engine.set_custom_config(custom); + } + + engine + .init_dir(cwd) + .with_code(proc_exit::Code::CONFIG_ERR)?; + let files = engine.files(cwd); let threads = if path.is_file() { 1 } else { args.threads }; let single_threaded = threads == 1; @@ -131,12 +152,12 @@ fn run_checks(args: &args::Args) -> proc_exit::ExitResult { }; if single_threaded { - typos_cli::file::walk_path(walk.build(), selected_checks, &policy, reporter) + typos_cli::file::walk_path(walk.build(), selected_checks, &engine, reporter) } else { typos_cli::file::walk_path_parallel( walk.build_parallel(), selected_checks, - &policy, + &engine, reporter, ) } @@ -189,20 +210,3 @@ fn init_logging(level: Option) { builder.init(); } } - -fn load_config(cwd: &std::path::Path, args: &args::Args) -> Result { - let mut config = config::Config::default(); - - if !args.isolated { - let derived = config::Config::derive(cwd)?; - config.update(&derived); - } - if let Some(path) = args.custom_config.as_ref() { - config.update(&config::Config::from_file(path)?); - } - - config.update(&args.config); - config.default.update(&args.overrides); - - Ok(config) -} diff --git a/src/policy.rs b/src/policy.rs index fb032f9..f5be135 100644 --- a/src/policy.rs +++ b/src/policy.rs @@ -1,30 +1,142 @@ pub struct ConfigStorage { - arena: typed_arena::Arena, + arena: std::sync::Mutex>, } impl ConfigStorage { pub fn new() -> Self { Self { - arena: typed_arena::Arena::new(), + arena: std::sync::Mutex::new(typed_arena::Arena::new()), } } fn get<'s>(&'s self, other: &str) -> &'s str { - self.arena.alloc(kstring::KString::from_ref(other)) + // Safe because we the references are stable once created. + // + // Trying to get this handled inside of `typed_arena` directly, see + // https://github.com/SimonSapin/rust-typed-arena/issues/49#issuecomment-809517312 + unsafe { + std::mem::transmute::<&str, &str>( + self.arena + .lock() + .unwrap() + .alloc(kstring::KString::from_ref(other)), + ) + } } } pub struct ConfigEngine<'s> { - files: crate::config::Walk, - check_filenames: bool, - check_files: bool, - binary: bool, - tokenizer: typos::tokens::Tokenizer, - dict: crate::dict::Override<'s, 's, crate::dict::BuiltIn>, + storage: &'s ConfigStorage, + + overrides: Option, + custom: Option, + isolated: bool, + + configs: std::collections::HashMap, + files: Intern, + tokenizer: Intern, + dict: Intern>, } impl<'s> ConfigEngine<'s> { - pub fn new(config: crate::config::Config, storage: &'s ConfigStorage) -> Self { + pub fn new(storage: &'s ConfigStorage) -> Self { + Self { + storage, + overrides: Default::default(), + custom: Default::default(), + configs: Default::default(), + isolated: false, + files: Default::default(), + tokenizer: Default::default(), + dict: Default::default(), + } + } + + pub fn set_overrides(&mut self, overrides: crate::config::EngineConfig) -> &mut Self { + self.overrides = Some(overrides); + self + } + + pub fn set_custom_config(&mut self, custom: crate::config::Config) -> &mut Self { + self.custom = Some(custom); + self + } + + pub fn set_isolated(&mut self, isolated: bool) -> &mut Self { + self.isolated = isolated; + self + } + + pub fn files(&mut self, cwd: &std::path::Path) -> &crate::config::Walk { + let dir = self + .configs + .get(cwd) + .expect("`init_dir` must be called first"); + self.get_files(dir) + } + + pub fn policy(&self, path: &std::path::Path) -> Policy<'_, '_> { + let dir = self + .get_dir(path) + .expect("`files()` should be called first"); + Policy { + check_filenames: dir.check_filenames, + check_files: dir.check_files, + binary: dir.binary, + tokenizer: self.get_tokenizer(dir), + dict: self.get_dict(dir), + } + } + + fn get_files(&self, dir: &DirConfig) -> &crate::config::Walk { + self.files.get(dir.files) + } + + fn get_tokenizer(&self, dir: &DirConfig) -> &typos::tokens::Tokenizer { + self.tokenizer.get(dir.tokenizer) + } + + fn get_dict(&self, dir: &DirConfig) -> &dyn typos::Dictionary { + self.dict.get(dir.dict) + } + + fn get_dir(&self, path: &std::path::Path) -> Option<&DirConfig> { + for path in path.ancestors() { + if let Some(dir) = self.configs.get(path) { + return Some(dir); + } + } + None + } + + pub fn load_config( + &self, + cwd: &std::path::Path, + ) -> Result { + let mut config = crate::config::Config::default(); + + if !self.isolated { + if let Some(derived) = crate::config::Config::from_dir(cwd)? { + config.update(&derived); + } + } + if let Some(custom) = self.custom.as_ref() { + config.update(custom); + } + if let Some(overrides) = self.overrides.as_ref() { + config.default.update(overrides); + } + + Ok(config) + } + + pub fn init_dir(&mut self, cwd: &std::path::Path) -> Result<(), anyhow::Error> { + if self.configs.contains_key(cwd) { + return Ok(()); + } + + let config = self.load_config(cwd)?; + let crate::config::Config { files, default } = config; let binary = default.binary(); let check_filename = default.check_filename(); @@ -49,39 +161,69 @@ impl<'s> ConfigEngine<'s> { dict.identifiers( dict_config .extend_identifiers() - .map(|(k, v)| (storage.get(k), storage.get(v))), + .map(|(k, v)| (self.storage.get(k), self.storage.get(v))), ); dict.words( dict_config .extend_words() - .map(|(k, v)| (storage.get(k), storage.get(v))), + .map(|(k, v)| (self.storage.get(k), self.storage.get(v))), ); - Self { + let dict = self.dict.intern(dict); + let files = self.files.intern(files); + let tokenizer = self.tokenizer.intern(tokenizer); + + let dir = DirConfig { files, check_filenames: check_filename, check_files: check_file, binary: binary, tokenizer, dict, + }; + + self.configs.insert(cwd.to_owned(), dir); + Ok(()) + } +} + +struct Intern { + data: Vec, +} + +impl Intern { + pub fn new() -> Self { + Self { + data: Default::default(), } } - pub fn files(&self) -> &crate::config::Walk { - &self.files + pub fn intern(&mut self, value: T) -> usize { + let symbol = self.data.len(); + self.data.push(value); + symbol } - pub fn policy(&self) -> Policy<'_, '_> { - Policy { - check_filenames: self.check_filenames, - check_files: self.check_files, - binary: self.binary, - tokenizer: &self.tokenizer, - dict: &self.dict, - } + pub fn get(&self, symbol: usize) -> &T { + &self.data[symbol] } } +impl Default for Intern { + fn default() -> Self { + Self::new() + } +} + +struct DirConfig { + files: usize, + tokenizer: usize, + dict: usize, + check_filenames: bool, + check_files: bool, + binary: bool, +} + #[non_exhaustive] #[derive(derive_setters::Setters)] pub struct Policy<'t, 'd> { From 8365351dba3e72460a9a5252fd6304d058943166 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Mon, 29 Mar 2021 13:40:46 -0500 Subject: [PATCH 8/9] perf(cli): Reuse configs across runs --- src/main.rs | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/main.rs b/src/main.rs index 183d20d..5cddf7c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -85,6 +85,16 @@ fn run_dump_config(args: &args::Args, output_path: &std::path::Path) -> proc_exi fn run_checks(args: &args::Args) -> proc_exit::ExitResult { let global_cwd = std::env::current_dir()?; + let storage = typos_cli::policy::ConfigStorage::new(); + let mut overrides = config::EngineConfig::default(); + overrides.update(&args.overrides); + let mut engine = typos_cli::policy::ConfigEngine::new(&storage); + engine.set_isolated(args.isolated).set_overrides(overrides); + if let Some(path) = args.custom_config.as_ref() { + let custom = config::Config::from_file(path).with_code(proc_exit::Code::CONFIG_ERR)?; + engine.set_custom_config(custom); + } + let mut typos_found = false; let mut errors_found = false; for path in args.path.iter() { @@ -101,16 +111,6 @@ fn run_checks(args: &args::Args) -> proc_exit::ExitResult { path.as_path() }; - let storage = typos_cli::policy::ConfigStorage::new(); - let mut overrides = config::EngineConfig::default(); - overrides.update(&args.overrides); - let mut engine = typos_cli::policy::ConfigEngine::new(&storage); - engine.set_isolated(args.isolated).set_overrides(overrides); - if let Some(path) = args.custom_config.as_ref() { - let custom = config::Config::from_file(path).with_code(proc_exit::Code::CONFIG_ERR)?; - engine.set_custom_config(custom); - } - engine .init_dir(cwd) .with_code(proc_exit::Code::CONFIG_ERR)?; From d51725b2a48e14fc8c63d5da5df4711f4450a3ab Mon Sep 17 00:00:00 2001 From: Ed Page Date: Mon, 29 Mar 2021 20:28:01 -0500 Subject: [PATCH 9/9] style: Address clippy --- src/config.rs | 4 ++-- src/policy.rs | 15 +++++++++++---- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/src/config.rs b/src/config.rs index 6f9ca10..97cb9d5 100644 --- a/src/config.rs +++ b/src/config.rs @@ -289,9 +289,9 @@ impl EngineConfig { tokenizer: Some( empty .tokenizer - .unwrap_or_else(|| TokenizerConfig::from_defaults()), + .unwrap_or_else(TokenizerConfig::from_defaults), ), - dict: Some(empty.dict.unwrap_or_else(|| DictConfig::from_defaults())), + dict: Some(empty.dict.unwrap_or_else(DictConfig::from_defaults)), } } diff --git a/src/policy.rs b/src/policy.rs index f5be135..92c8aa6 100644 --- a/src/policy.rs +++ b/src/policy.rs @@ -19,12 +19,19 @@ impl ConfigStorage { self.arena .lock() .unwrap() - .alloc(kstring::KString::from_ref(other)), + .alloc(kstring::KString::from_ref(other)) + .as_str(), ) } } } +impl Default for ConfigStorage { + fn default() -> Self { + Self::new() + } +} + pub struct ConfigEngine<'s> { storage: &'s ConfigStorage, @@ -145,8 +152,8 @@ impl<'s> ConfigEngine<'s> { tokenizer, dict, .. } = default; let tokenizer_config = - tokenizer.unwrap_or_else(|| crate::config::TokenizerConfig::from_defaults()); - let dict_config = dict.unwrap_or_else(|| crate::config::DictConfig::from_defaults()); + tokenizer.unwrap_or_else(crate::config::TokenizerConfig::from_defaults); + let dict_config = dict.unwrap_or_else(crate::config::DictConfig::from_defaults); let tokenizer = typos::tokens::TokenizerBuilder::new() .ignore_hex(tokenizer_config.ignore_hex()) @@ -177,7 +184,7 @@ impl<'s> ConfigEngine<'s> { files, check_filenames: check_filename, check_files: check_file, - binary: binary, + binary, tokenizer, dict, };