diff --git a/Cargo.lock b/Cargo.lock index 16f0dcc..a8db752 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1474,6 +1474,12 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a7f741b240f1a48843f9b8e0444fb55fb2a4ff67293b50a9179dfd5ea67f8d41" +[[package]] +name = "typed-arena" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0685c84d5d54d1c26f7d3eb96cd41550adb97baed141a761cf335d3d33bcd0ae" + [[package]] name = "typos" version = "0.3.0" @@ -1510,6 +1516,7 @@ dependencies = [ "itertools 0.10.0", "kstring", "log", + "once_cell", "phf", "predicates", "proc-exit", @@ -1517,6 +1524,7 @@ dependencies = [ "serde_json", "structopt", "toml", + "typed-arena", "typos", "typos-dict", "typos-vars", diff --git a/Cargo.toml b/Cargo.toml index 44e8895..01fd4da 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -46,6 +46,7 @@ toml = "0.5" log = "0.4" env_logger = "0.8" bstr = "0.2" +once_cell = "1.2.0" ahash = "0.7" difflib = "0.4" proc-exit = "1.0" @@ -58,6 +59,7 @@ itertools = "0.10" serde_json = "1.0" encoding = "0.2" kstring = "1.0" +typed-arena = "2.0.1" [dev-dependencies] assert_fs = "1.0" diff --git a/benches/checks.rs b/benches/checks.rs index 9c6a540..77b4743 100644 --- a/benches/checks.rs +++ b/benches/checks.rs @@ -5,6 +5,12 @@ use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion}; use typos_cli::file::FileChecker; fn bench_checks(c: &mut Criterion) { + let dictionary = typos_cli::dict::BuiltIn::new(Default::default()); + let tokenizer = typos::tokens::Tokenizer::new(); + let policy = typos_cli::policy::Policy::new() + .dictionary(&dictionary) + .tokenizer(&tokenizer); + let mut group = c.benchmark_group("checks"); for (name, sample) in data::DATA { let len = sample.len(); @@ -13,16 +19,11 @@ fn bench_checks(c: &mut Criterion) { let sample_path = temp.child("sample"); sample_path.write_str(sample).unwrap(); - let corrections = typos_cli::dict::BuiltIn::new(Default::default()); - let parser = typos::tokens::Tokenizer::new(); - let settings = typos_cli::file::CheckSettings::new(); b.iter(|| { typos_cli::file::FoundFiles.check_file( sample_path.path(), true, - &settings, - &parser, - &corrections, + &policy, &typos_cli::report::PrintSilent, ) }); @@ -34,16 +35,11 @@ fn bench_checks(c: &mut Criterion) { let sample_path = temp.child("sample"); sample_path.write_str(sample).unwrap(); - let corrections = typos_cli::dict::BuiltIn::new(Default::default()); - let parser = typos::tokens::Tokenizer::new(); - let settings = typos_cli::file::CheckSettings::new(); b.iter(|| { typos_cli::file::Identifiers.check_file( sample_path.path(), true, - &settings, - &parser, - &corrections, + &policy, &typos_cli::report::PrintSilent, ) }); @@ -55,16 +51,11 @@ fn bench_checks(c: &mut Criterion) { let sample_path = temp.child("sample"); sample_path.write_str(sample).unwrap(); - let corrections = typos_cli::dict::BuiltIn::new(Default::default()); - let parser = typos::tokens::Tokenizer::new(); - let settings = typos_cli::file::CheckSettings::new(); b.iter(|| { typos_cli::file::Words.check_file( sample_path.path(), true, - &settings, - &parser, - &corrections, + &policy, &typos_cli::report::PrintSilent, ) }); @@ -76,16 +67,11 @@ fn bench_checks(c: &mut Criterion) { let sample_path = temp.child("sample"); sample_path.write_str(sample).unwrap(); - let corrections = typos_cli::dict::BuiltIn::new(Default::default()); - let parser = typos::tokens::Tokenizer::new(); - let settings = typos_cli::file::CheckSettings::new(); b.iter(|| { typos_cli::file::Typos.check_file( sample_path.path(), true, - &settings, - &parser, - &corrections, + &policy, &typos_cli::report::PrintSilent, ) }); diff --git a/src/file.rs b/src/file.rs index 87dbbef..240763f 100644 --- a/src/file.rs +++ b/src/file.rs @@ -4,59 +4,17 @@ use std::io::Read; use std::io::Write; use crate::report; -use typos::tokens; -use typos::Dictionary; pub trait FileChecker: Send + Sync { fn check_file( &self, path: &std::path::Path, explicit: bool, - settings: &CheckSettings, - tokenizer: &tokens::Tokenizer, - dictionary: &dyn Dictionary, + policy: &crate::policy::Policy, reporter: &dyn report::Report, ) -> Result<(), std::io::Error>; } -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct CheckSettings { - check_filenames: bool, - check_files: bool, - binary: bool, -} - -impl CheckSettings { - pub fn new() -> Self { - Default::default() - } - - pub fn check_filenames(&mut self, yes: bool) -> &mut Self { - self.check_filenames = yes; - self - } - - pub fn check_files(&mut self, yes: bool) -> &mut Self { - self.check_files = yes; - self - } - - pub fn binary(&mut self, yes: bool) -> &mut Self { - self.binary = yes; - self - } -} - -impl Default for CheckSettings { - fn default() -> Self { - Self { - check_filenames: true, - check_files: true, - binary: false, - } - } -} - #[derive(Debug, Clone, Copy)] pub struct Typos; @@ -65,14 +23,12 @@ impl FileChecker for Typos { &self, path: &std::path::Path, explicit: bool, - settings: &CheckSettings, - tokenizer: &tokens::Tokenizer, - dictionary: &dyn Dictionary, + policy: &crate::policy::Policy, reporter: &dyn report::Report, ) -> Result<(), std::io::Error> { - if settings.check_filenames { + if policy.check_filenames { if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) { - for typo in typos::check_str(file_name, tokenizer, dictionary) { + for typo in typos::check_str(file_name, policy.tokenizer, policy.dictionary) { let msg = report::Typo { context: Some(report::PathContext { path }.into()), buffer: std::borrow::Cow::Borrowed(file_name.as_bytes()), @@ -85,14 +41,14 @@ impl FileChecker for Typos { } } - if settings.check_files { + if policy.check_files { let (buffer, content_type) = read_file(path, reporter)?; - if !explicit && !settings.binary && content_type.is_binary() { + if !explicit && !policy.binary && content_type.is_binary() { let msg = report::BinaryFile { path }; reporter.report(msg.into())?; } else { let mut accum_line_num = AccumulateLineNum::new(); - for typo in typos::check_bytes(&buffer, tokenizer, dictionary) { + for typo in typos::check_bytes(&buffer, policy.tokenizer, policy.dictionary) { let line_num = accum_line_num.line_num(&buffer, typo.byte_offset); let (line, line_offset) = extract_line(&buffer, typo.byte_offset); let msg = report::Typo { @@ -119,20 +75,18 @@ impl FileChecker for FixTypos { &self, path: &std::path::Path, explicit: bool, - settings: &CheckSettings, - tokenizer: &tokens::Tokenizer, - dictionary: &dyn Dictionary, + policy: &crate::policy::Policy, reporter: &dyn report::Report, ) -> Result<(), std::io::Error> { - if settings.check_files { + if policy.check_files { let (buffer, content_type) = read_file(path, reporter)?; - if !explicit && !settings.binary && content_type.is_binary() { + if !explicit && !policy.binary && content_type.is_binary() { let msg = report::BinaryFile { path }; reporter.report(msg.into())?; } else { let mut fixes = Vec::new(); let mut accum_line_num = AccumulateLineNum::new(); - for typo in typos::check_bytes(&buffer, tokenizer, dictionary) { + for typo in typos::check_bytes(&buffer, policy.tokenizer, policy.dictionary) { if is_fixable(&typo) { fixes.push(typo.into_owned()); } else { @@ -156,10 +110,10 @@ impl FileChecker for FixTypos { } // Ensure the above write can happen before renaming the file. - if settings.check_filenames { + if policy.check_filenames { if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) { let mut fixes = Vec::new(); - for typo in typos::check_str(file_name, tokenizer, dictionary) { + for typo in typos::check_str(file_name, policy.tokenizer, policy.dictionary) { if is_fixable(&typo) { fixes.push(typo.into_owned()); } else { @@ -196,22 +150,20 @@ impl FileChecker for DiffTypos { &self, path: &std::path::Path, explicit: bool, - settings: &CheckSettings, - tokenizer: &tokens::Tokenizer, - dictionary: &dyn Dictionary, + policy: &crate::policy::Policy, reporter: &dyn report::Report, ) -> Result<(), std::io::Error> { let mut content = Vec::new(); let mut new_content = Vec::new(); - if settings.check_files { + if policy.check_files { let (buffer, content_type) = read_file(path, reporter)?; - if !explicit && !settings.binary && content_type.is_binary() { + if !explicit && !policy.binary && content_type.is_binary() { let msg = report::BinaryFile { path }; reporter.report(msg.into())?; } else { let mut fixes = Vec::new(); let mut accum_line_num = AccumulateLineNum::new(); - for typo in typos::check_bytes(&buffer, tokenizer, dictionary) { + for typo in typos::check_bytes(&buffer, policy.tokenizer, policy.dictionary) { if is_fixable(&typo) { fixes.push(typo.into_owned()); } else { @@ -236,10 +188,10 @@ impl FileChecker for DiffTypos { // Match FixTypos ordering for easy diffing. let mut new_path = None; - if settings.check_filenames { + if policy.check_filenames { if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) { let mut fixes = Vec::new(); - for typo in typos::check_str(file_name, tokenizer, dictionary) { + for typo in typos::check_str(file_name, policy.tokenizer, policy.dictionary) { if is_fixable(&typo) { fixes.push(typo.into_owned()); } else { @@ -300,14 +252,12 @@ impl FileChecker for Identifiers { &self, path: &std::path::Path, explicit: bool, - settings: &CheckSettings, - tokenizer: &tokens::Tokenizer, - _dictionary: &dyn Dictionary, + policy: &crate::policy::Policy, reporter: &dyn report::Report, ) -> Result<(), std::io::Error> { - if settings.check_filenames { + if policy.check_filenames { if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) { - for word in tokenizer.parse_str(file_name) { + for word in policy.tokenizer.parse_str(file_name) { let msg = report::Parse { context: Some(report::PathContext { path }.into()), kind: report::ParseKind::Identifier, @@ -318,13 +268,13 @@ impl FileChecker for Identifiers { } } - if settings.check_files { + if policy.check_files { let (buffer, content_type) = read_file(path, reporter)?; - if !explicit && !settings.binary && content_type.is_binary() { + if !explicit && !policy.binary && content_type.is_binary() { let msg = report::BinaryFile { path }; reporter.report(msg.into())?; } else { - for word in tokenizer.parse_bytes(&buffer) { + for word in policy.tokenizer.parse_bytes(&buffer) { // HACK: Don't look up the line_num per entry to better match the performance // of Typos for comparison purposes. We don't really get much out of it // anyway. @@ -351,14 +301,16 @@ impl FileChecker for Words { &self, path: &std::path::Path, explicit: bool, - settings: &CheckSettings, - tokenizer: &tokens::Tokenizer, - _dictionary: &dyn Dictionary, + policy: &crate::policy::Policy, reporter: &dyn report::Report, ) -> Result<(), std::io::Error> { - if settings.check_filenames { + if policy.check_filenames { if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) { - for word in tokenizer.parse_str(file_name).flat_map(|i| i.split()) { + for word in policy + .tokenizer + .parse_str(file_name) + .flat_map(|i| i.split()) + { let msg = report::Parse { context: Some(report::PathContext { path }.into()), kind: report::ParseKind::Word, @@ -369,13 +321,17 @@ impl FileChecker for Words { } } - if settings.check_files { + if policy.check_files { let (buffer, content_type) = read_file(path, reporter)?; - if !explicit && !settings.binary && content_type.is_binary() { + if !explicit && !policy.binary && content_type.is_binary() { let msg = report::BinaryFile { path }; reporter.report(msg.into())?; } else { - for word in tokenizer.parse_bytes(&buffer).flat_map(|i| i.split()) { + for word in policy + .tokenizer + .parse_bytes(&buffer) + .flat_map(|i| i.split()) + { // HACK: Don't look up the line_num per entry to better match the performance // of Typos for comparison purposes. We don't really get much out of it // anyway. @@ -402,13 +358,11 @@ impl FileChecker for FoundFiles { &self, path: &std::path::Path, explicit: bool, - settings: &CheckSettings, - _parser: &tokens::Tokenizer, - _dictionary: &dyn Dictionary, + policy: &crate::policy::Policy, reporter: &dyn report::Report, ) -> Result<(), std::io::Error> { - // Check `settings.binary` first so we can easily check performance of walking vs reading - if settings.binary { + // Check `policy.binary` first so we can easily check performance of walking vs reading + if policy.binary { let msg = report::File::new(path); reporter.report(msg.into())?; } else { @@ -598,13 +552,11 @@ fn fix_buffer(mut buffer: Vec, typos: impl Iterator Result<(), ignore::Error> { for entry in walk { - walk_entry(entry, checks, settings, tokenizer, dictionary, reporter)?; + walk_entry(entry, checks, policy, reporter)?; } Ok(()) } @@ -612,15 +564,13 @@ pub fn walk_path( pub fn walk_path_parallel( walk: ignore::WalkParallel, checks: &dyn FileChecker, - settings: &CheckSettings, - tokenizer: &typos::tokens::Tokenizer, - dictionary: &dyn typos::Dictionary, + policy: &crate::policy::Policy, reporter: &dyn report::Report, ) -> Result<(), ignore::Error> { let error: std::sync::Mutex> = std::sync::Mutex::new(Ok(())); walk.run(|| { Box::new(|entry: Result| { - match walk_entry(entry, checks, settings, tokenizer, dictionary, reporter) { + match walk_entry(entry, checks, policy, reporter) { Ok(()) => ignore::WalkState::Continue, Err(err) => { *error.lock().unwrap() = Err(err); @@ -636,9 +586,7 @@ pub fn walk_path_parallel( fn walk_entry( entry: Result, checks: &dyn FileChecker, - settings: &CheckSettings, - tokenizer: &typos::tokens::Tokenizer, - dictionary: &dyn typos::Dictionary, + policy: &crate::policy::Policy, reporter: &dyn report::Report, ) -> Result<(), ignore::Error> { let entry = match entry { @@ -655,7 +603,7 @@ fn walk_entry( } else { entry.path() }; - checks.check_file(path, explicit, settings, tokenizer, dictionary, reporter)?; + checks.check_file(path, explicit, policy, reporter)?; } Ok(()) diff --git a/src/lib.rs b/src/lib.rs index bc10a10..ab43959 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,5 @@ pub mod config; pub mod dict; pub mod file; +pub mod policy; pub mod report; diff --git a/src/main.rs b/src/main.rs index cfd035e..3e8a605 100644 --- a/src/main.rs +++ b/src/main.rs @@ -8,7 +8,6 @@ use structopt::StructOpt; mod args; use typos_cli::config; -use typos_cli::dict; use typos_cli::report; use proc_exit::WithCodeResultExt; @@ -91,36 +90,22 @@ fn run_checks(args: &args::Args) -> proc_exit::ExitResult { }; let config = load_config(cwd, &args).with_code(proc_exit::Code::CONFIG_ERR)?; - let tokenizer = typos::tokens::TokenizerBuilder::new() - .ignore_hex(config.default.ignore_hex()) - .leading_digits(config.default.identifier_leading_digits()) - .leading_chars(config.default.identifier_leading_chars().to_owned()) - .include_digits(config.default.identifier_include_digits()) - .include_chars(config.default.identifier_include_chars().to_owned()) - .build(); - - let dictionary = crate::dict::BuiltIn::new(config.default.locale()); - let mut dictionary = crate::dict::Override::new(dictionary); - dictionary.identifiers(config.default.extend_identifiers()); - dictionary.words(config.default.extend_words()); - - let mut settings = typos_cli::file::CheckSettings::new(); - settings - .check_filenames(config.default.check_filename()) - .check_files(config.default.check_file()) - .binary(config.default.binary()); + let storage = typos_cli::policy::ConfigStorage::new(); + let engine = typos_cli::policy::ConfigEngine::new(config, &storage); + let files = engine.files(); + let policy = engine.policy(); let threads = if path.is_file() { 1 } else { args.threads }; let single_threaded = threads == 1; let mut walk = ignore::WalkBuilder::new(path); walk.threads(args.threads) - .hidden(config.files.ignore_hidden()) - .ignore(config.files.ignore_dot()) - .git_global(config.files.ignore_global()) - .git_ignore(config.files.ignore_vcs()) - .git_exclude(config.files.ignore_vcs()) - .parents(config.files.ignore_parent()); + .hidden(files.ignore_hidden()) + .ignore(files.ignore_dot()) + .git_global(files.ignore_global()) + .git_ignore(files.ignore_vcs()) + .git_exclude(files.ignore_vcs()) + .parents(files.ignore_parent()); // HACK: Diff doesn't handle mixing content let output_reporter = if args.diff { @@ -146,21 +131,12 @@ fn run_checks(args: &args::Args) -> proc_exit::ExitResult { }; if single_threaded { - typos_cli::file::walk_path( - walk.build(), - selected_checks, - &settings, - &tokenizer, - &dictionary, - reporter, - ) + typos_cli::file::walk_path(walk.build(), selected_checks, &policy, reporter) } else { typos_cli::file::walk_path_parallel( walk.build_parallel(), selected_checks, - &settings, - &tokenizer, - &dictionary, + &policy, reporter, ) } diff --git a/src/policy.rs b/src/policy.rs new file mode 100644 index 0000000..f5bc6ac --- /dev/null +++ b/src/policy.rs @@ -0,0 +1,106 @@ +pub struct ConfigStorage { + arena: typed_arena::Arena, +} + +impl ConfigStorage { + pub fn new() -> Self { + Self { + arena: typed_arena::Arena::new(), + } + } + + fn get<'s>(&'s self, other: &str) -> &'s str { + self.arena.alloc(kstring::KString::from_ref(other)) + } +} + +pub struct ConfigEngine<'s> { + files: crate::config::Walk, + check_filenames: bool, + check_files: bool, + binary: bool, + tokenizer: typos::tokens::Tokenizer, + dictionary: crate::dict::Override<'s, 's, crate::dict::BuiltIn>, +} + +impl<'s> ConfigEngine<'s> { + pub fn new(config: crate::config::Config, storage: &'s ConfigStorage) -> Self { + let crate::config::Config { files, default } = config; + + let tokenizer = typos::tokens::TokenizerBuilder::new() + .ignore_hex(default.ignore_hex()) + .leading_digits(default.identifier_leading_digits()) + .leading_chars(default.identifier_leading_chars().to_owned()) + .include_digits(default.identifier_include_digits()) + .include_chars(default.identifier_include_chars().to_owned()) + .build(); + + let dictionary = crate::dict::BuiltIn::new(default.locale()); + let mut dictionary = crate::dict::Override::new(dictionary); + dictionary.identifiers( + default + .extend_identifiers() + .map(|(k, v)| (storage.get(k), storage.get(v))), + ); + dictionary.words( + default + .extend_words() + .map(|(k, v)| (storage.get(k), storage.get(v))), + ); + + Self { + files, + check_filenames: default.check_filename(), + check_files: default.check_file(), + binary: default.binary(), + tokenizer, + dictionary, + } + } + + pub fn files(&self) -> &crate::config::Walk { + &self.files + } + + pub fn policy(&self) -> Policy<'_, '_> { + Policy { + check_filenames: self.check_filenames, + check_files: self.check_files, + binary: self.binary, + tokenizer: &self.tokenizer, + dictionary: &self.dictionary, + } + } +} + +#[non_exhaustive] +#[derive(derive_setters::Setters)] +pub struct Policy<'t, 'd> { + pub check_filenames: bool, + pub check_files: bool, + pub binary: bool, + pub tokenizer: &'t typos::tokens::Tokenizer, + pub dictionary: &'d dyn typos::Dictionary, +} + +impl<'t, 'd> Policy<'t, 'd> { + pub fn new() -> Self { + Default::default() + } +} + +static DEFAULT_TOKENIZER: once_cell::sync::Lazy = + once_cell::sync::Lazy::new(typos::tokens::Tokenizer::new); +static DEFAULT_DICT: crate::dict::BuiltIn = crate::dict::BuiltIn::new(crate::config::Locale::En); + +impl<'t, 'd> Default for Policy<'t, 'd> { + fn default() -> Self { + Self { + check_filenames: true, + check_files: true, + binary: false, + tokenizer: &DEFAULT_TOKENIZER, + dictionary: &DEFAULT_DICT, + } + } +}