From 4bbc59facf1b19eb1dd70eeba5783f20469eaaa6 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Mon, 1 Mar 2021 20:37:05 -0600 Subject: [PATCH] refactor(config)!: Detect when no dict config In preparing for smarter handling of config, we need to be able to tell what is present and what isn't. BREAKING CHANGE: `--hex` was removed, the value didn't seem high enough. --- benches/checks.rs | 4 +- docs/reference.md | 2 +- src/args.rs | 17 +--- src/config.rs | 240 +++++++++++++++++++++++++++++++--------------- src/file.rs | 12 +-- src/policy.rs | 47 +++++---- 6 files changed, 203 insertions(+), 119 deletions(-) diff --git a/benches/checks.rs b/benches/checks.rs index 77b4743..9b7f97d 100644 --- a/benches/checks.rs +++ b/benches/checks.rs @@ -5,10 +5,10 @@ use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion}; use typos_cli::file::FileChecker; fn bench_checks(c: &mut Criterion) { - let dictionary = typos_cli::dict::BuiltIn::new(Default::default()); + let dict = typos_cli::dict::BuiltIn::new(Default::default()); let tokenizer = typos::tokens::Tokenizer::new(); let policy = typos_cli::policy::Policy::new() - .dictionary(&dictionary) + .dict(&dict) .tokenizer(&tokenizer); let mut group = c.benchmark_group("checks"); diff --git a/docs/reference.md b/docs/reference.md index 7720c4c..32039da 100644 --- a/docs/reference.md +++ b/docs/reference.md @@ -28,6 +28,6 @@ Configuration is read from the following (in precedence order) | default.identifier-include-digits | \- | bool | Allow identifiers to include digits, in addition to letters. | | default.identifier-leading-chars | \- | string | Allow identifiers to start with one of these characters. | | default.identifier-include-chars | \- | string | Allow identifiers to include these characters. | -| default.locale | \- | en, en-us, en-gb, en-ca, en-au | English dialect to correct to. | +| default.locale | --locale | en, en-us, en-gb, en-ca, en-au | English dialect to correct to. | | default.extend-identifiers | \- | table of strings | Corrections for identifiers. When the correction is blank, the word is never valid. When the correction is the key, the word is always valid. | | default.extend-words | \- | table of strings | Corrections for identifiers. When the correction is blank, the word is never valid. When the correction is the key, the word is always valid. | diff --git a/src/args.rs b/src/args.rs index 53ae795..2c9c1db 100644 --- a/src/args.rs +++ b/src/args.rs @@ -122,12 +122,6 @@ pub(crate) struct FileArgs { #[structopt(long, overrides_with("no-check-files"), hidden(true))] check_files: bool, - #[structopt(long, overrides_with("hex"))] - /// Don't try to detect that an identifier looks like hex - no_hex: bool, - #[structopt(long, overrides_with("no-hex"), hidden(true))] - hex: bool, - #[structopt( long, possible_values(&config::Locale::variants()), @@ -163,15 +157,12 @@ impl config::FileSource for FileArgs { } } - fn ignore_hex(&self) -> Option { - match (self.hex, self.no_hex) { - (true, false) => Some(true), - (false, true) => Some(false), - (false, false) => None, - (_, _) => unreachable!("StructOpt should make this impossible"), - } + fn dict(&self) -> Option<&dyn config::DictSource> { + Some(self) } +} +impl config::DictSource for FileArgs { fn locale(&self) -> Option { self.locale } diff --git a/src/config.rs b/src/config.rs index 2fcbc63..1423ae9 100644 --- a/src/config.rs +++ b/src/config.rs @@ -58,6 +58,16 @@ pub trait FileSource { None } + fn tokenizer(&self) -> Option<&dyn TokenizerSource> { + None + } + + fn dict(&self) -> Option<&dyn DictSource> { + None + } +} + +pub trait TokenizerSource { /// Do not check identifiers that appear to be hexadecimal values. fn ignore_hex(&self) -> Option { None @@ -82,7 +92,9 @@ pub trait FileSource { fn identifier_include_chars(&self) -> Option<&str> { None } +} +pub trait DictSource { fn locale(&self) -> Option { None } @@ -258,14 +270,10 @@ pub struct FileConfig { pub binary: Option, pub check_filename: Option, pub check_file: Option, - pub ignore_hex: Option, - pub identifier_leading_digits: Option, - pub identifier_leading_chars: Option, - pub identifier_include_digits: Option, - pub identifier_include_chars: Option, - pub locale: Option, - pub extend_identifiers: HashMap, - pub extend_words: HashMap, + #[serde(flatten)] + pub tokenizer: Option, + #[serde(flatten)] + pub dict: Option, } impl FileConfig { @@ -275,18 +283,12 @@ impl FileConfig { binary: Some(empty.binary()), check_filename: Some(empty.check_filename()), check_file: Some(empty.check_file()), - ignore_hex: Some(empty.ignore_hex()), - identifier_leading_digits: Some(empty.identifier_leading_digits()), - identifier_leading_chars: Some(kstring::KString::from_ref( - empty.identifier_leading_chars(), - )), - identifier_include_digits: Some(empty.identifier_include_digits()), - identifier_include_chars: Some(kstring::KString::from_ref( - empty.identifier_include_chars(), - )), - locale: Some(empty.locale()), - extend_identifiers: Default::default(), - extend_words: Default::default(), + tokenizer: Some( + empty + .tokenizer + .unwrap_or_else(|| TokenizerConfig::from_defaults()), + ), + dict: Some(empty.dict.unwrap_or_else(|| DictConfig::from_defaults())), } } @@ -300,6 +302,87 @@ impl FileConfig { if let Some(source) = source.check_file() { self.check_file = Some(source); } + if let Some(source) = source.tokenizer() { + let mut tokenizer = None; + std::mem::swap(&mut tokenizer, &mut self.tokenizer); + let mut tokenizer = tokenizer.unwrap_or_default(); + tokenizer.update(source); + let mut tokenizer = Some(tokenizer); + std::mem::swap(&mut tokenizer, &mut self.tokenizer); + } + if let Some(source) = source.dict() { + let mut dict = None; + std::mem::swap(&mut dict, &mut self.dict); + let mut dict = dict.unwrap_or_default(); + dict.update(source); + let mut dict = Some(dict); + std::mem::swap(&mut dict, &mut self.dict); + } + } + + pub fn binary(&self) -> bool { + self.binary.unwrap_or(false) + } + + pub fn check_filename(&self) -> bool { + self.check_filename.unwrap_or(true) + } + + pub fn check_file(&self) -> bool { + self.check_file.unwrap_or(true) + } +} + +impl FileSource for FileConfig { + fn binary(&self) -> Option { + self.binary + } + + fn check_filename(&self) -> Option { + self.check_filename + } + + fn check_file(&self) -> Option { + self.check_file + } + + fn tokenizer(&self) -> Option<&dyn TokenizerSource> { + self.tokenizer.as_ref().map(|t| t as &dyn TokenizerSource) + } + + fn dict(&self) -> Option<&dyn DictSource> { + self.dict.as_ref().map(|d| d as &dyn DictSource) + } +} + +#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)] +#[serde(deny_unknown_fields, default)] +#[serde(rename_all = "kebab-case")] +pub struct TokenizerConfig { + pub ignore_hex: Option, + pub identifier_leading_digits: Option, + pub identifier_leading_chars: Option, + pub identifier_include_digits: Option, + pub identifier_include_chars: Option, +} + +impl TokenizerConfig { + pub fn from_defaults() -> Self { + let empty = Self::default(); + Self { + ignore_hex: Some(empty.ignore_hex()), + identifier_leading_digits: Some(empty.identifier_leading_digits()), + identifier_leading_chars: Some(kstring::KString::from_ref( + empty.identifier_leading_chars(), + )), + identifier_include_digits: Some(empty.identifier_include_digits()), + identifier_include_chars: Some(kstring::KString::from_ref( + empty.identifier_include_chars(), + )), + } + } + + pub fn update(&mut self, source: &dyn TokenizerSource) { if let Some(source) = source.ignore_hex() { self.ignore_hex = Some(source); } @@ -315,31 +398,6 @@ impl FileConfig { if let Some(source) = source.identifier_include_chars() { self.identifier_include_chars = Some(kstring::KString::from_ref(source)); } - if let Some(source) = source.locale() { - self.locale = Some(source); - } - self.extend_identifiers.extend( - source - .extend_identifiers() - .map(|(k, v)| (kstring::KString::from_ref(k), kstring::KString::from_ref(v))), - ); - self.extend_words.extend( - source - .extend_words() - .map(|(k, v)| (kstring::KString::from_ref(k), kstring::KString::from_ref(v))), - ); - } - - pub fn binary(&self) -> bool { - self.binary.unwrap_or(false) - } - - pub fn check_filename(&self) -> bool { - self.check_filename.unwrap_or(true) - } - - pub fn check_file(&self) -> bool { - self.check_file.unwrap_or(true) } pub fn ignore_hex(&self) -> bool { @@ -361,6 +419,64 @@ impl FileConfig { pub fn identifier_include_chars(&self) -> &str { self.identifier_include_chars.as_deref().unwrap_or("_'") } +} + +impl TokenizerSource for TokenizerConfig { + fn ignore_hex(&self) -> Option { + self.ignore_hex + } + + fn identifier_leading_digits(&self) -> Option { + self.identifier_leading_digits + } + + fn identifier_leading_chars(&self) -> Option<&str> { + self.identifier_leading_chars.as_deref() + } + + fn identifier_include_digits(&self) -> Option { + self.identifier_include_digits + } + + fn identifier_include_chars(&self) -> Option<&str> { + self.identifier_include_chars.as_deref() + } +} + +#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)] +#[serde(deny_unknown_fields, default)] +#[serde(rename_all = "kebab-case")] +pub struct DictConfig { + pub locale: Option, + pub extend_identifiers: HashMap, + pub extend_words: HashMap, +} + +impl DictConfig { + pub fn from_defaults() -> Self { + let empty = Self::default(); + Self { + locale: Some(empty.locale()), + extend_identifiers: Default::default(), + extend_words: Default::default(), + } + } + + pub fn update(&mut self, source: &dyn DictSource) { + if let Some(source) = source.locale() { + self.locale = Some(source); + } + self.extend_identifiers.extend( + source + .extend_identifiers() + .map(|(k, v)| (kstring::KString::from_ref(k), kstring::KString::from_ref(v))), + ); + self.extend_words.extend( + source + .extend_words() + .map(|(k, v)| (kstring::KString::from_ref(k), kstring::KString::from_ref(v))), + ); + } pub fn locale(&self) -> Locale { self.locale.unwrap_or_default() @@ -383,39 +499,7 @@ impl FileConfig { } } -impl FileSource for FileConfig { - fn binary(&self) -> Option { - self.binary - } - - fn check_filename(&self) -> Option { - self.check_filename - } - - fn check_file(&self) -> Option { - self.check_file - } - - fn ignore_hex(&self) -> Option { - self.ignore_hex - } - - fn identifier_leading_digits(&self) -> Option { - self.identifier_leading_digits - } - - fn identifier_leading_chars(&self) -> Option<&str> { - self.identifier_leading_chars.as_deref() - } - - fn identifier_include_digits(&self) -> Option { - self.identifier_include_digits - } - - fn identifier_include_chars(&self) -> Option<&str> { - self.identifier_include_chars.as_deref() - } - +impl DictSource for DictConfig { fn locale(&self) -> Option { self.locale } diff --git a/src/file.rs b/src/file.rs index 240763f..bc2015e 100644 --- a/src/file.rs +++ b/src/file.rs @@ -28,7 +28,7 @@ impl FileChecker for Typos { ) -> Result<(), std::io::Error> { if policy.check_filenames { if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) { - for typo in typos::check_str(file_name, policy.tokenizer, policy.dictionary) { + for typo in typos::check_str(file_name, policy.tokenizer, policy.dict) { let msg = report::Typo { context: Some(report::PathContext { path }.into()), buffer: std::borrow::Cow::Borrowed(file_name.as_bytes()), @@ -48,7 +48,7 @@ impl FileChecker for Typos { reporter.report(msg.into())?; } else { let mut accum_line_num = AccumulateLineNum::new(); - for typo in typos::check_bytes(&buffer, policy.tokenizer, policy.dictionary) { + for typo in typos::check_bytes(&buffer, policy.tokenizer, policy.dict) { let line_num = accum_line_num.line_num(&buffer, typo.byte_offset); let (line, line_offset) = extract_line(&buffer, typo.byte_offset); let msg = report::Typo { @@ -86,7 +86,7 @@ impl FileChecker for FixTypos { } else { let mut fixes = Vec::new(); let mut accum_line_num = AccumulateLineNum::new(); - for typo in typos::check_bytes(&buffer, policy.tokenizer, policy.dictionary) { + for typo in typos::check_bytes(&buffer, policy.tokenizer, policy.dict) { if is_fixable(&typo) { fixes.push(typo.into_owned()); } else { @@ -113,7 +113,7 @@ impl FileChecker for FixTypos { if policy.check_filenames { if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) { let mut fixes = Vec::new(); - for typo in typos::check_str(file_name, policy.tokenizer, policy.dictionary) { + for typo in typos::check_str(file_name, policy.tokenizer, policy.dict) { if is_fixable(&typo) { fixes.push(typo.into_owned()); } else { @@ -163,7 +163,7 @@ impl FileChecker for DiffTypos { } else { let mut fixes = Vec::new(); let mut accum_line_num = AccumulateLineNum::new(); - for typo in typos::check_bytes(&buffer, policy.tokenizer, policy.dictionary) { + for typo in typos::check_bytes(&buffer, policy.tokenizer, policy.dict) { if is_fixable(&typo) { fixes.push(typo.into_owned()); } else { @@ -191,7 +191,7 @@ impl FileChecker for DiffTypos { if policy.check_filenames { if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) { let mut fixes = Vec::new(); - for typo in typos::check_str(file_name, policy.tokenizer, policy.dictionary) { + for typo in typos::check_str(file_name, policy.tokenizer, policy.dict) { if is_fixable(&typo) { fixes.push(typo.into_owned()); } else { diff --git a/src/policy.rs b/src/policy.rs index f5bc6ac..fb032f9 100644 --- a/src/policy.rs +++ b/src/policy.rs @@ -20,41 +20,50 @@ pub struct ConfigEngine<'s> { check_files: bool, binary: bool, tokenizer: typos::tokens::Tokenizer, - dictionary: crate::dict::Override<'s, 's, crate::dict::BuiltIn>, + dict: crate::dict::Override<'s, 's, crate::dict::BuiltIn>, } impl<'s> ConfigEngine<'s> { pub fn new(config: crate::config::Config, storage: &'s ConfigStorage) -> Self { let crate::config::Config { files, default } = config; + let binary = default.binary(); + let check_filename = default.check_filename(); + let check_file = default.check_file(); + let crate::config::EngineConfig { + tokenizer, dict, .. + } = default; + let tokenizer_config = + tokenizer.unwrap_or_else(|| crate::config::TokenizerConfig::from_defaults()); + let dict_config = dict.unwrap_or_else(|| crate::config::DictConfig::from_defaults()); let tokenizer = typos::tokens::TokenizerBuilder::new() - .ignore_hex(default.ignore_hex()) - .leading_digits(default.identifier_leading_digits()) - .leading_chars(default.identifier_leading_chars().to_owned()) - .include_digits(default.identifier_include_digits()) - .include_chars(default.identifier_include_chars().to_owned()) + .ignore_hex(tokenizer_config.ignore_hex()) + .leading_digits(tokenizer_config.identifier_leading_digits()) + .leading_chars(tokenizer_config.identifier_leading_chars().to_owned()) + .include_digits(tokenizer_config.identifier_include_digits()) + .include_chars(tokenizer_config.identifier_include_chars().to_owned()) .build(); - let dictionary = crate::dict::BuiltIn::new(default.locale()); - let mut dictionary = crate::dict::Override::new(dictionary); - dictionary.identifiers( - default + let dict = crate::dict::BuiltIn::new(dict_config.locale()); + let mut dict = crate::dict::Override::new(dict); + dict.identifiers( + dict_config .extend_identifiers() .map(|(k, v)| (storage.get(k), storage.get(v))), ); - dictionary.words( - default + dict.words( + dict_config .extend_words() .map(|(k, v)| (storage.get(k), storage.get(v))), ); Self { files, - check_filenames: default.check_filename(), - check_files: default.check_file(), - binary: default.binary(), + check_filenames: check_filename, + check_files: check_file, + binary: binary, tokenizer, - dictionary, + dict, } } @@ -68,7 +77,7 @@ impl<'s> ConfigEngine<'s> { check_files: self.check_files, binary: self.binary, tokenizer: &self.tokenizer, - dictionary: &self.dictionary, + dict: &self.dict, } } } @@ -80,7 +89,7 @@ pub struct Policy<'t, 'd> { pub check_files: bool, pub binary: bool, pub tokenizer: &'t typos::tokens::Tokenizer, - pub dictionary: &'d dyn typos::Dictionary, + pub dict: &'d dyn typos::Dictionary, } impl<'t, 'd> Policy<'t, 'd> { @@ -100,7 +109,7 @@ impl<'t, 'd> Default for Policy<'t, 'd> { check_files: true, binary: false, tokenizer: &DEFAULT_TOKENIZER, - dictionary: &DEFAULT_DICT, + dict: &DEFAULT_DICT, } } }