diff --git a/Cargo.lock b/Cargo.lock index 8856495..daefb43 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -75,7 +75,7 @@ name = "bstr" version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "memchr 2.1.3 (registry+https://github.com/rust-lang/crates.io-index)", "regex-automata 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -164,6 +164,19 @@ dependencies = [ "memchr 2.1.3 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "derive_more" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "proc-macro2 0.4.26 (registry+https://github.com/rust-lang/crates.io-index)", + "quote 0.6.11 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", + "rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", + "syn 0.15.26 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "difference" version = "2.0.0" @@ -271,7 +284,7 @@ name = "heck" version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "unicode-segmentation 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)", + "unicode-segmentation 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -289,7 +302,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "crossbeam-channel 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", "globset 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)", - "lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", "memchr 2.1.3 (registry+https://github.com/rust-lang/crates.io-index)", "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -314,7 +327,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] name = "lazy_static" -version = "1.2.0" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] @@ -651,6 +664,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" name = "serde" version = "1.0.85" source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "serde_derive 1.0.85 (registry+https://github.com/rust-lang/crates.io-index)", +] [[package]] name = "serde_derive" @@ -780,7 +796,15 @@ name = "thread_local" version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "toml" +version = "0.4.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "serde 1.0.85 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -797,20 +821,22 @@ dependencies = [ "clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)", "clap-verbosity-flag 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", "csv 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)", + "derive_more 0.15.0 (registry+https://github.com/rust-lang/crates.io-index)", "env_logger 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)", "failure 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", "ignore 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", "itertools 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)", - "lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", "phf 0.7.24 (registry+https://github.com/rust-lang/crates.io-index)", "phf_codegen 0.7.24 (registry+https://github.com/rust-lang/crates.io-index)", "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "serde 1.0.85 (registry+https://github.com/rust-lang/crates.io-index)", - "serde_derive 1.0.85 (registry+https://github.com/rust-lang/crates.io-index)", "serde_json 1.0.36 (registry+https://github.com/rust-lang/crates.io-index)", "structopt 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)", + "toml 0.4.10 (registry+https://github.com/rust-lang/crates.io-index)", "unicase 1.4.2 (registry+https://github.com/rust-lang/crates.io-index)", + "unicode-segmentation 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -828,7 +854,7 @@ dependencies = [ [[package]] name = "unicode-segmentation" -version = "1.2.1" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] @@ -935,6 +961,7 @@ dependencies = [ "checksum crossbeam-utils 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)" = "41ee4864f4797060e52044376f7d107429ce1fb43460021b126424b7180ee21a" "checksum csv 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "9fd1c44c58078cfbeaf11fbb3eac9ae5534c23004ed770cc4bfb48e658ae4f04" "checksum csv-core 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "fa5cdef62f37e6ffe7d1f07a381bc0db32b7a3ff1cac0de56cb0d81e71f53d65" +"checksum derive_more 0.15.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7a141330240c921ec6d074a3e188a7c7ef95668bb95e7d44fa0e5778ec2a7afe" "checksum difference 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "524cbf6897b527295dff137cec09ecf3a05f4fddffd7dfcd1585403449e74198" "checksum either 1.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3be565ca5c557d7f59e7cfcf1844f9e3033650c929c6566f511e8005f205c1d0" "checksum env_logger 0.5.13 (registry+https://github.com/rust-lang/crates.io-index)" = "15b0a4d2e39f8420210be8b27eeda28029729e2fd4291019455016c348240c38" @@ -952,7 +979,7 @@ dependencies = [ "checksum ignore 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)" = "ad03ca67dc12474ecd91fdb94d758cbd20cb4e7a78ebe831df26a9b7511e1162" "checksum itertools 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "5b8467d9c1cebe26feb08c640139247fac215782d35371ade9a2136ed6085358" "checksum itoa 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "1306f3464951f30e30d12373d31c79fbd52d236e5e896fd92f96ec7babbbe60b" -"checksum lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a374c89b9db55895453a74c1e38861d9deec0b01b405a82516e9d5de4820dea1" +"checksum lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bc5729f27f159ddd61f4df6228e827e86643d4d3e7c32183cb30a1c08f604a14" "checksum libc 0.2.47 (registry+https://github.com/rust-lang/crates.io-index)" = "48450664a984b25d5b479554c29cc04e3150c97aa4c01da5604a2d4ed9151476" "checksum lock_api 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "62ebf1391f6acad60e5c8b43706dde4582df75c06698ab44511d15016bc2442c" "checksum log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c84ec4b527950aa83a329754b01dbe3f58361d1c5efacd1f6d68c494d08a17c6" @@ -1010,10 +1037,11 @@ dependencies = [ "checksum termion 1.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "689a3bdfaab439fd92bc87df5c4c78417d3cbe537487274e9b0b2dce76e92096" "checksum textwrap 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)" = "307686869c93e71f94da64286f9a9524c0f308a9e1c87a583de8e9c9039ad3f6" "checksum thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c6b53e329000edc2b34dbe8545fd20e55a333362d0a321909685a19bd28c3f1b" +"checksum toml 0.4.10 (registry+https://github.com/rust-lang/crates.io-index)" = "758664fc71a3a69038656bee8b6be6477d2a6c315a6b81f7081f591bffa4111f" "checksum treeline 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a7f741b240f1a48843f9b8e0444fb55fb2a4ff67293b50a9179dfd5ea67f8d41" "checksum ucd-util 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "535c204ee4d8434478593480b8f86ab45ec9aae0e83c568ca81abf0fd0e88f86" "checksum unicase 1.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7f4765f83163b74f957c797ad9253caf97f103fb064d3999aea9568d09fc8a33" -"checksum unicode-segmentation 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "aa6024fc12ddfd1c6dbc14a80fa2324d4568849869b779f6bd37e5e4c03344d1" +"checksum unicode-segmentation 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1967f4cdfc355b37fd76d2a954fb2ed3871034eb4f26d60537d88795cfc332a9" "checksum unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "882386231c45df4700b275c7ff55b6f3698780a650026380e72dabe76fa46526" "checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc" "checksum unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "382810877fe448991dfc7f0dd6e3ae5d58088fd0ea5e35189655f84e6814fa56" diff --git a/Cargo.toml b/Cargo.toml index daef3c6..4bb7a8a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -28,14 +28,16 @@ ignore = "0.4" phf = { version = "0.7", features = ["unicase"] } regex = "1.0" lazy_static = "1.2.0" -serde = "1.0" -serde_derive = "1.0" +serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" +toml = "0.4" itertools = "0.8" unicase = "1.1" bstr = "0.2" log = "0.4" env_logger = "0.6" +unicode-segmentation = "1.3.0" +derive_more = "0.15.0" [dev-dependencies] assert_fs = "0.10" diff --git a/src/checks.rs b/src/checks.rs index 4c8b475..3941887 100644 --- a/src/checks.rs +++ b/src/checks.rs @@ -7,6 +7,7 @@ use crate::report; use crate::tokens; use crate::Dictionary; +#[derive(Debug, Clone, PartialEq, Eq)] pub struct CheckSettings { check_filenames: bool, check_files: bool, @@ -58,6 +59,7 @@ impl Default for CheckSettings { } } +#[derive(Clone)] pub struct Checks<'d, 'p> { dictionary: &'d Dictionary, parser: &'p tokens::Parser, @@ -170,3 +172,14 @@ impl<'d, 'p> Checks<'d, 'p> { Ok(typos_found) } } + +impl std::fmt::Debug for Checks<'_, '_> { + fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result { + fmt.debug_struct("Checks") + .field("parser", self.parser) + .field("check_filenames", &self.check_filenames) + .field("check_files", &self.check_files) + .field("binary", &self.binary) + .finish() + } +} diff --git a/src/config.rs b/src/config.rs new file mode 100644 index 0000000..80903f6 --- /dev/null +++ b/src/config.rs @@ -0,0 +1,317 @@ +use std::io::Read; + +pub trait ConfigSource { + fn walk(&self) -> Option<&dyn WalkSource> { + None + } + + fn default(&self) -> Option<&dyn FileSource> { + None + } +} + +pub trait WalkSource { + /// Search binary files. + fn binary(&self) -> Option { + None + } + + /// Skip hidden files and directories. + fn ignore_hidden(&self) -> Option { + None + } + + /// Respect ignore files. + fn ignore_files(&self) -> Option { + None + } + + /// Respect .ignore files. + fn ignore_dot(&self) -> Option { + None + } + + /// Respect ignore files in vcs directories. + fn ignore_vcs(&self) -> Option { + None + } + + /// Respect global ignore files. + fn ignore_global(&self) -> Option { + None + } + + /// Respect ignore files in parent directories. + fn ignore_parent(&self) -> Option { + None + } +} + +pub trait FileSource { + /// Verifying spelling in file names. + fn check_filename(&self) -> Option { + None + } + + /// Verifying spelling in filess. + fn check_file(&self) -> Option { + None + } + + /// Do not check identifiers that appear to be hexadecimal values + fn ignore_hex(&self) -> Option { + None + } + + /// Allow identifiers to include digits, in addition to letters + fn identifier_include_digits(&self) -> Option { + None + } + + /// Specify additional characters to be included in identifiers + fn identifier_include_chars(&self) -> Option<&str> { + None + } +} + +#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)] +#[serde(deny_unknown_fields, default)] +#[serde(rename_all = "kebab-case")] +pub struct Config { + pub files: Walk, + pub default: FileConfig, +} + +impl Config { + pub fn from_file(path: &std::path::Path) -> Result { + let mut file = std::fs::File::open(path)?; + let mut s = String::new(); + file.read_to_string(&mut s)?; + Self::from_toml(&s) + } + + pub fn from_toml(data: &str) -> Result { + let content = toml::from_str(data)?; + Ok(content) + } + + pub fn derive(cwd: &std::path::Path) -> Result { + if let Some(path) = find_project_file(cwd.to_owned(), "typos.toml") { + Self::from_file(&path) + } else { + Ok(Default::default()) + } + } + + pub fn update(&mut self, source: &dyn ConfigSource) { + if let Some(walk) = source.walk() { + self.files.update(walk); + } + if let Some(default) = source.default() { + self.default.update(default); + } + } +} + +impl ConfigSource for Config { + fn walk(&self) -> Option<&dyn WalkSource> { + Some(&self.files) + } +} + +#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)] +#[serde(deny_unknown_fields, default)] +#[serde(rename_all = "kebab-case")] +pub struct Walk { + pub binary: Option, + pub ignore_hidden: Option, + pub ignore_files: Option, + pub ignore_dot: Option, + pub ignore_vcs: Option, + pub ignore_global: Option, + pub ignore_parent: Option, +} + +impl Walk { + pub fn update(&mut self, source: &dyn WalkSource) { + if let Some(source) = source.binary() { + self.binary = Some(source); + } + if let Some(source) = source.ignore_hidden() { + self.ignore_hidden = Some(source); + } + if let Some(source) = source.ignore_files() { + self.ignore_files = Some(source); + self.ignore_dot = None; + self.ignore_vcs = None; + self.ignore_global = None; + self.ignore_parent = None; + } + if let Some(source) = source.ignore_dot() { + self.ignore_dot = Some(source); + } + if let Some(source) = source.ignore_vcs() { + self.ignore_vcs = Some(source); + self.ignore_global = None; + } + if let Some(source) = source.ignore_global() { + self.ignore_global = Some(source); + } + if let Some(source) = source.ignore_parent() { + self.ignore_parent = Some(source); + } + } + + pub fn binary(&self) -> bool { + self.binary.unwrap_or(false) + } + + pub fn ignore_hidden(&self) -> bool { + self.ignore_hidden.unwrap_or(true) + } + + pub fn ignore_dot(&self) -> bool { + self.ignore_dot + .or_else(|| self.ignore_files) + .unwrap_or(true) + } + + pub fn ignore_vcs(&self) -> bool { + self.ignore_vcs + .or_else(|| self.ignore_files) + .unwrap_or(true) + } + + pub fn ignore_global(&self) -> bool { + self.ignore_global + .or_else(|| self.ignore_vcs) + .or_else(|| self.ignore_files) + .unwrap_or(true) + } + + pub fn ignore_parent(&self) -> bool { + self.ignore_parent + .or_else(|| self.ignore_files) + .unwrap_or(true) + } +} + +impl WalkSource for Walk { + fn binary(&self) -> Option { + self.binary + } + + fn ignore_hidden(&self) -> Option { + self.ignore_hidden + } + + fn ignore_files(&self) -> Option { + self.ignore_files + } + + fn ignore_dot(&self) -> Option { + self.ignore_dot + } + + fn ignore_vcs(&self) -> Option { + self.ignore_vcs + } + + fn ignore_global(&self) -> Option { + self.ignore_global + } + + fn ignore_parent(&self) -> Option { + self.ignore_parent + } +} + +#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)] +#[serde(deny_unknown_fields, default)] +#[serde(rename_all = "kebab-case")] +pub struct FileConfig { + pub check_filename: Option, + pub check_file: Option, + pub ignore_hex: Option, + pub identifier_include_digits: Option, + pub identifier_include_chars: Option, +} + +impl FileConfig { + pub fn update(&mut self, source: &dyn FileSource) { + if let Some(source) = source.check_filename() { + self.check_filename = Some(source); + } + if let Some(source) = source.check_file() { + self.check_file = Some(source); + } + if let Some(source) = source.ignore_hex() { + self.ignore_hex = Some(source); + } + if let Some(source) = source.identifier_include_digits() { + self.identifier_include_digits = Some(source); + } + if let Some(source) = source.identifier_include_chars() { + self.identifier_include_chars = Some(source.to_owned()); + } + } + + pub fn check_filename(&self) -> bool { + self.check_filename.unwrap_or(true) + } + + pub fn check_file(&self) -> bool { + self.check_file.unwrap_or(true) + } + + pub fn ignore_hex(&self) -> bool { + self.ignore_hex.unwrap_or(true) + } + + pub fn identifier_include_digits(&self) -> bool { + self.identifier_include_digits.unwrap_or(true) + } + + pub fn identifier_include_chars(&self) -> &str { + self.identifier_include_chars + .as_ref() + .map(|s| s.as_str()) + .unwrap_or("_'") + } +} + +impl FileSource for FileConfig { + fn check_filename(&self) -> Option { + self.check_filename + } + + fn check_file(&self) -> Option { + self.check_file + } + + fn ignore_hex(&self) -> Option { + self.ignore_hex + } + + fn identifier_include_digits(&self) -> Option { + self.identifier_include_digits + } + + fn identifier_include_chars(&self) -> Option<&str> { + self.identifier_include_chars.as_ref().map(|s| s.as_str()) + } +} + +fn find_project_file(dir: std::path::PathBuf, name: &str) -> Option { + let mut file_path = dir; + file_path.push(name); + while !file_path.exists() { + file_path.pop(); // filename + let hit_bottom = !file_path.pop(); + if hit_bottom { + return None; + } + file_path.push(name); + } + Some(file_path) +} diff --git a/src/lib.rs b/src/lib.rs index b5201df..2c3fd94 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,3 @@ -#[macro_use] -extern crate serde_derive; - mod dict; mod dict_codegen; diff --git a/src/main.rs b/src/main.rs index 456a054..319858c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -6,6 +6,8 @@ use std::io::Write; use structopt::StructOpt; +mod config; + arg_enum! { #[derive(Debug, Copy, Clone, PartialEq, Eq)] enum Format { @@ -35,11 +37,39 @@ impl Default for Format { #[derive(Debug, StructOpt)] #[structopt(rename_all = "kebab-case")] -struct Options { +struct Args { #[structopt(parse(from_os_str), default_value = ".")] /// Paths to check path: Vec, + #[structopt(short = "c", long = "config")] + /// Custom config file + custom_config: Option, + + #[structopt(long = "isolated")] + /// Ignore implicit configuration files. + isolated: bool, + + #[structopt(flatten)] + overrides: FileArgs, + + #[structopt( + long = "format", + raw(possible_values = "&Format::variants()", case_insensitive = "true"), + default_value = "long" + )] + pub format: Format, + + #[structopt(flatten)] + config: ConfigArgs, + + #[structopt(flatten)] + verbose: clap_verbosity_flag::Verbosity, +} + +#[derive(Debug, StructOpt)] +#[structopt(rename_all = "kebab-case")] +pub struct FileArgs { #[structopt(long, raw(overrides_with = r#""check-filenames""#))] /// Skip verifying spelling in file names. no_check_filenames: bool, @@ -65,14 +95,53 @@ struct Options { no_hex: bool, #[structopt(long, raw(overrides_with = r#""no-hex""#), raw(hidden = "true"))] hex: bool, +} - #[structopt( - long = "format", - raw(possible_values = "&Format::variants()", case_insensitive = "true"), - default_value = "long" - )] - pub format: Format, +impl config::FileSource for FileArgs { + fn check_filename(&self) -> Option { + match (self.check_filenames, self.no_check_filenames) { + (true, false) => Some(true), + (false, true) => Some(false), + (false, false) => None, + (_, _) => unreachable!("StructOpt should make this impossible"), + } + } + fn check_file(&self) -> Option { + match (self.check_files, self.no_check_files) { + (true, false) => Some(true), + (false, true) => Some(false), + (false, false) => None, + (_, _) => unreachable!("StructOpt should make this impossible"), + } + } + + fn ignore_hex(&self) -> Option { + match (self.hex, self.no_hex) { + (true, false) => Some(true), + (false, true) => Some(false), + (false, false) => None, + (_, _) => unreachable!("StructOpt should make this impossible"), + } + } +} + +#[derive(Debug, StructOpt)] +#[structopt(rename_all = "kebab-case")] +struct ConfigArgs { + #[structopt(flatten)] + walk: WalkArgs, +} + +impl config::ConfigSource for ConfigArgs { + fn walk(&self) -> Option<&dyn config::WalkSource> { + Some(&self.walk) + } +} + +#[derive(Debug, StructOpt)] +#[structopt(rename_all = "kebab-case")] +struct WalkArgs { #[structopt(long, raw(overrides_with = r#""no-binary""#))] /// Search binary files. binary: bool, @@ -122,44 +191,10 @@ struct Options { no_ignore_vcs: bool, #[structopt(long, raw(overrides_with = r#""no-ignore-vcs""#), raw(hidden = "true"))] ignore_vcs: bool, - - #[structopt(flatten)] - verbose: clap_verbosity_flag::Verbosity, } -impl Options { - pub fn infer(self) -> Self { - self - } - - pub fn check_files(&self) -> Option { - match (self.check_files, self.no_check_files) { - (true, false) => Some(true), - (false, true) => Some(false), - (false, false) => None, - (_, _) => unreachable!("StructOpt should make this impossible"), - } - } - - pub fn check_filenames(&self) -> Option { - match (self.check_filenames, self.no_check_filenames) { - (true, false) => Some(true), - (false, true) => Some(false), - (false, false) => None, - (_, _) => unreachable!("StructOpt should make this impossible"), - } - } - - pub fn ignore_hex(&self) -> Option { - match (self.no_hex, self.hex) { - (true, false) => Some(false), - (false, true) => Some(true), - (false, false) => None, - (_, _) => unreachable!("StructOpt should make this impossible"), - } - } - - pub fn binary(&self) -> Option { +impl config::WalkSource for WalkArgs { + fn binary(&self) -> Option { match (self.binary, self.no_binary) { (true, false) => Some(true), (false, true) => Some(false), @@ -168,7 +203,7 @@ impl Options { } } - pub fn ignore_hidden(&self) -> Option { + fn ignore_hidden(&self) -> Option { match (self.hidden, self.no_hidden) { (true, false) => Some(false), (false, true) => Some(true), @@ -177,49 +212,44 @@ impl Options { } } - pub fn ignore_dot(&self) -> Option { + fn ignore_files(&self) -> Option { + match (self.no_ignore, self.ignore) { + (true, false) => Some(false), + (false, true) => Some(true), + (false, false) => None, + (_, _) => unreachable!("StructOpt should make this impossible"), + } + } + + fn ignore_dot(&self) -> Option { match (self.no_ignore_dot, self.ignore_dot) { (true, false) => Some(false), (false, true) => Some(true), (false, false) => None, (_, _) => unreachable!("StructOpt should make this impossible"), } - .or_else(|| self.ignore_files()) } - pub fn ignore_global(&self) -> Option { - match (self.no_ignore_global, self.ignore_global) { - (true, false) => Some(false), - (false, true) => Some(true), - (false, false) => None, - (_, _) => unreachable!("StructOpt should make this impossible"), - } - .or_else(|| self.ignore_vcs()) - .or_else(|| self.ignore_files()) - } - - pub fn ignore_parent(&self) -> Option { - match (self.no_ignore_parent, self.ignore_parent) { - (true, false) => Some(false), - (false, true) => Some(true), - (false, false) => None, - (_, _) => unreachable!("StructOpt should make this impossible"), - } - .or_else(|| self.ignore_files()) - } - - pub fn ignore_vcs(&self) -> Option { + fn ignore_vcs(&self) -> Option { match (self.no_ignore_vcs, self.ignore_vcs) { (true, false) => Some(false), (false, true) => Some(true), (false, false) => None, (_, _) => unreachable!("StructOpt should make this impossible"), } - .or_else(|| self.ignore_files()) } - fn ignore_files(&self) -> Option { - match (self.no_ignore, self.ignore) { + fn ignore_global(&self) -> Option { + match (self.no_ignore_global, self.ignore_global) { + (true, false) => Some(false), + (false, true) => Some(true), + (false, false) => None, + (_, _) => unreachable!("StructOpt should make this impossible"), + } + } + + fn ignore_parent(&self) -> Option { + match (self.no_ignore_parent, self.ignore_parent) { (true, false) => Some(false), (false, true) => Some(true), (false, false) => None, @@ -250,52 +280,67 @@ pub fn get_logging(level: log::Level) -> env_logger::Builder { } fn run() -> Result { - let options = Options::from_args().infer(); + let args = Args::from_args(); - let mut builder = get_logging(options.verbose.log_level()); + let mut builder = get_logging(args.verbose.log_level()); builder.init(); - let check_filenames = options.check_filenames().unwrap_or(true); - let check_files = options.check_files().unwrap_or(true); - let ignore_hex = options.ignore_hex().unwrap_or(true); - let binary = options.binary().unwrap_or(false); - - let dictionary = typos::BuiltIn::new(); - - let parser = typos::tokens::ParserBuilder::new() - .ignore_hex(ignore_hex) - .build(); - - let checks = typos::checks::CheckSettings::new() - .check_filenames(check_filenames) - .check_files(check_files) - .binary(binary) - .build(&dictionary, &parser); - - let first_path = &options - .path - .get(0) - .expect("arg parsing enforces at least one"); - let mut walk = ignore::WalkBuilder::new(first_path); - for path in &options.path[1..] { - walk.add(path); + let mut config = config::Config::default(); + if let Some(path) = args.custom_config.as_ref() { + let custom = config::Config::from_file(path)?; + config.update(&custom); } - walk.hidden(options.ignore_hidden().unwrap_or(true)) - .ignore(options.ignore_dot().unwrap_or(true)) - .git_global(options.ignore_global().unwrap_or(true)) - .git_ignore(options.ignore_vcs().unwrap_or(true)) - .git_exclude(options.ignore_vcs().unwrap_or(true)) - .parents(options.ignore_parent().unwrap_or(true)); + let config = config; + let mut typos_found = false; - for entry in walk.build() { - let entry = entry?; - if entry.file_type().map(|t| t.is_file()).unwrap_or(true) { - let explicit = entry.depth() == 0; - if checks.check_filename(entry.path(), options.format.report())? { - typos_found = true; - } - if checks.check_file(entry.path(), explicit, options.format.report())? { - typos_found = true; + for path in args.path.iter() { + let path = path.canonicalize()?; + let cwd = if path.is_file() { + path.parent().unwrap() + } else { + path.as_path() + }; + + let mut config = config.clone(); + if !args.isolated { + let derived = config::Config::derive(cwd)?; + config.update(&derived); + } + config.update(&args.config); + config.default.update(&args.overrides); + let config = config; + + let dictionary = typos::BuiltIn::new(); + + let parser = typos::tokens::ParserBuilder::new() + .ignore_hex(config.default.ignore_hex()) + .include_digits(config.default.identifier_include_digits()) + .include_chars(config.default.identifier_include_chars().to_owned()) + .build(); + + let checks = typos::checks::CheckSettings::new() + .check_filenames(config.default.check_filename()) + .check_files(config.default.check_file()) + .binary(config.files.binary()) + .build(&dictionary, &parser); + + let mut walk = ignore::WalkBuilder::new(path); + walk.hidden(config.files.ignore_hidden()) + .ignore(config.files.ignore_dot()) + .git_global(config.files.ignore_global()) + .git_ignore(config.files.ignore_vcs()) + .git_exclude(config.files.ignore_vcs()) + .parents(config.files.ignore_parent()); + for entry in walk.build() { + let entry = entry?; + if entry.file_type().map(|t| t.is_file()).unwrap_or(true) { + let explicit = entry.depth() == 0; + if checks.check_filename(entry.path(), args.format.report())? { + typos_found = true; + } + if checks.check_file(entry.path(), explicit, args.format.report())? { + typos_found = true; + } } } } diff --git a/src/report.rs b/src/report.rs index 23b5c47..0c6adc3 100644 --- a/src/report.rs +++ b/src/report.rs @@ -1,7 +1,7 @@ use std::borrow::Cow; use std::io::{self, Write}; -#[derive(Clone, Debug, Serialize)] +#[derive(Clone, Debug, serde::Serialize, derive_more::From)] #[serde(rename_all = "snake_case")] #[serde(tag = "type")] pub enum Message<'m> { @@ -10,32 +10,15 @@ pub enum Message<'m> { FilenameCorrection(FilenameCorrection<'m>), } -impl<'m> From> for Message<'m> { - fn from(msg: BinaryFile<'m>) -> Self { - Message::BinaryFile(msg) - } -} - -impl<'m> From> for Message<'m> { - fn from(msg: Correction<'m>) -> Self { - Message::Correction(msg) - } -} - -impl<'m> From> for Message<'m> { - fn from(msg: FilenameCorrection<'m>) -> Self { - Message::FilenameCorrection(msg) - } -} - -#[derive(Clone, Debug, Serialize)] +#[derive(Clone, Debug, serde::Serialize, derive_more::Display)] +#[display(fmt = "Skipping binary file {}", "path.display()")] pub struct BinaryFile<'m> { pub path: &'m std::path::Path, #[serde(skip)] pub(crate) non_exhaustive: (), } -#[derive(Clone, Debug, Serialize)] +#[derive(Clone, Debug, serde::Serialize)] pub struct Correction<'m> { pub path: &'m std::path::Path, #[serde(skip)] @@ -48,7 +31,7 @@ pub struct Correction<'m> { pub(crate) non_exhaustive: (), } -#[derive(Clone, Debug, Serialize)] +#[derive(Clone, Debug, serde::Serialize)] pub struct FilenameCorrection<'m> { pub path: &'m std::path::Path, pub typo: &'m str, @@ -64,7 +47,7 @@ pub fn print_silent(_: Message) {} pub fn print_brief(msg: Message) { match msg { Message::BinaryFile(msg) => { - println!("Skipping binary file {}", msg.path.display(),); + println!("{}", msg); } Message::Correction(msg) => { println!( @@ -85,7 +68,7 @@ pub fn print_brief(msg: Message) { pub fn print_long(msg: Message) { match msg { Message::BinaryFile(msg) => { - println!("Skipping binary file {}", msg.path.display(),); + println!("{}", msg); } Message::Correction(msg) => print_long_correction(msg), Message::FilenameCorrection(msg) => { diff --git a/src/tokens.rs b/src/tokens.rs index e91824a..421c59c 100644 --- a/src/tokens.rs +++ b/src/tokens.rs @@ -6,9 +6,11 @@ pub enum Case { None, } -#[derive(Debug, Clone, Default, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct ParserBuilder { ignore_hex: bool, + include_digits: bool, + include_chars: String, } impl ParserBuilder { @@ -21,10 +23,30 @@ impl ParserBuilder { self } + pub fn include_digits(&mut self, yes: bool) -> &mut Self { + self.include_digits = yes; + self + } + + pub fn include_chars(&mut self, chars: String) -> &mut Self { + self.include_chars = chars; + self + } + pub fn build(&self) -> Parser { - let pattern = r#"\b(\p{Alphabetic}|\d|_|')+\b"#; - let words_str = regex::Regex::new(pattern).unwrap(); - let words_bytes = regex::bytes::Regex::new(pattern).unwrap(); + let mut pattern = r#"\b(\p{Alphabetic}"#.to_owned(); + if self.include_digits { + pattern.push_str(r#"|\d"#); + } + for grapheme in + unicode_segmentation::UnicodeSegmentation::graphemes(self.include_chars.as_str(), true) + { + let escaped = regex::escape(&grapheme); + pattern.push_str(&format!("|{}", escaped)); + } + pattern.push_str(r#")+\b"#); + let words_str = regex::Regex::new(&pattern).unwrap(); + let words_bytes = regex::bytes::Regex::new(&pattern).unwrap(); Parser { words_str, words_bytes, @@ -33,6 +55,16 @@ impl ParserBuilder { } } +impl Default for ParserBuilder { + fn default() -> Self { + Self { + ignore_hex: true, + include_digits: true, + include_chars: "_'".to_owned(), + } + } +} + #[derive(Debug, Clone)] pub struct Parser { words_str: regex::Regex,