From af90817e502bdb4780b71a31f8a0c2d9a575587a Mon Sep 17 00:00:00 2001 From: Ed Page Date: Fri, 17 Mar 2023 22:40:55 -0500 Subject: [PATCH] feat(dict): extend-ignore-identifiers-re support This opens the door for users to provide patterns for identifiers that are always valid. The key limitation is "identifiers". Run `typos --identifiers` to verify what you are trying to write the regex for. Fixes #651 --- Cargo.lock | 20 ++++++++++++++++---- README.md | 6 ++++++ crates/typos-cli/Cargo.toml | 2 ++ crates/typos-cli/src/config.rs | 26 +++++++++++++++++++++++++- crates/typos-cli/src/dict.rs | 12 ++++++++++++ docs/reference.md | 1 + 6 files changed, 62 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ede6dfb..9dfbd9d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1299,9 +1299,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.6.0" +version = "1.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c4eb3267174b8c6c2f654116623910a0fef09c4753f8dd83db29c48a0df988b" +checksum = "cce168fea28d3e05f158bda4576cf0c844d5045bc2cc3620fa0292ed5bb5814c" dependencies = [ "aho-corasick", "memchr", @@ -1316,9 +1316,9 @@ checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" [[package]] name = "regex-syntax" -version = "0.6.27" +version = "0.6.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3f87b73ce11b1619a3c6332f45341e0047173771e8b8b73f87bfeefb7b56244" +checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" [[package]] name = "rustc-demangle" @@ -1407,6 +1407,16 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_regex" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8136f1a4ea815d7eac4101cfd0b16dc0cb5e1fe1b8609dfd728058656b7badf" +dependencies = [ + "regex", + "serde", +] + [[package]] name = "serde_spanned" version = "0.6.1" @@ -1674,8 +1684,10 @@ dependencies = [ "maplit", "once_cell", "proc-exit", + "regex", "serde", "serde_json", + "serde_regex", "thread_local", "toml", "trycmd", diff --git a/README.md b/README.md index e209f6a..c9eac69 100644 --- a/README.md +++ b/README.md @@ -72,6 +72,12 @@ Sometimes, what looks like a typo is intentional, like with people's names, acro To mark a word or an identifier (grouping of words) as valid, add it your [`_typos.toml`](docs/reference.md) by declaring itself as the valid spelling: ```toml +[default] +extend-ignore-identifiers-re = [ + # *sigh* this just isn't worth the cost of fixing + "AttributeID.*Supress.*", +] + [default.extend-identifiers] # *sigh* this just isn't worth the cost of fixing AttributeIDSupressMenu = "AttributeIDSupressMenu" diff --git a/crates/typos-cli/Cargo.toml b/crates/typos-cli/Cargo.toml index f2a548c..0ce6e33 100644 --- a/crates/typos-cli/Cargo.toml +++ b/crates/typos-cli/Cargo.toml @@ -80,6 +80,8 @@ thread_local = "1.1.7" globset = "0.4.10" anstyle = "0.3.1" anstream = "0.2.0" +serde_regex = "1.1.0" +regex = "1.7.2" [dev-dependencies] assert_fs = "1.0" diff --git a/crates/typos-cli/src/config.rs b/crates/typos-cli/src/config.rs index f150bed..4e366b0 100644 --- a/crates/typos-cli/src/config.rs +++ b/crates/typos-cli/src/config.rs @@ -390,12 +390,14 @@ impl TokenizerConfig { } } -#[derive(Debug, Clone, Default, PartialEq, Eq, serde::Serialize, serde::Deserialize)] +#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)] #[serde(deny_unknown_fields)] #[serde(default)] #[serde(rename_all = "kebab-case")] pub struct DictConfig { pub locale: Option, + #[serde(with = "serde_regex")] + pub extend_ignore_identifiers_re: Vec, pub extend_identifiers: HashMap, pub extend_words: HashMap, } @@ -405,6 +407,7 @@ impl DictConfig { let empty = Self::default(); Self { locale: Some(empty.locale()), + extend_ignore_identifiers_re: Default::default(), extend_identifiers: Default::default(), extend_words: Default::default(), } @@ -414,6 +417,8 @@ impl DictConfig { if let Some(source) = source.locale { self.locale = Some(source); } + self.extend_ignore_identifiers_re + .extend(source.extend_ignore_identifiers_re.iter().cloned()); self.extend_identifiers.extend( source .extend_identifiers @@ -432,6 +437,10 @@ impl DictConfig { self.locale.unwrap_or_default() } + pub fn extend_ignore_identifiers_re(&self) -> Box + '_> { + Box::new(self.extend_ignore_identifiers_re.iter()) + } + pub fn extend_identifiers(&self) -> Box + '_> { Box::new( self.extend_identifiers @@ -460,6 +469,21 @@ fn find_project_file(dir: &std::path::Path, names: &[&str]) -> Option bool { + self.locale == rhs.locale + && self + .extend_ignore_identifiers_re + .iter() + .map(|r| r.as_str()) + .eq(rhs.extend_ignore_identifiers_re.iter().map(|r| r.as_str())) + && self.extend_identifiers == rhs.extend_identifiers + && self.extend_words == rhs.extend_words + } +} + +impl Eq for DictConfig {} + #[derive(Debug, Copy, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)] #[serde(rename_all = "kebab-case")] pub enum Locale { diff --git a/crates/typos-cli/src/dict.rs b/crates/typos-cli/src/dict.rs index f3a28f9..612f07e 100644 --- a/crates/typos-cli/src/dict.rs +++ b/crates/typos-cli/src/dict.rs @@ -199,6 +199,7 @@ fn case_correct(correction: &mut Cow<'_, str>, case: Case) { } pub struct Override<'i, 'w, D> { + ignored_identifiers: Vec, identifiers: HashMap<&'i str, Status<'i>, ahash::RandomState>, words: HashMap, Status<'w>, ahash::RandomState>, inner: D, @@ -207,12 +208,17 @@ pub struct Override<'i, 'w, D> { impl<'i, 'w, D: typos::Dictionary> Override<'i, 'w, D> { pub fn new(inner: D) -> Self { Self { + ignored_identifiers: Default::default(), identifiers: Default::default(), words: Default::default(), inner, } } + pub fn ignored_identifiers<'r>(&mut self, ignored: impl Iterator) { + self.ignored_identifiers.extend(ignored.cloned()); + } + pub fn identifiers>(&mut self, identifiers: I) { self.identifiers = Self::interpret(identifiers).collect(); } @@ -241,6 +247,12 @@ impl<'i, 'w, D: typos::Dictionary> Override<'i, 'w, D> { impl<'i, 'w, D: typos::Dictionary> typos::Dictionary for Override<'i, 'w, D> { fn correct_ident<'s, 't>(&'s self, ident: typos::tokens::Identifier<'t>) -> Option> { + for ignored in &self.ignored_identifiers { + if ignored.is_match(ident.token()) { + return None; + } + } + // Skip hashing if we can if !self.identifiers.is_empty() { self.identifiers diff --git a/docs/reference.md b/docs/reference.md index 24e422a..4ea0251 100644 --- a/docs/reference.md +++ b/docs/reference.md @@ -28,6 +28,7 @@ Configuration is read from the following (in precedence order) | default.unicode | --unicode | bool | Allow unicode characters in identifiers (and not just ASCII) | | default.locale | --locale | en, en-us, en-gb, en-ca, en-au | English dialect to correct to. | | default.extend-identifiers | \- | table of strings | Corrections for [identifiers](./design.md#identifiers-and-words). When the correction is blank, the identifier is never valid. When the correction is the key, the identifier is always valid. | +| default.extend-ignore-identifiers-re | \- | list of [regexes](https://docs.rs/regex/latest/regex/index.html#syntax) | Pattern-match always-valid identifiers | | default.extend-words | \- | table of strings | Corrections for [words](./design.md#identifiers-and-words). When the correction is blank, the word is never valid. When the correction is the key, the word is always valid. | | type.\.\ | \ | \ | See `default.` for child keys. Run with `--type-list` to see available ``s | | type.\.extend_globs | \- | list of strings | File globs for matching `` |