From be8628fcb3cc6d8248685da6a8880cd30a3d9418 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Mon, 16 Oct 2023 12:37:00 -0500 Subject: [PATCH] feat(config): Allow ignoring words by regex Fixes #852 --- crates/typos-cli/src/config.rs | 14 +++++++++++ crates/typos-cli/src/dict.rs | 25 ++++++++++++++----- crates/typos-cli/src/policy.rs | 1 + .../cmd/extend-ignore-words-re.in/_typos.toml | 9 +++++++ .../cmd/extend-ignore-words-re.in/file.fail | 1 + .../cmd/extend-ignore-words-re.in/file.ignore | 1 + .../tests/cmd/extend-ignore-words-re.toml | 12 +++++++++ docs/reference.md | 1 + 8 files changed, 58 insertions(+), 6 deletions(-) create mode 100644 crates/typos-cli/tests/cmd/extend-ignore-words-re.in/_typos.toml create mode 100644 crates/typos-cli/tests/cmd/extend-ignore-words-re.in/file.fail create mode 100644 crates/typos-cli/tests/cmd/extend-ignore-words-re.in/file.ignore create mode 100644 crates/typos-cli/tests/cmd/extend-ignore-words-re.toml diff --git a/crates/typos-cli/src/config.rs b/crates/typos-cli/src/config.rs index 9451aeb..b75298e 100644 --- a/crates/typos-cli/src/config.rs +++ b/crates/typos-cli/src/config.rs @@ -425,6 +425,8 @@ pub struct DictConfig { #[serde(with = "serde_regex")] pub extend_ignore_identifiers_re: Vec, pub extend_identifiers: HashMap, + #[serde(with = "serde_regex")] + pub extend_ignore_words_re: Vec, pub extend_words: HashMap, } @@ -435,6 +437,7 @@ impl DictConfig { locale: Some(empty.locale()), extend_ignore_identifiers_re: Default::default(), extend_identifiers: Default::default(), + extend_ignore_words_re: Default::default(), extend_words: Default::default(), } } @@ -451,6 +454,8 @@ impl DictConfig { .iter() .map(|(key, value)| (key.clone(), value.clone())), ); + self.extend_ignore_words_re + .extend(source.extend_ignore_words_re.iter().cloned()); self.extend_words.extend( source .extend_words @@ -475,6 +480,10 @@ impl DictConfig { ) } + pub fn extend_ignore_words_re(&self) -> Box + '_> { + Box::new(self.extend_ignore_words_re.iter()) + } + pub fn extend_words(&self) -> Box + '_> { Box::new( self.extend_words @@ -503,6 +512,11 @@ impl PartialEq for DictConfig { .map(|r| r.as_str()) .eq(rhs.extend_ignore_identifiers_re.iter().map(|r| r.as_str())) && self.extend_identifiers == rhs.extend_identifiers + && self + .extend_ignore_words_re + .iter() + .map(|r| r.as_str()) + .eq(rhs.extend_ignore_words_re.iter().map(|r| r.as_str())) && self.extend_words == rhs.extend_words } } diff --git a/crates/typos-cli/src/dict.rs b/crates/typos-cli/src/dict.rs index 57cb362..5adf584 100644 --- a/crates/typos-cli/src/dict.rs +++ b/crates/typos-cli/src/dict.rs @@ -216,6 +216,7 @@ fn case_correct(correction: &mut Cow<'_, str>, case: Case) { pub struct Override<'i, 'w, D> { ignored_identifiers: Vec, identifiers: HashMap<&'i str, Status<'i>, ahash::RandomState>, + ignored_words: Vec, words: HashMap, Status<'w>, ahash::RandomState>, inner: D, } @@ -225,6 +226,7 @@ impl<'i, 'w, D: typos::Dictionary> Override<'i, 'w, D> { Self { ignored_identifiers: Default::default(), identifiers: Default::default(), + ignored_words: Default::default(), words: Default::default(), inner, } @@ -238,6 +240,10 @@ impl<'i, 'w, D: typos::Dictionary> Override<'i, 'w, D> { self.identifiers = Self::interpret(identifiers).collect(); } + pub fn ignored_words<'r>(&mut self, ignored: impl Iterator) { + self.ignored_words.extend(ignored.cloned()); + } + pub fn words>(&mut self, words: I) { self.words = Self::interpret(words) .map(|(k, v)| (UniCase::new(k), v)) @@ -283,15 +289,22 @@ impl<'i, 'w, D: typos::Dictionary> typos::Dictionary for Override<'i, 'w, D> { return None; } + for ignored in &self.ignored_words { + if ignored.is_match(word.token()) { + return Some(Status::Valid); + } + } + // Skip hashing if we can - let custom = if !self.words.is_empty() { + if !self.words.is_empty() { let w = UniCase::new(word.token()); // HACK: couldn't figure out the lifetime issue with replacing `cloned` with `borrow` - self.words.get(&w).cloned() - } else { - None - }; - custom.or_else(|| self.inner.correct_word(word)) + if let Some(status) = self.words.get(&w).cloned() { + return Some(status); + } + } + + self.inner.correct_word(word) } } diff --git a/crates/typos-cli/src/policy.rs b/crates/typos-cli/src/policy.rs index 8870341..b7e116c 100644 --- a/crates/typos-cli/src/policy.rs +++ b/crates/typos-cli/src/policy.rs @@ -255,6 +255,7 @@ impl<'s> ConfigEngine<'s> { .extend_identifiers() .map(|(k, v)| (self.storage.get(k), self.storage.get(v))), ); + dict.ignored_words(dict_config.extend_ignore_words_re()); dict.words( dict_config .extend_words() diff --git a/crates/typos-cli/tests/cmd/extend-ignore-words-re.in/_typos.toml b/crates/typos-cli/tests/cmd/extend-ignore-words-re.in/_typos.toml new file mode 100644 index 0000000..0f56c80 --- /dev/null +++ b/crates/typos-cli/tests/cmd/extend-ignore-words-re.in/_typos.toml @@ -0,0 +1,9 @@ +[default.extend-words] +hello = "goodbye" + +[type.fail] +extend-glob = ["*.fail"] + +[type.ignore] +extend-glob = ["*.ignore"] +extend-ignore-words-re = ["he.*"] diff --git a/crates/typos-cli/tests/cmd/extend-ignore-words-re.in/file.fail b/crates/typos-cli/tests/cmd/extend-ignore-words-re.in/file.fail new file mode 100644 index 0000000..ce01362 --- /dev/null +++ b/crates/typos-cli/tests/cmd/extend-ignore-words-re.in/file.fail @@ -0,0 +1 @@ +hello diff --git a/crates/typos-cli/tests/cmd/extend-ignore-words-re.in/file.ignore b/crates/typos-cli/tests/cmd/extend-ignore-words-re.in/file.ignore new file mode 100644 index 0000000..ce01362 --- /dev/null +++ b/crates/typos-cli/tests/cmd/extend-ignore-words-re.in/file.ignore @@ -0,0 +1 @@ +hello diff --git a/crates/typos-cli/tests/cmd/extend-ignore-words-re.toml b/crates/typos-cli/tests/cmd/extend-ignore-words-re.toml new file mode 100644 index 0000000..60502eb --- /dev/null +++ b/crates/typos-cli/tests/cmd/extend-ignore-words-re.toml @@ -0,0 +1,12 @@ +bin.name = "typos" +status.code = 2 +stdin = "" +stdout = """ +error: `hello` should be `goodbye` + --> ./file.fail:1:1 + | +1 | hello + | ^^^^^ + | +""" +stderr = "" diff --git a/docs/reference.md b/docs/reference.md index 7ff18cf..cd02095 100644 --- a/docs/reference.md +++ b/docs/reference.md @@ -34,6 +34,7 @@ Configuration is read from the following (in precedence order) | default.extend-identifiers | \- | table of strings | Corrections for [identifiers](./design.md#identifiers-and-words). When the correction is blank, the identifier is never valid. When the correction is the key, the identifier is always valid. | | default.extend-ignore-identifiers-re | \- | list of [regexes](https://docs.rs/regex/latest/regex/index.html#syntax) | Pattern-match always-valid identifiers | | default.extend-words | \- | table of strings | Corrections for [words](./design.md#identifiers-and-words). When the correction is blank, the word is never valid. When the correction is the key, the word is always valid. | +| default.extend-ignore-words-re | \- | list of [regexes](https://docs.rs/regex/latest/regex/index.html#syntax) | Pattern-match always-valid words. Note: you must handle case insensitivity yourself | | type.\.\ | \ | \ | See `default.` for child keys. Run with `--type-list` to see available ``s | | type.\.extend-glob | \- | list of strings | File globs for matching `` |