mirror of
https://github.com/crate-ci/typos.git
synced 2024-11-25 10:31:02 -05:00
feat(dict): Override builtin dictionary
Sometimes you just have to live with a typo or its done intentionally (like weird company names). With this commit, a user can now identifier blessed identifiers and words. This is ostly what is needed for #9 but sometimes people will have common typos that they'll want to provide corrections for.
This commit is contained in:
parent
977606d398
commit
043692afe0
4 changed files with 92 additions and 1 deletions
|
@ -29,4 +29,6 @@ Configuration is read from the following (in precedence order)
|
|||
| default.identifier-include-digits | \- | bool | Allow identifiers to include digits, in addition to letters. |
|
||||
| default.identifier-leading-chars | \- | string | Allow identifiers to start with one of these characters. |
|
||||
| default.identifier-include-chars | \- | string | Allow identifiers to include these characters. |
|
||||
| default.locale | \- | en, en-us, en-gb, en-ca, en-au | |
|
||||
| default.locale | \- | en, en-us, en-gb, en-ca, en-au | English dialect to correct to. |
|
||||
| default.extend-valid-identifiers | \- | list of strings | Identifiers to presume as correct, skipping spell checking. This extends the list when layering configuration, rather than replacing it. |
|
||||
| default.extend-valid-words | \- | list of strings | Words to presume as correct, skipping spell checking. This extends the list when layering configuration, rather than replacing it. |
|
||||
|
|
|
@ -96,6 +96,14 @@ pub trait FileSource {
|
|||
fn locale(&self) -> Option<Locale> {
|
||||
None
|
||||
}
|
||||
|
||||
fn extend_valid_identifiers(&self) -> &[String] {
|
||||
&[]
|
||||
}
|
||||
|
||||
fn extend_valid_words(&self) -> &[String] {
|
||||
&[]
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
|
||||
|
@ -287,6 +295,8 @@ pub struct FileConfig {
|
|||
pub identifier_include_digits: Option<bool>,
|
||||
pub identifier_include_chars: Option<String>,
|
||||
pub locale: Option<Locale>,
|
||||
pub extend_valid_identifiers: Vec<String>,
|
||||
pub extend_valid_words: Vec<String>,
|
||||
}
|
||||
|
||||
impl FileConfig {
|
||||
|
@ -315,6 +325,10 @@ impl FileConfig {
|
|||
if let Some(source) = source.locale() {
|
||||
self.locale = Some(source);
|
||||
}
|
||||
self.extend_valid_identifiers
|
||||
.extend(source.extend_valid_identifiers().iter().cloned());
|
||||
self.extend_valid_words
|
||||
.extend(source.extend_valid_words().iter().cloned());
|
||||
}
|
||||
|
||||
pub fn check_filename(&self) -> bool {
|
||||
|
@ -348,6 +362,14 @@ impl FileConfig {
|
|||
pub fn locale(&self) -> Locale {
|
||||
self.locale.unwrap_or_default()
|
||||
}
|
||||
|
||||
pub fn extend_valid_identifiers(&self) -> &[String] {
|
||||
self.extend_valid_identifiers.as_slice()
|
||||
}
|
||||
|
||||
pub fn extend_valid_words(&self) -> &[String] {
|
||||
self.extend_valid_words.as_slice()
|
||||
}
|
||||
}
|
||||
|
||||
impl FileSource for FileConfig {
|
||||
|
@ -382,6 +404,14 @@ impl FileSource for FileConfig {
|
|||
fn locale(&self) -> Option<Locale> {
|
||||
self.locale
|
||||
}
|
||||
|
||||
fn extend_valid_identifiers(&self) -> &[String] {
|
||||
self.extend_valid_identifiers.as_slice()
|
||||
}
|
||||
|
||||
fn extend_valid_words(&self) -> &[String] {
|
||||
self.extend_valid_words.as_slice()
|
||||
}
|
||||
}
|
||||
|
||||
fn find_project_file(dir: std::path::PathBuf, name: &str) -> Option<std::path::PathBuf> {
|
||||
|
|
44
src/dict.rs
44
src/dict.rs
|
@ -1,4 +1,5 @@
|
|||
use std::borrow::Cow;
|
||||
use std::collections::HashSet;
|
||||
|
||||
use unicase::UniCase;
|
||||
|
||||
|
@ -130,6 +131,49 @@ fn case_correct(correction: &str, case: Case) -> Cow<'_, str> {
|
|||
}
|
||||
}
|
||||
|
||||
pub struct Override<'i, 'w, D> {
|
||||
valid_identifiers: HashSet<&'i str>,
|
||||
valid_words: HashSet<unicase::UniCase<&'w str>>,
|
||||
inner: D,
|
||||
}
|
||||
|
||||
impl<'i, 'w, D: typos::Dictionary> Override<'i, 'w, D> {
|
||||
pub fn new(inner: D) -> Self {
|
||||
Self {
|
||||
valid_identifiers: Default::default(),
|
||||
valid_words: Default::default(),
|
||||
inner,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn valid_identifiers<I: Iterator<Item = &'i str>>(&mut self, valid_identifiers: I) {
|
||||
self.valid_identifiers = valid_identifiers.collect();
|
||||
}
|
||||
|
||||
pub fn valid_words<I: Iterator<Item = &'w str>>(&mut self, valid_words: I) {
|
||||
self.valid_words = valid_words.map(UniCase::new).collect();
|
||||
}
|
||||
}
|
||||
|
||||
impl<'i, 'w, D: typos::Dictionary> typos::Dictionary for Override<'i, 'w, D> {
|
||||
fn correct_ident<'s, 't>(&'s self, ident: typos::tokens::Identifier<'t>) -> Vec<Cow<'s, str>> {
|
||||
if self.valid_identifiers.contains(ident.token()) {
|
||||
Vec::new()
|
||||
} else {
|
||||
self.inner.correct_ident(ident)
|
||||
}
|
||||
}
|
||||
|
||||
fn correct_word<'s, 't>(&'s self, word: typos::tokens::Word<'t>) -> Vec<Cow<'s, str>> {
|
||||
let w = UniCase::new(word.token());
|
||||
if self.valid_words.contains(&w) {
|
||||
Vec::new()
|
||||
} else {
|
||||
self.inner.correct_word(word)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
|
15
src/main.rs
15
src/main.rs
|
@ -57,6 +57,21 @@ fn run() -> Result<i32, anyhow::Error> {
|
|||
.build();
|
||||
|
||||
let dictionary = crate::dict::BuiltIn::new(config.default.locale());
|
||||
let mut dictionary = crate::dict::Override::new(dictionary);
|
||||
dictionary.valid_identifiers(
|
||||
config
|
||||
.default
|
||||
.extend_valid_identifiers()
|
||||
.iter()
|
||||
.map(|s| s.as_str()),
|
||||
);
|
||||
dictionary.valid_words(
|
||||
config
|
||||
.default
|
||||
.extend_valid_words()
|
||||
.iter()
|
||||
.map(|s| s.as_str()),
|
||||
);
|
||||
|
||||
let mut settings = typos::checks::TyposSettings::new();
|
||||
settings
|
||||
|
|
Loading…
Reference in a new issue