feat(dict): Override builtin dictionary

Sometimes you just have to live with a typo or its done intentionally
(like weird company names).  With this commit, a user can now identifier
blessed identifiers and words.

This is ostly what is needed for #9 but sometimes people will have
common typos that they'll want to provide corrections for.
This commit is contained in:
Ed Page 2020-09-02 20:12:49 -05:00
parent 977606d398
commit 043692afe0
4 changed files with 92 additions and 1 deletions

View file

@ -29,4 +29,6 @@ Configuration is read from the following (in precedence order)
| default.identifier-include-digits | \- | bool | Allow identifiers to include digits, in addition to letters. | | default.identifier-include-digits | \- | bool | Allow identifiers to include digits, in addition to letters. |
| default.identifier-leading-chars | \- | string | Allow identifiers to start with one of these characters. | | default.identifier-leading-chars | \- | string | Allow identifiers to start with one of these characters. |
| default.identifier-include-chars | \- | string | Allow identifiers to include these characters. | | default.identifier-include-chars | \- | string | Allow identifiers to include these characters. |
| default.locale | \- | en, en-us, en-gb, en-ca, en-au | | | default.locale | \- | en, en-us, en-gb, en-ca, en-au | English dialect to correct to. |
| default.extend-valid-identifiers | \- | list of strings | Identifiers to presume as correct, skipping spell checking. This extends the list when layering configuration, rather than replacing it. |
| default.extend-valid-words | \- | list of strings | Words to presume as correct, skipping spell checking. This extends the list when layering configuration, rather than replacing it. |

View file

@ -96,6 +96,14 @@ pub trait FileSource {
fn locale(&self) -> Option<Locale> { fn locale(&self) -> Option<Locale> {
None None
} }
fn extend_valid_identifiers(&self) -> &[String] {
&[]
}
fn extend_valid_words(&self) -> &[String] {
&[]
}
} }
#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)] #[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
@ -287,6 +295,8 @@ pub struct FileConfig {
pub identifier_include_digits: Option<bool>, pub identifier_include_digits: Option<bool>,
pub identifier_include_chars: Option<String>, pub identifier_include_chars: Option<String>,
pub locale: Option<Locale>, pub locale: Option<Locale>,
pub extend_valid_identifiers: Vec<String>,
pub extend_valid_words: Vec<String>,
} }
impl FileConfig { impl FileConfig {
@ -315,6 +325,10 @@ impl FileConfig {
if let Some(source) = source.locale() { if let Some(source) = source.locale() {
self.locale = Some(source); self.locale = Some(source);
} }
self.extend_valid_identifiers
.extend(source.extend_valid_identifiers().iter().cloned());
self.extend_valid_words
.extend(source.extend_valid_words().iter().cloned());
} }
pub fn check_filename(&self) -> bool { pub fn check_filename(&self) -> bool {
@ -348,6 +362,14 @@ impl FileConfig {
pub fn locale(&self) -> Locale { pub fn locale(&self) -> Locale {
self.locale.unwrap_or_default() self.locale.unwrap_or_default()
} }
pub fn extend_valid_identifiers(&self) -> &[String] {
self.extend_valid_identifiers.as_slice()
}
pub fn extend_valid_words(&self) -> &[String] {
self.extend_valid_words.as_slice()
}
} }
impl FileSource for FileConfig { impl FileSource for FileConfig {
@ -382,6 +404,14 @@ impl FileSource for FileConfig {
fn locale(&self) -> Option<Locale> { fn locale(&self) -> Option<Locale> {
self.locale self.locale
} }
fn extend_valid_identifiers(&self) -> &[String] {
self.extend_valid_identifiers.as_slice()
}
fn extend_valid_words(&self) -> &[String] {
self.extend_valid_words.as_slice()
}
} }
fn find_project_file(dir: std::path::PathBuf, name: &str) -> Option<std::path::PathBuf> { fn find_project_file(dir: std::path::PathBuf, name: &str) -> Option<std::path::PathBuf> {

View file

@ -1,4 +1,5 @@
use std::borrow::Cow; use std::borrow::Cow;
use std::collections::HashSet;
use unicase::UniCase; use unicase::UniCase;
@ -130,6 +131,49 @@ fn case_correct(correction: &str, case: Case) -> Cow<'_, str> {
} }
} }
pub struct Override<'i, 'w, D> {
valid_identifiers: HashSet<&'i str>,
valid_words: HashSet<unicase::UniCase<&'w str>>,
inner: D,
}
impl<'i, 'w, D: typos::Dictionary> Override<'i, 'w, D> {
pub fn new(inner: D) -> Self {
Self {
valid_identifiers: Default::default(),
valid_words: Default::default(),
inner,
}
}
pub fn valid_identifiers<I: Iterator<Item = &'i str>>(&mut self, valid_identifiers: I) {
self.valid_identifiers = valid_identifiers.collect();
}
pub fn valid_words<I: Iterator<Item = &'w str>>(&mut self, valid_words: I) {
self.valid_words = valid_words.map(UniCase::new).collect();
}
}
impl<'i, 'w, D: typos::Dictionary> typos::Dictionary for Override<'i, 'w, D> {
fn correct_ident<'s, 't>(&'s self, ident: typos::tokens::Identifier<'t>) -> Vec<Cow<'s, str>> {
if self.valid_identifiers.contains(ident.token()) {
Vec::new()
} else {
self.inner.correct_ident(ident)
}
}
fn correct_word<'s, 't>(&'s self, word: typos::tokens::Word<'t>) -> Vec<Cow<'s, str>> {
let w = UniCase::new(word.token());
if self.valid_words.contains(&w) {
Vec::new()
} else {
self.inner.correct_word(word)
}
}
}
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use super::*; use super::*;

View file

@ -57,6 +57,21 @@ fn run() -> Result<i32, anyhow::Error> {
.build(); .build();
let dictionary = crate::dict::BuiltIn::new(config.default.locale()); let dictionary = crate::dict::BuiltIn::new(config.default.locale());
let mut dictionary = crate::dict::Override::new(dictionary);
dictionary.valid_identifiers(
config
.default
.extend_valid_identifiers()
.iter()
.map(|s| s.as_str()),
);
dictionary.valid_words(
config
.default
.extend_valid_words()
.iter()
.map(|s| s.as_str()),
);
let mut settings = typos::checks::TyposSettings::new(); let mut settings = typos::checks::TyposSettings::new();
settings settings