feat: Custom dictionary support

Switching `valid-*` to just `*` where you map typo to correction, with
support for always-valid and never-valid.

Fixes #9
This commit is contained in:
Ed Page 2020-10-24 21:17:16 -05:00
parent 79d10d6d24
commit 527b9837b4
11 changed files with 354 additions and 227 deletions

View file

@ -10,7 +10,7 @@ schedules:
include: include:
- master - master
variables: variables:
minrust: 1.40.0 minrust: 1.42.0
codecov_token: $(CODECOV_TOKEN_SECRET) codecov_token: $(CODECOV_TOKEN_SECRET)
windows_vm: vs2017-win2016 windows_vm: vs2017-win2016
mac_vm: macos-10.14 mac_vm: macos-10.14

View file

@ -13,7 +13,9 @@ fn correct_word_hit(b: &mut test::Bencher) {
let input = typos::tokens::Word::new("successs", 0).unwrap(); let input = typos::tokens::Word::new("successs", 0).unwrap();
assert_eq!( assert_eq!(
corrections.correct_word(input), corrections.correct_word(input),
vec![std::borrow::Cow::Borrowed("successes")] Some(typos::Status::Corrections(vec![
std::borrow::Cow::Borrowed("successes")
]))
); );
b.iter(|| corrections.correct_word(input)); b.iter(|| corrections.correct_word(input));
} }
@ -22,6 +24,6 @@ fn correct_word_hit(b: &mut test::Bencher) {
fn correct_word_miss(b: &mut test::Bencher) { fn correct_word_miss(b: &mut test::Bencher) {
let corrections = typos_cli::dict::BuiltIn::new(Default::default()); let corrections = typos_cli::dict::BuiltIn::new(Default::default());
let input = typos::tokens::Word::new("success", 0).unwrap(); let input = typos::tokens::Word::new("success", 0).unwrap();
assert!(corrections.correct_word(input).is_empty()); assert!(corrections.correct_word(input).is_none());
b.iter(|| corrections.correct_word(input)); b.iter(|| corrections.correct_word(input));
} }

View file

@ -3,6 +3,7 @@ use bstr::ByteSlice;
use crate::report; use crate::report;
use crate::tokens; use crate::tokens;
use crate::Dictionary; use crate::Dictionary;
use crate::Status;
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq, Eq)]
pub struct TyposSettings { pub struct TyposSettings {
@ -233,16 +234,20 @@ impl Checks {
dictionary: &dyn Dictionary, dictionary: &dyn Dictionary,
reporter: &dyn report::Report, reporter: &dyn report::Report,
) -> Result<bool, crate::Error> { ) -> Result<bool, crate::Error> {
let mut typos_found = false;
if !self.check_filenames { if !self.check_filenames {
return Ok(typos_found); return Ok(false);
} }
if let Some(part) = path.file_name().and_then(|s| s.to_str()) { let mut typos_found = false;
for ident in parser.parse(part) { for ident in path
let corrections = dictionary.correct_ident(ident); .file_name()
if !corrections.is_empty() { .and_then(|s| s.to_str())
.iter()
.flat_map(|part| parser.parse(part))
{
match dictionary.correct_ident(ident) {
Some(Status::Valid) => {}
Some(corrections) => {
let byte_offset = ident.offset(); let byte_offset = ident.offset();
let msg = report::PathCorrection { let msg = report::PathCorrection {
path, path,
@ -251,18 +256,22 @@ impl Checks {
corrections, corrections,
}; };
typos_found |= reporter.report(msg.into()); typos_found |= reporter.report(msg.into());
} else { }
None => {
for word in ident.split() { for word in ident.split() {
let corrections = dictionary.correct_word(word); match dictionary.correct_word(word) {
if !corrections.is_empty() { Some(Status::Valid) => {}
let byte_offset = word.offset(); Some(corrections) => {
let msg = report::PathCorrection { let byte_offset = word.offset();
path, let msg = report::PathCorrection {
byte_offset, path,
typo: word.token(), byte_offset,
corrections, typo: word.token(),
}; corrections,
typos_found |= reporter.report(msg.into()); };
typos_found |= reporter.report(msg.into());
}
None => {}
} }
} }
} }
@ -305,32 +314,38 @@ impl Checks {
for (line_idx, line) in buffer.lines().enumerate() { for (line_idx, line) in buffer.lines().enumerate() {
let line_num = line_idx + 1; let line_num = line_idx + 1;
for ident in parser.parse_bytes(line) { for ident in parser.parse_bytes(line) {
let corrections = dictionary.correct_ident(ident); match dictionary.correct_ident(ident) {
if !corrections.is_empty() { Some(Status::Valid) => {}
let byte_offset = ident.offset(); Some(corrections) => {
let msg = report::Correction { let byte_offset = ident.offset();
path, let msg = report::Correction {
line, path,
line_num, line,
byte_offset, line_num,
typo: ident.token(), byte_offset,
corrections, typo: ident.token(),
}; corrections,
typos_found |= reporter.report(msg.into()); };
} else { typos_found |= reporter.report(msg.into());
for word in ident.split() { }
let corrections = dictionary.correct_word(word); None => {
if !corrections.is_empty() { for word in ident.split() {
let byte_offset = word.offset(); match dictionary.correct_word(word) {
let msg = report::Correction { Some(Status::Valid) => {}
path, Some(corrections) => {
line, let byte_offset = word.offset();
line_num, let msg = report::Correction {
byte_offset, path,
typo: word.token(), line,
corrections, line_num,
}; byte_offset,
typos_found |= reporter.report(msg.into()); typo: word.token(),
corrections,
};
typos_found |= reporter.report(msg.into());
}
None => {}
}
} }
} }
} }

View file

@ -1,7 +1,49 @@
use std::borrow::Cow; use std::borrow::Cow;
pub trait Dictionary: Send + Sync { #[derive(Clone, PartialEq, Eq, Debug, serde::Serialize, derive_more::From)]
fn correct_ident<'s, 'w>(&'s self, _ident: crate::tokens::Identifier<'w>) -> Vec<Cow<'s, str>>; #[serde(rename_all = "snake_case")]
#[serde(untagged)]
fn correct_word<'s, 'w>(&'s self, word: crate::tokens::Word<'w>) -> Vec<Cow<'s, str>>; pub enum Status<'c> {
Valid,
Invalid,
Corrections(Vec<Cow<'c, str>>),
}
impl<'c> Status<'c> {
pub fn is_invalid(&self) -> bool {
matches!(self, Status::Invalid)
}
pub fn is_valid(&self) -> bool {
matches!(self, Status::Valid)
}
pub fn is_correction(&self) -> bool {
matches!(self, Status::Corrections(_))
}
pub fn corrections_mut(&mut self) -> impl Iterator<Item = &mut Cow<'c, str>> {
match self {
Status::Corrections(corrections) => itertools::Either::Left(corrections.iter_mut()),
_ => itertools::Either::Right([].iter_mut()),
}
}
pub fn borrow(&self) -> Status<'_> {
match self {
Status::Corrections(corrections) => {
let corrections = corrections
.iter()
.map(|c| Cow::Borrowed(c.as_ref()))
.collect();
Status::Corrections(corrections)
}
_ => self.clone(),
}
}
}
pub trait Dictionary: Send + Sync {
fn correct_ident<'s, 'w>(&'s self, _ident: crate::tokens::Identifier<'w>)
-> Option<Status<'s>>;
fn correct_word<'s, 'w>(&'s self, word: crate::tokens::Word<'w>) -> Option<Status<'s>>;
} }

View file

@ -1,6 +1,5 @@
#![allow(clippy::needless_update)] #![allow(clippy::needless_update)]
use std::borrow::Cow;
use std::io::{self, Write}; use std::io::{self, Write};
#[derive(Clone, Debug, serde::Serialize, derive_more::From)] #[derive(Clone, Debug, serde::Serialize, derive_more::From)]
@ -21,8 +20,8 @@ impl<'m> Message<'m> {
pub fn is_correction(&self) -> bool { pub fn is_correction(&self) -> bool {
match self { match self {
Message::BinaryFile(_) => false, Message::BinaryFile(_) => false,
Message::Correction(_) => true, Message::Correction(c) => c.corrections.is_correction(),
Message::PathCorrection(_) => true, Message::PathCorrection(c) => c.corrections.is_correction(),
Message::File(_) => false, Message::File(_) => false,
Message::Parse(_) => false, Message::Parse(_) => false,
Message::PathError(_) => false, Message::PathError(_) => false,
@ -59,7 +58,7 @@ pub struct Correction<'m> {
pub line_num: usize, pub line_num: usize,
pub byte_offset: usize, pub byte_offset: usize,
pub typo: &'m str, pub typo: &'m str,
pub corrections: Vec<Cow<'m, str>>, pub corrections: crate::Status<'m>,
} }
impl<'m> Default for Correction<'m> { impl<'m> Default for Correction<'m> {
@ -70,7 +69,7 @@ impl<'m> Default for Correction<'m> {
line_num: 0, line_num: 0,
byte_offset: 0, byte_offset: 0,
typo: "", typo: "",
corrections: Vec::new(), corrections: crate::Status::Invalid,
} }
} }
} }
@ -81,7 +80,7 @@ pub struct PathCorrection<'m> {
pub path: &'m std::path::Path, pub path: &'m std::path::Path,
pub byte_offset: usize, pub byte_offset: usize,
pub typo: &'m str, pub typo: &'m str,
pub corrections: Vec<Cow<'m, str>>, pub corrections: crate::Status<'m>,
} }
impl<'m> Default for PathCorrection<'m> { impl<'m> Default for PathCorrection<'m> {
@ -90,7 +89,7 @@ impl<'m> Default for PathCorrection<'m> {
path: std::path::Path::new("-"), path: std::path::Path::new("-"),
byte_offset: 0, byte_offset: 0,
typo: "", typo: "",
corrections: Vec::new(), corrections: crate::Status::Invalid,
} }
} }
} }
@ -196,24 +195,42 @@ impl Report for PrintBrief {
Message::BinaryFile(msg) => { Message::BinaryFile(msg) => {
log::info!("{}", msg); log::info!("{}", msg);
} }
Message::Correction(msg) => { Message::Correction(msg) => match &msg.corrections {
println!( crate::Status::Valid => {}
"{}:{}:{}: {} -> {}", crate::Status::Invalid => {
msg.path.display(), println!(
msg.line_num, "{}:{}:{}: {} is disallowed",
msg.byte_offset, msg.path.display(),
msg.typo, msg.line_num,
itertools::join(msg.corrections.iter(), ", ") msg.byte_offset,
); msg.typo,
} );
Message::PathCorrection(msg) => { }
println!( crate::Status::Corrections(corrections) => {
"{}: {} -> {}", println!(
msg.path.display(), "{}:{}:{}: {} -> {}",
msg.typo, msg.path.display(),
itertools::join(msg.corrections.iter(), ", ") msg.line_num,
); msg.byte_offset,
} msg.typo,
itertools::join(corrections.iter(), ", ")
);
}
},
Message::PathCorrection(msg) => match &msg.corrections {
crate::Status::Valid => {}
crate::Status::Invalid => {
println!("{}: {} is disallowed", msg.path.display(), msg.typo,);
}
crate::Status::Corrections(corrections) => {
println!(
"{}: {} -> {}",
msg.path.display(),
msg.typo,
itertools::join(corrections.iter(), ", ")
);
}
},
Message::File(msg) => { Message::File(msg) => {
println!("{}", msg.path.display()); println!("{}", msg.path.display());
} }
@ -241,14 +258,24 @@ impl Report for PrintLong {
log::info!("{}", msg); log::info!("{}", msg);
} }
Message::Correction(msg) => print_long_correction(msg), Message::Correction(msg) => print_long_correction(msg),
Message::PathCorrection(msg) => { Message::PathCorrection(msg) => match &msg.corrections {
println!( crate::Status::Valid => {}
"{}: error: `{}` should be {}", crate::Status::Invalid => {
msg.path.display(), println!(
msg.typo, "{}: error: `{}` is disallowed",
itertools::join(msg.corrections.iter().map(|c| format!("`{}`", c)), ", ") msg.path.display(),
); msg.typo,
} );
}
crate::Status::Corrections(corrections) => {
println!(
"{}: error: `{}` should be {}",
msg.path.display(),
msg.typo,
itertools::join(corrections.iter().map(|c| format!("`{}`", c)), ", ")
);
}
},
Message::File(msg) => { Message::File(msg) => {
println!("{}", msg.path.display()); println!("{}", msg.path.display());
} }
@ -278,14 +305,21 @@ fn print_long_correction(msg: &Correction) {
let stdout = io::stdout(); let stdout = io::stdout();
let mut handle = stdout.lock(); let mut handle = stdout.lock();
match &msg.corrections {
writeln!( crate::Status::Valid => {}
handle, crate::Status::Invalid => {
"error: `{}` should be {}", writeln!(handle, "error: `{}` is disallowed", msg.typo,).unwrap();
msg.typo, }
itertools::join(msg.corrections.iter().map(|c| format!("`{}`", c)), ", ") crate::Status::Corrections(corrections) => {
) writeln!(
.unwrap(); handle,
"error: `{}` should be {}",
msg.typo,
itertools::join(corrections.iter().map(|c| format!("`{}`", c)), ", ")
)
.unwrap();
}
}
writeln!( writeln!(
handle, handle,
" --> {}:{}:{}", " --> {}:{}:{}",

View file

@ -73,7 +73,7 @@ impl ParserBuilder {
let escaped = regex::escape(&grapheme); let escaped = regex::escape(&grapheme);
pattern.push_str(&format!("|{}", escaped)); pattern.push_str(&format!("|{}", escaped));
} }
pattern.push_str(r#")"#); pattern.push(')');
} }
} }

View file

@ -30,5 +30,5 @@ Configuration is read from the following (in precedence order)
| default.identifier-leading-chars | \- | string | Allow identifiers to start with one of these characters. | | default.identifier-leading-chars | \- | string | Allow identifiers to start with one of these characters. |
| default.identifier-include-chars | \- | string | Allow identifiers to include these characters. | | default.identifier-include-chars | \- | string | Allow identifiers to include these characters. |
| default.locale | \- | en, en-us, en-gb, en-ca, en-au | English dialect to correct to. | | default.locale | \- | en, en-us, en-gb, en-ca, en-au | English dialect to correct to. |
| default.extend-valid-identifiers | \- | list of strings | Identifiers to presume as correct, skipping spell checking. This extends the list when layering configuration, rather than replacing it. | | default.extend-identifiers | \- | table of strings | Corrections for identifiers. When the correction is blank, the word is never valid. When the correction is the key, the word is always valid. |
| default.extend-valid-words | \- | list of strings | Words to presume as correct, skipping spell checking. This extends the list when layering configuration, rather than replacing it. | | default.extend-words | \- | table of strings | Corrections for identifiers. When the correction is blank, the word is never valid. When the correction is the key, the word is always valid. |

View file

@ -1,3 +1,4 @@
use std::collections::HashMap;
use std::io::Read; use std::io::Read;
pub trait ConfigSource { pub trait ConfigSource {
@ -97,12 +98,12 @@ pub trait FileSource {
None None
} }
fn extend_valid_identifiers(&self) -> &[String] { fn extend_identifiers(&self) -> Box<dyn Iterator<Item = (&str, &str)> + '_> {
&[] Box::new(None.into_iter())
} }
fn extend_valid_words(&self) -> &[String] { fn extend_words(&self) -> Box<dyn Iterator<Item = (&str, &str)> + '_> {
&[] Box::new(None.into_iter())
} }
} }
@ -220,28 +221,22 @@ impl Walk {
} }
pub fn ignore_dot(&self) -> bool { pub fn ignore_dot(&self) -> bool {
self.ignore_dot self.ignore_dot.or(self.ignore_files).unwrap_or(true)
.or_else(|| self.ignore_files)
.unwrap_or(true)
} }
pub fn ignore_vcs(&self) -> bool { pub fn ignore_vcs(&self) -> bool {
self.ignore_vcs self.ignore_vcs.or(self.ignore_files).unwrap_or(true)
.or_else(|| self.ignore_files)
.unwrap_or(true)
} }
pub fn ignore_global(&self) -> bool { pub fn ignore_global(&self) -> bool {
self.ignore_global self.ignore_global
.or_else(|| self.ignore_vcs) .or(self.ignore_vcs)
.or_else(|| self.ignore_files) .or(self.ignore_files)
.unwrap_or(true) .unwrap_or(true)
} }
pub fn ignore_parent(&self) -> bool { pub fn ignore_parent(&self) -> bool {
self.ignore_parent self.ignore_parent.or(self.ignore_files).unwrap_or(true)
.or_else(|| self.ignore_files)
.unwrap_or(true)
} }
} }
@ -295,8 +290,8 @@ pub struct FileConfig {
pub identifier_include_digits: Option<bool>, pub identifier_include_digits: Option<bool>,
pub identifier_include_chars: Option<String>, pub identifier_include_chars: Option<String>,
pub locale: Option<Locale>, pub locale: Option<Locale>,
pub extend_valid_identifiers: Vec<String>, pub extend_identifiers: HashMap<String, String>,
pub extend_valid_words: Vec<String>, pub extend_words: HashMap<String, String>,
} }
impl FileConfig { impl FileConfig {
@ -325,10 +320,16 @@ impl FileConfig {
if let Some(source) = source.locale() { if let Some(source) = source.locale() {
self.locale = Some(source); self.locale = Some(source);
} }
self.extend_valid_identifiers self.extend_identifiers.extend(
.extend(source.extend_valid_identifiers().iter().cloned()); source
self.extend_valid_words .extend_identifiers()
.extend(source.extend_valid_words().iter().cloned()); .map(|(k, v)| (k.to_owned(), v.to_owned())),
);
self.extend_words.extend(
source
.extend_words()
.map(|(k, v)| (k.to_owned(), v.to_owned())),
);
} }
pub fn check_filename(&self) -> bool { pub fn check_filename(&self) -> bool {
@ -363,12 +364,20 @@ impl FileConfig {
self.locale.unwrap_or_default() self.locale.unwrap_or_default()
} }
pub fn extend_valid_identifiers(&self) -> &[String] { pub fn extend_identifiers(&self) -> Box<dyn Iterator<Item = (&str, &str)> + '_> {
self.extend_valid_identifiers.as_slice() Box::new(
self.extend_identifiers
.iter()
.map(|(k, v)| (k.as_str(), v.as_str())),
)
} }
pub fn extend_valid_words(&self) -> &[String] { pub fn extend_words(&self) -> Box<dyn Iterator<Item = (&str, &str)> + '_> {
self.extend_valid_words.as_slice() Box::new(
self.extend_words
.iter()
.map(|(k, v)| (k.as_str(), v.as_str())),
)
} }
} }
@ -405,12 +414,20 @@ impl FileSource for FileConfig {
self.locale self.locale
} }
fn extend_valid_identifiers(&self) -> &[String] { fn extend_identifiers(&self) -> Box<dyn Iterator<Item = (&str, &str)> + '_> {
self.extend_valid_identifiers.as_slice() Box::new(
self.extend_identifiers
.iter()
.map(|(k, v)| (k.as_str(), v.as_str())),
)
} }
fn extend_valid_words(&self) -> &[String] { fn extend_words(&self) -> Box<dyn Iterator<Item = (&str, &str)> + '_> {
self.extend_valid_words.as_slice() Box::new(
self.extend_words
.iter()
.map(|(k, v)| (k.as_str(), v.as_str())),
)
} }
} }

View file

@ -1,9 +1,10 @@
use std::borrow::Cow; use std::borrow::Cow;
use std::collections::HashSet; use std::collections::HashMap;
use unicase::UniCase; use unicase::UniCase;
use typos::tokens::Case; use typos::tokens::Case;
use typos::Status;
#[derive(Default)] #[derive(Default)]
pub struct BuiltIn { pub struct BuiltIn {
@ -20,40 +21,40 @@ impl BuiltIn {
pub fn correct_ident<'s, 'w>( pub fn correct_ident<'s, 'w>(
&'s self, &'s self,
_ident: typos::tokens::Identifier<'w>, _ident: typos::tokens::Identifier<'w>,
) -> Vec<Cow<'s, str>> { ) -> Option<Status<'s>> {
Vec::new() None
} }
pub fn correct_word<'s, 'w>( pub fn correct_word<'s, 'w>(
&'s self, &'s self,
word_token: typos::tokens::Word<'w>, word_token: typos::tokens::Word<'w>,
) -> Vec<Cow<'s, str>> { ) -> Option<Status<'s>> {
let word = word_token.token(); let word = word_token.token();
let corrections = if let Some(correction) = self.correct_with_dict(word) { let mut corrections = if let Some(correction) = self.correct_with_dict(word) {
self.correct_with_vars(word) self.correct_with_vars(word)
.unwrap_or_else(|| vec![correction]) .unwrap_or_else(|| Status::Corrections(vec![Cow::Borrowed(correction)]))
} else { } else {
self.correct_with_vars(word).unwrap_or_else(Vec::new) self.correct_with_vars(word)?
}; };
corrections corrections
.into_iter() .corrections_mut()
.map(|s| case_correct(s, word_token.case())) .for_each(|mut s| case_correct(&mut s, word_token.case()));
.collect() Some(corrections)
} }
// Not using `Status` to avoid the allocations
fn correct_with_dict(&self, word: &str) -> Option<&'static str> { fn correct_with_dict(&self, word: &str) -> Option<&'static str> {
map_lookup(&typos_dict::WORD_DICTIONARY, word) map_lookup(&typos_dict::WORD_DICTIONARY, word)
} }
fn correct_with_vars(&self, word: &str) -> Option<Vec<&'static str>> { fn correct_with_vars(&self, word: &str) -> Option<Status<'static>> {
let variants = map_lookup(&typos_vars::VARS_DICTIONARY, word)?; map_lookup(&typos_vars::VARS_DICTIONARY, word).map(|variants| self.select_variant(variants))
self.select_variant(variants)
} }
fn select_variant( fn select_variant(
&self, &self,
vars: &'static [(u8, &'static typos_vars::VariantsMap)], vars: &'static [(u8, &'static typos_vars::VariantsMap)],
) -> Option<Vec<&'static str>> { ) -> Status<'static> {
let var = vars[0]; let var = vars[0];
let var_categories = unsafe { let var_categories = unsafe {
// Code-genned from a checked category-set, so known to be safe // Code-genned from a checked category-set, so known to be safe
@ -62,12 +63,13 @@ impl BuiltIn {
if let Some(locale) = self.locale { if let Some(locale) = self.locale {
if var_categories.contains(locale) { if var_categories.contains(locale) {
// Already valid for the current locale. // Already valid for the current locale.
None Status::Valid
} else { } else {
Some( Status::Corrections(
typos_vars::corrections(locale, *var.1) typos_vars::corrections(locale, *var.1)
.iter() .iter()
.copied() .copied()
.map(Cow::Borrowed)
.collect(), .collect(),
) )
} }
@ -75,23 +77,29 @@ impl BuiltIn {
// All locales are valid // All locales are valid
if var_categories.is_empty() { if var_categories.is_empty() {
// But the word is never valid. // But the word is never valid.
let mut unique: Vec<_> = var.1.iter().flat_map(|v| v.iter()).copied().collect(); let mut unique: Vec<_> = var
.1
.iter()
.flat_map(|v| v.iter())
.copied()
.map(Cow::Borrowed)
.collect();
unique.sort_unstable(); unique.sort_unstable();
unique.dedup(); unique.dedup();
Some(unique) Status::Corrections(unique)
} else { } else {
None Status::Valid
} }
} }
} }
} }
impl typos::Dictionary for BuiltIn { impl typos::Dictionary for BuiltIn {
fn correct_ident<'s, 'w>(&'s self, ident: typos::tokens::Identifier<'w>) -> Vec<Cow<'s, str>> { fn correct_ident<'s, 'w>(&'s self, ident: typos::tokens::Identifier<'w>) -> Option<Status<'s>> {
BuiltIn::correct_ident(self, ident) BuiltIn::correct_ident(self, ident)
} }
fn correct_word<'s, 'w>(&'s self, word: typos::tokens::Word<'w>) -> Vec<Cow<'s, str>> { fn correct_word<'s, 'w>(&'s self, word: typos::tokens::Word<'w>) -> Option<Status<'s>> {
BuiltIn::correct_word(self, word) BuiltIn::correct_word(self, word)
} }
} }
@ -109,68 +117,88 @@ fn map_lookup<V: Clone>(map: &'static phf::Map<UniCase<&'static str>, V>, key: &
} }
} }
fn case_correct(correction: &str, case: Case) -> Cow<'_, str> { fn case_correct(correction: &mut Cow<'_, str>, case: Case) {
match case { match case {
Case::Lower | Case::None => correction.into(), Case::Lower | Case::None => (),
Case::Title => { Case::Title => match correction {
let mut title = String::with_capacity(correction.as_bytes().len()); Cow::Borrowed(s) => {
let mut char_indices = correction.char_indices(); let mut s = String::from(*s);
if let Some((_, c)) = char_indices.next() { s[0..1].make_ascii_uppercase();
title.extend(c.to_uppercase()); *correction = s.into();
if let Some((i, _)) = char_indices.next() {
title.push_str(&correction[i..]);
}
} }
title.into() Cow::Owned(s) => {
} s[0..1].make_ascii_uppercase();
Case::Scream => correction }
.chars() },
.flat_map(|c| c.to_uppercase()) Case::Scream => match correction {
.collect::<String>() Cow::Borrowed(s) => {
.into(), let mut s = String::from(*s);
s.make_ascii_uppercase();
*correction = s.into();
}
Cow::Owned(s) => {
s.make_ascii_uppercase();
}
},
} }
} }
pub struct Override<'i, 'w, D> { pub struct Override<'i, 'w, D> {
valid_identifiers: HashSet<&'i str>, identifiers: HashMap<&'i str, Status<'i>>,
valid_words: HashSet<unicase::UniCase<&'w str>>, words: HashMap<unicase::UniCase<&'w str>, Status<'w>>,
inner: D, inner: D,
} }
impl<'i, 'w, D: typos::Dictionary> Override<'i, 'w, D> { impl<'i, 'w, D: typos::Dictionary> Override<'i, 'w, D> {
pub fn new(inner: D) -> Self { pub fn new(inner: D) -> Self {
Self { Self {
valid_identifiers: Default::default(), identifiers: Default::default(),
valid_words: Default::default(), words: Default::default(),
inner, inner,
} }
} }
pub fn valid_identifiers<I: Iterator<Item = &'i str>>(&mut self, valid_identifiers: I) { pub fn identifiers<I: Iterator<Item = (&'i str, &'i str)>>(&mut self, identifiers: I) {
self.valid_identifiers = valid_identifiers.collect(); self.identifiers = Self::interpret(identifiers).collect();
} }
pub fn valid_words<I: Iterator<Item = &'w str>>(&mut self, valid_words: I) { pub fn words<I: Iterator<Item = (&'w str, &'w str)>>(&mut self, words: I) {
self.valid_words = valid_words.map(UniCase::new).collect(); self.words = Self::interpret(words)
.map(|(k, v)| (UniCase::new(k), v))
.collect();
}
pub fn interpret<'z, I: Iterator<Item = (&'z str, &'z str)>>(
cases: I,
) -> impl Iterator<Item = (&'z str, Status<'z>)> {
cases.map(|(typo, correction)| {
let correction = if typo == correction {
Status::Valid
} else if correction.is_empty() {
Status::Invalid
} else {
Status::Corrections(vec![Cow::Borrowed(correction)])
};
(typo, correction)
})
} }
} }
impl<'i, 'w, D: typos::Dictionary> typos::Dictionary for Override<'i, 'w, D> { impl<'i, 'w, D: typos::Dictionary> typos::Dictionary for Override<'i, 'w, D> {
fn correct_ident<'s, 't>(&'s self, ident: typos::tokens::Identifier<'t>) -> Vec<Cow<'s, str>> { fn correct_ident<'s, 't>(&'s self, ident: typos::tokens::Identifier<'t>) -> Option<Status<'s>> {
if self.valid_identifiers.contains(ident.token()) { self.identifiers
Vec::new() .get(ident.token())
} else { .map(|c| c.borrow())
self.inner.correct_ident(ident) .or_else(|| self.inner.correct_ident(ident))
}
} }
fn correct_word<'s, 't>(&'s self, word: typos::tokens::Word<'t>) -> Vec<Cow<'s, str>> { fn correct_word<'s, 't>(&'s self, word: typos::tokens::Word<'t>) -> Option<Status<'s>> {
let w = UniCase::new(word.token()); let w = UniCase::new(word.token());
if self.valid_words.contains(&w) { // HACK: couldn't figure out the lifetime issue with replacing `cloned` with `borrow`
Vec::new() self.words
} else { .get(&w)
self.inner.correct_word(word) .cloned()
} .or_else(|| self.inner.correct_word(word))
} }
} }
@ -188,7 +216,12 @@ mod test {
("fOo", Case::None, "fOo"), ("fOo", Case::None, "fOo"),
]; ];
for (correction, case, expected) in cases.iter() { for (correction, case, expected) in cases.iter() {
let actual = case_correct(correction, *case); let mut actual = Cow::Borrowed(*correction);
case_correct(&mut actual, *case);
assert_eq!(*expected, actual);
let mut actual = Cow::Owned(String::from(*correction));
case_correct(&mut actual, *case);
assert_eq!(*expected, actual); assert_eq!(*expected, actual);
} }
} }

View file

@ -58,20 +58,8 @@ fn run() -> Result<i32, anyhow::Error> {
let dictionary = crate::dict::BuiltIn::new(config.default.locale()); let dictionary = crate::dict::BuiltIn::new(config.default.locale());
let mut dictionary = crate::dict::Override::new(dictionary); let mut dictionary = crate::dict::Override::new(dictionary);
dictionary.valid_identifiers( dictionary.identifiers(config.default.extend_identifiers());
config dictionary.words(config.default.extend_words());
.default
.extend_valid_identifiers()
.iter()
.map(|s| s.as_str()),
);
dictionary.valid_words(
config
.default
.extend_valid_words()
.iter()
.map(|s| s.as_str()),
);
let mut settings = typos::checks::TyposSettings::new(); let mut settings = typos::checks::TyposSettings::new();
settings settings

View file

@ -57,11 +57,12 @@ impl<'r> Replace<'r> {
impl<'r> typos::report::Report for Replace<'r> { impl<'r> typos::report::Report for Replace<'r> {
fn report(&self, msg: typos::report::Message<'_>) -> bool { fn report(&self, msg: typos::report::Message<'_>) -> bool {
match msg { match msg {
typos::report::Message::Correction(msg) => { typos::report::Message::Correction(msg) => match msg.corrections {
if msg.corrections.len() == 1 { typos::Status::Corrections(corrections) if corrections.len() == 1 => {
let path = msg.path.to_owned(); let path = msg.path.to_owned();
let line_num = msg.line_num; let line_num = msg.line_num;
let correction = Correction::from_content(msg); let correction =
Correction::new(msg.byte_offset, msg.typo, corrections[0].as_ref());
let mut deferred = self.deferred.lock().unwrap(); let mut deferred = self.deferred.lock().unwrap();
let content = deferred let content = deferred
.content .content
@ -71,24 +72,25 @@ impl<'r> typos::report::Report for Replace<'r> {
.or_insert_with(Vec::new); .or_insert_with(Vec::new);
content.push(correction); content.push(correction);
false false
} else {
self.reporter
.report(typos::report::Message::Correction(msg))
} }
} _ => self
typos::report::Message::PathCorrection(msg) => { .reporter
if msg.corrections.len() == 1 { .report(typos::report::Message::Correction(msg)),
},
typos::report::Message::PathCorrection(msg) => match msg.corrections {
typos::Status::Corrections(corrections) if corrections.len() == 1 => {
let path = msg.path.to_owned(); let path = msg.path.to_owned();
let correction = Correction::from_path(msg); let correction =
Correction::new(msg.byte_offset, msg.typo, corrections[0].as_ref());
let mut deferred = self.deferred.lock().unwrap(); let mut deferred = self.deferred.lock().unwrap();
let content = deferred.paths.entry(path).or_insert_with(Vec::new); let content = deferred.paths.entry(path).or_insert_with(Vec::new);
content.push(correction); content.push(correction);
false false
} else {
self.reporter
.report(typos::report::Message::PathCorrection(msg))
} }
} _ => self
.reporter
.report(typos::report::Message::PathCorrection(msg)),
},
_ => self.reporter.report(msg), _ => self.reporter.report(msg),
} }
} }
@ -108,21 +110,11 @@ struct Correction {
} }
impl Correction { impl Correction {
fn from_content(other: typos::report::Correction<'_>) -> Self { fn new(byte_offset: usize, typo: &str, correction: &str) -> Self {
assert_eq!(other.corrections.len(), 1);
Self { Self {
byte_offset: other.byte_offset, byte_offset,
typo: other.typo.as_bytes().to_vec(), typo: typo.as_bytes().to_vec(),
correction: other.corrections[0].as_bytes().to_vec(), correction: correction.as_bytes().to_vec(),
}
}
fn from_path(other: typos::report::PathCorrection<'_>) -> Self {
assert_eq!(other.corrections.len(), 1);
Self {
byte_offset: other.byte_offset,
typo: other.typo.as_bytes().to_vec(),
correction: other.corrections[0].as_bytes().to_vec(),
} }
} }
} }
@ -222,7 +214,9 @@ mod test {
.line_num(1) .line_num(1)
.byte_offset(2) .byte_offset(2)
.typo("foo") .typo("foo")
.corrections(vec![std::borrow::Cow::Borrowed("bar")]) .corrections(typos::Status::Corrections(vec![
std::borrow::Cow::Borrowed("bar"),
]))
.into(), .into(),
); );
replace.write().unwrap(); replace.write().unwrap();
@ -243,7 +237,9 @@ mod test {
.path(input_file.path()) .path(input_file.path())
.byte_offset(0) .byte_offset(0)
.typo("foo") .typo("foo")
.corrections(vec![std::borrow::Cow::Borrowed("bar")]) .corrections(typos::Status::Corrections(vec![
std::borrow::Cow::Borrowed("bar"),
]))
.into(), .into(),
); );
replace.write().unwrap(); replace.write().unwrap();