mirror of
https://github.com/crate-ci/typos.git
synced 2024-12-22 15:42:23 -05:00
feat: Custom dictionary support
Switching `valid-*` to just `*` where you map typo to correction, with support for always-valid and never-valid. Fixes #9
This commit is contained in:
parent
79d10d6d24
commit
527b9837b4
11 changed files with 354 additions and 227 deletions
|
@ -10,7 +10,7 @@ schedules:
|
|||
include:
|
||||
- master
|
||||
variables:
|
||||
minrust: 1.40.0
|
||||
minrust: 1.42.0
|
||||
codecov_token: $(CODECOV_TOKEN_SECRET)
|
||||
windows_vm: vs2017-win2016
|
||||
mac_vm: macos-10.14
|
||||
|
|
|
@ -13,7 +13,9 @@ fn correct_word_hit(b: &mut test::Bencher) {
|
|||
let input = typos::tokens::Word::new("successs", 0).unwrap();
|
||||
assert_eq!(
|
||||
corrections.correct_word(input),
|
||||
vec![std::borrow::Cow::Borrowed("successes")]
|
||||
Some(typos::Status::Corrections(vec![
|
||||
std::borrow::Cow::Borrowed("successes")
|
||||
]))
|
||||
);
|
||||
b.iter(|| corrections.correct_word(input));
|
||||
}
|
||||
|
@ -22,6 +24,6 @@ fn correct_word_hit(b: &mut test::Bencher) {
|
|||
fn correct_word_miss(b: &mut test::Bencher) {
|
||||
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
|
||||
let input = typos::tokens::Word::new("success", 0).unwrap();
|
||||
assert!(corrections.correct_word(input).is_empty());
|
||||
assert!(corrections.correct_word(input).is_none());
|
||||
b.iter(|| corrections.correct_word(input));
|
||||
}
|
||||
|
|
|
@ -3,6 +3,7 @@ use bstr::ByteSlice;
|
|||
use crate::report;
|
||||
use crate::tokens;
|
||||
use crate::Dictionary;
|
||||
use crate::Status;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct TyposSettings {
|
||||
|
@ -233,16 +234,20 @@ impl Checks {
|
|||
dictionary: &dyn Dictionary,
|
||||
reporter: &dyn report::Report,
|
||||
) -> Result<bool, crate::Error> {
|
||||
let mut typos_found = false;
|
||||
|
||||
if !self.check_filenames {
|
||||
return Ok(typos_found);
|
||||
return Ok(false);
|
||||
}
|
||||
|
||||
if let Some(part) = path.file_name().and_then(|s| s.to_str()) {
|
||||
for ident in parser.parse(part) {
|
||||
let corrections = dictionary.correct_ident(ident);
|
||||
if !corrections.is_empty() {
|
||||
let mut typos_found = false;
|
||||
for ident in path
|
||||
.file_name()
|
||||
.and_then(|s| s.to_str())
|
||||
.iter()
|
||||
.flat_map(|part| parser.parse(part))
|
||||
{
|
||||
match dictionary.correct_ident(ident) {
|
||||
Some(Status::Valid) => {}
|
||||
Some(corrections) => {
|
||||
let byte_offset = ident.offset();
|
||||
let msg = report::PathCorrection {
|
||||
path,
|
||||
|
@ -251,18 +256,22 @@ impl Checks {
|
|||
corrections,
|
||||
};
|
||||
typos_found |= reporter.report(msg.into());
|
||||
} else {
|
||||
}
|
||||
None => {
|
||||
for word in ident.split() {
|
||||
let corrections = dictionary.correct_word(word);
|
||||
if !corrections.is_empty() {
|
||||
let byte_offset = word.offset();
|
||||
let msg = report::PathCorrection {
|
||||
path,
|
||||
byte_offset,
|
||||
typo: word.token(),
|
||||
corrections,
|
||||
};
|
||||
typos_found |= reporter.report(msg.into());
|
||||
match dictionary.correct_word(word) {
|
||||
Some(Status::Valid) => {}
|
||||
Some(corrections) => {
|
||||
let byte_offset = word.offset();
|
||||
let msg = report::PathCorrection {
|
||||
path,
|
||||
byte_offset,
|
||||
typo: word.token(),
|
||||
corrections,
|
||||
};
|
||||
typos_found |= reporter.report(msg.into());
|
||||
}
|
||||
None => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -305,32 +314,38 @@ impl Checks {
|
|||
for (line_idx, line) in buffer.lines().enumerate() {
|
||||
let line_num = line_idx + 1;
|
||||
for ident in parser.parse_bytes(line) {
|
||||
let corrections = dictionary.correct_ident(ident);
|
||||
if !corrections.is_empty() {
|
||||
let byte_offset = ident.offset();
|
||||
let msg = report::Correction {
|
||||
path,
|
||||
line,
|
||||
line_num,
|
||||
byte_offset,
|
||||
typo: ident.token(),
|
||||
corrections,
|
||||
};
|
||||
typos_found |= reporter.report(msg.into());
|
||||
} else {
|
||||
for word in ident.split() {
|
||||
let corrections = dictionary.correct_word(word);
|
||||
if !corrections.is_empty() {
|
||||
let byte_offset = word.offset();
|
||||
let msg = report::Correction {
|
||||
path,
|
||||
line,
|
||||
line_num,
|
||||
byte_offset,
|
||||
typo: word.token(),
|
||||
corrections,
|
||||
};
|
||||
typos_found |= reporter.report(msg.into());
|
||||
match dictionary.correct_ident(ident) {
|
||||
Some(Status::Valid) => {}
|
||||
Some(corrections) => {
|
||||
let byte_offset = ident.offset();
|
||||
let msg = report::Correction {
|
||||
path,
|
||||
line,
|
||||
line_num,
|
||||
byte_offset,
|
||||
typo: ident.token(),
|
||||
corrections,
|
||||
};
|
||||
typos_found |= reporter.report(msg.into());
|
||||
}
|
||||
None => {
|
||||
for word in ident.split() {
|
||||
match dictionary.correct_word(word) {
|
||||
Some(Status::Valid) => {}
|
||||
Some(corrections) => {
|
||||
let byte_offset = word.offset();
|
||||
let msg = report::Correction {
|
||||
path,
|
||||
line,
|
||||
line_num,
|
||||
byte_offset,
|
||||
typo: word.token(),
|
||||
corrections,
|
||||
};
|
||||
typos_found |= reporter.report(msg.into());
|
||||
}
|
||||
None => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,7 +1,49 @@
|
|||
use std::borrow::Cow;
|
||||
|
||||
pub trait Dictionary: Send + Sync {
|
||||
fn correct_ident<'s, 'w>(&'s self, _ident: crate::tokens::Identifier<'w>) -> Vec<Cow<'s, str>>;
|
||||
|
||||
fn correct_word<'s, 'w>(&'s self, word: crate::tokens::Word<'w>) -> Vec<Cow<'s, str>>;
|
||||
#[derive(Clone, PartialEq, Eq, Debug, serde::Serialize, derive_more::From)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
#[serde(untagged)]
|
||||
pub enum Status<'c> {
|
||||
Valid,
|
||||
Invalid,
|
||||
Corrections(Vec<Cow<'c, str>>),
|
||||
}
|
||||
|
||||
impl<'c> Status<'c> {
|
||||
pub fn is_invalid(&self) -> bool {
|
||||
matches!(self, Status::Invalid)
|
||||
}
|
||||
pub fn is_valid(&self) -> bool {
|
||||
matches!(self, Status::Valid)
|
||||
}
|
||||
pub fn is_correction(&self) -> bool {
|
||||
matches!(self, Status::Corrections(_))
|
||||
}
|
||||
|
||||
pub fn corrections_mut(&mut self) -> impl Iterator<Item = &mut Cow<'c, str>> {
|
||||
match self {
|
||||
Status::Corrections(corrections) => itertools::Either::Left(corrections.iter_mut()),
|
||||
_ => itertools::Either::Right([].iter_mut()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn borrow(&self) -> Status<'_> {
|
||||
match self {
|
||||
Status::Corrections(corrections) => {
|
||||
let corrections = corrections
|
||||
.iter()
|
||||
.map(|c| Cow::Borrowed(c.as_ref()))
|
||||
.collect();
|
||||
Status::Corrections(corrections)
|
||||
}
|
||||
_ => self.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub trait Dictionary: Send + Sync {
|
||||
fn correct_ident<'s, 'w>(&'s self, _ident: crate::tokens::Identifier<'w>)
|
||||
-> Option<Status<'s>>;
|
||||
|
||||
fn correct_word<'s, 'w>(&'s self, word: crate::tokens::Word<'w>) -> Option<Status<'s>>;
|
||||
}
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
#![allow(clippy::needless_update)]
|
||||
|
||||
use std::borrow::Cow;
|
||||
use std::io::{self, Write};
|
||||
|
||||
#[derive(Clone, Debug, serde::Serialize, derive_more::From)]
|
||||
|
@ -21,8 +20,8 @@ impl<'m> Message<'m> {
|
|||
pub fn is_correction(&self) -> bool {
|
||||
match self {
|
||||
Message::BinaryFile(_) => false,
|
||||
Message::Correction(_) => true,
|
||||
Message::PathCorrection(_) => true,
|
||||
Message::Correction(c) => c.corrections.is_correction(),
|
||||
Message::PathCorrection(c) => c.corrections.is_correction(),
|
||||
Message::File(_) => false,
|
||||
Message::Parse(_) => false,
|
||||
Message::PathError(_) => false,
|
||||
|
@ -59,7 +58,7 @@ pub struct Correction<'m> {
|
|||
pub line_num: usize,
|
||||
pub byte_offset: usize,
|
||||
pub typo: &'m str,
|
||||
pub corrections: Vec<Cow<'m, str>>,
|
||||
pub corrections: crate::Status<'m>,
|
||||
}
|
||||
|
||||
impl<'m> Default for Correction<'m> {
|
||||
|
@ -70,7 +69,7 @@ impl<'m> Default for Correction<'m> {
|
|||
line_num: 0,
|
||||
byte_offset: 0,
|
||||
typo: "",
|
||||
corrections: Vec::new(),
|
||||
corrections: crate::Status::Invalid,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -81,7 +80,7 @@ pub struct PathCorrection<'m> {
|
|||
pub path: &'m std::path::Path,
|
||||
pub byte_offset: usize,
|
||||
pub typo: &'m str,
|
||||
pub corrections: Vec<Cow<'m, str>>,
|
||||
pub corrections: crate::Status<'m>,
|
||||
}
|
||||
|
||||
impl<'m> Default for PathCorrection<'m> {
|
||||
|
@ -90,7 +89,7 @@ impl<'m> Default for PathCorrection<'m> {
|
|||
path: std::path::Path::new("-"),
|
||||
byte_offset: 0,
|
||||
typo: "",
|
||||
corrections: Vec::new(),
|
||||
corrections: crate::Status::Invalid,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -196,24 +195,42 @@ impl Report for PrintBrief {
|
|||
Message::BinaryFile(msg) => {
|
||||
log::info!("{}", msg);
|
||||
}
|
||||
Message::Correction(msg) => {
|
||||
println!(
|
||||
"{}:{}:{}: {} -> {}",
|
||||
msg.path.display(),
|
||||
msg.line_num,
|
||||
msg.byte_offset,
|
||||
msg.typo,
|
||||
itertools::join(msg.corrections.iter(), ", ")
|
||||
);
|
||||
}
|
||||
Message::PathCorrection(msg) => {
|
||||
println!(
|
||||
"{}: {} -> {}",
|
||||
msg.path.display(),
|
||||
msg.typo,
|
||||
itertools::join(msg.corrections.iter(), ", ")
|
||||
);
|
||||
}
|
||||
Message::Correction(msg) => match &msg.corrections {
|
||||
crate::Status::Valid => {}
|
||||
crate::Status::Invalid => {
|
||||
println!(
|
||||
"{}:{}:{}: {} is disallowed",
|
||||
msg.path.display(),
|
||||
msg.line_num,
|
||||
msg.byte_offset,
|
||||
msg.typo,
|
||||
);
|
||||
}
|
||||
crate::Status::Corrections(corrections) => {
|
||||
println!(
|
||||
"{}:{}:{}: {} -> {}",
|
||||
msg.path.display(),
|
||||
msg.line_num,
|
||||
msg.byte_offset,
|
||||
msg.typo,
|
||||
itertools::join(corrections.iter(), ", ")
|
||||
);
|
||||
}
|
||||
},
|
||||
Message::PathCorrection(msg) => match &msg.corrections {
|
||||
crate::Status::Valid => {}
|
||||
crate::Status::Invalid => {
|
||||
println!("{}: {} is disallowed", msg.path.display(), msg.typo,);
|
||||
}
|
||||
crate::Status::Corrections(corrections) => {
|
||||
println!(
|
||||
"{}: {} -> {}",
|
||||
msg.path.display(),
|
||||
msg.typo,
|
||||
itertools::join(corrections.iter(), ", ")
|
||||
);
|
||||
}
|
||||
},
|
||||
Message::File(msg) => {
|
||||
println!("{}", msg.path.display());
|
||||
}
|
||||
|
@ -241,14 +258,24 @@ impl Report for PrintLong {
|
|||
log::info!("{}", msg);
|
||||
}
|
||||
Message::Correction(msg) => print_long_correction(msg),
|
||||
Message::PathCorrection(msg) => {
|
||||
println!(
|
||||
"{}: error: `{}` should be {}",
|
||||
msg.path.display(),
|
||||
msg.typo,
|
||||
itertools::join(msg.corrections.iter().map(|c| format!("`{}`", c)), ", ")
|
||||
);
|
||||
}
|
||||
Message::PathCorrection(msg) => match &msg.corrections {
|
||||
crate::Status::Valid => {}
|
||||
crate::Status::Invalid => {
|
||||
println!(
|
||||
"{}: error: `{}` is disallowed",
|
||||
msg.path.display(),
|
||||
msg.typo,
|
||||
);
|
||||
}
|
||||
crate::Status::Corrections(corrections) => {
|
||||
println!(
|
||||
"{}: error: `{}` should be {}",
|
||||
msg.path.display(),
|
||||
msg.typo,
|
||||
itertools::join(corrections.iter().map(|c| format!("`{}`", c)), ", ")
|
||||
);
|
||||
}
|
||||
},
|
||||
Message::File(msg) => {
|
||||
println!("{}", msg.path.display());
|
||||
}
|
||||
|
@ -278,14 +305,21 @@ fn print_long_correction(msg: &Correction) {
|
|||
|
||||
let stdout = io::stdout();
|
||||
let mut handle = stdout.lock();
|
||||
|
||||
writeln!(
|
||||
handle,
|
||||
"error: `{}` should be {}",
|
||||
msg.typo,
|
||||
itertools::join(msg.corrections.iter().map(|c| format!("`{}`", c)), ", ")
|
||||
)
|
||||
.unwrap();
|
||||
match &msg.corrections {
|
||||
crate::Status::Valid => {}
|
||||
crate::Status::Invalid => {
|
||||
writeln!(handle, "error: `{}` is disallowed", msg.typo,).unwrap();
|
||||
}
|
||||
crate::Status::Corrections(corrections) => {
|
||||
writeln!(
|
||||
handle,
|
||||
"error: `{}` should be {}",
|
||||
msg.typo,
|
||||
itertools::join(corrections.iter().map(|c| format!("`{}`", c)), ", ")
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
}
|
||||
writeln!(
|
||||
handle,
|
||||
" --> {}:{}:{}",
|
||||
|
|
|
@ -73,7 +73,7 @@ impl ParserBuilder {
|
|||
let escaped = regex::escape(&grapheme);
|
||||
pattern.push_str(&format!("|{}", escaped));
|
||||
}
|
||||
pattern.push_str(r#")"#);
|
||||
pattern.push(')');
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -30,5 +30,5 @@ Configuration is read from the following (in precedence order)
|
|||
| default.identifier-leading-chars | \- | string | Allow identifiers to start with one of these characters. |
|
||||
| default.identifier-include-chars | \- | string | Allow identifiers to include these characters. |
|
||||
| default.locale | \- | en, en-us, en-gb, en-ca, en-au | English dialect to correct to. |
|
||||
| default.extend-valid-identifiers | \- | list of strings | Identifiers to presume as correct, skipping spell checking. This extends the list when layering configuration, rather than replacing it. |
|
||||
| default.extend-valid-words | \- | list of strings | Words to presume as correct, skipping spell checking. This extends the list when layering configuration, rather than replacing it. |
|
||||
| default.extend-identifiers | \- | table of strings | Corrections for identifiers. When the correction is blank, the word is never valid. When the correction is the key, the word is always valid. |
|
||||
| default.extend-words | \- | table of strings | Corrections for identifiers. When the correction is blank, the word is never valid. When the correction is the key, the word is always valid. |
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
use std::collections::HashMap;
|
||||
use std::io::Read;
|
||||
|
||||
pub trait ConfigSource {
|
||||
|
@ -97,12 +98,12 @@ pub trait FileSource {
|
|||
None
|
||||
}
|
||||
|
||||
fn extend_valid_identifiers(&self) -> &[String] {
|
||||
&[]
|
||||
fn extend_identifiers(&self) -> Box<dyn Iterator<Item = (&str, &str)> + '_> {
|
||||
Box::new(None.into_iter())
|
||||
}
|
||||
|
||||
fn extend_valid_words(&self) -> &[String] {
|
||||
&[]
|
||||
fn extend_words(&self) -> Box<dyn Iterator<Item = (&str, &str)> + '_> {
|
||||
Box::new(None.into_iter())
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -220,28 +221,22 @@ impl Walk {
|
|||
}
|
||||
|
||||
pub fn ignore_dot(&self) -> bool {
|
||||
self.ignore_dot
|
||||
.or_else(|| self.ignore_files)
|
||||
.unwrap_or(true)
|
||||
self.ignore_dot.or(self.ignore_files).unwrap_or(true)
|
||||
}
|
||||
|
||||
pub fn ignore_vcs(&self) -> bool {
|
||||
self.ignore_vcs
|
||||
.or_else(|| self.ignore_files)
|
||||
.unwrap_or(true)
|
||||
self.ignore_vcs.or(self.ignore_files).unwrap_or(true)
|
||||
}
|
||||
|
||||
pub fn ignore_global(&self) -> bool {
|
||||
self.ignore_global
|
||||
.or_else(|| self.ignore_vcs)
|
||||
.or_else(|| self.ignore_files)
|
||||
.or(self.ignore_vcs)
|
||||
.or(self.ignore_files)
|
||||
.unwrap_or(true)
|
||||
}
|
||||
|
||||
pub fn ignore_parent(&self) -> bool {
|
||||
self.ignore_parent
|
||||
.or_else(|| self.ignore_files)
|
||||
.unwrap_or(true)
|
||||
self.ignore_parent.or(self.ignore_files).unwrap_or(true)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -295,8 +290,8 @@ pub struct FileConfig {
|
|||
pub identifier_include_digits: Option<bool>,
|
||||
pub identifier_include_chars: Option<String>,
|
||||
pub locale: Option<Locale>,
|
||||
pub extend_valid_identifiers: Vec<String>,
|
||||
pub extend_valid_words: Vec<String>,
|
||||
pub extend_identifiers: HashMap<String, String>,
|
||||
pub extend_words: HashMap<String, String>,
|
||||
}
|
||||
|
||||
impl FileConfig {
|
||||
|
@ -325,10 +320,16 @@ impl FileConfig {
|
|||
if let Some(source) = source.locale() {
|
||||
self.locale = Some(source);
|
||||
}
|
||||
self.extend_valid_identifiers
|
||||
.extend(source.extend_valid_identifiers().iter().cloned());
|
||||
self.extend_valid_words
|
||||
.extend(source.extend_valid_words().iter().cloned());
|
||||
self.extend_identifiers.extend(
|
||||
source
|
||||
.extend_identifiers()
|
||||
.map(|(k, v)| (k.to_owned(), v.to_owned())),
|
||||
);
|
||||
self.extend_words.extend(
|
||||
source
|
||||
.extend_words()
|
||||
.map(|(k, v)| (k.to_owned(), v.to_owned())),
|
||||
);
|
||||
}
|
||||
|
||||
pub fn check_filename(&self) -> bool {
|
||||
|
@ -363,12 +364,20 @@ impl FileConfig {
|
|||
self.locale.unwrap_or_default()
|
||||
}
|
||||
|
||||
pub fn extend_valid_identifiers(&self) -> &[String] {
|
||||
self.extend_valid_identifiers.as_slice()
|
||||
pub fn extend_identifiers(&self) -> Box<dyn Iterator<Item = (&str, &str)> + '_> {
|
||||
Box::new(
|
||||
self.extend_identifiers
|
||||
.iter()
|
||||
.map(|(k, v)| (k.as_str(), v.as_str())),
|
||||
)
|
||||
}
|
||||
|
||||
pub fn extend_valid_words(&self) -> &[String] {
|
||||
self.extend_valid_words.as_slice()
|
||||
pub fn extend_words(&self) -> Box<dyn Iterator<Item = (&str, &str)> + '_> {
|
||||
Box::new(
|
||||
self.extend_words
|
||||
.iter()
|
||||
.map(|(k, v)| (k.as_str(), v.as_str())),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -405,12 +414,20 @@ impl FileSource for FileConfig {
|
|||
self.locale
|
||||
}
|
||||
|
||||
fn extend_valid_identifiers(&self) -> &[String] {
|
||||
self.extend_valid_identifiers.as_slice()
|
||||
fn extend_identifiers(&self) -> Box<dyn Iterator<Item = (&str, &str)> + '_> {
|
||||
Box::new(
|
||||
self.extend_identifiers
|
||||
.iter()
|
||||
.map(|(k, v)| (k.as_str(), v.as_str())),
|
||||
)
|
||||
}
|
||||
|
||||
fn extend_valid_words(&self) -> &[String] {
|
||||
self.extend_valid_words.as_slice()
|
||||
fn extend_words(&self) -> Box<dyn Iterator<Item = (&str, &str)> + '_> {
|
||||
Box::new(
|
||||
self.extend_words
|
||||
.iter()
|
||||
.map(|(k, v)| (k.as_str(), v.as_str())),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
151
src/dict.rs
151
src/dict.rs
|
@ -1,9 +1,10 @@
|
|||
use std::borrow::Cow;
|
||||
use std::collections::HashSet;
|
||||
use std::collections::HashMap;
|
||||
|
||||
use unicase::UniCase;
|
||||
|
||||
use typos::tokens::Case;
|
||||
use typos::Status;
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct BuiltIn {
|
||||
|
@ -20,40 +21,40 @@ impl BuiltIn {
|
|||
pub fn correct_ident<'s, 'w>(
|
||||
&'s self,
|
||||
_ident: typos::tokens::Identifier<'w>,
|
||||
) -> Vec<Cow<'s, str>> {
|
||||
Vec::new()
|
||||
) -> Option<Status<'s>> {
|
||||
None
|
||||
}
|
||||
|
||||
pub fn correct_word<'s, 'w>(
|
||||
&'s self,
|
||||
word_token: typos::tokens::Word<'w>,
|
||||
) -> Vec<Cow<'s, str>> {
|
||||
) -> Option<Status<'s>> {
|
||||
let word = word_token.token();
|
||||
let corrections = if let Some(correction) = self.correct_with_dict(word) {
|
||||
let mut corrections = if let Some(correction) = self.correct_with_dict(word) {
|
||||
self.correct_with_vars(word)
|
||||
.unwrap_or_else(|| vec![correction])
|
||||
.unwrap_or_else(|| Status::Corrections(vec![Cow::Borrowed(correction)]))
|
||||
} else {
|
||||
self.correct_with_vars(word).unwrap_or_else(Vec::new)
|
||||
self.correct_with_vars(word)?
|
||||
};
|
||||
corrections
|
||||
.into_iter()
|
||||
.map(|s| case_correct(s, word_token.case()))
|
||||
.collect()
|
||||
.corrections_mut()
|
||||
.for_each(|mut s| case_correct(&mut s, word_token.case()));
|
||||
Some(corrections)
|
||||
}
|
||||
|
||||
// Not using `Status` to avoid the allocations
|
||||
fn correct_with_dict(&self, word: &str) -> Option<&'static str> {
|
||||
map_lookup(&typos_dict::WORD_DICTIONARY, word)
|
||||
}
|
||||
|
||||
fn correct_with_vars(&self, word: &str) -> Option<Vec<&'static str>> {
|
||||
let variants = map_lookup(&typos_vars::VARS_DICTIONARY, word)?;
|
||||
self.select_variant(variants)
|
||||
fn correct_with_vars(&self, word: &str) -> Option<Status<'static>> {
|
||||
map_lookup(&typos_vars::VARS_DICTIONARY, word).map(|variants| self.select_variant(variants))
|
||||
}
|
||||
|
||||
fn select_variant(
|
||||
&self,
|
||||
vars: &'static [(u8, &'static typos_vars::VariantsMap)],
|
||||
) -> Option<Vec<&'static str>> {
|
||||
) -> Status<'static> {
|
||||
let var = vars[0];
|
||||
let var_categories = unsafe {
|
||||
// Code-genned from a checked category-set, so known to be safe
|
||||
|
@ -62,12 +63,13 @@ impl BuiltIn {
|
|||
if let Some(locale) = self.locale {
|
||||
if var_categories.contains(locale) {
|
||||
// Already valid for the current locale.
|
||||
None
|
||||
Status::Valid
|
||||
} else {
|
||||
Some(
|
||||
Status::Corrections(
|
||||
typos_vars::corrections(locale, *var.1)
|
||||
.iter()
|
||||
.copied()
|
||||
.map(Cow::Borrowed)
|
||||
.collect(),
|
||||
)
|
||||
}
|
||||
|
@ -75,23 +77,29 @@ impl BuiltIn {
|
|||
// All locales are valid
|
||||
if var_categories.is_empty() {
|
||||
// But the word is never valid.
|
||||
let mut unique: Vec<_> = var.1.iter().flat_map(|v| v.iter()).copied().collect();
|
||||
let mut unique: Vec<_> = var
|
||||
.1
|
||||
.iter()
|
||||
.flat_map(|v| v.iter())
|
||||
.copied()
|
||||
.map(Cow::Borrowed)
|
||||
.collect();
|
||||
unique.sort_unstable();
|
||||
unique.dedup();
|
||||
Some(unique)
|
||||
Status::Corrections(unique)
|
||||
} else {
|
||||
None
|
||||
Status::Valid
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl typos::Dictionary for BuiltIn {
|
||||
fn correct_ident<'s, 'w>(&'s self, ident: typos::tokens::Identifier<'w>) -> Vec<Cow<'s, str>> {
|
||||
fn correct_ident<'s, 'w>(&'s self, ident: typos::tokens::Identifier<'w>) -> Option<Status<'s>> {
|
||||
BuiltIn::correct_ident(self, ident)
|
||||
}
|
||||
|
||||
fn correct_word<'s, 'w>(&'s self, word: typos::tokens::Word<'w>) -> Vec<Cow<'s, str>> {
|
||||
fn correct_word<'s, 'w>(&'s self, word: typos::tokens::Word<'w>) -> Option<Status<'s>> {
|
||||
BuiltIn::correct_word(self, word)
|
||||
}
|
||||
}
|
||||
|
@ -109,68 +117,88 @@ fn map_lookup<V: Clone>(map: &'static phf::Map<UniCase<&'static str>, V>, key: &
|
|||
}
|
||||
}
|
||||
|
||||
fn case_correct(correction: &str, case: Case) -> Cow<'_, str> {
|
||||
fn case_correct(correction: &mut Cow<'_, str>, case: Case) {
|
||||
match case {
|
||||
Case::Lower | Case::None => correction.into(),
|
||||
Case::Title => {
|
||||
let mut title = String::with_capacity(correction.as_bytes().len());
|
||||
let mut char_indices = correction.char_indices();
|
||||
if let Some((_, c)) = char_indices.next() {
|
||||
title.extend(c.to_uppercase());
|
||||
if let Some((i, _)) = char_indices.next() {
|
||||
title.push_str(&correction[i..]);
|
||||
}
|
||||
Case::Lower | Case::None => (),
|
||||
Case::Title => match correction {
|
||||
Cow::Borrowed(s) => {
|
||||
let mut s = String::from(*s);
|
||||
s[0..1].make_ascii_uppercase();
|
||||
*correction = s.into();
|
||||
}
|
||||
title.into()
|
||||
}
|
||||
Case::Scream => correction
|
||||
.chars()
|
||||
.flat_map(|c| c.to_uppercase())
|
||||
.collect::<String>()
|
||||
.into(),
|
||||
Cow::Owned(s) => {
|
||||
s[0..1].make_ascii_uppercase();
|
||||
}
|
||||
},
|
||||
Case::Scream => match correction {
|
||||
Cow::Borrowed(s) => {
|
||||
let mut s = String::from(*s);
|
||||
s.make_ascii_uppercase();
|
||||
*correction = s.into();
|
||||
}
|
||||
Cow::Owned(s) => {
|
||||
s.make_ascii_uppercase();
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Override<'i, 'w, D> {
|
||||
valid_identifiers: HashSet<&'i str>,
|
||||
valid_words: HashSet<unicase::UniCase<&'w str>>,
|
||||
identifiers: HashMap<&'i str, Status<'i>>,
|
||||
words: HashMap<unicase::UniCase<&'w str>, Status<'w>>,
|
||||
inner: D,
|
||||
}
|
||||
|
||||
impl<'i, 'w, D: typos::Dictionary> Override<'i, 'w, D> {
|
||||
pub fn new(inner: D) -> Self {
|
||||
Self {
|
||||
valid_identifiers: Default::default(),
|
||||
valid_words: Default::default(),
|
||||
identifiers: Default::default(),
|
||||
words: Default::default(),
|
||||
inner,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn valid_identifiers<I: Iterator<Item = &'i str>>(&mut self, valid_identifiers: I) {
|
||||
self.valid_identifiers = valid_identifiers.collect();
|
||||
pub fn identifiers<I: Iterator<Item = (&'i str, &'i str)>>(&mut self, identifiers: I) {
|
||||
self.identifiers = Self::interpret(identifiers).collect();
|
||||
}
|
||||
|
||||
pub fn valid_words<I: Iterator<Item = &'w str>>(&mut self, valid_words: I) {
|
||||
self.valid_words = valid_words.map(UniCase::new).collect();
|
||||
pub fn words<I: Iterator<Item = (&'w str, &'w str)>>(&mut self, words: I) {
|
||||
self.words = Self::interpret(words)
|
||||
.map(|(k, v)| (UniCase::new(k), v))
|
||||
.collect();
|
||||
}
|
||||
|
||||
pub fn interpret<'z, I: Iterator<Item = (&'z str, &'z str)>>(
|
||||
cases: I,
|
||||
) -> impl Iterator<Item = (&'z str, Status<'z>)> {
|
||||
cases.map(|(typo, correction)| {
|
||||
let correction = if typo == correction {
|
||||
Status::Valid
|
||||
} else if correction.is_empty() {
|
||||
Status::Invalid
|
||||
} else {
|
||||
Status::Corrections(vec![Cow::Borrowed(correction)])
|
||||
};
|
||||
(typo, correction)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<'i, 'w, D: typos::Dictionary> typos::Dictionary for Override<'i, 'w, D> {
|
||||
fn correct_ident<'s, 't>(&'s self, ident: typos::tokens::Identifier<'t>) -> Vec<Cow<'s, str>> {
|
||||
if self.valid_identifiers.contains(ident.token()) {
|
||||
Vec::new()
|
||||
} else {
|
||||
self.inner.correct_ident(ident)
|
||||
}
|
||||
fn correct_ident<'s, 't>(&'s self, ident: typos::tokens::Identifier<'t>) -> Option<Status<'s>> {
|
||||
self.identifiers
|
||||
.get(ident.token())
|
||||
.map(|c| c.borrow())
|
||||
.or_else(|| self.inner.correct_ident(ident))
|
||||
}
|
||||
|
||||
fn correct_word<'s, 't>(&'s self, word: typos::tokens::Word<'t>) -> Vec<Cow<'s, str>> {
|
||||
fn correct_word<'s, 't>(&'s self, word: typos::tokens::Word<'t>) -> Option<Status<'s>> {
|
||||
let w = UniCase::new(word.token());
|
||||
if self.valid_words.contains(&w) {
|
||||
Vec::new()
|
||||
} else {
|
||||
self.inner.correct_word(word)
|
||||
}
|
||||
// HACK: couldn't figure out the lifetime issue with replacing `cloned` with `borrow`
|
||||
self.words
|
||||
.get(&w)
|
||||
.cloned()
|
||||
.or_else(|| self.inner.correct_word(word))
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -188,7 +216,12 @@ mod test {
|
|||
("fOo", Case::None, "fOo"),
|
||||
];
|
||||
for (correction, case, expected) in cases.iter() {
|
||||
let actual = case_correct(correction, *case);
|
||||
let mut actual = Cow::Borrowed(*correction);
|
||||
case_correct(&mut actual, *case);
|
||||
assert_eq!(*expected, actual);
|
||||
|
||||
let mut actual = Cow::Owned(String::from(*correction));
|
||||
case_correct(&mut actual, *case);
|
||||
assert_eq!(*expected, actual);
|
||||
}
|
||||
}
|
||||
|
|
16
src/main.rs
16
src/main.rs
|
@ -58,20 +58,8 @@ fn run() -> Result<i32, anyhow::Error> {
|
|||
|
||||
let dictionary = crate::dict::BuiltIn::new(config.default.locale());
|
||||
let mut dictionary = crate::dict::Override::new(dictionary);
|
||||
dictionary.valid_identifiers(
|
||||
config
|
||||
.default
|
||||
.extend_valid_identifiers()
|
||||
.iter()
|
||||
.map(|s| s.as_str()),
|
||||
);
|
||||
dictionary.valid_words(
|
||||
config
|
||||
.default
|
||||
.extend_valid_words()
|
||||
.iter()
|
||||
.map(|s| s.as_str()),
|
||||
);
|
||||
dictionary.identifiers(config.default.extend_identifiers());
|
||||
dictionary.words(config.default.extend_words());
|
||||
|
||||
let mut settings = typos::checks::TyposSettings::new();
|
||||
settings
|
||||
|
|
|
@ -57,11 +57,12 @@ impl<'r> Replace<'r> {
|
|||
impl<'r> typos::report::Report for Replace<'r> {
|
||||
fn report(&self, msg: typos::report::Message<'_>) -> bool {
|
||||
match msg {
|
||||
typos::report::Message::Correction(msg) => {
|
||||
if msg.corrections.len() == 1 {
|
||||
typos::report::Message::Correction(msg) => match msg.corrections {
|
||||
typos::Status::Corrections(corrections) if corrections.len() == 1 => {
|
||||
let path = msg.path.to_owned();
|
||||
let line_num = msg.line_num;
|
||||
let correction = Correction::from_content(msg);
|
||||
let correction =
|
||||
Correction::new(msg.byte_offset, msg.typo, corrections[0].as_ref());
|
||||
let mut deferred = self.deferred.lock().unwrap();
|
||||
let content = deferred
|
||||
.content
|
||||
|
@ -71,24 +72,25 @@ impl<'r> typos::report::Report for Replace<'r> {
|
|||
.or_insert_with(Vec::new);
|
||||
content.push(correction);
|
||||
false
|
||||
} else {
|
||||
self.reporter
|
||||
.report(typos::report::Message::Correction(msg))
|
||||
}
|
||||
}
|
||||
typos::report::Message::PathCorrection(msg) => {
|
||||
if msg.corrections.len() == 1 {
|
||||
_ => self
|
||||
.reporter
|
||||
.report(typos::report::Message::Correction(msg)),
|
||||
},
|
||||
typos::report::Message::PathCorrection(msg) => match msg.corrections {
|
||||
typos::Status::Corrections(corrections) if corrections.len() == 1 => {
|
||||
let path = msg.path.to_owned();
|
||||
let correction = Correction::from_path(msg);
|
||||
let correction =
|
||||
Correction::new(msg.byte_offset, msg.typo, corrections[0].as_ref());
|
||||
let mut deferred = self.deferred.lock().unwrap();
|
||||
let content = deferred.paths.entry(path).or_insert_with(Vec::new);
|
||||
content.push(correction);
|
||||
false
|
||||
} else {
|
||||
self.reporter
|
||||
.report(typos::report::Message::PathCorrection(msg))
|
||||
}
|
||||
}
|
||||
_ => self
|
||||
.reporter
|
||||
.report(typos::report::Message::PathCorrection(msg)),
|
||||
},
|
||||
_ => self.reporter.report(msg),
|
||||
}
|
||||
}
|
||||
|
@ -108,21 +110,11 @@ struct Correction {
|
|||
}
|
||||
|
||||
impl Correction {
|
||||
fn from_content(other: typos::report::Correction<'_>) -> Self {
|
||||
assert_eq!(other.corrections.len(), 1);
|
||||
fn new(byte_offset: usize, typo: &str, correction: &str) -> Self {
|
||||
Self {
|
||||
byte_offset: other.byte_offset,
|
||||
typo: other.typo.as_bytes().to_vec(),
|
||||
correction: other.corrections[0].as_bytes().to_vec(),
|
||||
}
|
||||
}
|
||||
|
||||
fn from_path(other: typos::report::PathCorrection<'_>) -> Self {
|
||||
assert_eq!(other.corrections.len(), 1);
|
||||
Self {
|
||||
byte_offset: other.byte_offset,
|
||||
typo: other.typo.as_bytes().to_vec(),
|
||||
correction: other.corrections[0].as_bytes().to_vec(),
|
||||
byte_offset,
|
||||
typo: typo.as_bytes().to_vec(),
|
||||
correction: correction.as_bytes().to_vec(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -222,7 +214,9 @@ mod test {
|
|||
.line_num(1)
|
||||
.byte_offset(2)
|
||||
.typo("foo")
|
||||
.corrections(vec![std::borrow::Cow::Borrowed("bar")])
|
||||
.corrections(typos::Status::Corrections(vec![
|
||||
std::borrow::Cow::Borrowed("bar"),
|
||||
]))
|
||||
.into(),
|
||||
);
|
||||
replace.write().unwrap();
|
||||
|
@ -243,7 +237,9 @@ mod test {
|
|||
.path(input_file.path())
|
||||
.byte_offset(0)
|
||||
.typo("foo")
|
||||
.corrections(vec![std::borrow::Cow::Borrowed("bar")])
|
||||
.corrections(typos::Status::Corrections(vec![
|
||||
std::borrow::Cow::Borrowed("bar"),
|
||||
]))
|
||||
.into(),
|
||||
);
|
||||
replace.write().unwrap();
|
||||
|
|
Loading…
Reference in a new issue