mirror of
https://github.com/crate-ci/typos.git
synced 2024-11-22 17:11:07 -05:00
Merge pull request #144 from epage/custom
feat: Custom dictionary support
This commit is contained in:
commit
b8d35c30e5
11 changed files with 354 additions and 227 deletions
|
@ -10,7 +10,7 @@ schedules:
|
||||||
include:
|
include:
|
||||||
- master
|
- master
|
||||||
variables:
|
variables:
|
||||||
minrust: 1.40.0
|
minrust: 1.42.0
|
||||||
codecov_token: $(CODECOV_TOKEN_SECRET)
|
codecov_token: $(CODECOV_TOKEN_SECRET)
|
||||||
windows_vm: vs2017-win2016
|
windows_vm: vs2017-win2016
|
||||||
mac_vm: macos-10.14
|
mac_vm: macos-10.14
|
||||||
|
|
|
@ -13,7 +13,9 @@ fn correct_word_hit(b: &mut test::Bencher) {
|
||||||
let input = typos::tokens::Word::new("successs", 0).unwrap();
|
let input = typos::tokens::Word::new("successs", 0).unwrap();
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
corrections.correct_word(input),
|
corrections.correct_word(input),
|
||||||
vec![std::borrow::Cow::Borrowed("successes")]
|
Some(typos::Status::Corrections(vec![
|
||||||
|
std::borrow::Cow::Borrowed("successes")
|
||||||
|
]))
|
||||||
);
|
);
|
||||||
b.iter(|| corrections.correct_word(input));
|
b.iter(|| corrections.correct_word(input));
|
||||||
}
|
}
|
||||||
|
@ -22,6 +24,6 @@ fn correct_word_hit(b: &mut test::Bencher) {
|
||||||
fn correct_word_miss(b: &mut test::Bencher) {
|
fn correct_word_miss(b: &mut test::Bencher) {
|
||||||
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
|
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
|
||||||
let input = typos::tokens::Word::new("success", 0).unwrap();
|
let input = typos::tokens::Word::new("success", 0).unwrap();
|
||||||
assert!(corrections.correct_word(input).is_empty());
|
assert!(corrections.correct_word(input).is_none());
|
||||||
b.iter(|| corrections.correct_word(input));
|
b.iter(|| corrections.correct_word(input));
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,6 +3,7 @@ use bstr::ByteSlice;
|
||||||
use crate::report;
|
use crate::report;
|
||||||
use crate::tokens;
|
use crate::tokens;
|
||||||
use crate::Dictionary;
|
use crate::Dictionary;
|
||||||
|
use crate::Status;
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||||
pub struct TyposSettings {
|
pub struct TyposSettings {
|
||||||
|
@ -233,16 +234,20 @@ impl Checks {
|
||||||
dictionary: &dyn Dictionary,
|
dictionary: &dyn Dictionary,
|
||||||
reporter: &dyn report::Report,
|
reporter: &dyn report::Report,
|
||||||
) -> Result<bool, crate::Error> {
|
) -> Result<bool, crate::Error> {
|
||||||
let mut typos_found = false;
|
|
||||||
|
|
||||||
if !self.check_filenames {
|
if !self.check_filenames {
|
||||||
return Ok(typos_found);
|
return Ok(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(part) = path.file_name().and_then(|s| s.to_str()) {
|
let mut typos_found = false;
|
||||||
for ident in parser.parse(part) {
|
for ident in path
|
||||||
let corrections = dictionary.correct_ident(ident);
|
.file_name()
|
||||||
if !corrections.is_empty() {
|
.and_then(|s| s.to_str())
|
||||||
|
.iter()
|
||||||
|
.flat_map(|part| parser.parse(part))
|
||||||
|
{
|
||||||
|
match dictionary.correct_ident(ident) {
|
||||||
|
Some(Status::Valid) => {}
|
||||||
|
Some(corrections) => {
|
||||||
let byte_offset = ident.offset();
|
let byte_offset = ident.offset();
|
||||||
let msg = report::PathCorrection {
|
let msg = report::PathCorrection {
|
||||||
path,
|
path,
|
||||||
|
@ -251,10 +256,12 @@ impl Checks {
|
||||||
corrections,
|
corrections,
|
||||||
};
|
};
|
||||||
typos_found |= reporter.report(msg.into());
|
typos_found |= reporter.report(msg.into());
|
||||||
} else {
|
}
|
||||||
|
None => {
|
||||||
for word in ident.split() {
|
for word in ident.split() {
|
||||||
let corrections = dictionary.correct_word(word);
|
match dictionary.correct_word(word) {
|
||||||
if !corrections.is_empty() {
|
Some(Status::Valid) => {}
|
||||||
|
Some(corrections) => {
|
||||||
let byte_offset = word.offset();
|
let byte_offset = word.offset();
|
||||||
let msg = report::PathCorrection {
|
let msg = report::PathCorrection {
|
||||||
path,
|
path,
|
||||||
|
@ -264,6 +271,8 @@ impl Checks {
|
||||||
};
|
};
|
||||||
typos_found |= reporter.report(msg.into());
|
typos_found |= reporter.report(msg.into());
|
||||||
}
|
}
|
||||||
|
None => {}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -305,8 +314,9 @@ impl Checks {
|
||||||
for (line_idx, line) in buffer.lines().enumerate() {
|
for (line_idx, line) in buffer.lines().enumerate() {
|
||||||
let line_num = line_idx + 1;
|
let line_num = line_idx + 1;
|
||||||
for ident in parser.parse_bytes(line) {
|
for ident in parser.parse_bytes(line) {
|
||||||
let corrections = dictionary.correct_ident(ident);
|
match dictionary.correct_ident(ident) {
|
||||||
if !corrections.is_empty() {
|
Some(Status::Valid) => {}
|
||||||
|
Some(corrections) => {
|
||||||
let byte_offset = ident.offset();
|
let byte_offset = ident.offset();
|
||||||
let msg = report::Correction {
|
let msg = report::Correction {
|
||||||
path,
|
path,
|
||||||
|
@ -317,10 +327,12 @@ impl Checks {
|
||||||
corrections,
|
corrections,
|
||||||
};
|
};
|
||||||
typos_found |= reporter.report(msg.into());
|
typos_found |= reporter.report(msg.into());
|
||||||
} else {
|
}
|
||||||
|
None => {
|
||||||
for word in ident.split() {
|
for word in ident.split() {
|
||||||
let corrections = dictionary.correct_word(word);
|
match dictionary.correct_word(word) {
|
||||||
if !corrections.is_empty() {
|
Some(Status::Valid) => {}
|
||||||
|
Some(corrections) => {
|
||||||
let byte_offset = word.offset();
|
let byte_offset = word.offset();
|
||||||
let msg = report::Correction {
|
let msg = report::Correction {
|
||||||
path,
|
path,
|
||||||
|
@ -332,6 +344,9 @@ impl Checks {
|
||||||
};
|
};
|
||||||
typos_found |= reporter.report(msg.into());
|
typos_found |= reporter.report(msg.into());
|
||||||
}
|
}
|
||||||
|
None => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,7 +1,49 @@
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
|
|
||||||
pub trait Dictionary: Send + Sync {
|
#[derive(Clone, PartialEq, Eq, Debug, serde::Serialize, derive_more::From)]
|
||||||
fn correct_ident<'s, 'w>(&'s self, _ident: crate::tokens::Identifier<'w>) -> Vec<Cow<'s, str>>;
|
#[serde(rename_all = "snake_case")]
|
||||||
|
#[serde(untagged)]
|
||||||
fn correct_word<'s, 'w>(&'s self, word: crate::tokens::Word<'w>) -> Vec<Cow<'s, str>>;
|
pub enum Status<'c> {
|
||||||
|
Valid,
|
||||||
|
Invalid,
|
||||||
|
Corrections(Vec<Cow<'c, str>>),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'c> Status<'c> {
|
||||||
|
pub fn is_invalid(&self) -> bool {
|
||||||
|
matches!(self, Status::Invalid)
|
||||||
|
}
|
||||||
|
pub fn is_valid(&self) -> bool {
|
||||||
|
matches!(self, Status::Valid)
|
||||||
|
}
|
||||||
|
pub fn is_correction(&self) -> bool {
|
||||||
|
matches!(self, Status::Corrections(_))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn corrections_mut(&mut self) -> impl Iterator<Item = &mut Cow<'c, str>> {
|
||||||
|
match self {
|
||||||
|
Status::Corrections(corrections) => itertools::Either::Left(corrections.iter_mut()),
|
||||||
|
_ => itertools::Either::Right([].iter_mut()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn borrow(&self) -> Status<'_> {
|
||||||
|
match self {
|
||||||
|
Status::Corrections(corrections) => {
|
||||||
|
let corrections = corrections
|
||||||
|
.iter()
|
||||||
|
.map(|c| Cow::Borrowed(c.as_ref()))
|
||||||
|
.collect();
|
||||||
|
Status::Corrections(corrections)
|
||||||
|
}
|
||||||
|
_ => self.clone(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub trait Dictionary: Send + Sync {
|
||||||
|
fn correct_ident<'s, 'w>(&'s self, _ident: crate::tokens::Identifier<'w>)
|
||||||
|
-> Option<Status<'s>>;
|
||||||
|
|
||||||
|
fn correct_word<'s, 'w>(&'s self, word: crate::tokens::Word<'w>) -> Option<Status<'s>>;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
#![allow(clippy::needless_update)]
|
#![allow(clippy::needless_update)]
|
||||||
|
|
||||||
use std::borrow::Cow;
|
|
||||||
use std::io::{self, Write};
|
use std::io::{self, Write};
|
||||||
|
|
||||||
#[derive(Clone, Debug, serde::Serialize, derive_more::From)]
|
#[derive(Clone, Debug, serde::Serialize, derive_more::From)]
|
||||||
|
@ -21,8 +20,8 @@ impl<'m> Message<'m> {
|
||||||
pub fn is_correction(&self) -> bool {
|
pub fn is_correction(&self) -> bool {
|
||||||
match self {
|
match self {
|
||||||
Message::BinaryFile(_) => false,
|
Message::BinaryFile(_) => false,
|
||||||
Message::Correction(_) => true,
|
Message::Correction(c) => c.corrections.is_correction(),
|
||||||
Message::PathCorrection(_) => true,
|
Message::PathCorrection(c) => c.corrections.is_correction(),
|
||||||
Message::File(_) => false,
|
Message::File(_) => false,
|
||||||
Message::Parse(_) => false,
|
Message::Parse(_) => false,
|
||||||
Message::PathError(_) => false,
|
Message::PathError(_) => false,
|
||||||
|
@ -59,7 +58,7 @@ pub struct Correction<'m> {
|
||||||
pub line_num: usize,
|
pub line_num: usize,
|
||||||
pub byte_offset: usize,
|
pub byte_offset: usize,
|
||||||
pub typo: &'m str,
|
pub typo: &'m str,
|
||||||
pub corrections: Vec<Cow<'m, str>>,
|
pub corrections: crate::Status<'m>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'m> Default for Correction<'m> {
|
impl<'m> Default for Correction<'m> {
|
||||||
|
@ -70,7 +69,7 @@ impl<'m> Default for Correction<'m> {
|
||||||
line_num: 0,
|
line_num: 0,
|
||||||
byte_offset: 0,
|
byte_offset: 0,
|
||||||
typo: "",
|
typo: "",
|
||||||
corrections: Vec::new(),
|
corrections: crate::Status::Invalid,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -81,7 +80,7 @@ pub struct PathCorrection<'m> {
|
||||||
pub path: &'m std::path::Path,
|
pub path: &'m std::path::Path,
|
||||||
pub byte_offset: usize,
|
pub byte_offset: usize,
|
||||||
pub typo: &'m str,
|
pub typo: &'m str,
|
||||||
pub corrections: Vec<Cow<'m, str>>,
|
pub corrections: crate::Status<'m>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'m> Default for PathCorrection<'m> {
|
impl<'m> Default for PathCorrection<'m> {
|
||||||
|
@ -90,7 +89,7 @@ impl<'m> Default for PathCorrection<'m> {
|
||||||
path: std::path::Path::new("-"),
|
path: std::path::Path::new("-"),
|
||||||
byte_offset: 0,
|
byte_offset: 0,
|
||||||
typo: "",
|
typo: "",
|
||||||
corrections: Vec::new(),
|
corrections: crate::Status::Invalid,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -196,24 +195,42 @@ impl Report for PrintBrief {
|
||||||
Message::BinaryFile(msg) => {
|
Message::BinaryFile(msg) => {
|
||||||
log::info!("{}", msg);
|
log::info!("{}", msg);
|
||||||
}
|
}
|
||||||
Message::Correction(msg) => {
|
Message::Correction(msg) => match &msg.corrections {
|
||||||
|
crate::Status::Valid => {}
|
||||||
|
crate::Status::Invalid => {
|
||||||
|
println!(
|
||||||
|
"{}:{}:{}: {} is disallowed",
|
||||||
|
msg.path.display(),
|
||||||
|
msg.line_num,
|
||||||
|
msg.byte_offset,
|
||||||
|
msg.typo,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
crate::Status::Corrections(corrections) => {
|
||||||
println!(
|
println!(
|
||||||
"{}:{}:{}: {} -> {}",
|
"{}:{}:{}: {} -> {}",
|
||||||
msg.path.display(),
|
msg.path.display(),
|
||||||
msg.line_num,
|
msg.line_num,
|
||||||
msg.byte_offset,
|
msg.byte_offset,
|
||||||
msg.typo,
|
msg.typo,
|
||||||
itertools::join(msg.corrections.iter(), ", ")
|
itertools::join(corrections.iter(), ", ")
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
Message::PathCorrection(msg) => {
|
},
|
||||||
|
Message::PathCorrection(msg) => match &msg.corrections {
|
||||||
|
crate::Status::Valid => {}
|
||||||
|
crate::Status::Invalid => {
|
||||||
|
println!("{}: {} is disallowed", msg.path.display(), msg.typo,);
|
||||||
|
}
|
||||||
|
crate::Status::Corrections(corrections) => {
|
||||||
println!(
|
println!(
|
||||||
"{}: {} -> {}",
|
"{}: {} -> {}",
|
||||||
msg.path.display(),
|
msg.path.display(),
|
||||||
msg.typo,
|
msg.typo,
|
||||||
itertools::join(msg.corrections.iter(), ", ")
|
itertools::join(corrections.iter(), ", ")
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
},
|
||||||
Message::File(msg) => {
|
Message::File(msg) => {
|
||||||
println!("{}", msg.path.display());
|
println!("{}", msg.path.display());
|
||||||
}
|
}
|
||||||
|
@ -241,14 +258,24 @@ impl Report for PrintLong {
|
||||||
log::info!("{}", msg);
|
log::info!("{}", msg);
|
||||||
}
|
}
|
||||||
Message::Correction(msg) => print_long_correction(msg),
|
Message::Correction(msg) => print_long_correction(msg),
|
||||||
Message::PathCorrection(msg) => {
|
Message::PathCorrection(msg) => match &msg.corrections {
|
||||||
|
crate::Status::Valid => {}
|
||||||
|
crate::Status::Invalid => {
|
||||||
|
println!(
|
||||||
|
"{}: error: `{}` is disallowed",
|
||||||
|
msg.path.display(),
|
||||||
|
msg.typo,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
crate::Status::Corrections(corrections) => {
|
||||||
println!(
|
println!(
|
||||||
"{}: error: `{}` should be {}",
|
"{}: error: `{}` should be {}",
|
||||||
msg.path.display(),
|
msg.path.display(),
|
||||||
msg.typo,
|
msg.typo,
|
||||||
itertools::join(msg.corrections.iter().map(|c| format!("`{}`", c)), ", ")
|
itertools::join(corrections.iter().map(|c| format!("`{}`", c)), ", ")
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
},
|
||||||
Message::File(msg) => {
|
Message::File(msg) => {
|
||||||
println!("{}", msg.path.display());
|
println!("{}", msg.path.display());
|
||||||
}
|
}
|
||||||
|
@ -278,14 +305,21 @@ fn print_long_correction(msg: &Correction) {
|
||||||
|
|
||||||
let stdout = io::stdout();
|
let stdout = io::stdout();
|
||||||
let mut handle = stdout.lock();
|
let mut handle = stdout.lock();
|
||||||
|
match &msg.corrections {
|
||||||
|
crate::Status::Valid => {}
|
||||||
|
crate::Status::Invalid => {
|
||||||
|
writeln!(handle, "error: `{}` is disallowed", msg.typo,).unwrap();
|
||||||
|
}
|
||||||
|
crate::Status::Corrections(corrections) => {
|
||||||
writeln!(
|
writeln!(
|
||||||
handle,
|
handle,
|
||||||
"error: `{}` should be {}",
|
"error: `{}` should be {}",
|
||||||
msg.typo,
|
msg.typo,
|
||||||
itertools::join(msg.corrections.iter().map(|c| format!("`{}`", c)), ", ")
|
itertools::join(corrections.iter().map(|c| format!("`{}`", c)), ", ")
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
}
|
||||||
|
}
|
||||||
writeln!(
|
writeln!(
|
||||||
handle,
|
handle,
|
||||||
" --> {}:{}:{}",
|
" --> {}:{}:{}",
|
||||||
|
|
|
@ -73,7 +73,7 @@ impl ParserBuilder {
|
||||||
let escaped = regex::escape(&grapheme);
|
let escaped = regex::escape(&grapheme);
|
||||||
pattern.push_str(&format!("|{}", escaped));
|
pattern.push_str(&format!("|{}", escaped));
|
||||||
}
|
}
|
||||||
pattern.push_str(r#")"#);
|
pattern.push(')');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -30,5 +30,5 @@ Configuration is read from the following (in precedence order)
|
||||||
| default.identifier-leading-chars | \- | string | Allow identifiers to start with one of these characters. |
|
| default.identifier-leading-chars | \- | string | Allow identifiers to start with one of these characters. |
|
||||||
| default.identifier-include-chars | \- | string | Allow identifiers to include these characters. |
|
| default.identifier-include-chars | \- | string | Allow identifiers to include these characters. |
|
||||||
| default.locale | \- | en, en-us, en-gb, en-ca, en-au | English dialect to correct to. |
|
| default.locale | \- | en, en-us, en-gb, en-ca, en-au | English dialect to correct to. |
|
||||||
| default.extend-valid-identifiers | \- | list of strings | Identifiers to presume as correct, skipping spell checking. This extends the list when layering configuration, rather than replacing it. |
|
| default.extend-identifiers | \- | table of strings | Corrections for identifiers. When the correction is blank, the word is never valid. When the correction is the key, the word is always valid. |
|
||||||
| default.extend-valid-words | \- | list of strings | Words to presume as correct, skipping spell checking. This extends the list when layering configuration, rather than replacing it. |
|
| default.extend-words | \- | table of strings | Corrections for identifiers. When the correction is blank, the word is never valid. When the correction is the key, the word is always valid. |
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
use std::collections::HashMap;
|
||||||
use std::io::Read;
|
use std::io::Read;
|
||||||
|
|
||||||
pub trait ConfigSource {
|
pub trait ConfigSource {
|
||||||
|
@ -97,12 +98,12 @@ pub trait FileSource {
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
|
|
||||||
fn extend_valid_identifiers(&self) -> &[String] {
|
fn extend_identifiers(&self) -> Box<dyn Iterator<Item = (&str, &str)> + '_> {
|
||||||
&[]
|
Box::new(None.into_iter())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn extend_valid_words(&self) -> &[String] {
|
fn extend_words(&self) -> Box<dyn Iterator<Item = (&str, &str)> + '_> {
|
||||||
&[]
|
Box::new(None.into_iter())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -220,28 +221,22 @@ impl Walk {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn ignore_dot(&self) -> bool {
|
pub fn ignore_dot(&self) -> bool {
|
||||||
self.ignore_dot
|
self.ignore_dot.or(self.ignore_files).unwrap_or(true)
|
||||||
.or_else(|| self.ignore_files)
|
|
||||||
.unwrap_or(true)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn ignore_vcs(&self) -> bool {
|
pub fn ignore_vcs(&self) -> bool {
|
||||||
self.ignore_vcs
|
self.ignore_vcs.or(self.ignore_files).unwrap_or(true)
|
||||||
.or_else(|| self.ignore_files)
|
|
||||||
.unwrap_or(true)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn ignore_global(&self) -> bool {
|
pub fn ignore_global(&self) -> bool {
|
||||||
self.ignore_global
|
self.ignore_global
|
||||||
.or_else(|| self.ignore_vcs)
|
.or(self.ignore_vcs)
|
||||||
.or_else(|| self.ignore_files)
|
.or(self.ignore_files)
|
||||||
.unwrap_or(true)
|
.unwrap_or(true)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn ignore_parent(&self) -> bool {
|
pub fn ignore_parent(&self) -> bool {
|
||||||
self.ignore_parent
|
self.ignore_parent.or(self.ignore_files).unwrap_or(true)
|
||||||
.or_else(|| self.ignore_files)
|
|
||||||
.unwrap_or(true)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -295,8 +290,8 @@ pub struct FileConfig {
|
||||||
pub identifier_include_digits: Option<bool>,
|
pub identifier_include_digits: Option<bool>,
|
||||||
pub identifier_include_chars: Option<String>,
|
pub identifier_include_chars: Option<String>,
|
||||||
pub locale: Option<Locale>,
|
pub locale: Option<Locale>,
|
||||||
pub extend_valid_identifiers: Vec<String>,
|
pub extend_identifiers: HashMap<String, String>,
|
||||||
pub extend_valid_words: Vec<String>,
|
pub extend_words: HashMap<String, String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl FileConfig {
|
impl FileConfig {
|
||||||
|
@ -325,10 +320,16 @@ impl FileConfig {
|
||||||
if let Some(source) = source.locale() {
|
if let Some(source) = source.locale() {
|
||||||
self.locale = Some(source);
|
self.locale = Some(source);
|
||||||
}
|
}
|
||||||
self.extend_valid_identifiers
|
self.extend_identifiers.extend(
|
||||||
.extend(source.extend_valid_identifiers().iter().cloned());
|
source
|
||||||
self.extend_valid_words
|
.extend_identifiers()
|
||||||
.extend(source.extend_valid_words().iter().cloned());
|
.map(|(k, v)| (k.to_owned(), v.to_owned())),
|
||||||
|
);
|
||||||
|
self.extend_words.extend(
|
||||||
|
source
|
||||||
|
.extend_words()
|
||||||
|
.map(|(k, v)| (k.to_owned(), v.to_owned())),
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn check_filename(&self) -> bool {
|
pub fn check_filename(&self) -> bool {
|
||||||
|
@ -363,12 +364,20 @@ impl FileConfig {
|
||||||
self.locale.unwrap_or_default()
|
self.locale.unwrap_or_default()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn extend_valid_identifiers(&self) -> &[String] {
|
pub fn extend_identifiers(&self) -> Box<dyn Iterator<Item = (&str, &str)> + '_> {
|
||||||
self.extend_valid_identifiers.as_slice()
|
Box::new(
|
||||||
|
self.extend_identifiers
|
||||||
|
.iter()
|
||||||
|
.map(|(k, v)| (k.as_str(), v.as_str())),
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn extend_valid_words(&self) -> &[String] {
|
pub fn extend_words(&self) -> Box<dyn Iterator<Item = (&str, &str)> + '_> {
|
||||||
self.extend_valid_words.as_slice()
|
Box::new(
|
||||||
|
self.extend_words
|
||||||
|
.iter()
|
||||||
|
.map(|(k, v)| (k.as_str(), v.as_str())),
|
||||||
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -405,12 +414,20 @@ impl FileSource for FileConfig {
|
||||||
self.locale
|
self.locale
|
||||||
}
|
}
|
||||||
|
|
||||||
fn extend_valid_identifiers(&self) -> &[String] {
|
fn extend_identifiers(&self) -> Box<dyn Iterator<Item = (&str, &str)> + '_> {
|
||||||
self.extend_valid_identifiers.as_slice()
|
Box::new(
|
||||||
|
self.extend_identifiers
|
||||||
|
.iter()
|
||||||
|
.map(|(k, v)| (k.as_str(), v.as_str())),
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn extend_valid_words(&self) -> &[String] {
|
fn extend_words(&self) -> Box<dyn Iterator<Item = (&str, &str)> + '_> {
|
||||||
self.extend_valid_words.as_slice()
|
Box::new(
|
||||||
|
self.extend_words
|
||||||
|
.iter()
|
||||||
|
.map(|(k, v)| (k.as_str(), v.as_str())),
|
||||||
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
147
src/dict.rs
147
src/dict.rs
|
@ -1,9 +1,10 @@
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
use std::collections::HashSet;
|
use std::collections::HashMap;
|
||||||
|
|
||||||
use unicase::UniCase;
|
use unicase::UniCase;
|
||||||
|
|
||||||
use typos::tokens::Case;
|
use typos::tokens::Case;
|
||||||
|
use typos::Status;
|
||||||
|
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
pub struct BuiltIn {
|
pub struct BuiltIn {
|
||||||
|
@ -20,40 +21,40 @@ impl BuiltIn {
|
||||||
pub fn correct_ident<'s, 'w>(
|
pub fn correct_ident<'s, 'w>(
|
||||||
&'s self,
|
&'s self,
|
||||||
_ident: typos::tokens::Identifier<'w>,
|
_ident: typos::tokens::Identifier<'w>,
|
||||||
) -> Vec<Cow<'s, str>> {
|
) -> Option<Status<'s>> {
|
||||||
Vec::new()
|
None
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn correct_word<'s, 'w>(
|
pub fn correct_word<'s, 'w>(
|
||||||
&'s self,
|
&'s self,
|
||||||
word_token: typos::tokens::Word<'w>,
|
word_token: typos::tokens::Word<'w>,
|
||||||
) -> Vec<Cow<'s, str>> {
|
) -> Option<Status<'s>> {
|
||||||
let word = word_token.token();
|
let word = word_token.token();
|
||||||
let corrections = if let Some(correction) = self.correct_with_dict(word) {
|
let mut corrections = if let Some(correction) = self.correct_with_dict(word) {
|
||||||
self.correct_with_vars(word)
|
self.correct_with_vars(word)
|
||||||
.unwrap_or_else(|| vec![correction])
|
.unwrap_or_else(|| Status::Corrections(vec![Cow::Borrowed(correction)]))
|
||||||
} else {
|
} else {
|
||||||
self.correct_with_vars(word).unwrap_or_else(Vec::new)
|
self.correct_with_vars(word)?
|
||||||
};
|
};
|
||||||
corrections
|
corrections
|
||||||
.into_iter()
|
.corrections_mut()
|
||||||
.map(|s| case_correct(s, word_token.case()))
|
.for_each(|mut s| case_correct(&mut s, word_token.case()));
|
||||||
.collect()
|
Some(corrections)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Not using `Status` to avoid the allocations
|
||||||
fn correct_with_dict(&self, word: &str) -> Option<&'static str> {
|
fn correct_with_dict(&self, word: &str) -> Option<&'static str> {
|
||||||
map_lookup(&typos_dict::WORD_DICTIONARY, word)
|
map_lookup(&typos_dict::WORD_DICTIONARY, word)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn correct_with_vars(&self, word: &str) -> Option<Vec<&'static str>> {
|
fn correct_with_vars(&self, word: &str) -> Option<Status<'static>> {
|
||||||
let variants = map_lookup(&typos_vars::VARS_DICTIONARY, word)?;
|
map_lookup(&typos_vars::VARS_DICTIONARY, word).map(|variants| self.select_variant(variants))
|
||||||
self.select_variant(variants)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn select_variant(
|
fn select_variant(
|
||||||
&self,
|
&self,
|
||||||
vars: &'static [(u8, &'static typos_vars::VariantsMap)],
|
vars: &'static [(u8, &'static typos_vars::VariantsMap)],
|
||||||
) -> Option<Vec<&'static str>> {
|
) -> Status<'static> {
|
||||||
let var = vars[0];
|
let var = vars[0];
|
||||||
let var_categories = unsafe {
|
let var_categories = unsafe {
|
||||||
// Code-genned from a checked category-set, so known to be safe
|
// Code-genned from a checked category-set, so known to be safe
|
||||||
|
@ -62,12 +63,13 @@ impl BuiltIn {
|
||||||
if let Some(locale) = self.locale {
|
if let Some(locale) = self.locale {
|
||||||
if var_categories.contains(locale) {
|
if var_categories.contains(locale) {
|
||||||
// Already valid for the current locale.
|
// Already valid for the current locale.
|
||||||
None
|
Status::Valid
|
||||||
} else {
|
} else {
|
||||||
Some(
|
Status::Corrections(
|
||||||
typos_vars::corrections(locale, *var.1)
|
typos_vars::corrections(locale, *var.1)
|
||||||
.iter()
|
.iter()
|
||||||
.copied()
|
.copied()
|
||||||
|
.map(Cow::Borrowed)
|
||||||
.collect(),
|
.collect(),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
@ -75,23 +77,29 @@ impl BuiltIn {
|
||||||
// All locales are valid
|
// All locales are valid
|
||||||
if var_categories.is_empty() {
|
if var_categories.is_empty() {
|
||||||
// But the word is never valid.
|
// But the word is never valid.
|
||||||
let mut unique: Vec<_> = var.1.iter().flat_map(|v| v.iter()).copied().collect();
|
let mut unique: Vec<_> = var
|
||||||
|
.1
|
||||||
|
.iter()
|
||||||
|
.flat_map(|v| v.iter())
|
||||||
|
.copied()
|
||||||
|
.map(Cow::Borrowed)
|
||||||
|
.collect();
|
||||||
unique.sort_unstable();
|
unique.sort_unstable();
|
||||||
unique.dedup();
|
unique.dedup();
|
||||||
Some(unique)
|
Status::Corrections(unique)
|
||||||
} else {
|
} else {
|
||||||
None
|
Status::Valid
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl typos::Dictionary for BuiltIn {
|
impl typos::Dictionary for BuiltIn {
|
||||||
fn correct_ident<'s, 'w>(&'s self, ident: typos::tokens::Identifier<'w>) -> Vec<Cow<'s, str>> {
|
fn correct_ident<'s, 'w>(&'s self, ident: typos::tokens::Identifier<'w>) -> Option<Status<'s>> {
|
||||||
BuiltIn::correct_ident(self, ident)
|
BuiltIn::correct_ident(self, ident)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn correct_word<'s, 'w>(&'s self, word: typos::tokens::Word<'w>) -> Vec<Cow<'s, str>> {
|
fn correct_word<'s, 'w>(&'s self, word: typos::tokens::Word<'w>) -> Option<Status<'s>> {
|
||||||
BuiltIn::correct_word(self, word)
|
BuiltIn::correct_word(self, word)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -109,68 +117,88 @@ fn map_lookup<V: Clone>(map: &'static phf::Map<UniCase<&'static str>, V>, key: &
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn case_correct(correction: &str, case: Case) -> Cow<'_, str> {
|
fn case_correct(correction: &mut Cow<'_, str>, case: Case) {
|
||||||
match case {
|
match case {
|
||||||
Case::Lower | Case::None => correction.into(),
|
Case::Lower | Case::None => (),
|
||||||
Case::Title => {
|
Case::Title => match correction {
|
||||||
let mut title = String::with_capacity(correction.as_bytes().len());
|
Cow::Borrowed(s) => {
|
||||||
let mut char_indices = correction.char_indices();
|
let mut s = String::from(*s);
|
||||||
if let Some((_, c)) = char_indices.next() {
|
s[0..1].make_ascii_uppercase();
|
||||||
title.extend(c.to_uppercase());
|
*correction = s.into();
|
||||||
if let Some((i, _)) = char_indices.next() {
|
|
||||||
title.push_str(&correction[i..]);
|
|
||||||
}
|
}
|
||||||
|
Cow::Owned(s) => {
|
||||||
|
s[0..1].make_ascii_uppercase();
|
||||||
}
|
}
|
||||||
title.into()
|
},
|
||||||
|
Case::Scream => match correction {
|
||||||
|
Cow::Borrowed(s) => {
|
||||||
|
let mut s = String::from(*s);
|
||||||
|
s.make_ascii_uppercase();
|
||||||
|
*correction = s.into();
|
||||||
}
|
}
|
||||||
Case::Scream => correction
|
Cow::Owned(s) => {
|
||||||
.chars()
|
s.make_ascii_uppercase();
|
||||||
.flat_map(|c| c.to_uppercase())
|
}
|
||||||
.collect::<String>()
|
},
|
||||||
.into(),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct Override<'i, 'w, D> {
|
pub struct Override<'i, 'w, D> {
|
||||||
valid_identifiers: HashSet<&'i str>,
|
identifiers: HashMap<&'i str, Status<'i>>,
|
||||||
valid_words: HashSet<unicase::UniCase<&'w str>>,
|
words: HashMap<unicase::UniCase<&'w str>, Status<'w>>,
|
||||||
inner: D,
|
inner: D,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'i, 'w, D: typos::Dictionary> Override<'i, 'w, D> {
|
impl<'i, 'w, D: typos::Dictionary> Override<'i, 'w, D> {
|
||||||
pub fn new(inner: D) -> Self {
|
pub fn new(inner: D) -> Self {
|
||||||
Self {
|
Self {
|
||||||
valid_identifiers: Default::default(),
|
identifiers: Default::default(),
|
||||||
valid_words: Default::default(),
|
words: Default::default(),
|
||||||
inner,
|
inner,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn valid_identifiers<I: Iterator<Item = &'i str>>(&mut self, valid_identifiers: I) {
|
pub fn identifiers<I: Iterator<Item = (&'i str, &'i str)>>(&mut self, identifiers: I) {
|
||||||
self.valid_identifiers = valid_identifiers.collect();
|
self.identifiers = Self::interpret(identifiers).collect();
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn valid_words<I: Iterator<Item = &'w str>>(&mut self, valid_words: I) {
|
pub fn words<I: Iterator<Item = (&'w str, &'w str)>>(&mut self, words: I) {
|
||||||
self.valid_words = valid_words.map(UniCase::new).collect();
|
self.words = Self::interpret(words)
|
||||||
|
.map(|(k, v)| (UniCase::new(k), v))
|
||||||
|
.collect();
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn interpret<'z, I: Iterator<Item = (&'z str, &'z str)>>(
|
||||||
|
cases: I,
|
||||||
|
) -> impl Iterator<Item = (&'z str, Status<'z>)> {
|
||||||
|
cases.map(|(typo, correction)| {
|
||||||
|
let correction = if typo == correction {
|
||||||
|
Status::Valid
|
||||||
|
} else if correction.is_empty() {
|
||||||
|
Status::Invalid
|
||||||
|
} else {
|
||||||
|
Status::Corrections(vec![Cow::Borrowed(correction)])
|
||||||
|
};
|
||||||
|
(typo, correction)
|
||||||
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'i, 'w, D: typos::Dictionary> typos::Dictionary for Override<'i, 'w, D> {
|
impl<'i, 'w, D: typos::Dictionary> typos::Dictionary for Override<'i, 'w, D> {
|
||||||
fn correct_ident<'s, 't>(&'s self, ident: typos::tokens::Identifier<'t>) -> Vec<Cow<'s, str>> {
|
fn correct_ident<'s, 't>(&'s self, ident: typos::tokens::Identifier<'t>) -> Option<Status<'s>> {
|
||||||
if self.valid_identifiers.contains(ident.token()) {
|
self.identifiers
|
||||||
Vec::new()
|
.get(ident.token())
|
||||||
} else {
|
.map(|c| c.borrow())
|
||||||
self.inner.correct_ident(ident)
|
.or_else(|| self.inner.correct_ident(ident))
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn correct_word<'s, 't>(&'s self, word: typos::tokens::Word<'t>) -> Vec<Cow<'s, str>> {
|
fn correct_word<'s, 't>(&'s self, word: typos::tokens::Word<'t>) -> Option<Status<'s>> {
|
||||||
let w = UniCase::new(word.token());
|
let w = UniCase::new(word.token());
|
||||||
if self.valid_words.contains(&w) {
|
// HACK: couldn't figure out the lifetime issue with replacing `cloned` with `borrow`
|
||||||
Vec::new()
|
self.words
|
||||||
} else {
|
.get(&w)
|
||||||
self.inner.correct_word(word)
|
.cloned()
|
||||||
}
|
.or_else(|| self.inner.correct_word(word))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -188,7 +216,12 @@ mod test {
|
||||||
("fOo", Case::None, "fOo"),
|
("fOo", Case::None, "fOo"),
|
||||||
];
|
];
|
||||||
for (correction, case, expected) in cases.iter() {
|
for (correction, case, expected) in cases.iter() {
|
||||||
let actual = case_correct(correction, *case);
|
let mut actual = Cow::Borrowed(*correction);
|
||||||
|
case_correct(&mut actual, *case);
|
||||||
|
assert_eq!(*expected, actual);
|
||||||
|
|
||||||
|
let mut actual = Cow::Owned(String::from(*correction));
|
||||||
|
case_correct(&mut actual, *case);
|
||||||
assert_eq!(*expected, actual);
|
assert_eq!(*expected, actual);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
16
src/main.rs
16
src/main.rs
|
@ -58,20 +58,8 @@ fn run() -> Result<i32, anyhow::Error> {
|
||||||
|
|
||||||
let dictionary = crate::dict::BuiltIn::new(config.default.locale());
|
let dictionary = crate::dict::BuiltIn::new(config.default.locale());
|
||||||
let mut dictionary = crate::dict::Override::new(dictionary);
|
let mut dictionary = crate::dict::Override::new(dictionary);
|
||||||
dictionary.valid_identifiers(
|
dictionary.identifiers(config.default.extend_identifiers());
|
||||||
config
|
dictionary.words(config.default.extend_words());
|
||||||
.default
|
|
||||||
.extend_valid_identifiers()
|
|
||||||
.iter()
|
|
||||||
.map(|s| s.as_str()),
|
|
||||||
);
|
|
||||||
dictionary.valid_words(
|
|
||||||
config
|
|
||||||
.default
|
|
||||||
.extend_valid_words()
|
|
||||||
.iter()
|
|
||||||
.map(|s| s.as_str()),
|
|
||||||
);
|
|
||||||
|
|
||||||
let mut settings = typos::checks::TyposSettings::new();
|
let mut settings = typos::checks::TyposSettings::new();
|
||||||
settings
|
settings
|
||||||
|
|
|
@ -57,11 +57,12 @@ impl<'r> Replace<'r> {
|
||||||
impl<'r> typos::report::Report for Replace<'r> {
|
impl<'r> typos::report::Report for Replace<'r> {
|
||||||
fn report(&self, msg: typos::report::Message<'_>) -> bool {
|
fn report(&self, msg: typos::report::Message<'_>) -> bool {
|
||||||
match msg {
|
match msg {
|
||||||
typos::report::Message::Correction(msg) => {
|
typos::report::Message::Correction(msg) => match msg.corrections {
|
||||||
if msg.corrections.len() == 1 {
|
typos::Status::Corrections(corrections) if corrections.len() == 1 => {
|
||||||
let path = msg.path.to_owned();
|
let path = msg.path.to_owned();
|
||||||
let line_num = msg.line_num;
|
let line_num = msg.line_num;
|
||||||
let correction = Correction::from_content(msg);
|
let correction =
|
||||||
|
Correction::new(msg.byte_offset, msg.typo, corrections[0].as_ref());
|
||||||
let mut deferred = self.deferred.lock().unwrap();
|
let mut deferred = self.deferred.lock().unwrap();
|
||||||
let content = deferred
|
let content = deferred
|
||||||
.content
|
.content
|
||||||
|
@ -71,24 +72,25 @@ impl<'r> typos::report::Report for Replace<'r> {
|
||||||
.or_insert_with(Vec::new);
|
.or_insert_with(Vec::new);
|
||||||
content.push(correction);
|
content.push(correction);
|
||||||
false
|
false
|
||||||
} else {
|
|
||||||
self.reporter
|
|
||||||
.report(typos::report::Message::Correction(msg))
|
|
||||||
}
|
}
|
||||||
}
|
_ => self
|
||||||
typos::report::Message::PathCorrection(msg) => {
|
.reporter
|
||||||
if msg.corrections.len() == 1 {
|
.report(typos::report::Message::Correction(msg)),
|
||||||
|
},
|
||||||
|
typos::report::Message::PathCorrection(msg) => match msg.corrections {
|
||||||
|
typos::Status::Corrections(corrections) if corrections.len() == 1 => {
|
||||||
let path = msg.path.to_owned();
|
let path = msg.path.to_owned();
|
||||||
let correction = Correction::from_path(msg);
|
let correction =
|
||||||
|
Correction::new(msg.byte_offset, msg.typo, corrections[0].as_ref());
|
||||||
let mut deferred = self.deferred.lock().unwrap();
|
let mut deferred = self.deferred.lock().unwrap();
|
||||||
let content = deferred.paths.entry(path).or_insert_with(Vec::new);
|
let content = deferred.paths.entry(path).or_insert_with(Vec::new);
|
||||||
content.push(correction);
|
content.push(correction);
|
||||||
false
|
false
|
||||||
} else {
|
|
||||||
self.reporter
|
|
||||||
.report(typos::report::Message::PathCorrection(msg))
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
_ => self
|
||||||
|
.reporter
|
||||||
|
.report(typos::report::Message::PathCorrection(msg)),
|
||||||
|
},
|
||||||
_ => self.reporter.report(msg),
|
_ => self.reporter.report(msg),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -108,21 +110,11 @@ struct Correction {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Correction {
|
impl Correction {
|
||||||
fn from_content(other: typos::report::Correction<'_>) -> Self {
|
fn new(byte_offset: usize, typo: &str, correction: &str) -> Self {
|
||||||
assert_eq!(other.corrections.len(), 1);
|
|
||||||
Self {
|
Self {
|
||||||
byte_offset: other.byte_offset,
|
byte_offset,
|
||||||
typo: other.typo.as_bytes().to_vec(),
|
typo: typo.as_bytes().to_vec(),
|
||||||
correction: other.corrections[0].as_bytes().to_vec(),
|
correction: correction.as_bytes().to_vec(),
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn from_path(other: typos::report::PathCorrection<'_>) -> Self {
|
|
||||||
assert_eq!(other.corrections.len(), 1);
|
|
||||||
Self {
|
|
||||||
byte_offset: other.byte_offset,
|
|
||||||
typo: other.typo.as_bytes().to_vec(),
|
|
||||||
correction: other.corrections[0].as_bytes().to_vec(),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -222,7 +214,9 @@ mod test {
|
||||||
.line_num(1)
|
.line_num(1)
|
||||||
.byte_offset(2)
|
.byte_offset(2)
|
||||||
.typo("foo")
|
.typo("foo")
|
||||||
.corrections(vec![std::borrow::Cow::Borrowed("bar")])
|
.corrections(typos::Status::Corrections(vec![
|
||||||
|
std::borrow::Cow::Borrowed("bar"),
|
||||||
|
]))
|
||||||
.into(),
|
.into(),
|
||||||
);
|
);
|
||||||
replace.write().unwrap();
|
replace.write().unwrap();
|
||||||
|
@ -243,7 +237,9 @@ mod test {
|
||||||
.path(input_file.path())
|
.path(input_file.path())
|
||||||
.byte_offset(0)
|
.byte_offset(0)
|
||||||
.typo("foo")
|
.typo("foo")
|
||||||
.corrections(vec![std::borrow::Cow::Borrowed("bar")])
|
.corrections(typos::Status::Corrections(vec![
|
||||||
|
std::borrow::Cow::Borrowed("bar"),
|
||||||
|
]))
|
||||||
.into(),
|
.into(),
|
||||||
);
|
);
|
||||||
replace.write().unwrap();
|
replace.write().unwrap();
|
||||||
|
|
Loading…
Reference in a new issue