feat: Support multiple, valid corrections

Some of the other spell checkers already do this. While I've not checked
where we might need it for our dictionary, this will be important for
dialects.
This commit is contained in:
Ed Page 2020-06-30 20:47:40 -05:00
parent a5ed18ee46
commit bc1302f01b
6 changed files with 75 additions and 56 deletions

View file

@ -13,7 +13,7 @@ fn correct_word_hit(b: &mut test::Bencher) {
let input = typos::tokens::Word::new("successs", 0).unwrap();
assert_eq!(
corrections.correct_word(input),
Some(std::borrow::Cow::Borrowed("successes"))
vec![std::borrow::Cow::Borrowed("successes")]
);
b.iter(|| corrections.correct_word(input));
}
@ -22,6 +22,6 @@ fn correct_word_hit(b: &mut test::Bencher) {
fn correct_word_miss(b: &mut test::Bencher) {
let corrections = typos_cli::dict::BuiltIn::new();
let input = typos::tokens::Word::new("success", 0).unwrap();
assert_eq!(corrections.correct_word(input), None);
assert!(corrections.correct_word(input).is_empty());
b.iter(|| corrections.correct_word(input));
}

View file

@ -227,24 +227,26 @@ impl Checks {
if let Some(part) = path.file_name().and_then(|s| s.to_str()) {
for ident in parser.parse(part) {
if let Some(correction) = dictionary.correct_ident(ident) {
let corrections = dictionary.correct_ident(ident);
if !corrections.is_empty() {
let byte_offset = ident.offset();
let msg = report::PathCorrection {
path,
byte_offset,
typo: ident.token(),
correction,
corrections,
};
typos_found |= reporter.report(msg.into());
} else {
for word in ident.split() {
if let Some(correction) = dictionary.correct_word(word) {
let corrections = dictionary.correct_word(word);
if !corrections.is_empty() {
let byte_offset = word.offset();
let msg = report::PathCorrection {
path,
byte_offset,
typo: word.token(),
correction,
corrections,
};
typos_found |= reporter.report(msg.into());
}
@ -281,7 +283,8 @@ impl Checks {
for (line_idx, line) in buffer.lines().enumerate() {
let line_num = line_idx + 1;
for ident in parser.parse_bytes(line) {
if let Some(correction) = dictionary.correct_ident(ident) {
let corrections = dictionary.correct_ident(ident);
if !corrections.is_empty() {
let byte_offset = ident.offset();
let msg = report::Correction {
path,
@ -289,12 +292,13 @@ impl Checks {
line_num,
byte_offset,
typo: ident.token(),
correction,
corrections,
};
typos_found |= reporter.report(msg.into());
} else {
for word in ident.split() {
if let Some(correction) = dictionary.correct_word(word) {
let corrections = dictionary.correct_word(word);
if !corrections.is_empty() {
let byte_offset = word.offset();
let msg = report::Correction {
path,
@ -302,7 +306,7 @@ impl Checks {
line_num,
byte_offset,
typo: word.token(),
correction,
corrections,
};
typos_found |= reporter.report(msg.into());
}

View file

@ -1,10 +1,7 @@
use std::borrow::Cow;
pub trait Dictionary: Send + Sync {
fn correct_ident<'s, 'w>(
&'s self,
_ident: crate::tokens::Identifier<'w>,
) -> Option<Cow<'s, str>>;
fn correct_ident<'s, 'w>(&'s self, _ident: crate::tokens::Identifier<'w>) -> Vec<Cow<'s, str>>;
fn correct_word<'s, 'w>(&'s self, word: crate::tokens::Word<'w>) -> Option<Cow<'s, str>>;
fn correct_word<'s, 'w>(&'s self, word: crate::tokens::Word<'w>) -> Vec<Cow<'s, str>>;
}

View file

@ -57,7 +57,7 @@ pub struct Correction<'m> {
pub line_num: usize,
pub byte_offset: usize,
pub typo: &'m str,
pub correction: Cow<'m, str>,
pub corrections: Vec<Cow<'m, str>>,
}
impl<'m> Default for Correction<'m> {
@ -68,7 +68,7 @@ impl<'m> Default for Correction<'m> {
line_num: 0,
byte_offset: 0,
typo: "",
correction: Cow::Borrowed(""),
corrections: Vec::new(),
}
}
}
@ -79,7 +79,7 @@ pub struct PathCorrection<'m> {
pub path: &'m std::path::Path,
pub byte_offset: usize,
pub typo: &'m str,
pub correction: Cow<'m, str>,
pub corrections: Vec<Cow<'m, str>>,
}
impl<'m> Default for PathCorrection<'m> {
@ -88,7 +88,7 @@ impl<'m> Default for PathCorrection<'m> {
path: std::path::Path::new("-"),
byte_offset: 0,
typo: "",
correction: Cow::Borrowed(""),
corrections: Vec::new(),
}
}
}
@ -201,11 +201,16 @@ impl Report for PrintBrief {
msg.line_num,
msg.byte_offset,
msg.typo,
msg.correction
itertools::join(msg.corrections.iter(), ", ")
);
}
Message::PathCorrection(msg) => {
println!("{}: {} -> {}", msg.path.display(), msg.typo, msg.correction);
println!(
"{}: {} -> {}",
msg.path.display(),
msg.typo,
itertools::join(msg.corrections.iter(), ", ")
);
}
Message::File(msg) => {
println!("{}", msg.path.display());
@ -236,10 +241,10 @@ impl Report for PrintLong {
Message::Correction(msg) => print_long_correction(msg),
Message::PathCorrection(msg) => {
println!(
"{}: error: `{}` should be `{}`",
"{}: error: `{}` should be {}",
msg.path.display(),
msg.typo,
msg.correction
itertools::join(msg.corrections.iter().map(|c| format!("`{}`", c)), ", ")
);
}
Message::File(msg) => {
@ -274,8 +279,9 @@ fn print_long_correction(msg: &Correction) {
writeln!(
handle,
"error: `{}` should be `{}`",
msg.typo, msg.correction
"error: `{}` should be {}",
msg.typo,
itertools::join(msg.corrections.iter().map(|c| format!("`{}`", c)), ", ")
)
.unwrap();
writeln!(

View file

@ -15,24 +15,24 @@ impl BuiltIn {
pub fn correct_ident<'s, 'w>(
&'s self,
_ident: typos::tokens::Identifier<'w>,
) -> Option<Cow<'s, str>> {
None
) -> Vec<Cow<'s, str>> {
Vec::new()
}
pub fn correct_word<'s, 'w>(&'s self, word: typos::tokens::Word<'w>) -> Option<Cow<'s, str>> {
map_lookup(&typos_dict::WORD_DICTIONARY, word.token()).map(|s| case_correct(s, word.case()))
pub fn correct_word<'s, 'w>(&'s self, word: typos::tokens::Word<'w>) -> Vec<Cow<'s, str>> {
map_lookup(&typos_dict::WORD_DICTIONARY, word.token())
.map(|s| case_correct(s, word.case()))
.into_iter()
.collect()
}
}
impl typos::Dictionary for BuiltIn {
fn correct_ident<'s, 'w>(
&'s self,
ident: typos::tokens::Identifier<'w>,
) -> Option<Cow<'s, str>> {
fn correct_ident<'s, 'w>(&'s self, ident: typos::tokens::Identifier<'w>) -> Vec<Cow<'s, str>> {
BuiltIn::correct_ident(self, ident)
}
fn correct_word<'s, 'w>(&'s self, word: typos::tokens::Word<'w>) -> Option<Cow<'s, str>> {
fn correct_word<'s, 'w>(&'s self, word: typos::tokens::Word<'w>) -> Vec<Cow<'s, str>> {
BuiltIn::correct_word(self, word)
}
}

View file

@ -58,26 +58,36 @@ impl<'r> typos::report::Report for Replace<'r> {
fn report(&self, msg: typos::report::Message<'_>) -> bool {
match msg {
typos::report::Message::Correction(msg) => {
let path = msg.path.to_owned();
let line_num = msg.line_num;
let correction = Correction::from_content(msg);
let mut deferred = self.deferred.lock().unwrap();
let content = deferred
.content
.entry(path)
.or_insert_with(BTreeMap::new)
.entry(line_num)
.or_insert_with(Vec::new);
content.push(correction);
false
if msg.corrections.len() == 1 {
let path = msg.path.to_owned();
let line_num = msg.line_num;
let correction = Correction::from_content(msg);
let mut deferred = self.deferred.lock().unwrap();
let content = deferred
.content
.entry(path)
.or_insert_with(BTreeMap::new)
.entry(line_num)
.or_insert_with(Vec::new);
content.push(correction);
false
} else {
self.reporter
.report(typos::report::Message::Correction(msg))
}
}
typos::report::Message::PathCorrection(msg) => {
let path = msg.path.to_owned();
let correction = Correction::from_path(msg);
let mut deferred = self.deferred.lock().unwrap();
let content = deferred.paths.entry(path).or_insert_with(Vec::new);
content.push(correction);
false
if msg.corrections.len() == 1 {
let path = msg.path.to_owned();
let correction = Correction::from_path(msg);
let mut deferred = self.deferred.lock().unwrap();
let content = deferred.paths.entry(path).or_insert_with(Vec::new);
content.push(correction);
false
} else {
self.reporter
.report(typos::report::Message::PathCorrection(msg))
}
}
_ => self.reporter.report(msg),
}
@ -99,18 +109,20 @@ struct Correction {
impl Correction {
fn from_content(other: typos::report::Correction<'_>) -> Self {
assert_eq!(other.corrections.len(), 1);
Self {
byte_offset: other.byte_offset,
typo: other.typo.as_bytes().to_vec(),
correction: other.correction.as_bytes().to_vec(),
correction: other.corrections[0].as_bytes().to_vec(),
}
}
fn from_path(other: typos::report::PathCorrection<'_>) -> Self {
assert_eq!(other.corrections.len(), 1);
Self {
byte_offset: other.byte_offset,
typo: other.typo.as_bytes().to_vec(),
correction: other.correction.as_bytes().to_vec(),
correction: other.corrections[0].as_bytes().to_vec(),
}
}
}
@ -210,7 +222,7 @@ mod test {
.line_num(1)
.byte_offset(2)
.typo("foo")
.correction(std::borrow::Cow::Borrowed("bar"))
.corrections(vec![std::borrow::Cow::Borrowed("bar")])
.into(),
);
replace.write().unwrap();
@ -231,7 +243,7 @@ mod test {
.path(input_file.path())
.byte_offset(0)
.typo("foo")
.correction(std::borrow::Cow::Borrowed("bar"))
.corrections(vec![std::borrow::Cow::Borrowed("bar")])
.into(),
);
replace.write().unwrap();