refactor(typos): Simplify the top-level API

This commit is contained in:
Ed Page 2021-02-11 21:25:04 -06:00
parent e1e4ce8b61
commit b5f606f201
5 changed files with 108 additions and 185 deletions

100
crates/typos/src/check.rs Normal file
View file

@ -0,0 +1,100 @@
use crate::tokens;
use crate::Dictionary;
use std::borrow::Cow;
pub fn check_str<'b, 's: 'b>(
buffer: &'b str,
tokenizer: &'s tokens::Tokenizer,
dictionary: &'s dyn Dictionary,
) -> impl Iterator<Item = Typo<'b>> {
tokenizer
.parse_str(buffer)
.flat_map(move |ident| process_ident(ident, dictionary))
}
pub fn check_bytes<'b, 's: 'b>(
buffer: &'b [u8],
tokenizer: &'s tokens::Tokenizer,
dictionary: &'s dyn Dictionary,
) -> impl Iterator<Item = Typo<'b>> {
tokenizer
.parse_bytes(buffer)
.flat_map(move |ident| process_ident(ident, dictionary))
}
fn process_ident<'i, 's: 'i>(
ident: tokens::Identifier<'i>,
dictionary: &'s dyn Dictionary,
) -> impl Iterator<Item = Typo<'i>> {
match dictionary.correct_ident(ident) {
Some(crate::Status::Valid) => itertools::Either::Left(None.into_iter()),
Some(corrections) => {
let typo = Typo {
byte_offset: ident.offset(),
typo: ident.token().into(),
corrections,
};
itertools::Either::Left(Some(typo).into_iter())
}
None => itertools::Either::Right(
ident
.split()
.filter_map(move |word| process_word(word, dictionary)),
),
}
}
fn process_word<'w, 's: 'w>(
word: tokens::Word<'w>,
dictionary: &'s dyn Dictionary,
) -> Option<Typo<'w>> {
match dictionary.correct_word(word) {
Some(crate::Status::Valid) => None,
Some(corrections) => {
let typo = Typo {
byte_offset: word.offset(),
typo: word.token().into(),
corrections,
};
Some(typo)
}
None => None,
}
}
/// An invalid term found in the buffer.
#[derive(Clone, Debug)]
#[non_exhaustive]
pub struct Typo<'m> {
pub byte_offset: usize,
pub typo: Cow<'m, str>,
pub corrections: crate::Status<'m>,
}
impl<'m> Typo<'m> {
pub fn into_owned(self) -> Typo<'static> {
Typo {
byte_offset: self.byte_offset,
typo: Cow::Owned(self.typo.into_owned()),
corrections: self.corrections.into_owned(),
}
}
pub fn borrow(&self) -> Typo<'_> {
Typo {
byte_offset: self.byte_offset,
typo: Cow::Borrowed(self.typo.as_ref()),
corrections: self.corrections.borrow(),
}
}
}
impl<'m> Default for Typo<'m> {
fn default() -> Self {
Self {
byte_offset: 0,
typo: "".into(),
corrections: crate::Status::Invalid,
}
}
}

View file

@ -13,21 +13,6 @@ pub trait Dictionary: Send + Sync {
fn correct_word<'s, 'w>(&'s self, word: crate::tokens::Word<'w>) -> Option<Status<'s>>;
}
pub(crate) struct NullDictionary;
impl Dictionary for NullDictionary {
fn correct_ident<'s, 'w>(
&'s self,
_ident: crate::tokens::Identifier<'w>,
) -> Option<Status<'s>> {
None
}
fn correct_word<'s, 'w>(&'s self, _word: crate::tokens::Word<'w>) -> Option<Status<'s>> {
None
}
}
/// Validity of a term in a Dictionary.
#[derive(Clone, PartialEq, Eq, Debug, serde::Serialize)]
#[serde(rename_all = "snake_case")]

View file

@ -1,7 +1,7 @@
mod check;
mod dict;
mod parser;
pub mod tokens;
pub use check::*;
pub use dict::*;
pub use parser::*;

View file

@ -1,147 +0,0 @@
use crate::tokens;
use crate::Dictionary;
use std::borrow::Cow;
/// Extract typos from the buffer.
#[derive(Clone)]
pub struct ParserBuilder<'p, 'd> {
tokenizer: Option<&'p tokens::Tokenizer>,
dictionary: &'d dyn Dictionary,
}
impl<'p> ParserBuilder<'p, 'static> {
pub fn new() -> Self {
Default::default()
}
}
impl<'p, 'd> ParserBuilder<'p, 'd> {
/// Set the Tokenizer used when parsing.
pub fn tokenizer(mut self, tokenizer: &'p tokens::Tokenizer) -> Self {
self.tokenizer = Some(tokenizer);
self
}
/// Set the dictionary used when parsing.
pub fn dictionary<'d1>(self, dictionary: &'d1 dyn Dictionary) -> ParserBuilder<'p, 'd1> {
ParserBuilder {
tokenizer: self.tokenizer,
dictionary,
}
}
/// Extract typos from the buffer.
pub fn build(&self) -> TyposParser<'p, 'd> {
TyposParser {
tokenizer: self.tokenizer.unwrap_or(&DEFAULT_TOKENIZER),
dictionary: self.dictionary,
}
}
}
impl<'p> Default for ParserBuilder<'p, 'static> {
fn default() -> Self {
Self {
tokenizer: None,
dictionary: &crate::NullDictionary,
}
}
}
static DEFAULT_TOKENIZER: once_cell::sync::Lazy<tokens::Tokenizer> =
once_cell::sync::Lazy::new(tokens::Tokenizer::new);
/// Extract typos from the buffer.
#[derive(Clone)]
pub struct TyposParser<'p, 'd> {
tokenizer: &'p tokens::Tokenizer,
dictionary: &'d dyn Dictionary,
}
impl<'p, 'd> TyposParser<'p, 'd> {
pub fn parse_str<'b, 's: 'b>(&'s self, buffer: &'b str) -> impl Iterator<Item = Typo<'b>> {
self.tokenizer
.parse_str(buffer)
.flat_map(move |ident| self.process_ident(ident))
}
pub fn parse_bytes<'b, 's: 'b>(&'s self, buffer: &'b [u8]) -> impl Iterator<Item = Typo<'b>> {
self.tokenizer
.parse_bytes(buffer)
.flat_map(move |ident| self.process_ident(ident))
}
fn process_ident<'i, 's: 'i>(
&'s self,
ident: tokens::Identifier<'i>,
) -> impl Iterator<Item = Typo<'i>> {
match self.dictionary.correct_ident(ident) {
Some(crate::Status::Valid) => itertools::Either::Left(None.into_iter()),
Some(corrections) => {
let typo = Typo {
byte_offset: ident.offset(),
typo: ident.token().into(),
corrections,
};
itertools::Either::Left(Some(typo).into_iter())
}
None => itertools::Either::Right(
ident
.split()
.filter_map(move |word| self.process_word(word)),
),
}
}
fn process_word<'w, 's: 'w>(&'s self, word: tokens::Word<'w>) -> Option<Typo<'w>> {
match self.dictionary.correct_word(word) {
Some(crate::Status::Valid) => None,
Some(corrections) => {
let typo = Typo {
byte_offset: word.offset(),
typo: word.token().into(),
corrections,
};
Some(typo)
}
None => None,
}
}
}
/// An invalid term found in the buffer.
#[derive(Clone, Debug)]
#[non_exhaustive]
pub struct Typo<'m> {
pub byte_offset: usize,
pub typo: Cow<'m, str>,
pub corrections: crate::Status<'m>,
}
impl<'m> Typo<'m> {
pub fn into_owned(self) -> Typo<'static> {
Typo {
byte_offset: self.byte_offset,
typo: Cow::Owned(self.typo.into_owned()),
corrections: self.corrections.into_owned(),
}
}
pub fn borrow(&self) -> Typo<'_> {
Typo {
byte_offset: self.byte_offset,
typo: Cow::Borrowed(self.typo.as_ref()),
corrections: self.corrections.borrow(),
}
}
}
impl<'m> Default for Typo<'m> {
fn default() -> Self {
Self {
byte_offset: 0,
typo: "".into(),
corrections: crate::Status::Invalid,
}
}
}

View file

@ -70,14 +70,9 @@ impl FileChecker for Typos {
dictionary: &dyn Dictionary,
reporter: &dyn report::Report,
) -> Result<(), std::io::Error> {
let parser = typos::ParserBuilder::new()
.tokenizer(tokenizer)
.dictionary(dictionary)
.build();
if settings.check_filenames {
if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
for typo in parser.parse_str(file_name) {
for typo in typos::check_str(file_name, tokenizer, dictionary) {
let msg = report::Typo {
context: Some(report::PathContext { path }.into()),
buffer: std::borrow::Cow::Borrowed(file_name.as_bytes()),
@ -97,7 +92,7 @@ impl FileChecker for Typos {
reporter.report(msg.into())?;
} else {
let mut accum_line_num = AccumulateLineNum::new();
for typo in parser.parse_bytes(&buffer) {
for typo in typos::check_bytes(&buffer, tokenizer, dictionary) {
let line_num = accum_line_num.line_num(&buffer, typo.byte_offset);
let (line, line_offset) = extract_line(&buffer, typo.byte_offset);
let msg = report::Typo {
@ -129,11 +124,6 @@ impl FileChecker for FixTypos {
dictionary: &dyn Dictionary,
reporter: &dyn report::Report,
) -> Result<(), std::io::Error> {
let parser = typos::ParserBuilder::new()
.tokenizer(tokenizer)
.dictionary(dictionary)
.build();
if settings.check_files {
let (buffer, content_type) = read_file(path, reporter)?;
if !explicit && !settings.binary && content_type.is_binary() {
@ -142,7 +132,7 @@ impl FileChecker for FixTypos {
} else {
let mut fixes = Vec::new();
let mut accum_line_num = AccumulateLineNum::new();
for typo in parser.parse_bytes(&buffer) {
for typo in typos::check_bytes(&buffer, tokenizer, dictionary) {
if is_fixable(&typo) {
fixes.push(typo.into_owned());
} else {
@ -169,7 +159,7 @@ impl FileChecker for FixTypos {
if settings.check_filenames {
if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
let mut fixes = Vec::new();
for typo in parser.parse_str(file_name) {
for typo in typos::check_str(file_name, tokenizer, dictionary) {
if is_fixable(&typo) {
fixes.push(typo.into_owned());
} else {
@ -211,11 +201,6 @@ impl FileChecker for DiffTypos {
dictionary: &dyn Dictionary,
reporter: &dyn report::Report,
) -> Result<(), std::io::Error> {
let parser = typos::ParserBuilder::new()
.tokenizer(tokenizer)
.dictionary(dictionary)
.build();
let mut content = Vec::new();
let mut new_content = Vec::new();
if settings.check_files {
@ -226,7 +211,7 @@ impl FileChecker for DiffTypos {
} else {
let mut fixes = Vec::new();
let mut accum_line_num = AccumulateLineNum::new();
for typo in parser.parse_bytes(&buffer) {
for typo in typos::check_bytes(&buffer, tokenizer, dictionary) {
if is_fixable(&typo) {
fixes.push(typo.into_owned());
} else {
@ -254,7 +239,7 @@ impl FileChecker for DiffTypos {
if settings.check_filenames {
if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
let mut fixes = Vec::new();
for typo in parser.parse_str(file_name) {
for typo in typos::check_str(file_name, tokenizer, dictionary) {
if is_fixable(&typo) {
fixes.push(typo.into_owned());
} else {