mirror of
https://github.com/crate-ci/typos.git
synced 2024-12-22 23:52:12 -05:00
refactor(typos): Simplify the top-level API
This commit is contained in:
parent
e1e4ce8b61
commit
b5f606f201
5 changed files with 108 additions and 185 deletions
100
crates/typos/src/check.rs
Normal file
100
crates/typos/src/check.rs
Normal file
|
@ -0,0 +1,100 @@
|
|||
use crate::tokens;
|
||||
use crate::Dictionary;
|
||||
use std::borrow::Cow;
|
||||
|
||||
pub fn check_str<'b, 's: 'b>(
|
||||
buffer: &'b str,
|
||||
tokenizer: &'s tokens::Tokenizer,
|
||||
dictionary: &'s dyn Dictionary,
|
||||
) -> impl Iterator<Item = Typo<'b>> {
|
||||
tokenizer
|
||||
.parse_str(buffer)
|
||||
.flat_map(move |ident| process_ident(ident, dictionary))
|
||||
}
|
||||
|
||||
pub fn check_bytes<'b, 's: 'b>(
|
||||
buffer: &'b [u8],
|
||||
tokenizer: &'s tokens::Tokenizer,
|
||||
dictionary: &'s dyn Dictionary,
|
||||
) -> impl Iterator<Item = Typo<'b>> {
|
||||
tokenizer
|
||||
.parse_bytes(buffer)
|
||||
.flat_map(move |ident| process_ident(ident, dictionary))
|
||||
}
|
||||
|
||||
fn process_ident<'i, 's: 'i>(
|
||||
ident: tokens::Identifier<'i>,
|
||||
dictionary: &'s dyn Dictionary,
|
||||
) -> impl Iterator<Item = Typo<'i>> {
|
||||
match dictionary.correct_ident(ident) {
|
||||
Some(crate::Status::Valid) => itertools::Either::Left(None.into_iter()),
|
||||
Some(corrections) => {
|
||||
let typo = Typo {
|
||||
byte_offset: ident.offset(),
|
||||
typo: ident.token().into(),
|
||||
corrections,
|
||||
};
|
||||
itertools::Either::Left(Some(typo).into_iter())
|
||||
}
|
||||
None => itertools::Either::Right(
|
||||
ident
|
||||
.split()
|
||||
.filter_map(move |word| process_word(word, dictionary)),
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
fn process_word<'w, 's: 'w>(
|
||||
word: tokens::Word<'w>,
|
||||
dictionary: &'s dyn Dictionary,
|
||||
) -> Option<Typo<'w>> {
|
||||
match dictionary.correct_word(word) {
|
||||
Some(crate::Status::Valid) => None,
|
||||
Some(corrections) => {
|
||||
let typo = Typo {
|
||||
byte_offset: word.offset(),
|
||||
typo: word.token().into(),
|
||||
corrections,
|
||||
};
|
||||
Some(typo)
|
||||
}
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// An invalid term found in the buffer.
|
||||
#[derive(Clone, Debug)]
|
||||
#[non_exhaustive]
|
||||
pub struct Typo<'m> {
|
||||
pub byte_offset: usize,
|
||||
pub typo: Cow<'m, str>,
|
||||
pub corrections: crate::Status<'m>,
|
||||
}
|
||||
|
||||
impl<'m> Typo<'m> {
|
||||
pub fn into_owned(self) -> Typo<'static> {
|
||||
Typo {
|
||||
byte_offset: self.byte_offset,
|
||||
typo: Cow::Owned(self.typo.into_owned()),
|
||||
corrections: self.corrections.into_owned(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn borrow(&self) -> Typo<'_> {
|
||||
Typo {
|
||||
byte_offset: self.byte_offset,
|
||||
typo: Cow::Borrowed(self.typo.as_ref()),
|
||||
corrections: self.corrections.borrow(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'m> Default for Typo<'m> {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
byte_offset: 0,
|
||||
typo: "".into(),
|
||||
corrections: crate::Status::Invalid,
|
||||
}
|
||||
}
|
||||
}
|
|
@ -13,21 +13,6 @@ pub trait Dictionary: Send + Sync {
|
|||
fn correct_word<'s, 'w>(&'s self, word: crate::tokens::Word<'w>) -> Option<Status<'s>>;
|
||||
}
|
||||
|
||||
pub(crate) struct NullDictionary;
|
||||
|
||||
impl Dictionary for NullDictionary {
|
||||
fn correct_ident<'s, 'w>(
|
||||
&'s self,
|
||||
_ident: crate::tokens::Identifier<'w>,
|
||||
) -> Option<Status<'s>> {
|
||||
None
|
||||
}
|
||||
|
||||
fn correct_word<'s, 'w>(&'s self, _word: crate::tokens::Word<'w>) -> Option<Status<'s>> {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Validity of a term in a Dictionary.
|
||||
#[derive(Clone, PartialEq, Eq, Debug, serde::Serialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
mod check;
|
||||
mod dict;
|
||||
mod parser;
|
||||
|
||||
pub mod tokens;
|
||||
|
||||
pub use check::*;
|
||||
pub use dict::*;
|
||||
pub use parser::*;
|
||||
|
|
|
@ -1,147 +0,0 @@
|
|||
use crate::tokens;
|
||||
use crate::Dictionary;
|
||||
use std::borrow::Cow;
|
||||
|
||||
/// Extract typos from the buffer.
|
||||
#[derive(Clone)]
|
||||
pub struct ParserBuilder<'p, 'd> {
|
||||
tokenizer: Option<&'p tokens::Tokenizer>,
|
||||
dictionary: &'d dyn Dictionary,
|
||||
}
|
||||
|
||||
impl<'p> ParserBuilder<'p, 'static> {
|
||||
pub fn new() -> Self {
|
||||
Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'p, 'd> ParserBuilder<'p, 'd> {
|
||||
/// Set the Tokenizer used when parsing.
|
||||
pub fn tokenizer(mut self, tokenizer: &'p tokens::Tokenizer) -> Self {
|
||||
self.tokenizer = Some(tokenizer);
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the dictionary used when parsing.
|
||||
pub fn dictionary<'d1>(self, dictionary: &'d1 dyn Dictionary) -> ParserBuilder<'p, 'd1> {
|
||||
ParserBuilder {
|
||||
tokenizer: self.tokenizer,
|
||||
dictionary,
|
||||
}
|
||||
}
|
||||
|
||||
/// Extract typos from the buffer.
|
||||
pub fn build(&self) -> TyposParser<'p, 'd> {
|
||||
TyposParser {
|
||||
tokenizer: self.tokenizer.unwrap_or(&DEFAULT_TOKENIZER),
|
||||
dictionary: self.dictionary,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'p> Default for ParserBuilder<'p, 'static> {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
tokenizer: None,
|
||||
dictionary: &crate::NullDictionary,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static DEFAULT_TOKENIZER: once_cell::sync::Lazy<tokens::Tokenizer> =
|
||||
once_cell::sync::Lazy::new(tokens::Tokenizer::new);
|
||||
|
||||
/// Extract typos from the buffer.
|
||||
#[derive(Clone)]
|
||||
pub struct TyposParser<'p, 'd> {
|
||||
tokenizer: &'p tokens::Tokenizer,
|
||||
dictionary: &'d dyn Dictionary,
|
||||
}
|
||||
|
||||
impl<'p, 'd> TyposParser<'p, 'd> {
|
||||
pub fn parse_str<'b, 's: 'b>(&'s self, buffer: &'b str) -> impl Iterator<Item = Typo<'b>> {
|
||||
self.tokenizer
|
||||
.parse_str(buffer)
|
||||
.flat_map(move |ident| self.process_ident(ident))
|
||||
}
|
||||
|
||||
pub fn parse_bytes<'b, 's: 'b>(&'s self, buffer: &'b [u8]) -> impl Iterator<Item = Typo<'b>> {
|
||||
self.tokenizer
|
||||
.parse_bytes(buffer)
|
||||
.flat_map(move |ident| self.process_ident(ident))
|
||||
}
|
||||
|
||||
fn process_ident<'i, 's: 'i>(
|
||||
&'s self,
|
||||
ident: tokens::Identifier<'i>,
|
||||
) -> impl Iterator<Item = Typo<'i>> {
|
||||
match self.dictionary.correct_ident(ident) {
|
||||
Some(crate::Status::Valid) => itertools::Either::Left(None.into_iter()),
|
||||
Some(corrections) => {
|
||||
let typo = Typo {
|
||||
byte_offset: ident.offset(),
|
||||
typo: ident.token().into(),
|
||||
corrections,
|
||||
};
|
||||
itertools::Either::Left(Some(typo).into_iter())
|
||||
}
|
||||
None => itertools::Either::Right(
|
||||
ident
|
||||
.split()
|
||||
.filter_map(move |word| self.process_word(word)),
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
fn process_word<'w, 's: 'w>(&'s self, word: tokens::Word<'w>) -> Option<Typo<'w>> {
|
||||
match self.dictionary.correct_word(word) {
|
||||
Some(crate::Status::Valid) => None,
|
||||
Some(corrections) => {
|
||||
let typo = Typo {
|
||||
byte_offset: word.offset(),
|
||||
typo: word.token().into(),
|
||||
corrections,
|
||||
};
|
||||
Some(typo)
|
||||
}
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// An invalid term found in the buffer.
|
||||
#[derive(Clone, Debug)]
|
||||
#[non_exhaustive]
|
||||
pub struct Typo<'m> {
|
||||
pub byte_offset: usize,
|
||||
pub typo: Cow<'m, str>,
|
||||
pub corrections: crate::Status<'m>,
|
||||
}
|
||||
|
||||
impl<'m> Typo<'m> {
|
||||
pub fn into_owned(self) -> Typo<'static> {
|
||||
Typo {
|
||||
byte_offset: self.byte_offset,
|
||||
typo: Cow::Owned(self.typo.into_owned()),
|
||||
corrections: self.corrections.into_owned(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn borrow(&self) -> Typo<'_> {
|
||||
Typo {
|
||||
byte_offset: self.byte_offset,
|
||||
typo: Cow::Borrowed(self.typo.as_ref()),
|
||||
corrections: self.corrections.borrow(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'m> Default for Typo<'m> {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
byte_offset: 0,
|
||||
typo: "".into(),
|
||||
corrections: crate::Status::Invalid,
|
||||
}
|
||||
}
|
||||
}
|
|
@ -70,14 +70,9 @@ impl FileChecker for Typos {
|
|||
dictionary: &dyn Dictionary,
|
||||
reporter: &dyn report::Report,
|
||||
) -> Result<(), std::io::Error> {
|
||||
let parser = typos::ParserBuilder::new()
|
||||
.tokenizer(tokenizer)
|
||||
.dictionary(dictionary)
|
||||
.build();
|
||||
|
||||
if settings.check_filenames {
|
||||
if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
|
||||
for typo in parser.parse_str(file_name) {
|
||||
for typo in typos::check_str(file_name, tokenizer, dictionary) {
|
||||
let msg = report::Typo {
|
||||
context: Some(report::PathContext { path }.into()),
|
||||
buffer: std::borrow::Cow::Borrowed(file_name.as_bytes()),
|
||||
|
@ -97,7 +92,7 @@ impl FileChecker for Typos {
|
|||
reporter.report(msg.into())?;
|
||||
} else {
|
||||
let mut accum_line_num = AccumulateLineNum::new();
|
||||
for typo in parser.parse_bytes(&buffer) {
|
||||
for typo in typos::check_bytes(&buffer, tokenizer, dictionary) {
|
||||
let line_num = accum_line_num.line_num(&buffer, typo.byte_offset);
|
||||
let (line, line_offset) = extract_line(&buffer, typo.byte_offset);
|
||||
let msg = report::Typo {
|
||||
|
@ -129,11 +124,6 @@ impl FileChecker for FixTypos {
|
|||
dictionary: &dyn Dictionary,
|
||||
reporter: &dyn report::Report,
|
||||
) -> Result<(), std::io::Error> {
|
||||
let parser = typos::ParserBuilder::new()
|
||||
.tokenizer(tokenizer)
|
||||
.dictionary(dictionary)
|
||||
.build();
|
||||
|
||||
if settings.check_files {
|
||||
let (buffer, content_type) = read_file(path, reporter)?;
|
||||
if !explicit && !settings.binary && content_type.is_binary() {
|
||||
|
@ -142,7 +132,7 @@ impl FileChecker for FixTypos {
|
|||
} else {
|
||||
let mut fixes = Vec::new();
|
||||
let mut accum_line_num = AccumulateLineNum::new();
|
||||
for typo in parser.parse_bytes(&buffer) {
|
||||
for typo in typos::check_bytes(&buffer, tokenizer, dictionary) {
|
||||
if is_fixable(&typo) {
|
||||
fixes.push(typo.into_owned());
|
||||
} else {
|
||||
|
@ -169,7 +159,7 @@ impl FileChecker for FixTypos {
|
|||
if settings.check_filenames {
|
||||
if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
|
||||
let mut fixes = Vec::new();
|
||||
for typo in parser.parse_str(file_name) {
|
||||
for typo in typos::check_str(file_name, tokenizer, dictionary) {
|
||||
if is_fixable(&typo) {
|
||||
fixes.push(typo.into_owned());
|
||||
} else {
|
||||
|
@ -211,11 +201,6 @@ impl FileChecker for DiffTypos {
|
|||
dictionary: &dyn Dictionary,
|
||||
reporter: &dyn report::Report,
|
||||
) -> Result<(), std::io::Error> {
|
||||
let parser = typos::ParserBuilder::new()
|
||||
.tokenizer(tokenizer)
|
||||
.dictionary(dictionary)
|
||||
.build();
|
||||
|
||||
let mut content = Vec::new();
|
||||
let mut new_content = Vec::new();
|
||||
if settings.check_files {
|
||||
|
@ -226,7 +211,7 @@ impl FileChecker for DiffTypos {
|
|||
} else {
|
||||
let mut fixes = Vec::new();
|
||||
let mut accum_line_num = AccumulateLineNum::new();
|
||||
for typo in parser.parse_bytes(&buffer) {
|
||||
for typo in typos::check_bytes(&buffer, tokenizer, dictionary) {
|
||||
if is_fixable(&typo) {
|
||||
fixes.push(typo.into_owned());
|
||||
} else {
|
||||
|
@ -254,7 +239,7 @@ impl FileChecker for DiffTypos {
|
|||
if settings.check_filenames {
|
||||
if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
|
||||
let mut fixes = Vec::new();
|
||||
for typo in parser.parse_str(file_name) {
|
||||
for typo in typos::check_str(file_name, tokenizer, dictionary) {
|
||||
if is_fixable(&typo) {
|
||||
fixes.push(typo.into_owned());
|
||||
} else {
|
||||
|
|
Loading…
Reference in a new issue