mirror of
https://github.com/crate-ci/typos.git
synced 2024-12-23 08:02:15 -05:00
refactor(typos): Simplify the top-level API
This commit is contained in:
parent
e1e4ce8b61
commit
b5f606f201
5 changed files with 108 additions and 185 deletions
100
crates/typos/src/check.rs
Normal file
100
crates/typos/src/check.rs
Normal file
|
@ -0,0 +1,100 @@
|
||||||
|
use crate::tokens;
|
||||||
|
use crate::Dictionary;
|
||||||
|
use std::borrow::Cow;
|
||||||
|
|
||||||
|
pub fn check_str<'b, 's: 'b>(
|
||||||
|
buffer: &'b str,
|
||||||
|
tokenizer: &'s tokens::Tokenizer,
|
||||||
|
dictionary: &'s dyn Dictionary,
|
||||||
|
) -> impl Iterator<Item = Typo<'b>> {
|
||||||
|
tokenizer
|
||||||
|
.parse_str(buffer)
|
||||||
|
.flat_map(move |ident| process_ident(ident, dictionary))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn check_bytes<'b, 's: 'b>(
|
||||||
|
buffer: &'b [u8],
|
||||||
|
tokenizer: &'s tokens::Tokenizer,
|
||||||
|
dictionary: &'s dyn Dictionary,
|
||||||
|
) -> impl Iterator<Item = Typo<'b>> {
|
||||||
|
tokenizer
|
||||||
|
.parse_bytes(buffer)
|
||||||
|
.flat_map(move |ident| process_ident(ident, dictionary))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn process_ident<'i, 's: 'i>(
|
||||||
|
ident: tokens::Identifier<'i>,
|
||||||
|
dictionary: &'s dyn Dictionary,
|
||||||
|
) -> impl Iterator<Item = Typo<'i>> {
|
||||||
|
match dictionary.correct_ident(ident) {
|
||||||
|
Some(crate::Status::Valid) => itertools::Either::Left(None.into_iter()),
|
||||||
|
Some(corrections) => {
|
||||||
|
let typo = Typo {
|
||||||
|
byte_offset: ident.offset(),
|
||||||
|
typo: ident.token().into(),
|
||||||
|
corrections,
|
||||||
|
};
|
||||||
|
itertools::Either::Left(Some(typo).into_iter())
|
||||||
|
}
|
||||||
|
None => itertools::Either::Right(
|
||||||
|
ident
|
||||||
|
.split()
|
||||||
|
.filter_map(move |word| process_word(word, dictionary)),
|
||||||
|
),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn process_word<'w, 's: 'w>(
|
||||||
|
word: tokens::Word<'w>,
|
||||||
|
dictionary: &'s dyn Dictionary,
|
||||||
|
) -> Option<Typo<'w>> {
|
||||||
|
match dictionary.correct_word(word) {
|
||||||
|
Some(crate::Status::Valid) => None,
|
||||||
|
Some(corrections) => {
|
||||||
|
let typo = Typo {
|
||||||
|
byte_offset: word.offset(),
|
||||||
|
typo: word.token().into(),
|
||||||
|
corrections,
|
||||||
|
};
|
||||||
|
Some(typo)
|
||||||
|
}
|
||||||
|
None => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An invalid term found in the buffer.
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
#[non_exhaustive]
|
||||||
|
pub struct Typo<'m> {
|
||||||
|
pub byte_offset: usize,
|
||||||
|
pub typo: Cow<'m, str>,
|
||||||
|
pub corrections: crate::Status<'m>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'m> Typo<'m> {
|
||||||
|
pub fn into_owned(self) -> Typo<'static> {
|
||||||
|
Typo {
|
||||||
|
byte_offset: self.byte_offset,
|
||||||
|
typo: Cow::Owned(self.typo.into_owned()),
|
||||||
|
corrections: self.corrections.into_owned(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn borrow(&self) -> Typo<'_> {
|
||||||
|
Typo {
|
||||||
|
byte_offset: self.byte_offset,
|
||||||
|
typo: Cow::Borrowed(self.typo.as_ref()),
|
||||||
|
corrections: self.corrections.borrow(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'m> Default for Typo<'m> {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self {
|
||||||
|
byte_offset: 0,
|
||||||
|
typo: "".into(),
|
||||||
|
corrections: crate::Status::Invalid,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -13,21 +13,6 @@ pub trait Dictionary: Send + Sync {
|
||||||
fn correct_word<'s, 'w>(&'s self, word: crate::tokens::Word<'w>) -> Option<Status<'s>>;
|
fn correct_word<'s, 'w>(&'s self, word: crate::tokens::Word<'w>) -> Option<Status<'s>>;
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) struct NullDictionary;
|
|
||||||
|
|
||||||
impl Dictionary for NullDictionary {
|
|
||||||
fn correct_ident<'s, 'w>(
|
|
||||||
&'s self,
|
|
||||||
_ident: crate::tokens::Identifier<'w>,
|
|
||||||
) -> Option<Status<'s>> {
|
|
||||||
None
|
|
||||||
}
|
|
||||||
|
|
||||||
fn correct_word<'s, 'w>(&'s self, _word: crate::tokens::Word<'w>) -> Option<Status<'s>> {
|
|
||||||
None
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Validity of a term in a Dictionary.
|
/// Validity of a term in a Dictionary.
|
||||||
#[derive(Clone, PartialEq, Eq, Debug, serde::Serialize)]
|
#[derive(Clone, PartialEq, Eq, Debug, serde::Serialize)]
|
||||||
#[serde(rename_all = "snake_case")]
|
#[serde(rename_all = "snake_case")]
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
|
mod check;
|
||||||
mod dict;
|
mod dict;
|
||||||
mod parser;
|
|
||||||
|
|
||||||
pub mod tokens;
|
pub mod tokens;
|
||||||
|
|
||||||
|
pub use check::*;
|
||||||
pub use dict::*;
|
pub use dict::*;
|
||||||
pub use parser::*;
|
|
||||||
|
|
|
@ -1,147 +0,0 @@
|
||||||
use crate::tokens;
|
|
||||||
use crate::Dictionary;
|
|
||||||
use std::borrow::Cow;
|
|
||||||
|
|
||||||
/// Extract typos from the buffer.
|
|
||||||
#[derive(Clone)]
|
|
||||||
pub struct ParserBuilder<'p, 'd> {
|
|
||||||
tokenizer: Option<&'p tokens::Tokenizer>,
|
|
||||||
dictionary: &'d dyn Dictionary,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'p> ParserBuilder<'p, 'static> {
|
|
||||||
pub fn new() -> Self {
|
|
||||||
Default::default()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'p, 'd> ParserBuilder<'p, 'd> {
|
|
||||||
/// Set the Tokenizer used when parsing.
|
|
||||||
pub fn tokenizer(mut self, tokenizer: &'p tokens::Tokenizer) -> Self {
|
|
||||||
self.tokenizer = Some(tokenizer);
|
|
||||||
self
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Set the dictionary used when parsing.
|
|
||||||
pub fn dictionary<'d1>(self, dictionary: &'d1 dyn Dictionary) -> ParserBuilder<'p, 'd1> {
|
|
||||||
ParserBuilder {
|
|
||||||
tokenizer: self.tokenizer,
|
|
||||||
dictionary,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Extract typos from the buffer.
|
|
||||||
pub fn build(&self) -> TyposParser<'p, 'd> {
|
|
||||||
TyposParser {
|
|
||||||
tokenizer: self.tokenizer.unwrap_or(&DEFAULT_TOKENIZER),
|
|
||||||
dictionary: self.dictionary,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'p> Default for ParserBuilder<'p, 'static> {
|
|
||||||
fn default() -> Self {
|
|
||||||
Self {
|
|
||||||
tokenizer: None,
|
|
||||||
dictionary: &crate::NullDictionary,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static DEFAULT_TOKENIZER: once_cell::sync::Lazy<tokens::Tokenizer> =
|
|
||||||
once_cell::sync::Lazy::new(tokens::Tokenizer::new);
|
|
||||||
|
|
||||||
/// Extract typos from the buffer.
|
|
||||||
#[derive(Clone)]
|
|
||||||
pub struct TyposParser<'p, 'd> {
|
|
||||||
tokenizer: &'p tokens::Tokenizer,
|
|
||||||
dictionary: &'d dyn Dictionary,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'p, 'd> TyposParser<'p, 'd> {
|
|
||||||
pub fn parse_str<'b, 's: 'b>(&'s self, buffer: &'b str) -> impl Iterator<Item = Typo<'b>> {
|
|
||||||
self.tokenizer
|
|
||||||
.parse_str(buffer)
|
|
||||||
.flat_map(move |ident| self.process_ident(ident))
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn parse_bytes<'b, 's: 'b>(&'s self, buffer: &'b [u8]) -> impl Iterator<Item = Typo<'b>> {
|
|
||||||
self.tokenizer
|
|
||||||
.parse_bytes(buffer)
|
|
||||||
.flat_map(move |ident| self.process_ident(ident))
|
|
||||||
}
|
|
||||||
|
|
||||||
fn process_ident<'i, 's: 'i>(
|
|
||||||
&'s self,
|
|
||||||
ident: tokens::Identifier<'i>,
|
|
||||||
) -> impl Iterator<Item = Typo<'i>> {
|
|
||||||
match self.dictionary.correct_ident(ident) {
|
|
||||||
Some(crate::Status::Valid) => itertools::Either::Left(None.into_iter()),
|
|
||||||
Some(corrections) => {
|
|
||||||
let typo = Typo {
|
|
||||||
byte_offset: ident.offset(),
|
|
||||||
typo: ident.token().into(),
|
|
||||||
corrections,
|
|
||||||
};
|
|
||||||
itertools::Either::Left(Some(typo).into_iter())
|
|
||||||
}
|
|
||||||
None => itertools::Either::Right(
|
|
||||||
ident
|
|
||||||
.split()
|
|
||||||
.filter_map(move |word| self.process_word(word)),
|
|
||||||
),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn process_word<'w, 's: 'w>(&'s self, word: tokens::Word<'w>) -> Option<Typo<'w>> {
|
|
||||||
match self.dictionary.correct_word(word) {
|
|
||||||
Some(crate::Status::Valid) => None,
|
|
||||||
Some(corrections) => {
|
|
||||||
let typo = Typo {
|
|
||||||
byte_offset: word.offset(),
|
|
||||||
typo: word.token().into(),
|
|
||||||
corrections,
|
|
||||||
};
|
|
||||||
Some(typo)
|
|
||||||
}
|
|
||||||
None => None,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// An invalid term found in the buffer.
|
|
||||||
#[derive(Clone, Debug)]
|
|
||||||
#[non_exhaustive]
|
|
||||||
pub struct Typo<'m> {
|
|
||||||
pub byte_offset: usize,
|
|
||||||
pub typo: Cow<'m, str>,
|
|
||||||
pub corrections: crate::Status<'m>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'m> Typo<'m> {
|
|
||||||
pub fn into_owned(self) -> Typo<'static> {
|
|
||||||
Typo {
|
|
||||||
byte_offset: self.byte_offset,
|
|
||||||
typo: Cow::Owned(self.typo.into_owned()),
|
|
||||||
corrections: self.corrections.into_owned(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn borrow(&self) -> Typo<'_> {
|
|
||||||
Typo {
|
|
||||||
byte_offset: self.byte_offset,
|
|
||||||
typo: Cow::Borrowed(self.typo.as_ref()),
|
|
||||||
corrections: self.corrections.borrow(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'m> Default for Typo<'m> {
|
|
||||||
fn default() -> Self {
|
|
||||||
Self {
|
|
||||||
byte_offset: 0,
|
|
||||||
typo: "".into(),
|
|
||||||
corrections: crate::Status::Invalid,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -70,14 +70,9 @@ impl FileChecker for Typos {
|
||||||
dictionary: &dyn Dictionary,
|
dictionary: &dyn Dictionary,
|
||||||
reporter: &dyn report::Report,
|
reporter: &dyn report::Report,
|
||||||
) -> Result<(), std::io::Error> {
|
) -> Result<(), std::io::Error> {
|
||||||
let parser = typos::ParserBuilder::new()
|
|
||||||
.tokenizer(tokenizer)
|
|
||||||
.dictionary(dictionary)
|
|
||||||
.build();
|
|
||||||
|
|
||||||
if settings.check_filenames {
|
if settings.check_filenames {
|
||||||
if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
|
if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
|
||||||
for typo in parser.parse_str(file_name) {
|
for typo in typos::check_str(file_name, tokenizer, dictionary) {
|
||||||
let msg = report::Typo {
|
let msg = report::Typo {
|
||||||
context: Some(report::PathContext { path }.into()),
|
context: Some(report::PathContext { path }.into()),
|
||||||
buffer: std::borrow::Cow::Borrowed(file_name.as_bytes()),
|
buffer: std::borrow::Cow::Borrowed(file_name.as_bytes()),
|
||||||
|
@ -97,7 +92,7 @@ impl FileChecker for Typos {
|
||||||
reporter.report(msg.into())?;
|
reporter.report(msg.into())?;
|
||||||
} else {
|
} else {
|
||||||
let mut accum_line_num = AccumulateLineNum::new();
|
let mut accum_line_num = AccumulateLineNum::new();
|
||||||
for typo in parser.parse_bytes(&buffer) {
|
for typo in typos::check_bytes(&buffer, tokenizer, dictionary) {
|
||||||
let line_num = accum_line_num.line_num(&buffer, typo.byte_offset);
|
let line_num = accum_line_num.line_num(&buffer, typo.byte_offset);
|
||||||
let (line, line_offset) = extract_line(&buffer, typo.byte_offset);
|
let (line, line_offset) = extract_line(&buffer, typo.byte_offset);
|
||||||
let msg = report::Typo {
|
let msg = report::Typo {
|
||||||
|
@ -129,11 +124,6 @@ impl FileChecker for FixTypos {
|
||||||
dictionary: &dyn Dictionary,
|
dictionary: &dyn Dictionary,
|
||||||
reporter: &dyn report::Report,
|
reporter: &dyn report::Report,
|
||||||
) -> Result<(), std::io::Error> {
|
) -> Result<(), std::io::Error> {
|
||||||
let parser = typos::ParserBuilder::new()
|
|
||||||
.tokenizer(tokenizer)
|
|
||||||
.dictionary(dictionary)
|
|
||||||
.build();
|
|
||||||
|
|
||||||
if settings.check_files {
|
if settings.check_files {
|
||||||
let (buffer, content_type) = read_file(path, reporter)?;
|
let (buffer, content_type) = read_file(path, reporter)?;
|
||||||
if !explicit && !settings.binary && content_type.is_binary() {
|
if !explicit && !settings.binary && content_type.is_binary() {
|
||||||
|
@ -142,7 +132,7 @@ impl FileChecker for FixTypos {
|
||||||
} else {
|
} else {
|
||||||
let mut fixes = Vec::new();
|
let mut fixes = Vec::new();
|
||||||
let mut accum_line_num = AccumulateLineNum::new();
|
let mut accum_line_num = AccumulateLineNum::new();
|
||||||
for typo in parser.parse_bytes(&buffer) {
|
for typo in typos::check_bytes(&buffer, tokenizer, dictionary) {
|
||||||
if is_fixable(&typo) {
|
if is_fixable(&typo) {
|
||||||
fixes.push(typo.into_owned());
|
fixes.push(typo.into_owned());
|
||||||
} else {
|
} else {
|
||||||
|
@ -169,7 +159,7 @@ impl FileChecker for FixTypos {
|
||||||
if settings.check_filenames {
|
if settings.check_filenames {
|
||||||
if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
|
if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
|
||||||
let mut fixes = Vec::new();
|
let mut fixes = Vec::new();
|
||||||
for typo in parser.parse_str(file_name) {
|
for typo in typos::check_str(file_name, tokenizer, dictionary) {
|
||||||
if is_fixable(&typo) {
|
if is_fixable(&typo) {
|
||||||
fixes.push(typo.into_owned());
|
fixes.push(typo.into_owned());
|
||||||
} else {
|
} else {
|
||||||
|
@ -211,11 +201,6 @@ impl FileChecker for DiffTypos {
|
||||||
dictionary: &dyn Dictionary,
|
dictionary: &dyn Dictionary,
|
||||||
reporter: &dyn report::Report,
|
reporter: &dyn report::Report,
|
||||||
) -> Result<(), std::io::Error> {
|
) -> Result<(), std::io::Error> {
|
||||||
let parser = typos::ParserBuilder::new()
|
|
||||||
.tokenizer(tokenizer)
|
|
||||||
.dictionary(dictionary)
|
|
||||||
.build();
|
|
||||||
|
|
||||||
let mut content = Vec::new();
|
let mut content = Vec::new();
|
||||||
let mut new_content = Vec::new();
|
let mut new_content = Vec::new();
|
||||||
if settings.check_files {
|
if settings.check_files {
|
||||||
|
@ -226,7 +211,7 @@ impl FileChecker for DiffTypos {
|
||||||
} else {
|
} else {
|
||||||
let mut fixes = Vec::new();
|
let mut fixes = Vec::new();
|
||||||
let mut accum_line_num = AccumulateLineNum::new();
|
let mut accum_line_num = AccumulateLineNum::new();
|
||||||
for typo in parser.parse_bytes(&buffer) {
|
for typo in typos::check_bytes(&buffer, tokenizer, dictionary) {
|
||||||
if is_fixable(&typo) {
|
if is_fixable(&typo) {
|
||||||
fixes.push(typo.into_owned());
|
fixes.push(typo.into_owned());
|
||||||
} else {
|
} else {
|
||||||
|
@ -254,7 +239,7 @@ impl FileChecker for DiffTypos {
|
||||||
if settings.check_filenames {
|
if settings.check_filenames {
|
||||||
if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
|
if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
|
||||||
let mut fixes = Vec::new();
|
let mut fixes = Vec::new();
|
||||||
for typo in parser.parse_str(file_name) {
|
for typo in typos::check_str(file_name, tokenizer, dictionary) {
|
||||||
if is_fixable(&typo) {
|
if is_fixable(&typo) {
|
||||||
fixes.push(typo.into_owned());
|
fixes.push(typo.into_owned());
|
||||||
} else {
|
} else {
|
||||||
|
|
Loading…
Reference in a new issue