refactor(typos): Focus API on primary use case

This commit is contained in:
Ed Page 2021-01-02 12:51:35 -06:00
parent aba85df435
commit 692f0ac095
2 changed files with 9 additions and 60 deletions

View file

@ -2,6 +2,7 @@ use crate::tokens;
use crate::Dictionary; use crate::Dictionary;
use std::borrow::Cow; use std::borrow::Cow;
/// Extract typos from the buffer.
#[derive(Clone)] #[derive(Clone)]
pub struct ParserBuilder<'p, 'd> { pub struct ParserBuilder<'p, 'd> {
tokenizer: Option<&'p tokens::Tokenizer>, tokenizer: Option<&'p tokens::Tokenizer>,
@ -30,26 +31,12 @@ impl<'p, 'd> ParserBuilder<'p, 'd> {
} }
/// Extract typos from the buffer. /// Extract typos from the buffer.
pub fn typos(&self) -> TyposParser<'p, 'd> { pub fn build(&self) -> TyposParser<'p, 'd> {
TyposParser { TyposParser {
tokenizer: self.tokenizer.unwrap_or_else(|| &DEFAULT_TOKENIZER), tokenizer: self.tokenizer.unwrap_or_else(|| &DEFAULT_TOKENIZER),
dictionary: self.dictionary, dictionary: self.dictionary,
} }
} }
/// Parse for Identifiers.
pub fn identifiers(&self) -> IdentifiersParser<'p> {
IdentifiersParser {
tokenizer: self.tokenizer.unwrap_or_else(|| &DEFAULT_TOKENIZER),
}
}
/// Parse for Words.
pub fn words(&self) -> WordsParser<'p> {
WordsParser {
tokenizer: self.tokenizer.unwrap_or_else(|| &DEFAULT_TOKENIZER),
}
}
} }
impl<'p> Default for ParserBuilder<'p, 'static> { impl<'p> Default for ParserBuilder<'p, 'static> {
@ -158,35 +145,3 @@ impl<'m> Default for Typo<'m> {
} }
} }
} }
/// Parse for Identifiers.
#[derive(Debug, Clone)]
pub struct IdentifiersParser<'p> {
tokenizer: &'p tokens::Tokenizer,
}
impl<'p> IdentifiersParser<'p> {
pub fn parse_str(&self, buffer: &'p str) -> impl Iterator<Item = tokens::Identifier<'p>> {
self.tokenizer.parse_str(buffer)
}
pub fn parse_bytes(&self, buffer: &'p [u8]) -> impl Iterator<Item = tokens::Identifier<'p>> {
self.tokenizer.parse_bytes(buffer)
}
}
/// Parse for Words.
#[derive(Debug, Clone)]
pub struct WordsParser<'p> {
tokenizer: &'p tokens::Tokenizer,
}
impl<'p> WordsParser<'p> {
pub fn parse_str(&self, buffer: &'p str) -> impl Iterator<Item = tokens::Word<'p>> {
self.tokenizer.parse_str(buffer).flat_map(|i| i.split())
}
pub fn parse_bytes(&self, buffer: &'p [u8]) -> impl Iterator<Item = tokens::Word<'p>> {
self.tokenizer.parse_bytes(buffer).flat_map(|i| i.split())
}
}

View file

@ -118,7 +118,7 @@ impl Check for Typos {
let parser = typos::ParserBuilder::new() let parser = typos::ParserBuilder::new()
.tokenizer(tokenizer) .tokenizer(tokenizer)
.dictionary(dictionary) .dictionary(dictionary)
.typos(); .build();
if self.check_filenames { if self.check_filenames {
if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) { if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
@ -180,7 +180,7 @@ impl Check for FixTypos {
let parser = typos::ParserBuilder::new() let parser = typos::ParserBuilder::new()
.tokenizer(tokenizer) .tokenizer(tokenizer)
.dictionary(dictionary) .dictionary(dictionary)
.typos(); .build();
if self.check_files { if self.check_files {
let (buffer, content_type) = read_file(path, reporter)?; let (buffer, content_type) = read_file(path, reporter)?;
@ -265,7 +265,7 @@ impl Check for DiffTypos {
let parser = typos::ParserBuilder::new() let parser = typos::ParserBuilder::new()
.tokenizer(tokenizer) .tokenizer(tokenizer)
.dictionary(dictionary) .dictionary(dictionary)
.typos(); .build();
let mut content = Vec::new(); let mut content = Vec::new();
let mut new_content = Vec::new(); let mut new_content = Vec::new();
@ -379,13 +379,9 @@ impl Check for Identifiers {
_dictionary: &dyn Dictionary, _dictionary: &dyn Dictionary,
reporter: &dyn report::Report, reporter: &dyn report::Report,
) -> Result<(), std::io::Error> { ) -> Result<(), std::io::Error> {
let parser = typos::ParserBuilder::new()
.tokenizer(tokenizer)
.identifiers();
if self.check_filenames { if self.check_filenames {
if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) { if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
for word in parser.parse_str(file_name) { for word in tokenizer.parse_str(file_name) {
let msg = report::Parse { let msg = report::Parse {
context: Some(report::PathContext { path }.into()), context: Some(report::PathContext { path }.into()),
kind: report::ParseKind::Identifier, kind: report::ParseKind::Identifier,
@ -402,7 +398,7 @@ impl Check for Identifiers {
let msg = report::BinaryFile { path }; let msg = report::BinaryFile { path };
reporter.report(msg.into())?; reporter.report(msg.into())?;
} else { } else {
for word in parser.parse_bytes(&buffer) { for word in tokenizer.parse_bytes(&buffer) {
// HACK: Don't look up the line_num per entry to better match the performance // HACK: Don't look up the line_num per entry to better match the performance
// of Typos for comparison purposes. We don't really get much out of it // of Typos for comparison purposes. We don't really get much out of it
// anyway. // anyway.
@ -437,11 +433,9 @@ impl Check for Words {
_dictionary: &dyn Dictionary, _dictionary: &dyn Dictionary,
reporter: &dyn report::Report, reporter: &dyn report::Report,
) -> Result<(), std::io::Error> { ) -> Result<(), std::io::Error> {
let parser = typos::ParserBuilder::new().tokenizer(tokenizer).words();
if self.check_filenames { if self.check_filenames {
if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) { if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
for word in parser.parse_str(file_name) { for word in tokenizer.parse_str(file_name).flat_map(|i| i.split()) {
let msg = report::Parse { let msg = report::Parse {
context: Some(report::PathContext { path }.into()), context: Some(report::PathContext { path }.into()),
kind: report::ParseKind::Word, kind: report::ParseKind::Word,
@ -458,7 +452,7 @@ impl Check for Words {
let msg = report::BinaryFile { path }; let msg = report::BinaryFile { path };
reporter.report(msg.into())?; reporter.report(msg.into())?;
} else { } else {
for word in parser.parse_bytes(&buffer) { for word in tokenizer.parse_bytes(&buffer).flat_map(|i| i.split()) {
// HACK: Don't look up the line_num per entry to better match the performance // HACK: Don't look up the line_num per entry to better match the performance
// of Typos for comparison purposes. We don't really get much out of it // of Typos for comparison purposes. We don't really get much out of it
// anyway. // anyway.