refactor(typos): Open up the name Parser

This commit is contained in:
Ed Page 2020-12-28 21:51:44 -06:00
parent 7fdd0dee16
commit 1e64080c05
6 changed files with 48 additions and 48 deletions

View file

@ -9,7 +9,7 @@ use typos::checks::Check;
fn bench_parse_ident_str(data: &str, b: &mut test::Bencher) {
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
let parser = typos::tokens::Parser::new();
let parser = typos::tokens::Tokenizer::new();
let checks = typos::checks::TyposSettings::new().build_identifier_parser();
b.iter(|| checks.check_str(data, &parser, &corrections, &typos::report::PrintSilent));
}
@ -46,7 +46,7 @@ fn parse_idents_corpus_str(b: &mut test::Bencher) {
fn bench_parse_ident_bytes(data: &str, b: &mut test::Bencher) {
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
let parser = typos::tokens::Parser::new();
let parser = typos::tokens::Tokenizer::new();
let checks = typos::checks::TyposSettings::new().build_identifier_parser();
b.iter(|| {
checks.check_bytes(
@ -90,7 +90,7 @@ fn parse_idents_corpus_bytes(b: &mut test::Bencher) {
fn bench_parse_word_str(data: &str, b: &mut test::Bencher) {
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
let parser = typos::tokens::Parser::new();
let parser = typos::tokens::Tokenizer::new();
let checks = typos::checks::TyposSettings::new().build_word_parser();
b.iter(|| checks.check_str(data, &parser, &corrections, &typos::report::PrintSilent));
}
@ -127,7 +127,7 @@ fn parse_words_corpus(b: &mut test::Bencher) {
fn bench_typos(data: &str, b: &mut test::Bencher) {
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
let parser = typos::tokens::Parser::new();
let parser = typos::tokens::Tokenizer::new();
let checks = typos::checks::TyposSettings::new().build_typos();
b.iter(|| checks.check_str(data, &parser, &corrections, &typos::report::PrintSilent));
}
@ -168,7 +168,7 @@ fn bench_check_file(data: &str, b: &mut test::Bencher) {
sample_path.write_str(data).unwrap();
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
let parser = typos::tokens::Parser::new();
let parser = typos::tokens::Tokenizer::new();
let checks = typos::checks::TyposSettings::new().build_typos();
b.iter(|| {
checks.check_file(

View file

@ -6,19 +6,19 @@ mod data;
#[bench]
fn ident_parse_empty(b: &mut test::Bencher) {
let parser = typos::tokens::Parser::new();
let parser = typos::tokens::Tokenizer::new();
b.iter(|| parser.parse_bytes(data::EMPTY.as_bytes()).last());
}
#[bench]
fn ident_parse_no_tokens(b: &mut test::Bencher) {
let parser = typos::tokens::Parser::new();
let parser = typos::tokens::Tokenizer::new();
b.iter(|| parser.parse_bytes(data::NO_TOKENS.as_bytes()).last());
}
#[bench]
fn ident_parse_single_token(b: &mut test::Bencher) {
let parser = typos::tokens::Parser::new();
let parser = typos::tokens::Tokenizer::new();
b.iter(|| {
parser.parse_bytes(data::SINGLE_TOKEN.as_bytes()).last();
});
@ -26,19 +26,19 @@ fn ident_parse_single_token(b: &mut test::Bencher) {
#[bench]
fn ident_parse_sherlock(b: &mut test::Bencher) {
let parser = typos::tokens::Parser::new();
let parser = typos::tokens::Tokenizer::new();
b.iter(|| parser.parse_bytes(data::SHERLOCK.as_bytes()).last());
}
#[bench]
fn ident_parse_code(b: &mut test::Bencher) {
let parser = typos::tokens::Parser::new();
let parser = typos::tokens::Tokenizer::new();
b.iter(|| parser.parse_bytes(data::CODE.as_bytes()).last());
}
#[bench]
fn ident_parse_corpus(b: &mut test::Bencher) {
let parser = typos::tokens::Parser::new();
let parser = typos::tokens::Tokenizer::new();
b.iter(|| parser.parse_bytes(data::CORPUS.as_bytes()).last());
}

View file

@ -9,7 +9,7 @@ pub trait Check: Send + Sync {
fn check_str(
&self,
buffer: &str,
parser: &tokens::Parser,
parser: &tokens::Tokenizer,
dictionary: &dyn Dictionary,
reporter: &dyn report::Report,
) -> Result<(), std::io::Error>;
@ -17,7 +17,7 @@ pub trait Check: Send + Sync {
fn check_bytes(
&self,
buffer: &[u8],
parser: &tokens::Parser,
parser: &tokens::Tokenizer,
dictionary: &dyn Dictionary,
reporter: &dyn report::Report,
) -> Result<(), std::io::Error>;
@ -31,7 +31,7 @@ pub trait Check: Send + Sync {
fn check_filename(
&self,
path: &std::path::Path,
parser: &tokens::Parser,
parser: &tokens::Tokenizer,
dictionary: &dyn Dictionary,
reporter: &dyn report::Report,
) -> Result<(), std::io::Error> {
@ -54,7 +54,7 @@ pub trait Check: Send + Sync {
&self,
path: &std::path::Path,
explicit: bool,
parser: &tokens::Parser,
parser: &tokens::Tokenizer,
dictionary: &dyn Dictionary,
reporter: &dyn report::Report,
) -> Result<(), std::io::Error> {
@ -172,7 +172,7 @@ impl Check for Typos {
fn check_str(
&self,
buffer: &str,
parser: &tokens::Parser,
parser: &tokens::Tokenizer,
dictionary: &dyn Dictionary,
reporter: &dyn report::Report,
) -> Result<(), std::io::Error> {
@ -217,7 +217,7 @@ impl Check for Typos {
fn check_bytes(
&self,
buffer: &[u8],
parser: &tokens::Parser,
parser: &tokens::Tokenizer,
dictionary: &dyn Dictionary,
reporter: &dyn report::Report,
) -> Result<(), std::io::Error> {
@ -284,7 +284,7 @@ impl Check for ParseIdentifiers {
fn check_str(
&self,
buffer: &str,
parser: &tokens::Parser,
parser: &tokens::Tokenizer,
_dictionary: &dyn Dictionary,
reporter: &dyn report::Report,
) -> Result<(), std::io::Error> {
@ -303,7 +303,7 @@ impl Check for ParseIdentifiers {
fn check_bytes(
&self,
buffer: &[u8],
parser: &tokens::Parser,
parser: &tokens::Tokenizer,
_dictionary: &dyn Dictionary,
reporter: &dyn report::Report,
) -> Result<(), std::io::Error> {
@ -343,7 +343,7 @@ impl Check for ParseWords {
fn check_str(
&self,
buffer: &str,
parser: &tokens::Parser,
parser: &tokens::Tokenizer,
_dictionary: &dyn Dictionary,
reporter: &dyn report::Report,
) -> Result<(), std::io::Error> {
@ -365,7 +365,7 @@ impl Check for ParseWords {
fn check_bytes(
&self,
buffer: &[u8],
parser: &tokens::Parser,
parser: &tokens::Tokenizer,
_dictionary: &dyn Dictionary,
reporter: &dyn report::Report,
) -> Result<(), std::io::Error> {
@ -404,7 +404,7 @@ impl Check for Files {
fn check_str(
&self,
_buffer: &str,
_parser: &tokens::Parser,
_parser: &tokens::Tokenizer,
_dictionary: &dyn Dictionary,
_reporter: &dyn report::Report,
) -> Result<(), std::io::Error> {
@ -414,7 +414,7 @@ impl Check for Files {
fn check_bytes(
&self,
_buffer: &[u8],
_parser: &tokens::Parser,
_parser: &tokens::Tokenizer,
_dictionary: &dyn Dictionary,
_reporter: &dyn report::Report,
) -> Result<(), std::io::Error> {
@ -436,7 +436,7 @@ impl Check for Files {
fn check_filename(
&self,
_path: &std::path::Path,
_parser: &tokens::Parser,
_parser: &tokens::Tokenizer,
_dictionary: &dyn Dictionary,
_reporter: &dyn report::Report,
) -> Result<(), std::io::Error> {
@ -447,7 +447,7 @@ impl Check for Files {
&self,
path: &std::path::Path,
_explicit: bool,
_parser: &tokens::Parser,
_parser: &tokens::Tokenizer,
_dictionary: &dyn Dictionary,
reporter: &dyn report::Report,
) -> Result<(), std::io::Error> {

View file

@ -1,5 +1,5 @@
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct ParserBuilder {
pub struct TokenizerBuilder {
ignore_hex: bool,
leading_digits: bool,
leading_chars: String,
@ -7,7 +7,7 @@ pub struct ParserBuilder {
include_chars: String,
}
impl ParserBuilder {
impl TokenizerBuilder {
pub fn new() -> Self {
Default::default()
}
@ -37,7 +37,7 @@ impl ParserBuilder {
self
}
pub fn build(&self) -> Parser {
pub fn build(&self) -> Tokenizer {
let mut pattern = r#"\b("#.to_owned();
Self::push_pattern(&mut pattern, self.leading_digits, &self.leading_chars);
Self::push_pattern(&mut pattern, self.include_digits, &self.include_chars);
@ -46,7 +46,7 @@ impl ParserBuilder {
let words_str = regex::Regex::new(&pattern).unwrap();
let words_bytes = regex::bytes::Regex::new(&pattern).unwrap();
Parser {
Tokenizer {
words_str,
words_bytes,
// `leading_digits` let's us bypass the regexes since you can't have a decimal or
@ -69,7 +69,7 @@ impl ParserBuilder {
}
}
impl Default for ParserBuilder {
impl Default for TokenizerBuilder {
fn default() -> Self {
Self {
ignore_hex: true,
@ -82,16 +82,16 @@ impl Default for ParserBuilder {
}
#[derive(Debug, Clone)]
pub struct Parser {
pub struct Tokenizer {
words_str: regex::Regex,
words_bytes: regex::bytes::Regex,
ignore_numbers: bool,
ignore_hex: bool,
}
impl Parser {
impl Tokenizer {
pub fn new() -> Self {
ParserBuilder::default().build()
TokenizerBuilder::default().build()
}
pub fn parse_str<'c>(&'c self, content: &'c str) -> impl Iterator<Item = Identifier<'c>> {
@ -124,7 +124,7 @@ impl Parser {
}
}
impl Default for Parser {
impl Default for Tokenizer {
fn default() -> Self {
Self::new()
}
@ -387,7 +387,7 @@ mod test {
#[test]
fn tokenize_empty_is_empty() {
let parser = Parser::new();
let parser = Tokenizer::new();
let input = "";
let expected: Vec<Identifier> = vec![];
@ -399,7 +399,7 @@ mod test {
#[test]
fn tokenize_word_is_word() {
let parser = Parser::new();
let parser = Tokenizer::new();
let input = "word";
let expected: Vec<Identifier> = vec![Identifier::new_unchecked("word", 0)];
@ -411,7 +411,7 @@ mod test {
#[test]
fn tokenize_space_separated_words() {
let parser = Parser::new();
let parser = Tokenizer::new();
let input = "A B";
let expected: Vec<Identifier> = vec![
@ -426,7 +426,7 @@ mod test {
#[test]
fn tokenize_dot_separated_words() {
let parser = Parser::new();
let parser = Tokenizer::new();
let input = "A.B";
let expected: Vec<Identifier> = vec![
@ -441,7 +441,7 @@ mod test {
#[test]
fn tokenize_namespace_separated_words() {
let parser = Parser::new();
let parser = Tokenizer::new();
let input = "A::B";
let expected: Vec<Identifier> = vec![
@ -456,7 +456,7 @@ mod test {
#[test]
fn tokenize_underscore_doesnt_separate() {
let parser = Parser::new();
let parser = Tokenizer::new();
let input = "A_B";
let expected: Vec<Identifier> = vec![Identifier::new_unchecked("A_B", 0)];
@ -468,7 +468,7 @@ mod test {
#[test]
fn tokenize_ignore_hex_enabled() {
let parser = ParserBuilder::new().ignore_hex(true).build();
let parser = TokenizerBuilder::new().ignore_hex(true).build();
let input = "Hello 0xDEADBEEF World";
let expected: Vec<Identifier> = vec![
@ -483,7 +483,7 @@ mod test {
#[test]
fn tokenize_ignore_hex_disabled() {
let parser = ParserBuilder::new()
let parser = TokenizerBuilder::new()
.ignore_hex(false)
.leading_digits(true)
.build();
@ -523,11 +523,11 @@ mod test {
&[("A", Case::Scream, 0), ("String", Case::Title, 1)],
),
(
"SimpleXMLParser",
"SimpleXMLTokenizer",
&[
("Simple", Case::Title, 0),
("XML", Case::Scream, 6),
("Parser", Case::Title, 9),
("Tokenizer", Case::Title, 9),
],
),
(

View file

@ -1,7 +1,7 @@
pub(crate) fn check_path(
walk: ignore::Walk,
checks: &dyn typos::checks::Check,
parser: &typos::tokens::Parser,
parser: &typos::tokens::Tokenizer,
dictionary: &dyn typos::Dictionary,
reporter: &dyn typos::report::Report,
) -> Result<(), ignore::Error> {
@ -14,7 +14,7 @@ pub(crate) fn check_path(
pub(crate) fn check_path_parallel(
walk: ignore::WalkParallel,
checks: &dyn typos::checks::Check,
parser: &typos::tokens::Parser,
parser: &typos::tokens::Tokenizer,
dictionary: &dyn typos::Dictionary,
reporter: &dyn typos::report::Report,
) -> Result<(), ignore::Error> {
@ -37,7 +37,7 @@ pub(crate) fn check_path_parallel(
fn check_entry(
entry: Result<ignore::DirEntry, ignore::Error>,
checks: &dyn typos::checks::Check,
parser: &typos::tokens::Parser,
parser: &typos::tokens::Tokenizer,
dictionary: &dyn typos::Dictionary,
reporter: &dyn typos::report::Report,
) -> Result<(), ignore::Error> {

View file

@ -61,7 +61,7 @@ fn run() -> proc_exit::ExitResult {
config.default.update(&args.overrides);
let config = config;
let parser = typos::tokens::ParserBuilder::new()
let parser = typos::tokens::TokenizerBuilder::new()
.ignore_hex(config.default.ignore_hex())
.leading_digits(config.default.identifier_leading_digits())
.leading_chars(config.default.identifier_leading_chars().to_owned())