refactor(typos): Open up the name Parser

This commit is contained in:
Ed Page 2020-12-28 21:51:44 -06:00
parent 7fdd0dee16
commit 1e64080c05
6 changed files with 48 additions and 48 deletions

View file

@ -9,7 +9,7 @@ use typos::checks::Check;
fn bench_parse_ident_str(data: &str, b: &mut test::Bencher) { fn bench_parse_ident_str(data: &str, b: &mut test::Bencher) {
let corrections = typos_cli::dict::BuiltIn::new(Default::default()); let corrections = typos_cli::dict::BuiltIn::new(Default::default());
let parser = typos::tokens::Parser::new(); let parser = typos::tokens::Tokenizer::new();
let checks = typos::checks::TyposSettings::new().build_identifier_parser(); let checks = typos::checks::TyposSettings::new().build_identifier_parser();
b.iter(|| checks.check_str(data, &parser, &corrections, &typos::report::PrintSilent)); b.iter(|| checks.check_str(data, &parser, &corrections, &typos::report::PrintSilent));
} }
@ -46,7 +46,7 @@ fn parse_idents_corpus_str(b: &mut test::Bencher) {
fn bench_parse_ident_bytes(data: &str, b: &mut test::Bencher) { fn bench_parse_ident_bytes(data: &str, b: &mut test::Bencher) {
let corrections = typos_cli::dict::BuiltIn::new(Default::default()); let corrections = typos_cli::dict::BuiltIn::new(Default::default());
let parser = typos::tokens::Parser::new(); let parser = typos::tokens::Tokenizer::new();
let checks = typos::checks::TyposSettings::new().build_identifier_parser(); let checks = typos::checks::TyposSettings::new().build_identifier_parser();
b.iter(|| { b.iter(|| {
checks.check_bytes( checks.check_bytes(
@ -90,7 +90,7 @@ fn parse_idents_corpus_bytes(b: &mut test::Bencher) {
fn bench_parse_word_str(data: &str, b: &mut test::Bencher) { fn bench_parse_word_str(data: &str, b: &mut test::Bencher) {
let corrections = typos_cli::dict::BuiltIn::new(Default::default()); let corrections = typos_cli::dict::BuiltIn::new(Default::default());
let parser = typos::tokens::Parser::new(); let parser = typos::tokens::Tokenizer::new();
let checks = typos::checks::TyposSettings::new().build_word_parser(); let checks = typos::checks::TyposSettings::new().build_word_parser();
b.iter(|| checks.check_str(data, &parser, &corrections, &typos::report::PrintSilent)); b.iter(|| checks.check_str(data, &parser, &corrections, &typos::report::PrintSilent));
} }
@ -127,7 +127,7 @@ fn parse_words_corpus(b: &mut test::Bencher) {
fn bench_typos(data: &str, b: &mut test::Bencher) { fn bench_typos(data: &str, b: &mut test::Bencher) {
let corrections = typos_cli::dict::BuiltIn::new(Default::default()); let corrections = typos_cli::dict::BuiltIn::new(Default::default());
let parser = typos::tokens::Parser::new(); let parser = typos::tokens::Tokenizer::new();
let checks = typos::checks::TyposSettings::new().build_typos(); let checks = typos::checks::TyposSettings::new().build_typos();
b.iter(|| checks.check_str(data, &parser, &corrections, &typos::report::PrintSilent)); b.iter(|| checks.check_str(data, &parser, &corrections, &typos::report::PrintSilent));
} }
@ -168,7 +168,7 @@ fn bench_check_file(data: &str, b: &mut test::Bencher) {
sample_path.write_str(data).unwrap(); sample_path.write_str(data).unwrap();
let corrections = typos_cli::dict::BuiltIn::new(Default::default()); let corrections = typos_cli::dict::BuiltIn::new(Default::default());
let parser = typos::tokens::Parser::new(); let parser = typos::tokens::Tokenizer::new();
let checks = typos::checks::TyposSettings::new().build_typos(); let checks = typos::checks::TyposSettings::new().build_typos();
b.iter(|| { b.iter(|| {
checks.check_file( checks.check_file(

View file

@ -6,19 +6,19 @@ mod data;
#[bench] #[bench]
fn ident_parse_empty(b: &mut test::Bencher) { fn ident_parse_empty(b: &mut test::Bencher) {
let parser = typos::tokens::Parser::new(); let parser = typos::tokens::Tokenizer::new();
b.iter(|| parser.parse_bytes(data::EMPTY.as_bytes()).last()); b.iter(|| parser.parse_bytes(data::EMPTY.as_bytes()).last());
} }
#[bench] #[bench]
fn ident_parse_no_tokens(b: &mut test::Bencher) { fn ident_parse_no_tokens(b: &mut test::Bencher) {
let parser = typos::tokens::Parser::new(); let parser = typos::tokens::Tokenizer::new();
b.iter(|| parser.parse_bytes(data::NO_TOKENS.as_bytes()).last()); b.iter(|| parser.parse_bytes(data::NO_TOKENS.as_bytes()).last());
} }
#[bench] #[bench]
fn ident_parse_single_token(b: &mut test::Bencher) { fn ident_parse_single_token(b: &mut test::Bencher) {
let parser = typos::tokens::Parser::new(); let parser = typos::tokens::Tokenizer::new();
b.iter(|| { b.iter(|| {
parser.parse_bytes(data::SINGLE_TOKEN.as_bytes()).last(); parser.parse_bytes(data::SINGLE_TOKEN.as_bytes()).last();
}); });
@ -26,19 +26,19 @@ fn ident_parse_single_token(b: &mut test::Bencher) {
#[bench] #[bench]
fn ident_parse_sherlock(b: &mut test::Bencher) { fn ident_parse_sherlock(b: &mut test::Bencher) {
let parser = typos::tokens::Parser::new(); let parser = typos::tokens::Tokenizer::new();
b.iter(|| parser.parse_bytes(data::SHERLOCK.as_bytes()).last()); b.iter(|| parser.parse_bytes(data::SHERLOCK.as_bytes()).last());
} }
#[bench] #[bench]
fn ident_parse_code(b: &mut test::Bencher) { fn ident_parse_code(b: &mut test::Bencher) {
let parser = typos::tokens::Parser::new(); let parser = typos::tokens::Tokenizer::new();
b.iter(|| parser.parse_bytes(data::CODE.as_bytes()).last()); b.iter(|| parser.parse_bytes(data::CODE.as_bytes()).last());
} }
#[bench] #[bench]
fn ident_parse_corpus(b: &mut test::Bencher) { fn ident_parse_corpus(b: &mut test::Bencher) {
let parser = typos::tokens::Parser::new(); let parser = typos::tokens::Tokenizer::new();
b.iter(|| parser.parse_bytes(data::CORPUS.as_bytes()).last()); b.iter(|| parser.parse_bytes(data::CORPUS.as_bytes()).last());
} }

View file

@ -9,7 +9,7 @@ pub trait Check: Send + Sync {
fn check_str( fn check_str(
&self, &self,
buffer: &str, buffer: &str,
parser: &tokens::Parser, parser: &tokens::Tokenizer,
dictionary: &dyn Dictionary, dictionary: &dyn Dictionary,
reporter: &dyn report::Report, reporter: &dyn report::Report,
) -> Result<(), std::io::Error>; ) -> Result<(), std::io::Error>;
@ -17,7 +17,7 @@ pub trait Check: Send + Sync {
fn check_bytes( fn check_bytes(
&self, &self,
buffer: &[u8], buffer: &[u8],
parser: &tokens::Parser, parser: &tokens::Tokenizer,
dictionary: &dyn Dictionary, dictionary: &dyn Dictionary,
reporter: &dyn report::Report, reporter: &dyn report::Report,
) -> Result<(), std::io::Error>; ) -> Result<(), std::io::Error>;
@ -31,7 +31,7 @@ pub trait Check: Send + Sync {
fn check_filename( fn check_filename(
&self, &self,
path: &std::path::Path, path: &std::path::Path,
parser: &tokens::Parser, parser: &tokens::Tokenizer,
dictionary: &dyn Dictionary, dictionary: &dyn Dictionary,
reporter: &dyn report::Report, reporter: &dyn report::Report,
) -> Result<(), std::io::Error> { ) -> Result<(), std::io::Error> {
@ -54,7 +54,7 @@ pub trait Check: Send + Sync {
&self, &self,
path: &std::path::Path, path: &std::path::Path,
explicit: bool, explicit: bool,
parser: &tokens::Parser, parser: &tokens::Tokenizer,
dictionary: &dyn Dictionary, dictionary: &dyn Dictionary,
reporter: &dyn report::Report, reporter: &dyn report::Report,
) -> Result<(), std::io::Error> { ) -> Result<(), std::io::Error> {
@ -172,7 +172,7 @@ impl Check for Typos {
fn check_str( fn check_str(
&self, &self,
buffer: &str, buffer: &str,
parser: &tokens::Parser, parser: &tokens::Tokenizer,
dictionary: &dyn Dictionary, dictionary: &dyn Dictionary,
reporter: &dyn report::Report, reporter: &dyn report::Report,
) -> Result<(), std::io::Error> { ) -> Result<(), std::io::Error> {
@ -217,7 +217,7 @@ impl Check for Typos {
fn check_bytes( fn check_bytes(
&self, &self,
buffer: &[u8], buffer: &[u8],
parser: &tokens::Parser, parser: &tokens::Tokenizer,
dictionary: &dyn Dictionary, dictionary: &dyn Dictionary,
reporter: &dyn report::Report, reporter: &dyn report::Report,
) -> Result<(), std::io::Error> { ) -> Result<(), std::io::Error> {
@ -284,7 +284,7 @@ impl Check for ParseIdentifiers {
fn check_str( fn check_str(
&self, &self,
buffer: &str, buffer: &str,
parser: &tokens::Parser, parser: &tokens::Tokenizer,
_dictionary: &dyn Dictionary, _dictionary: &dyn Dictionary,
reporter: &dyn report::Report, reporter: &dyn report::Report,
) -> Result<(), std::io::Error> { ) -> Result<(), std::io::Error> {
@ -303,7 +303,7 @@ impl Check for ParseIdentifiers {
fn check_bytes( fn check_bytes(
&self, &self,
buffer: &[u8], buffer: &[u8],
parser: &tokens::Parser, parser: &tokens::Tokenizer,
_dictionary: &dyn Dictionary, _dictionary: &dyn Dictionary,
reporter: &dyn report::Report, reporter: &dyn report::Report,
) -> Result<(), std::io::Error> { ) -> Result<(), std::io::Error> {
@ -343,7 +343,7 @@ impl Check for ParseWords {
fn check_str( fn check_str(
&self, &self,
buffer: &str, buffer: &str,
parser: &tokens::Parser, parser: &tokens::Tokenizer,
_dictionary: &dyn Dictionary, _dictionary: &dyn Dictionary,
reporter: &dyn report::Report, reporter: &dyn report::Report,
) -> Result<(), std::io::Error> { ) -> Result<(), std::io::Error> {
@ -365,7 +365,7 @@ impl Check for ParseWords {
fn check_bytes( fn check_bytes(
&self, &self,
buffer: &[u8], buffer: &[u8],
parser: &tokens::Parser, parser: &tokens::Tokenizer,
_dictionary: &dyn Dictionary, _dictionary: &dyn Dictionary,
reporter: &dyn report::Report, reporter: &dyn report::Report,
) -> Result<(), std::io::Error> { ) -> Result<(), std::io::Error> {
@ -404,7 +404,7 @@ impl Check for Files {
fn check_str( fn check_str(
&self, &self,
_buffer: &str, _buffer: &str,
_parser: &tokens::Parser, _parser: &tokens::Tokenizer,
_dictionary: &dyn Dictionary, _dictionary: &dyn Dictionary,
_reporter: &dyn report::Report, _reporter: &dyn report::Report,
) -> Result<(), std::io::Error> { ) -> Result<(), std::io::Error> {
@ -414,7 +414,7 @@ impl Check for Files {
fn check_bytes( fn check_bytes(
&self, &self,
_buffer: &[u8], _buffer: &[u8],
_parser: &tokens::Parser, _parser: &tokens::Tokenizer,
_dictionary: &dyn Dictionary, _dictionary: &dyn Dictionary,
_reporter: &dyn report::Report, _reporter: &dyn report::Report,
) -> Result<(), std::io::Error> { ) -> Result<(), std::io::Error> {
@ -436,7 +436,7 @@ impl Check for Files {
fn check_filename( fn check_filename(
&self, &self,
_path: &std::path::Path, _path: &std::path::Path,
_parser: &tokens::Parser, _parser: &tokens::Tokenizer,
_dictionary: &dyn Dictionary, _dictionary: &dyn Dictionary,
_reporter: &dyn report::Report, _reporter: &dyn report::Report,
) -> Result<(), std::io::Error> { ) -> Result<(), std::io::Error> {
@ -447,7 +447,7 @@ impl Check for Files {
&self, &self,
path: &std::path::Path, path: &std::path::Path,
_explicit: bool, _explicit: bool,
_parser: &tokens::Parser, _parser: &tokens::Tokenizer,
_dictionary: &dyn Dictionary, _dictionary: &dyn Dictionary,
reporter: &dyn report::Report, reporter: &dyn report::Report,
) -> Result<(), std::io::Error> { ) -> Result<(), std::io::Error> {

View file

@ -1,5 +1,5 @@
#[derive(Debug, Clone, PartialEq, Eq, Hash)] #[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct ParserBuilder { pub struct TokenizerBuilder {
ignore_hex: bool, ignore_hex: bool,
leading_digits: bool, leading_digits: bool,
leading_chars: String, leading_chars: String,
@ -7,7 +7,7 @@ pub struct ParserBuilder {
include_chars: String, include_chars: String,
} }
impl ParserBuilder { impl TokenizerBuilder {
pub fn new() -> Self { pub fn new() -> Self {
Default::default() Default::default()
} }
@ -37,7 +37,7 @@ impl ParserBuilder {
self self
} }
pub fn build(&self) -> Parser { pub fn build(&self) -> Tokenizer {
let mut pattern = r#"\b("#.to_owned(); let mut pattern = r#"\b("#.to_owned();
Self::push_pattern(&mut pattern, self.leading_digits, &self.leading_chars); Self::push_pattern(&mut pattern, self.leading_digits, &self.leading_chars);
Self::push_pattern(&mut pattern, self.include_digits, &self.include_chars); Self::push_pattern(&mut pattern, self.include_digits, &self.include_chars);
@ -46,7 +46,7 @@ impl ParserBuilder {
let words_str = regex::Regex::new(&pattern).unwrap(); let words_str = regex::Regex::new(&pattern).unwrap();
let words_bytes = regex::bytes::Regex::new(&pattern).unwrap(); let words_bytes = regex::bytes::Regex::new(&pattern).unwrap();
Parser { Tokenizer {
words_str, words_str,
words_bytes, words_bytes,
// `leading_digits` let's us bypass the regexes since you can't have a decimal or // `leading_digits` let's us bypass the regexes since you can't have a decimal or
@ -69,7 +69,7 @@ impl ParserBuilder {
} }
} }
impl Default for ParserBuilder { impl Default for TokenizerBuilder {
fn default() -> Self { fn default() -> Self {
Self { Self {
ignore_hex: true, ignore_hex: true,
@ -82,16 +82,16 @@ impl Default for ParserBuilder {
} }
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct Parser { pub struct Tokenizer {
words_str: regex::Regex, words_str: regex::Regex,
words_bytes: regex::bytes::Regex, words_bytes: regex::bytes::Regex,
ignore_numbers: bool, ignore_numbers: bool,
ignore_hex: bool, ignore_hex: bool,
} }
impl Parser { impl Tokenizer {
pub fn new() -> Self { pub fn new() -> Self {
ParserBuilder::default().build() TokenizerBuilder::default().build()
} }
pub fn parse_str<'c>(&'c self, content: &'c str) -> impl Iterator<Item = Identifier<'c>> { pub fn parse_str<'c>(&'c self, content: &'c str) -> impl Iterator<Item = Identifier<'c>> {
@ -124,7 +124,7 @@ impl Parser {
} }
} }
impl Default for Parser { impl Default for Tokenizer {
fn default() -> Self { fn default() -> Self {
Self::new() Self::new()
} }
@ -387,7 +387,7 @@ mod test {
#[test] #[test]
fn tokenize_empty_is_empty() { fn tokenize_empty_is_empty() {
let parser = Parser::new(); let parser = Tokenizer::new();
let input = ""; let input = "";
let expected: Vec<Identifier> = vec![]; let expected: Vec<Identifier> = vec![];
@ -399,7 +399,7 @@ mod test {
#[test] #[test]
fn tokenize_word_is_word() { fn tokenize_word_is_word() {
let parser = Parser::new(); let parser = Tokenizer::new();
let input = "word"; let input = "word";
let expected: Vec<Identifier> = vec![Identifier::new_unchecked("word", 0)]; let expected: Vec<Identifier> = vec![Identifier::new_unchecked("word", 0)];
@ -411,7 +411,7 @@ mod test {
#[test] #[test]
fn tokenize_space_separated_words() { fn tokenize_space_separated_words() {
let parser = Parser::new(); let parser = Tokenizer::new();
let input = "A B"; let input = "A B";
let expected: Vec<Identifier> = vec![ let expected: Vec<Identifier> = vec![
@ -426,7 +426,7 @@ mod test {
#[test] #[test]
fn tokenize_dot_separated_words() { fn tokenize_dot_separated_words() {
let parser = Parser::new(); let parser = Tokenizer::new();
let input = "A.B"; let input = "A.B";
let expected: Vec<Identifier> = vec![ let expected: Vec<Identifier> = vec![
@ -441,7 +441,7 @@ mod test {
#[test] #[test]
fn tokenize_namespace_separated_words() { fn tokenize_namespace_separated_words() {
let parser = Parser::new(); let parser = Tokenizer::new();
let input = "A::B"; let input = "A::B";
let expected: Vec<Identifier> = vec![ let expected: Vec<Identifier> = vec![
@ -456,7 +456,7 @@ mod test {
#[test] #[test]
fn tokenize_underscore_doesnt_separate() { fn tokenize_underscore_doesnt_separate() {
let parser = Parser::new(); let parser = Tokenizer::new();
let input = "A_B"; let input = "A_B";
let expected: Vec<Identifier> = vec![Identifier::new_unchecked("A_B", 0)]; let expected: Vec<Identifier> = vec![Identifier::new_unchecked("A_B", 0)];
@ -468,7 +468,7 @@ mod test {
#[test] #[test]
fn tokenize_ignore_hex_enabled() { fn tokenize_ignore_hex_enabled() {
let parser = ParserBuilder::new().ignore_hex(true).build(); let parser = TokenizerBuilder::new().ignore_hex(true).build();
let input = "Hello 0xDEADBEEF World"; let input = "Hello 0xDEADBEEF World";
let expected: Vec<Identifier> = vec![ let expected: Vec<Identifier> = vec![
@ -483,7 +483,7 @@ mod test {
#[test] #[test]
fn tokenize_ignore_hex_disabled() { fn tokenize_ignore_hex_disabled() {
let parser = ParserBuilder::new() let parser = TokenizerBuilder::new()
.ignore_hex(false) .ignore_hex(false)
.leading_digits(true) .leading_digits(true)
.build(); .build();
@ -523,11 +523,11 @@ mod test {
&[("A", Case::Scream, 0), ("String", Case::Title, 1)], &[("A", Case::Scream, 0), ("String", Case::Title, 1)],
), ),
( (
"SimpleXMLParser", "SimpleXMLTokenizer",
&[ &[
("Simple", Case::Title, 0), ("Simple", Case::Title, 0),
("XML", Case::Scream, 6), ("XML", Case::Scream, 6),
("Parser", Case::Title, 9), ("Tokenizer", Case::Title, 9),
], ],
), ),
( (

View file

@ -1,7 +1,7 @@
pub(crate) fn check_path( pub(crate) fn check_path(
walk: ignore::Walk, walk: ignore::Walk,
checks: &dyn typos::checks::Check, checks: &dyn typos::checks::Check,
parser: &typos::tokens::Parser, parser: &typos::tokens::Tokenizer,
dictionary: &dyn typos::Dictionary, dictionary: &dyn typos::Dictionary,
reporter: &dyn typos::report::Report, reporter: &dyn typos::report::Report,
) -> Result<(), ignore::Error> { ) -> Result<(), ignore::Error> {
@ -14,7 +14,7 @@ pub(crate) fn check_path(
pub(crate) fn check_path_parallel( pub(crate) fn check_path_parallel(
walk: ignore::WalkParallel, walk: ignore::WalkParallel,
checks: &dyn typos::checks::Check, checks: &dyn typos::checks::Check,
parser: &typos::tokens::Parser, parser: &typos::tokens::Tokenizer,
dictionary: &dyn typos::Dictionary, dictionary: &dyn typos::Dictionary,
reporter: &dyn typos::report::Report, reporter: &dyn typos::report::Report,
) -> Result<(), ignore::Error> { ) -> Result<(), ignore::Error> {
@ -37,7 +37,7 @@ pub(crate) fn check_path_parallel(
fn check_entry( fn check_entry(
entry: Result<ignore::DirEntry, ignore::Error>, entry: Result<ignore::DirEntry, ignore::Error>,
checks: &dyn typos::checks::Check, checks: &dyn typos::checks::Check,
parser: &typos::tokens::Parser, parser: &typos::tokens::Tokenizer,
dictionary: &dyn typos::Dictionary, dictionary: &dyn typos::Dictionary,
reporter: &dyn typos::report::Report, reporter: &dyn typos::report::Report,
) -> Result<(), ignore::Error> { ) -> Result<(), ignore::Error> {

View file

@ -61,7 +61,7 @@ fn run() -> proc_exit::ExitResult {
config.default.update(&args.overrides); config.default.update(&args.overrides);
let config = config; let config = config;
let parser = typos::tokens::ParserBuilder::new() let parser = typos::tokens::TokenizerBuilder::new()
.ignore_hex(config.default.ignore_hex()) .ignore_hex(config.default.ignore_hex())
.leading_digits(config.default.identifier_leading_digits()) .leading_digits(config.default.identifier_leading_digits())
.leading_chars(config.default.identifier_leading_chars().to_owned()) .leading_chars(config.default.identifier_leading_chars().to_owned())