mirror of
https://github.com/crate-ci/typos.git
synced 2025-01-09 00:04:49 -05:00
refactor(typos): Open up the name Parser
This commit is contained in:
parent
7fdd0dee16
commit
1e64080c05
6 changed files with 48 additions and 48 deletions
|
@ -9,7 +9,7 @@ use typos::checks::Check;
|
|||
|
||||
fn bench_parse_ident_str(data: &str, b: &mut test::Bencher) {
|
||||
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
|
||||
let parser = typos::tokens::Parser::new();
|
||||
let parser = typos::tokens::Tokenizer::new();
|
||||
let checks = typos::checks::TyposSettings::new().build_identifier_parser();
|
||||
b.iter(|| checks.check_str(data, &parser, &corrections, &typos::report::PrintSilent));
|
||||
}
|
||||
|
@ -46,7 +46,7 @@ fn parse_idents_corpus_str(b: &mut test::Bencher) {
|
|||
|
||||
fn bench_parse_ident_bytes(data: &str, b: &mut test::Bencher) {
|
||||
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
|
||||
let parser = typos::tokens::Parser::new();
|
||||
let parser = typos::tokens::Tokenizer::new();
|
||||
let checks = typos::checks::TyposSettings::new().build_identifier_parser();
|
||||
b.iter(|| {
|
||||
checks.check_bytes(
|
||||
|
@ -90,7 +90,7 @@ fn parse_idents_corpus_bytes(b: &mut test::Bencher) {
|
|||
|
||||
fn bench_parse_word_str(data: &str, b: &mut test::Bencher) {
|
||||
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
|
||||
let parser = typos::tokens::Parser::new();
|
||||
let parser = typos::tokens::Tokenizer::new();
|
||||
let checks = typos::checks::TyposSettings::new().build_word_parser();
|
||||
b.iter(|| checks.check_str(data, &parser, &corrections, &typos::report::PrintSilent));
|
||||
}
|
||||
|
@ -127,7 +127,7 @@ fn parse_words_corpus(b: &mut test::Bencher) {
|
|||
|
||||
fn bench_typos(data: &str, b: &mut test::Bencher) {
|
||||
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
|
||||
let parser = typos::tokens::Parser::new();
|
||||
let parser = typos::tokens::Tokenizer::new();
|
||||
let checks = typos::checks::TyposSettings::new().build_typos();
|
||||
b.iter(|| checks.check_str(data, &parser, &corrections, &typos::report::PrintSilent));
|
||||
}
|
||||
|
@ -168,7 +168,7 @@ fn bench_check_file(data: &str, b: &mut test::Bencher) {
|
|||
sample_path.write_str(data).unwrap();
|
||||
|
||||
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
|
||||
let parser = typos::tokens::Parser::new();
|
||||
let parser = typos::tokens::Tokenizer::new();
|
||||
let checks = typos::checks::TyposSettings::new().build_typos();
|
||||
b.iter(|| {
|
||||
checks.check_file(
|
||||
|
|
|
@ -6,19 +6,19 @@ mod data;
|
|||
|
||||
#[bench]
|
||||
fn ident_parse_empty(b: &mut test::Bencher) {
|
||||
let parser = typos::tokens::Parser::new();
|
||||
let parser = typos::tokens::Tokenizer::new();
|
||||
b.iter(|| parser.parse_bytes(data::EMPTY.as_bytes()).last());
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn ident_parse_no_tokens(b: &mut test::Bencher) {
|
||||
let parser = typos::tokens::Parser::new();
|
||||
let parser = typos::tokens::Tokenizer::new();
|
||||
b.iter(|| parser.parse_bytes(data::NO_TOKENS.as_bytes()).last());
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn ident_parse_single_token(b: &mut test::Bencher) {
|
||||
let parser = typos::tokens::Parser::new();
|
||||
let parser = typos::tokens::Tokenizer::new();
|
||||
b.iter(|| {
|
||||
parser.parse_bytes(data::SINGLE_TOKEN.as_bytes()).last();
|
||||
});
|
||||
|
@ -26,19 +26,19 @@ fn ident_parse_single_token(b: &mut test::Bencher) {
|
|||
|
||||
#[bench]
|
||||
fn ident_parse_sherlock(b: &mut test::Bencher) {
|
||||
let parser = typos::tokens::Parser::new();
|
||||
let parser = typos::tokens::Tokenizer::new();
|
||||
b.iter(|| parser.parse_bytes(data::SHERLOCK.as_bytes()).last());
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn ident_parse_code(b: &mut test::Bencher) {
|
||||
let parser = typos::tokens::Parser::new();
|
||||
let parser = typos::tokens::Tokenizer::new();
|
||||
b.iter(|| parser.parse_bytes(data::CODE.as_bytes()).last());
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn ident_parse_corpus(b: &mut test::Bencher) {
|
||||
let parser = typos::tokens::Parser::new();
|
||||
let parser = typos::tokens::Tokenizer::new();
|
||||
b.iter(|| parser.parse_bytes(data::CORPUS.as_bytes()).last());
|
||||
}
|
||||
|
||||
|
|
|
@ -9,7 +9,7 @@ pub trait Check: Send + Sync {
|
|||
fn check_str(
|
||||
&self,
|
||||
buffer: &str,
|
||||
parser: &tokens::Parser,
|
||||
parser: &tokens::Tokenizer,
|
||||
dictionary: &dyn Dictionary,
|
||||
reporter: &dyn report::Report,
|
||||
) -> Result<(), std::io::Error>;
|
||||
|
@ -17,7 +17,7 @@ pub trait Check: Send + Sync {
|
|||
fn check_bytes(
|
||||
&self,
|
||||
buffer: &[u8],
|
||||
parser: &tokens::Parser,
|
||||
parser: &tokens::Tokenizer,
|
||||
dictionary: &dyn Dictionary,
|
||||
reporter: &dyn report::Report,
|
||||
) -> Result<(), std::io::Error>;
|
||||
|
@ -31,7 +31,7 @@ pub trait Check: Send + Sync {
|
|||
fn check_filename(
|
||||
&self,
|
||||
path: &std::path::Path,
|
||||
parser: &tokens::Parser,
|
||||
parser: &tokens::Tokenizer,
|
||||
dictionary: &dyn Dictionary,
|
||||
reporter: &dyn report::Report,
|
||||
) -> Result<(), std::io::Error> {
|
||||
|
@ -54,7 +54,7 @@ pub trait Check: Send + Sync {
|
|||
&self,
|
||||
path: &std::path::Path,
|
||||
explicit: bool,
|
||||
parser: &tokens::Parser,
|
||||
parser: &tokens::Tokenizer,
|
||||
dictionary: &dyn Dictionary,
|
||||
reporter: &dyn report::Report,
|
||||
) -> Result<(), std::io::Error> {
|
||||
|
@ -172,7 +172,7 @@ impl Check for Typos {
|
|||
fn check_str(
|
||||
&self,
|
||||
buffer: &str,
|
||||
parser: &tokens::Parser,
|
||||
parser: &tokens::Tokenizer,
|
||||
dictionary: &dyn Dictionary,
|
||||
reporter: &dyn report::Report,
|
||||
) -> Result<(), std::io::Error> {
|
||||
|
@ -217,7 +217,7 @@ impl Check for Typos {
|
|||
fn check_bytes(
|
||||
&self,
|
||||
buffer: &[u8],
|
||||
parser: &tokens::Parser,
|
||||
parser: &tokens::Tokenizer,
|
||||
dictionary: &dyn Dictionary,
|
||||
reporter: &dyn report::Report,
|
||||
) -> Result<(), std::io::Error> {
|
||||
|
@ -284,7 +284,7 @@ impl Check for ParseIdentifiers {
|
|||
fn check_str(
|
||||
&self,
|
||||
buffer: &str,
|
||||
parser: &tokens::Parser,
|
||||
parser: &tokens::Tokenizer,
|
||||
_dictionary: &dyn Dictionary,
|
||||
reporter: &dyn report::Report,
|
||||
) -> Result<(), std::io::Error> {
|
||||
|
@ -303,7 +303,7 @@ impl Check for ParseIdentifiers {
|
|||
fn check_bytes(
|
||||
&self,
|
||||
buffer: &[u8],
|
||||
parser: &tokens::Parser,
|
||||
parser: &tokens::Tokenizer,
|
||||
_dictionary: &dyn Dictionary,
|
||||
reporter: &dyn report::Report,
|
||||
) -> Result<(), std::io::Error> {
|
||||
|
@ -343,7 +343,7 @@ impl Check for ParseWords {
|
|||
fn check_str(
|
||||
&self,
|
||||
buffer: &str,
|
||||
parser: &tokens::Parser,
|
||||
parser: &tokens::Tokenizer,
|
||||
_dictionary: &dyn Dictionary,
|
||||
reporter: &dyn report::Report,
|
||||
) -> Result<(), std::io::Error> {
|
||||
|
@ -365,7 +365,7 @@ impl Check for ParseWords {
|
|||
fn check_bytes(
|
||||
&self,
|
||||
buffer: &[u8],
|
||||
parser: &tokens::Parser,
|
||||
parser: &tokens::Tokenizer,
|
||||
_dictionary: &dyn Dictionary,
|
||||
reporter: &dyn report::Report,
|
||||
) -> Result<(), std::io::Error> {
|
||||
|
@ -404,7 +404,7 @@ impl Check for Files {
|
|||
fn check_str(
|
||||
&self,
|
||||
_buffer: &str,
|
||||
_parser: &tokens::Parser,
|
||||
_parser: &tokens::Tokenizer,
|
||||
_dictionary: &dyn Dictionary,
|
||||
_reporter: &dyn report::Report,
|
||||
) -> Result<(), std::io::Error> {
|
||||
|
@ -414,7 +414,7 @@ impl Check for Files {
|
|||
fn check_bytes(
|
||||
&self,
|
||||
_buffer: &[u8],
|
||||
_parser: &tokens::Parser,
|
||||
_parser: &tokens::Tokenizer,
|
||||
_dictionary: &dyn Dictionary,
|
||||
_reporter: &dyn report::Report,
|
||||
) -> Result<(), std::io::Error> {
|
||||
|
@ -436,7 +436,7 @@ impl Check for Files {
|
|||
fn check_filename(
|
||||
&self,
|
||||
_path: &std::path::Path,
|
||||
_parser: &tokens::Parser,
|
||||
_parser: &tokens::Tokenizer,
|
||||
_dictionary: &dyn Dictionary,
|
||||
_reporter: &dyn report::Report,
|
||||
) -> Result<(), std::io::Error> {
|
||||
|
@ -447,7 +447,7 @@ impl Check for Files {
|
|||
&self,
|
||||
path: &std::path::Path,
|
||||
_explicit: bool,
|
||||
_parser: &tokens::Parser,
|
||||
_parser: &tokens::Tokenizer,
|
||||
_dictionary: &dyn Dictionary,
|
||||
reporter: &dyn report::Report,
|
||||
) -> Result<(), std::io::Error> {
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct ParserBuilder {
|
||||
pub struct TokenizerBuilder {
|
||||
ignore_hex: bool,
|
||||
leading_digits: bool,
|
||||
leading_chars: String,
|
||||
|
@ -7,7 +7,7 @@ pub struct ParserBuilder {
|
|||
include_chars: String,
|
||||
}
|
||||
|
||||
impl ParserBuilder {
|
||||
impl TokenizerBuilder {
|
||||
pub fn new() -> Self {
|
||||
Default::default()
|
||||
}
|
||||
|
@ -37,7 +37,7 @@ impl ParserBuilder {
|
|||
self
|
||||
}
|
||||
|
||||
pub fn build(&self) -> Parser {
|
||||
pub fn build(&self) -> Tokenizer {
|
||||
let mut pattern = r#"\b("#.to_owned();
|
||||
Self::push_pattern(&mut pattern, self.leading_digits, &self.leading_chars);
|
||||
Self::push_pattern(&mut pattern, self.include_digits, &self.include_chars);
|
||||
|
@ -46,7 +46,7 @@ impl ParserBuilder {
|
|||
let words_str = regex::Regex::new(&pattern).unwrap();
|
||||
let words_bytes = regex::bytes::Regex::new(&pattern).unwrap();
|
||||
|
||||
Parser {
|
||||
Tokenizer {
|
||||
words_str,
|
||||
words_bytes,
|
||||
// `leading_digits` let's us bypass the regexes since you can't have a decimal or
|
||||
|
@ -69,7 +69,7 @@ impl ParserBuilder {
|
|||
}
|
||||
}
|
||||
|
||||
impl Default for ParserBuilder {
|
||||
impl Default for TokenizerBuilder {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
ignore_hex: true,
|
||||
|
@ -82,16 +82,16 @@ impl Default for ParserBuilder {
|
|||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Parser {
|
||||
pub struct Tokenizer {
|
||||
words_str: regex::Regex,
|
||||
words_bytes: regex::bytes::Regex,
|
||||
ignore_numbers: bool,
|
||||
ignore_hex: bool,
|
||||
}
|
||||
|
||||
impl Parser {
|
||||
impl Tokenizer {
|
||||
pub fn new() -> Self {
|
||||
ParserBuilder::default().build()
|
||||
TokenizerBuilder::default().build()
|
||||
}
|
||||
|
||||
pub fn parse_str<'c>(&'c self, content: &'c str) -> impl Iterator<Item = Identifier<'c>> {
|
||||
|
@ -124,7 +124,7 @@ impl Parser {
|
|||
}
|
||||
}
|
||||
|
||||
impl Default for Parser {
|
||||
impl Default for Tokenizer {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
|
@ -387,7 +387,7 @@ mod test {
|
|||
|
||||
#[test]
|
||||
fn tokenize_empty_is_empty() {
|
||||
let parser = Parser::new();
|
||||
let parser = Tokenizer::new();
|
||||
|
||||
let input = "";
|
||||
let expected: Vec<Identifier> = vec![];
|
||||
|
@ -399,7 +399,7 @@ mod test {
|
|||
|
||||
#[test]
|
||||
fn tokenize_word_is_word() {
|
||||
let parser = Parser::new();
|
||||
let parser = Tokenizer::new();
|
||||
|
||||
let input = "word";
|
||||
let expected: Vec<Identifier> = vec![Identifier::new_unchecked("word", 0)];
|
||||
|
@ -411,7 +411,7 @@ mod test {
|
|||
|
||||
#[test]
|
||||
fn tokenize_space_separated_words() {
|
||||
let parser = Parser::new();
|
||||
let parser = Tokenizer::new();
|
||||
|
||||
let input = "A B";
|
||||
let expected: Vec<Identifier> = vec![
|
||||
|
@ -426,7 +426,7 @@ mod test {
|
|||
|
||||
#[test]
|
||||
fn tokenize_dot_separated_words() {
|
||||
let parser = Parser::new();
|
||||
let parser = Tokenizer::new();
|
||||
|
||||
let input = "A.B";
|
||||
let expected: Vec<Identifier> = vec![
|
||||
|
@ -441,7 +441,7 @@ mod test {
|
|||
|
||||
#[test]
|
||||
fn tokenize_namespace_separated_words() {
|
||||
let parser = Parser::new();
|
||||
let parser = Tokenizer::new();
|
||||
|
||||
let input = "A::B";
|
||||
let expected: Vec<Identifier> = vec![
|
||||
|
@ -456,7 +456,7 @@ mod test {
|
|||
|
||||
#[test]
|
||||
fn tokenize_underscore_doesnt_separate() {
|
||||
let parser = Parser::new();
|
||||
let parser = Tokenizer::new();
|
||||
|
||||
let input = "A_B";
|
||||
let expected: Vec<Identifier> = vec![Identifier::new_unchecked("A_B", 0)];
|
||||
|
@ -468,7 +468,7 @@ mod test {
|
|||
|
||||
#[test]
|
||||
fn tokenize_ignore_hex_enabled() {
|
||||
let parser = ParserBuilder::new().ignore_hex(true).build();
|
||||
let parser = TokenizerBuilder::new().ignore_hex(true).build();
|
||||
|
||||
let input = "Hello 0xDEADBEEF World";
|
||||
let expected: Vec<Identifier> = vec![
|
||||
|
@ -483,7 +483,7 @@ mod test {
|
|||
|
||||
#[test]
|
||||
fn tokenize_ignore_hex_disabled() {
|
||||
let parser = ParserBuilder::new()
|
||||
let parser = TokenizerBuilder::new()
|
||||
.ignore_hex(false)
|
||||
.leading_digits(true)
|
||||
.build();
|
||||
|
@ -523,11 +523,11 @@ mod test {
|
|||
&[("A", Case::Scream, 0), ("String", Case::Title, 1)],
|
||||
),
|
||||
(
|
||||
"SimpleXMLParser",
|
||||
"SimpleXMLTokenizer",
|
||||
&[
|
||||
("Simple", Case::Title, 0),
|
||||
("XML", Case::Scream, 6),
|
||||
("Parser", Case::Title, 9),
|
||||
("Tokenizer", Case::Title, 9),
|
||||
],
|
||||
),
|
||||
(
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
pub(crate) fn check_path(
|
||||
walk: ignore::Walk,
|
||||
checks: &dyn typos::checks::Check,
|
||||
parser: &typos::tokens::Parser,
|
||||
parser: &typos::tokens::Tokenizer,
|
||||
dictionary: &dyn typos::Dictionary,
|
||||
reporter: &dyn typos::report::Report,
|
||||
) -> Result<(), ignore::Error> {
|
||||
|
@ -14,7 +14,7 @@ pub(crate) fn check_path(
|
|||
pub(crate) fn check_path_parallel(
|
||||
walk: ignore::WalkParallel,
|
||||
checks: &dyn typos::checks::Check,
|
||||
parser: &typos::tokens::Parser,
|
||||
parser: &typos::tokens::Tokenizer,
|
||||
dictionary: &dyn typos::Dictionary,
|
||||
reporter: &dyn typos::report::Report,
|
||||
) -> Result<(), ignore::Error> {
|
||||
|
@ -37,7 +37,7 @@ pub(crate) fn check_path_parallel(
|
|||
fn check_entry(
|
||||
entry: Result<ignore::DirEntry, ignore::Error>,
|
||||
checks: &dyn typos::checks::Check,
|
||||
parser: &typos::tokens::Parser,
|
||||
parser: &typos::tokens::Tokenizer,
|
||||
dictionary: &dyn typos::Dictionary,
|
||||
reporter: &dyn typos::report::Report,
|
||||
) -> Result<(), ignore::Error> {
|
||||
|
|
|
@ -61,7 +61,7 @@ fn run() -> proc_exit::ExitResult {
|
|||
config.default.update(&args.overrides);
|
||||
let config = config;
|
||||
|
||||
let parser = typos::tokens::ParserBuilder::new()
|
||||
let parser = typos::tokens::TokenizerBuilder::new()
|
||||
.ignore_hex(config.default.ignore_hex())
|
||||
.leading_digits(config.default.identifier_leading_digits())
|
||||
.leading_chars(config.default.identifier_leading_chars().to_owned())
|
||||
|
|
Loading…
Reference in a new issue