From e98fc52b0df66e7ddb7e08ea5ae537888a5fd3e7 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Fri, 14 Jul 2023 12:32:07 -0500 Subject: [PATCH 1/7] chore(typos): Add parse tracing --- crates/typos/src/tokens.rs | 269 +++++++++++++++++++++---------------- 1 file changed, 152 insertions(+), 117 deletions(-) diff --git a/crates/typos/src/tokens.rs b/crates/typos/src/tokens.rs index 6ac6ce1..d36c94c 100644 --- a/crates/typos/src/tokens.rs +++ b/crates/typos/src/tokens.rs @@ -133,6 +133,7 @@ mod parser { use winnow::stream::Stream; use winnow::stream::StreamIsPartial; use winnow::token::*; + use winnow::trace::trace; pub(crate) fn next_identifier(input: T) -> IResult::Slice> where @@ -153,7 +154,7 @@ mod parser { // `{XID_Continue}+` because XID_Continue is a superset of XID_Start and rather catch odd // or unexpected cases than strip off start characters to a word since we aren't doing a // proper word boundary parse - take_while(1.., is_xid_continue).parse_next(input) + trace("identifier", take_while(1.., is_xid_continue)).parse_next(input) } fn ignore(input: T) -> IResult::Slice> @@ -162,23 +163,26 @@ mod parser { ::Slice: AsBStr + SliceLen + Default, ::Token: AsChar + Copy, { - take_many0(alt(( - // CAUTION: If adding an ignorable literal, if it doesn't start with `is_xid_continue`, - // - Update `is_ignore_char` to make sure `sep1` doesn't eat it all up - // - Make sure you always consume it - terminated(uuid_literal, peek(sep1)), - terminated(hash_literal, peek(sep1)), - terminated(base64_literal, peek(sep1)), // base64 should be quoted or something - terminated(ordinal_literal, peek(sep1)), - terminated(hex_literal, peek(sep1)), - terminated(dec_literal, peek(sep1)), // Allow digit-prefixed words - terminated(email_literal, peek(sep1)), - terminated(url_literal, peek(sep1)), - terminated(css_color, peek(sep1)), - c_escape, - printf, - other, - ))) + trace( + "ignore", + take_many0(alt(( + // CAUTION: If adding an ignorable literal, if it doesn't start with `is_xid_continue`, + // - Update `is_ignore_char` to make sure `sep1` doesn't eat it all up + // - Make sure you always consume it + terminated(uuid_literal, peek(sep1)), + terminated(hash_literal, peek(sep1)), + terminated(base64_literal, peek(sep1)), // base64 should be quoted or something + terminated(ordinal_literal, peek(sep1)), + terminated(hex_literal, peek(sep1)), + terminated(dec_literal, peek(sep1)), // Allow digit-prefixed words + terminated(email_literal, peek(sep1)), + terminated(url_literal, peek(sep1)), + terminated(css_color, peek(sep1)), + c_escape, + printf, + other, + ))), + ) .parse_next(input) } @@ -201,12 +205,15 @@ mod parser { ::Slice: AsBStr + SliceLen + Default, ::Token: AsChar + Copy, { - ( - one_of(|c| !is_xid_continue(c)), - take_while(0.., is_ignore_char), + trace( + "other", + ( + one_of(|c| !is_xid_continue(c)), + take_while(0.., is_ignore_char), + ) + .recognize(), ) - .recognize() - .parse_next(input) + .parse_next(input) } fn ordinal_literal(input: T) -> IResult::Slice> @@ -221,14 +228,17 @@ mod parser { ['_'].contains(&c) } - ( - take_while(0.., is_sep), - take_while(1.., is_dec_digit), - alt((('s', 't'), ('n', 'd'), ('r', 'd'), ('t', 'h'))), - take_while(0.., is_sep), + trace( + "ordinal_literal", + ( + take_while(0.., is_sep), + take_while(1.., is_dec_digit), + alt((('s', 't'), ('n', 'd'), ('r', 'd'), ('t', 'h'))), + take_while(0.., is_sep), + ) + .recognize(), ) - .recognize() - .parse_next(input) + .parse_next(input) } fn dec_literal(input: T) -> IResult::Slice> @@ -237,7 +247,7 @@ mod parser { ::Slice: AsBStr + SliceLen + Default, ::Token: AsChar + Copy, { - take_while(1.., is_dec_digit_with_sep).parse_next(input) + trace("dec_literal", take_while(1.., is_dec_digit_with_sep)).parse_next(input) } fn hex_literal(input: T) -> IResult::Slice> @@ -259,12 +269,15 @@ mod parser { ::Slice: AsBStr + SliceLen + Default, ::Token: AsChar + Copy, { - preceded( - '#', - alt(( - terminated(take_while(3..=8, is_lower_hex_digit), peek(sep1)), - terminated(take_while(3..=8, is_upper_hex_digit), peek(sep1)), - )), + trace( + "color", + preceded( + '#', + alt(( + terminated(take_while(3..=8, is_lower_hex_digit), peek(sep1)), + terminated(take_while(3..=8, is_upper_hex_digit), peek(sep1)), + )), + ), ) .parse_next(input) } @@ -275,31 +288,34 @@ mod parser { ::Slice: AsBStr + SliceLen + Default, ::Token: AsChar + Copy, { - alt(( - ( - take_while(8, is_lower_hex_digit), - '-', - take_while(4, is_lower_hex_digit), - '-', - take_while(4, is_lower_hex_digit), - '-', - take_while(4, is_lower_hex_digit), - '-', - take_while(12, is_lower_hex_digit), - ), - ( - take_while(8, is_upper_hex_digit), - '-', - take_while(4, is_upper_hex_digit), - '-', - take_while(4, is_upper_hex_digit), - '-', - take_while(4, is_upper_hex_digit), - '-', - take_while(12, is_upper_hex_digit), - ), - )) - .recognize() + trace( + "uuid", + alt(( + ( + take_while(8, is_lower_hex_digit), + '-', + take_while(4, is_lower_hex_digit), + '-', + take_while(4, is_lower_hex_digit), + '-', + take_while(4, is_lower_hex_digit), + '-', + take_while(12, is_lower_hex_digit), + ), + ( + take_while(8, is_upper_hex_digit), + '-', + take_while(4, is_upper_hex_digit), + '-', + take_while(4, is_upper_hex_digit), + '-', + take_while(4, is_upper_hex_digit), + '-', + take_while(12, is_upper_hex_digit), + ), + )) + .recognize(), + ) .parse_next(input) } @@ -319,10 +335,13 @@ mod parser { // or more. const IGNORE_HEX_MIN: usize = 32; - alt(( - take_while(IGNORE_HEX_MIN.., is_lower_hex_digit), - take_while(IGNORE_HEX_MIN.., is_upper_hex_digit), - )) + trace( + "hash", + alt(( + take_while(IGNORE_HEX_MIN.., is_lower_hex_digit), + take_while(IGNORE_HEX_MIN.., is_upper_hex_digit), + )), + ) .parse_next(input) } @@ -332,32 +351,35 @@ mod parser { ::Slice: AsBStr + SliceLen + Default, ::Token: AsChar + Copy, { - let (padding, captured) = take_while(1.., is_base64_digit).parse_next(input.clone())?; + trace("base64", move |input: T| { + let (padding, captured) = take_while(1.., is_base64_digit).parse_next(input.clone())?; - const CHUNK: usize = 4; - let padding_offset = input.offset_to(&padding); - let mut padding_len = CHUNK - padding_offset % CHUNK; - if padding_len == CHUNK { - padding_len = 0; - } + const CHUNK: usize = 4; + let padding_offset = input.offset_to(&padding); + let mut padding_len = CHUNK - padding_offset % CHUNK; + if padding_len == CHUNK { + padding_len = 0; + } - if captured.slice_len() < 90 - && padding_len == 0 - && captured - .as_bstr() - .iter() - .all(|c| !['/', '+'].contains(&c.as_char())) - { - return Err(winnow::error::ErrMode::Backtrack( - winnow::error::Error::new(input, winnow::error::ErrorKind::Slice), - )); - } + if captured.slice_len() < 90 + && padding_len == 0 + && captured + .as_bstr() + .iter() + .all(|c| !['/', '+'].contains(&c.as_char())) + { + return Err(winnow::error::ErrMode::Backtrack( + winnow::error::Error::new(input, winnow::error::ErrorKind::Slice), + )); + } - let (after, _) = - take_while(padding_len..=padding_len, is_base64_padding).parse_next(padding)?; + let (after, _) = + take_while(padding_len..=padding_len, is_base64_padding).parse_next(padding)?; - let after_offset = input.offset_to(&after); - Ok(input.next_slice(after_offset)) + let after_offset = input.offset_to(&after); + Ok(input.next_slice(after_offset)) + }) + .parse_next(input) } fn email_literal(input: T) -> IResult::Slice> @@ -366,13 +388,16 @@ mod parser { ::Slice: AsBStr + SliceLen + Default, ::Token: AsChar + Copy, { - ( - take_while(1.., is_localport_char), - '@', - take_while(1.., is_domain_char), + trace( + "email", + ( + take_while(1.., is_localport_char), + '@', + take_while(1.., is_domain_char), + ) + .recognize(), ) - .recognize() - .parse_next(input) + .parse_next(input) } fn url_literal(input: T) -> IResult::Slice> @@ -381,24 +406,27 @@ mod parser { ::Slice: AsBStr + SliceLen + Default, ::Token: AsChar + Copy, { - ( - opt(terminated( - take_while(1.., is_scheme_char), - // HACK: Technically you can skip `//` if you don't have a domain but that would - // get messy to support. - (':', '/', '/'), - )), + trace( + "url", ( - opt(terminated(url_userinfo, '@')), - take_while(1.., is_domain_char), - opt(preceded(':', take_while(1.., AsChar::is_dec_digit))), - ), - '/', - // HACK: Too lazy to enumerate - take_while(0.., is_path_query_fragment), + opt(terminated( + take_while(1.., is_scheme_char), + // HACK: Technically you can skip `//` if you don't have a domain but that would + // get messy to support. + (':', '/', '/'), + )), + ( + opt(terminated(url_userinfo, '@')), + take_while(1.., is_domain_char), + opt(preceded(':', take_while(1.., AsChar::is_dec_digit))), + ), + '/', + // HACK: Too lazy to enumerate + take_while(0.., is_path_query_fragment), + ) + .recognize(), ) - .recognize() - .parse_next(input) + .parse_next(input) } fn url_userinfo(input: T) -> IResult::Slice> @@ -407,12 +435,15 @@ mod parser { ::Slice: AsBStr + SliceLen + Default, ::Token: AsChar + Copy, { - ( - take_while(1.., is_localport_char), - opt(preceded(':', take_while(0.., is_localport_char))), + trace( + "userinfo", + ( + take_while(1.., is_localport_char), + opt(preceded(':', take_while(0.., is_localport_char))), + ) + .recognize(), ) - .recognize() - .parse_next(input) + .parse_next(input) } fn c_escape(input: T) -> IResult::Slice> @@ -425,7 +456,11 @@ mod parser { // regular string that does escaping. The escaped letter might be part of a word, or it // might not be. Rather than guess and be wrong part of the time and correct people's words // incorrectly, we opt for just not evaluating it at all. - preceded(take_while(1.., is_escape), take_while(0.., is_xid_continue)).parse_next(input) + trace( + "escape", + preceded(take_while(1.., is_escape), take_while(0.., is_xid_continue)), + ) + .parse_next(input) } fn printf(input: T) -> IResult::Slice> @@ -434,7 +469,7 @@ mod parser { ::Slice: AsBStr + SliceLen + Default, ::Token: AsChar + Copy, { - preceded('%', take_while(1.., is_xid_continue)).parse_next(input) + trace("printf", preceded('%', take_while(1.., is_xid_continue))).parse_next(input) } fn take_many0(mut f: F) -> impl FnMut(I) -> IResult::Slice, E> From 0bde06af9adc4e727a893329ce9ceb18fc27a973 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Fri, 14 Jul 2023 12:32:16 -0500 Subject: [PATCH 2/7] chore(varcon): Add parse tracing --- crates/varcon-core/src/parser.rs | 275 +++++++++++++++++-------------- 1 file changed, 152 insertions(+), 123 deletions(-) diff --git a/crates/varcon-core/src/parser.rs b/crates/varcon-core/src/parser.rs index 1fcb2d0..4eeb9cc 100644 --- a/crates/varcon-core/src/parser.rs +++ b/crates/varcon-core/src/parser.rs @@ -1,4 +1,5 @@ use winnow::prelude::*; +use winnow::trace::trace; use crate::*; @@ -62,37 +63,41 @@ A Cv: acknowledgment's / Av B C: acknowledgement's impl Cluster { pub fn parse(input: &str) -> IResult<&str, Self> { - let header = ( - "#", - winnow::ascii::space0, - winnow::ascii::not_line_ending, - winnow::ascii::line_ending, - ); - let note = winnow::combinator::preceded( - ("##", winnow::ascii::space0), - winnow::combinator::terminated( + trace("cluster", move |input| { + let header = ( + "#", + winnow::ascii::space0, winnow::ascii::not_line_ending, winnow::ascii::line_ending, - ), - ); - let mut cluster = ( - winnow::combinator::opt(header), - winnow::combinator::repeat( - 1.., - winnow::combinator::terminated(Entry::parse, winnow::ascii::line_ending), - ), - winnow::combinator::repeat(0.., note), - ); - let (input, (header, entries, notes)): (_, (_, _, Vec<_>)) = cluster.parse_next(input)?; + ); + let note = winnow::combinator::preceded( + ("##", winnow::ascii::space0), + winnow::combinator::terminated( + winnow::ascii::not_line_ending, + winnow::ascii::line_ending, + ), + ); + let mut cluster = ( + winnow::combinator::opt(header), + winnow::combinator::repeat( + 1.., + winnow::combinator::terminated(Entry::parse, winnow::ascii::line_ending), + ), + winnow::combinator::repeat(0.., note), + ); + let (input, (header, entries, notes)): (_, (_, _, Vec<_>)) = + cluster.parse_next(input)?; - let header = header.map(|s| s.2.to_owned()); - let notes = notes.into_iter().map(|s| s.to_owned()).collect(); - let c = Self { - header, - entries, - notes, - }; - Ok((input, c)) + let header = header.map(|s| s.2.to_owned()); + let notes = notes.into_iter().map(|s| s.to_owned()).collect(); + let c = Self { + header, + entries, + notes, + }; + Ok((input, c)) + }) + .parse_next(input) } } @@ -145,64 +150,70 @@ A B C: coloration's / B. Cv: colouration's impl Entry { pub fn parse(input: &str) -> IResult<&str, Self> { - let var_sep = (winnow::ascii::space0, '/', winnow::ascii::space0); - let (input, variants) = - winnow::combinator::separated1(Variant::parse, var_sep).parse_next(input)?; + trace("entry", move |input| { + let var_sep = (winnow::ascii::space0, '/', winnow::ascii::space0); + let (input, variants) = + winnow::combinator::separated1(Variant::parse, var_sep).parse_next(input)?; - let desc_sep = (winnow::ascii::space0, '|'); - let (input, description) = - winnow::combinator::opt((desc_sep, Self::parse_description)).parse_next(input)?; + let desc_sep = (winnow::ascii::space0, '|'); + let (input, description) = + winnow::combinator::opt((desc_sep, Self::parse_description)).parse_next(input)?; - let comment_sep = (winnow::ascii::space0, '#'); - let (input, comment) = winnow::combinator::opt(( - comment_sep, - winnow::ascii::space1, - winnow::ascii::not_line_ending, - )) - .parse_next(input)?; + let comment_sep = (winnow::ascii::space0, '#'); + let (input, comment) = winnow::combinator::opt(( + comment_sep, + winnow::ascii::space1, + winnow::ascii::not_line_ending, + )) + .parse_next(input)?; - let mut e = match description { - Some((_, description)) => description, - None => Self { - variants: Vec::new(), - pos: None, - archaic: false, - note: false, - description: None, - comment: None, - }, - }; - e.variants = variants; - e.comment = comment.map(|c| c.2.to_owned()); - Ok((input, e)) + let mut e = match description { + Some((_, description)) => description, + None => Self { + variants: Vec::new(), + pos: None, + archaic: false, + note: false, + description: None, + comment: None, + }, + }; + e.variants = variants; + e.comment = comment.map(|c| c.2.to_owned()); + Ok((input, e)) + }) + .parse_next(input) } fn parse_description(input: &str) -> IResult<&str, Self> { - let (input, (pos, archaic, note, description)) = ( - winnow::combinator::opt((winnow::ascii::space1, Pos::parse)), - winnow::combinator::opt((winnow::ascii::space1, "(-)")), - winnow::combinator::opt((winnow::ascii::space1, "--")), - winnow::combinator::opt(( - winnow::ascii::space1, - winnow::token::take_till0(('\n', '\r', '#')), - )), - ) - .parse_next(input)?; + trace("description", move |input| { + let (input, (pos, archaic, note, description)) = ( + winnow::combinator::opt((winnow::ascii::space1, Pos::parse)), + winnow::combinator::opt((winnow::ascii::space1, "(-)")), + winnow::combinator::opt((winnow::ascii::space1, "--")), + winnow::combinator::opt(( + winnow::ascii::space1, + winnow::token::take_till0(('\n', '\r', '#')), + )), + ) + .parse_next(input)?; - let variants = Vec::new(); - let pos = pos.map(|(_, p)| p); - let archaic = archaic.is_some(); - let note = note.is_some(); - let description = description.map(|(_, d)| d.to_owned()); - let e = Self { - variants, - pos, - archaic, - note, - description, - comment: None, - }; - Ok((input, e)) + let variants = Vec::new(); + let pos = pos.map(|(_, p)| p); + let archaic = archaic.is_some(); + let note = note.is_some(); + let description = description.map(|(_, d)| d.to_owned()); + let e = Self { + variants, + pos, + archaic, + note, + description, + comment: None, + }; + Ok((input, e)) + }) + .parse_next(input) } } @@ -298,19 +309,25 @@ mod test_entry { impl Variant { pub fn parse(input: &str) -> IResult<&str, Self> { - let types = winnow::combinator::separated1(Type::parse, winnow::ascii::space1); - let sep = (":", winnow::ascii::space0); - let (input, (types, word)) = - winnow::combinator::separated_pair(types, sep, word).parse_next(input)?; - let v = Self { types, word }; - Ok((input, v)) + trace("variant", move |input| { + let types = winnow::combinator::separated1(Type::parse, winnow::ascii::space1); + let sep = (":", winnow::ascii::space0); + let (input, (types, word)) = + winnow::combinator::separated_pair(types, sep, word).parse_next(input)?; + let v = Self { types, word }; + Ok((input, v)) + }) + .parse_next(input) } } fn word(input: &str) -> IResult<&str, String> { - winnow::token::take_till1(|item: char| item.is_ascii_whitespace()) - .map(|s: &str| s.to_owned().replace('_', " ")) - .parse_next(input) + trace("word", move |input| { + winnow::token::take_till1(|item: char| item.is_ascii_whitespace()) + .map(|s: &str| s.to_owned().replace('_', " ")) + .parse_next(input) + }) + .parse_next(input) } #[cfg(test)] @@ -382,12 +399,15 @@ mod test_variant { impl Type { pub fn parse(input: &str) -> IResult<&str, Type> { - let (input, category) = Category::parse(input)?; - let (input, tag) = winnow::combinator::opt(Tag::parse).parse_next(input)?; - let (input, num) = winnow::combinator::opt(winnow::ascii::digit1).parse_next(input)?; - let num = num.map(|s| s.parse().expect("parser ensured its a number")); - let t = Type { category, tag, num }; - Ok((input, t)) + trace("type", move |input| { + let (input, category) = Category::parse(input)?; + let (input, tag) = winnow::combinator::opt(Tag::parse).parse_next(input)?; + let (input, num) = winnow::combinator::opt(winnow::ascii::digit1).parse_next(input)?; + let num = num.map(|s| s.parse().expect("parser ensured its a number")); + let t = Type { category, tag, num }; + Ok((input, t)) + }) + .parse_next(input) } } @@ -439,18 +459,21 @@ mod test_type { impl Category { pub fn parse(input: &str) -> IResult<&str, Category> { - let symbols = winnow::token::one_of(['A', 'B', 'Z', 'C', 'D', '_']); - symbols - .map(|c| match c { - 'A' => Category::American, - 'B' => Category::BritishIse, - 'Z' => Category::BritishIze, - 'C' => Category::Canadian, - 'D' => Category::Australian, - '_' => Category::Other, - _ => unreachable!("parser won't select this option"), - }) - .parse_next(input) + trace("category", move |input| { + let symbols = winnow::token::one_of(['A', 'B', 'Z', 'C', 'D', '_']); + symbols + .map(|c| match c { + 'A' => Category::American, + 'B' => Category::BritishIse, + 'Z' => Category::BritishIze, + 'C' => Category::Canadian, + 'D' => Category::Australian, + '_' => Category::Other, + _ => unreachable!("parser won't select this option"), + }) + .parse_next(input) + }) + .parse_next(input) } } @@ -475,17 +498,20 @@ mod test_category { impl Tag { pub fn parse(input: &str) -> IResult<&str, Tag> { - let symbols = winnow::token::one_of(['.', 'v', 'V', '-', 'x']); - symbols - .map(|c| match c { - '.' => Tag::Eq, - 'v' => Tag::Variant, - 'V' => Tag::Seldom, - '-' => Tag::Possible, - 'x' => Tag::Improper, - _ => unreachable!("parser won't select this option"), - }) - .parse_next(input) + trace("tag", move |input| { + let symbols = winnow::token::one_of(['.', 'v', 'V', '-', 'x']); + symbols + .map(|c| match c { + '.' => Tag::Eq, + 'v' => Tag::Variant, + 'V' => Tag::Seldom, + '-' => Tag::Possible, + 'x' => Tag::Improper, + _ => unreachable!("parser won't select this option"), + }) + .parse_next(input) + }) + .parse_next(input) } } @@ -510,12 +536,15 @@ mod test_tag { impl Pos { pub fn parse(input: &str) -> IResult<&str, Pos> { - winnow::branch::alt(( - "".value(Pos::Noun), - "".value(Pos::Verb), - "".value(Pos::Adjective), - "".value(Pos::Adverb), - )) + trace("pos", move |input| { + winnow::branch::alt(( + "".value(Pos::Noun), + "".value(Pos::Verb), + "".value(Pos::Adjective), + "".value(Pos::Adverb), + )) + .parse_next(input) + }) .parse_next(input) } } From 9426924f8f4136aae512c2275d6971ac1cf42806 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Fri, 14 Jul 2023 12:33:02 -0500 Subject: [PATCH 3/7] fix: Hide optional dependencies --- crates/dictgen/Cargo.toml | 4 ++-- crates/typos-cli/Cargo.toml | 4 ++-- crates/varcon-core/Cargo.toml | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/crates/dictgen/Cargo.toml b/crates/dictgen/Cargo.toml index 648a746..3045ba3 100644 --- a/crates/dictgen/Cargo.toml +++ b/crates/dictgen/Cargo.toml @@ -13,8 +13,8 @@ include.workspace = true [features] default = ["std"] std = [] -codegen = ["std", "phf_codegen"] -map = ["phf", "phf_shared"] +codegen = ["std", "dep:phf_codegen"] +map = ["dep:phf", "dep:phf_shared"] [dependencies] unicase = "2.6" diff --git a/crates/typos-cli/Cargo.toml b/crates/typos-cli/Cargo.toml index ad938d4..194dd95 100644 --- a/crates/typos-cli/Cargo.toml +++ b/crates/typos-cli/Cargo.toml @@ -32,8 +32,8 @@ pre-release-replacements = [ [features] default = ["dict", "vars"] -dict = ["typos-dict"] -vars = ["typos-vars"] +dict = ["dep:typos-dict"] +vars = ["dep:typos-vars"] [[bin]] diff --git a/crates/varcon-core/Cargo.toml b/crates/varcon-core/Cargo.toml index e675955..a55eacf 100644 --- a/crates/varcon-core/Cargo.toml +++ b/crates/varcon-core/Cargo.toml @@ -12,8 +12,8 @@ include.workspace = true [features] default = [] -parser = ["winnow"] -flags = ["enumflags2"] +parser = ["dep:winnow"] +flags = ["dep:enumflags2"] [dependencies] winnow = { version = "0.4.9", optional = true } From 6cc3e3f9e033285d3c10dac138a90317172bb800 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Fri, 14 Jul 2023 12:44:39 -0500 Subject: [PATCH 4/7] refactor(varcon)!: Upgrade to winnow 0.5 --- Cargo.lock | 15 +++- crates/varcon-core/Cargo.toml | 2 +- crates/varcon-core/src/parser.rs | 143 ++++++++++++++++--------------- 3 files changed, 88 insertions(+), 72 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d492fef..ecb55b7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1610,7 +1610,7 @@ dependencies = [ "serde", "serde_spanned", "toml_datetime", - "winnow", + "winnow 0.4.9", ] [[package]] @@ -1648,7 +1648,7 @@ dependencies = [ "thiserror", "unicode-segmentation", "unicode-xid", - "winnow", + "winnow 0.4.9", ] [[package]] @@ -1831,7 +1831,7 @@ name = "varcon-core" version = "2.2.12" dependencies = [ "enumflags2", - "winnow", + "winnow 0.5.0", ] [[package]] @@ -2127,3 +2127,12 @@ checksum = "81a2094c43cc94775293eaa0e499fbc30048a6d824ac82c0351a8c0bf9112529" dependencies = [ "memchr", ] + +[[package]] +name = "winnow" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81fac9742fd1ad1bd9643b991319f72dd031016d44b77039a26977eb667141e7" +dependencies = [ + "memchr", +] diff --git a/crates/varcon-core/Cargo.toml b/crates/varcon-core/Cargo.toml index a55eacf..671ef17 100644 --- a/crates/varcon-core/Cargo.toml +++ b/crates/varcon-core/Cargo.toml @@ -16,7 +16,7 @@ parser = ["dep:winnow"] flags = ["dep:enumflags2"] [dependencies] -winnow = { version = "0.4.9", optional = true } +winnow = { version = "0.5.0", optional = true } enumflags2 = { version = "0.7", optional = true } [package.metadata.docs.rs] diff --git a/crates/varcon-core/src/parser.rs b/crates/varcon-core/src/parser.rs index 4eeb9cc..a770ba9 100644 --- a/crates/varcon-core/src/parser.rs +++ b/crates/varcon-core/src/parser.rs @@ -18,10 +18,8 @@ impl<'i> Iterator for ClusterIter<'i> { type Item = Cluster; fn next(&mut self) -> Option { - let i = self.input.trim_start(); - let (i, c) = Cluster::parse(i).ok()?; - self.input = i; - Some(c) + self.input = self.input.trim_start(); + Cluster::parse.parse_next(&mut self.input).ok() } } @@ -62,8 +60,8 @@ A Cv: acknowledgment's / Av B C: acknowledgement's } impl Cluster { - pub fn parse(input: &str) -> IResult<&str, Self> { - trace("cluster", move |input| { + pub fn parse(input: &mut &str) -> PResult { + trace("cluster", move |input: &mut &str| { let header = ( "#", winnow::ascii::space0, @@ -85,8 +83,7 @@ impl Cluster { ), winnow::combinator::repeat(0.., note), ); - let (input, (header, entries, notes)): (_, (_, _, Vec<_>)) = - cluster.parse_next(input)?; + let (header, entries, notes): (_, _, Vec<_>) = cluster.parse_next(input)?; let header = header.map(|s| s.2.to_owned()); let notes = notes.into_iter().map(|s| s.to_owned()).collect(); @@ -95,7 +92,7 @@ impl Cluster { entries, notes, }; - Ok((input, c)) + Ok(c) }) .parse_next(input) } @@ -107,15 +104,16 @@ mod test_cluster { #[test] fn test_basic() { - let (input, actual) = Cluster::parse( - "# acknowledgment (level 35) + let (input, actual) = Cluster::parse + .parse_peek( + "# acknowledgment (level 35) A Cv: acknowledgment / Av B C: acknowledgement A Cv: acknowledgments / Av B C: acknowledgements A Cv: acknowledgment's / Av B C: acknowledgement's ", - ) - .unwrap(); + ) + .unwrap(); assert_eq!(input, "\n"); assert_eq!( actual.header, @@ -127,8 +125,9 @@ A Cv: acknowledgment's / Av B C: acknowledgement's #[test] fn test_notes() { - let (input, actual) = Cluster::parse( - "# coloration (level 50) + let (input, actual) = Cluster::parse + .parse_peek( + "# coloration (level 50) A B C: coloration / B. Cv: colouration A B C: colorations / B. Cv: colourations A B C: coloration's / B. Cv: colouration's @@ -136,8 +135,8 @@ A B C: coloration's / B. Cv: colouration's ## variant for British Engl or some reason ", - ) - .unwrap(); + ) + .unwrap(); assert_eq!(input, "\n"); assert_eq!( actual.header, @@ -149,18 +148,18 @@ A B C: coloration's / B. Cv: colouration's } impl Entry { - pub fn parse(input: &str) -> IResult<&str, Self> { - trace("entry", move |input| { + pub fn parse(input: &mut &str) -> PResult { + trace("entry", move |input: &mut &str| { let var_sep = (winnow::ascii::space0, '/', winnow::ascii::space0); - let (input, variants) = + let variants = winnow::combinator::separated1(Variant::parse, var_sep).parse_next(input)?; let desc_sep = (winnow::ascii::space0, '|'); - let (input, description) = + let description = winnow::combinator::opt((desc_sep, Self::parse_description)).parse_next(input)?; let comment_sep = (winnow::ascii::space0, '#'); - let (input, comment) = winnow::combinator::opt(( + let comment = winnow::combinator::opt(( comment_sep, winnow::ascii::space1, winnow::ascii::not_line_ending, @@ -180,14 +179,14 @@ impl Entry { }; e.variants = variants; e.comment = comment.map(|c| c.2.to_owned()); - Ok((input, e)) + Ok(e) }) .parse_next(input) } - fn parse_description(input: &str) -> IResult<&str, Self> { - trace("description", move |input| { - let (input, (pos, archaic, note, description)) = ( + fn parse_description(input: &mut &str) -> PResult { + trace("description", move |input: &mut &str| { + let (pos, archaic, note, description) = ( winnow::combinator::opt((winnow::ascii::space1, Pos::parse)), winnow::combinator::opt((winnow::ascii::space1, "(-)")), winnow::combinator::opt((winnow::ascii::space1, "--")), @@ -211,7 +210,7 @@ impl Entry { description, comment: None, }; - Ok((input, e)) + Ok(e) }) .parse_next(input) } @@ -226,8 +225,9 @@ mod test_entry { fn test_variant_only() { // Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use // cases. - let (input, actual) = - Entry::parse("A Cv: acknowledgment's / Av B C: acknowledgement's\n").unwrap(); + let (input, actual) = Entry::parse + .parse_peek("A Cv: acknowledgment's / Av B C: acknowledgement's\n") + .unwrap(); assert_eq!(input, "\n"); assert_eq!(actual.variants.len(), 2); assert_eq!(actual.pos, None); @@ -240,7 +240,9 @@ mod test_entry { fn test_description() { // Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use // cases. - let (input, actual) = Entry::parse("A C: prize / B: prise | otherwise\n").unwrap(); + let (input, actual) = Entry::parse + .parse_peek("A C: prize / B: prise | otherwise\n") + .unwrap(); assert_eq!(input, "\n"); assert_eq!(actual.variants.len(), 2); assert_eq!(actual.pos, None); @@ -253,7 +255,9 @@ mod test_entry { fn test_pos() { // Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use // cases. - let (input, actual) = Entry::parse("A B C: practice / AV Cv: practise | \n").unwrap(); + let (input, actual) = Entry::parse + .parse_peek("A B C: practice / AV Cv: practise | \n") + .unwrap(); assert_eq!(input, "\n"); assert_eq!(actual.variants.len(), 2); assert_eq!(actual.pos, Some(Pos::Noun)); @@ -266,7 +270,9 @@ mod test_entry { fn test_archaic() { // Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use // cases. - let (input, actual) = Entry::parse("A: bark / Av B: barque | (-) ship\n").unwrap(); + let (input, actual) = Entry::parse + .parse_peek("A: bark / Av B: barque | (-) ship\n") + .unwrap(); assert_eq!(input, "\n"); assert_eq!(actual.variants.len(), 2); assert_eq!(actual.pos, None); @@ -279,7 +285,7 @@ mod test_entry { fn test_note() { // Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use // cases. - let (input, actual) = Entry::parse("_: cabbies | -- plural\n").unwrap(); + let (input, actual) = Entry::parse.parse_peek("_: cabbies | -- plural\n").unwrap(); assert_eq!(input, "\n"); assert_eq!(actual.variants.len(), 1); assert_eq!(actual.pos, None); @@ -290,7 +296,7 @@ mod test_entry { #[test] fn test_trailing_comment() { - let (input, actual) = Entry::parse( + let (input, actual) = Entry::parse.parse_peek( "A B: accursed / AV B-: accurst # ODE: archaic, M-W: 'or' but can find little evidence of use\n", ) .unwrap(); @@ -308,21 +314,21 @@ mod test_entry { } impl Variant { - pub fn parse(input: &str) -> IResult<&str, Self> { - trace("variant", move |input| { + pub fn parse(input: &mut &str) -> PResult { + trace("variant", move |input: &mut &str| { let types = winnow::combinator::separated1(Type::parse, winnow::ascii::space1); let sep = (":", winnow::ascii::space0); - let (input, (types, word)) = + let (types, word) = winnow::combinator::separated_pair(types, sep, word).parse_next(input)?; let v = Self { types, word }; - Ok((input, v)) + Ok(v) }) .parse_next(input) } } -fn word(input: &str) -> IResult<&str, String> { - trace("word", move |input| { +fn word(input: &mut &str) -> PResult { + trace("word", move |input: &mut &str| { winnow::token::take_till1(|item: char| item.is_ascii_whitespace()) .map(|s: &str| s.to_owned().replace('_', " ")) .parse_next(input) @@ -338,7 +344,7 @@ mod test_variant { fn test_valid() { // Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use // cases. - let (input, actual) = Variant::parse("A Cv: acknowledgment ").unwrap(); + let (input, actual) = Variant::parse.parse_peek("A Cv: acknowledgment ").unwrap(); assert_eq!(input, " "); assert_eq!( actual.types, @@ -360,8 +366,9 @@ mod test_variant { #[test] fn test_extra() { - let (input, actual) = - Variant::parse("A Cv: acknowledgment's / Av B C: acknowledgement's").unwrap(); + let (input, actual) = Variant::parse + .parse_peek("A Cv: acknowledgment's / Av B C: acknowledgement's") + .unwrap(); assert_eq!(input, " / Av B C: acknowledgement's"); assert_eq!( actual.types, @@ -383,7 +390,7 @@ mod test_variant { #[test] fn test_underscore() { - let (input, actual) = Variant::parse("_: air_gun\n").unwrap(); + let (input, actual) = Variant::parse.parse_peek("_: air_gun\n").unwrap(); assert_eq!(input, "\n"); assert_eq!( actual.types, @@ -398,14 +405,14 @@ mod test_variant { } impl Type { - pub fn parse(input: &str) -> IResult<&str, Type> { - trace("type", move |input| { - let (input, category) = Category::parse(input)?; - let (input, tag) = winnow::combinator::opt(Tag::parse).parse_next(input)?; - let (input, num) = winnow::combinator::opt(winnow::ascii::digit1).parse_next(input)?; + pub fn parse(input: &mut &str) -> PResult { + trace("type", move |input: &mut &str| { + let category = Category::parse(input)?; + let tag = winnow::combinator::opt(Tag::parse).parse_next(input)?; + let num = winnow::combinator::opt(winnow::ascii::digit1).parse_next(input)?; let num = num.map(|s| s.parse().expect("parser ensured its a number")); let t = Type { category, tag, num }; - Ok((input, t)) + Ok(t) }) .parse_next(input) } @@ -419,13 +426,13 @@ mod test_type { fn test_valid() { // Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use // cases. - let (input, actual) = Type::parse("A ").unwrap(); + let (input, actual) = Type::parse.parse_peek("A ").unwrap(); assert_eq!(input, " "); assert_eq!(actual.category, Category::American); assert_eq!(actual.tag, None); assert_eq!(actual.num, None); - let (input, actual) = Type::parse("Bv ").unwrap(); + let (input, actual) = Type::parse.parse_peek("Bv ").unwrap(); assert_eq!(input, " "); assert_eq!(actual.category, Category::BritishIse); assert_eq!(actual.tag, Some(Tag::Variant)); @@ -434,13 +441,13 @@ mod test_type { #[test] fn test_extra() { - let (input, actual) = Type::parse("Z foobar").unwrap(); + let (input, actual) = Type::parse.parse_peek("Z foobar").unwrap(); assert_eq!(input, " foobar"); assert_eq!(actual.category, Category::BritishIze); assert_eq!(actual.tag, None); assert_eq!(actual.num, None); - let (input, actual) = Type::parse("C- foobar").unwrap(); + let (input, actual) = Type::parse.parse_peek("C- foobar").unwrap(); assert_eq!(input, " foobar"); assert_eq!(actual.category, Category::Canadian); assert_eq!(actual.tag, Some(Tag::Possible)); @@ -449,7 +456,7 @@ mod test_type { #[test] fn test_num() { - let (input, actual) = Type::parse("Av1 ").unwrap(); + let (input, actual) = Type::parse.parse_peek("Av1 ").unwrap(); assert_eq!(input, " "); assert_eq!(actual.category, Category::American); assert_eq!(actual.tag, Some(Tag::Variant)); @@ -458,8 +465,8 @@ mod test_type { } impl Category { - pub fn parse(input: &str) -> IResult<&str, Category> { - trace("category", move |input| { + pub fn parse(input: &mut &str) -> PResult { + trace("category", move |input: &mut &str| { let symbols = winnow::token::one_of(['A', 'B', 'Z', 'C', 'D', '_']); symbols .map(|c| match c { @@ -483,22 +490,22 @@ mod test_category { #[test] fn test_valid() { - let (input, actual) = Category::parse("A").unwrap(); + let (input, actual) = Category::parse.parse_peek("A").unwrap(); assert_eq!(input, ""); assert_eq!(actual, Category::American); } #[test] fn test_extra() { - let (input, actual) = Category::parse("_ foobar").unwrap(); + let (input, actual) = Category::parse.parse_peek("_ foobar").unwrap(); assert_eq!(input, " foobar"); assert_eq!(actual, Category::Other); } } impl Tag { - pub fn parse(input: &str) -> IResult<&str, Tag> { - trace("tag", move |input| { + pub fn parse(input: &mut &str) -> PResult { + trace("tag", move |input: &mut &str| { let symbols = winnow::token::one_of(['.', 'v', 'V', '-', 'x']); symbols .map(|c| match c { @@ -521,23 +528,23 @@ mod test_tag { #[test] fn test_valid() { - let (input, actual) = Tag::parse(".").unwrap(); + let (input, actual) = Tag::parse.parse_peek(".").unwrap(); assert_eq!(input, ""); assert_eq!(actual, Tag::Eq); } #[test] fn test_extra() { - let (input, actual) = Tag::parse("x foobar").unwrap(); + let (input, actual) = Tag::parse.parse_peek("x foobar").unwrap(); assert_eq!(input, " foobar"); assert_eq!(actual, Tag::Improper); } } impl Pos { - pub fn parse(input: &str) -> IResult<&str, Pos> { - trace("pos", move |input| { - winnow::branch::alt(( + pub fn parse(input: &mut &str) -> PResult { + trace("pos", move |input: &mut &str| { + winnow::combinator::alt(( "".value(Pos::Noun), "".value(Pos::Verb), "".value(Pos::Adjective), @@ -555,14 +562,14 @@ mod test_pos { #[test] fn test_valid() { - let (input, actual) = Pos::parse("").unwrap(); + let (input, actual) = Pos::parse.parse_peek("").unwrap(); assert_eq!(input, ""); assert_eq!(actual, Pos::Noun); } #[test] fn test_extra() { - let (input, actual) = Pos::parse(" foobar").unwrap(); + let (input, actual) = Pos::parse.parse_peek(" foobar").unwrap(); assert_eq!(input, " foobar"); assert_eq!(actual, Pos::Adjective); } From 4fd45378562bb9f20c41e7bce9626f6e585c4acc Mon Sep 17 00:00:00 2001 From: Ed Page Date: Fri, 14 Jul 2023 12:48:41 -0500 Subject: [PATCH 5/7] fix(varcon)!: Make API independent of winnow --- crates/varcon-core/src/parser.rs | 113 +++++++++++++++++++++---------- 1 file changed, 77 insertions(+), 36 deletions(-) diff --git a/crates/varcon-core/src/parser.rs b/crates/varcon-core/src/parser.rs index a770ba9..87d63da 100644 --- a/crates/varcon-core/src/parser.rs +++ b/crates/varcon-core/src/parser.rs @@ -19,7 +19,7 @@ impl<'i> Iterator for ClusterIter<'i> { fn next(&mut self) -> Option { self.input = self.input.trim_start(); - Cluster::parse.parse_next(&mut self.input).ok() + Cluster::parse_.parse_next(&mut self.input).ok() } } @@ -60,7 +60,11 @@ A Cv: acknowledgment's / Av B C: acknowledgement's } impl Cluster { - pub fn parse(input: &mut &str) -> PResult { + pub fn parse(input: &str) -> Result { + Self::parse_.parse(input).map_err(|_err| ParseError) + } + + fn parse_(input: &mut &str) -> PResult { trace("cluster", move |input: &mut &str| { let header = ( "#", @@ -79,7 +83,7 @@ impl Cluster { winnow::combinator::opt(header), winnow::combinator::repeat( 1.., - winnow::combinator::terminated(Entry::parse, winnow::ascii::line_ending), + winnow::combinator::terminated(Entry::parse_, winnow::ascii::line_ending), ), winnow::combinator::repeat(0.., note), ); @@ -104,7 +108,7 @@ mod test_cluster { #[test] fn test_basic() { - let (input, actual) = Cluster::parse + let (input, actual) = Cluster::parse_ .parse_peek( "# acknowledgment (level 35) A Cv: acknowledgment / Av B C: acknowledgement @@ -125,7 +129,7 @@ A Cv: acknowledgment's / Av B C: acknowledgement's #[test] fn test_notes() { - let (input, actual) = Cluster::parse + let (input, actual) = Cluster::parse_ .parse_peek( "# coloration (level 50) A B C: coloration / B. Cv: colouration @@ -148,11 +152,15 @@ A B C: coloration's / B. Cv: colouration's } impl Entry { - pub fn parse(input: &mut &str) -> PResult { + pub fn parse(input: &str) -> Result { + Self::parse_.parse(input).map_err(|_err| ParseError) + } + + fn parse_(input: &mut &str) -> PResult { trace("entry", move |input: &mut &str| { let var_sep = (winnow::ascii::space0, '/', winnow::ascii::space0); let variants = - winnow::combinator::separated1(Variant::parse, var_sep).parse_next(input)?; + winnow::combinator::separated1(Variant::parse_, var_sep).parse_next(input)?; let desc_sep = (winnow::ascii::space0, '|'); let description = @@ -187,7 +195,7 @@ impl Entry { fn parse_description(input: &mut &str) -> PResult { trace("description", move |input: &mut &str| { let (pos, archaic, note, description) = ( - winnow::combinator::opt((winnow::ascii::space1, Pos::parse)), + winnow::combinator::opt((winnow::ascii::space1, Pos::parse_)), winnow::combinator::opt((winnow::ascii::space1, "(-)")), winnow::combinator::opt((winnow::ascii::space1, "--")), winnow::combinator::opt(( @@ -225,7 +233,7 @@ mod test_entry { fn test_variant_only() { // Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use // cases. - let (input, actual) = Entry::parse + let (input, actual) = Entry::parse_ .parse_peek("A Cv: acknowledgment's / Av B C: acknowledgement's\n") .unwrap(); assert_eq!(input, "\n"); @@ -240,7 +248,7 @@ mod test_entry { fn test_description() { // Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use // cases. - let (input, actual) = Entry::parse + let (input, actual) = Entry::parse_ .parse_peek("A C: prize / B: prise | otherwise\n") .unwrap(); assert_eq!(input, "\n"); @@ -255,7 +263,7 @@ mod test_entry { fn test_pos() { // Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use // cases. - let (input, actual) = Entry::parse + let (input, actual) = Entry::parse_ .parse_peek("A B C: practice / AV Cv: practise | \n") .unwrap(); assert_eq!(input, "\n"); @@ -270,7 +278,7 @@ mod test_entry { fn test_archaic() { // Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use // cases. - let (input, actual) = Entry::parse + let (input, actual) = Entry::parse_ .parse_peek("A: bark / Av B: barque | (-) ship\n") .unwrap(); assert_eq!(input, "\n"); @@ -285,7 +293,9 @@ mod test_entry { fn test_note() { // Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use // cases. - let (input, actual) = Entry::parse.parse_peek("_: cabbies | -- plural\n").unwrap(); + let (input, actual) = Entry::parse_ + .parse_peek("_: cabbies | -- plural\n") + .unwrap(); assert_eq!(input, "\n"); assert_eq!(actual.variants.len(), 1); assert_eq!(actual.pos, None); @@ -296,7 +306,7 @@ mod test_entry { #[test] fn test_trailing_comment() { - let (input, actual) = Entry::parse.parse_peek( + let (input, actual) = Entry::parse_.parse_peek( "A B: accursed / AV B-: accurst # ODE: archaic, M-W: 'or' but can find little evidence of use\n", ) .unwrap(); @@ -314,9 +324,13 @@ mod test_entry { } impl Variant { - pub fn parse(input: &mut &str) -> PResult { + pub fn parse(input: &str) -> Result { + Self::parse_.parse(input).map_err(|_err| ParseError) + } + + fn parse_(input: &mut &str) -> PResult { trace("variant", move |input: &mut &str| { - let types = winnow::combinator::separated1(Type::parse, winnow::ascii::space1); + let types = winnow::combinator::separated1(Type::parse_, winnow::ascii::space1); let sep = (":", winnow::ascii::space0); let (types, word) = winnow::combinator::separated_pair(types, sep, word).parse_next(input)?; @@ -344,7 +358,7 @@ mod test_variant { fn test_valid() { // Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use // cases. - let (input, actual) = Variant::parse.parse_peek("A Cv: acknowledgment ").unwrap(); + let (input, actual) = Variant::parse_.parse_peek("A Cv: acknowledgment ").unwrap(); assert_eq!(input, " "); assert_eq!( actual.types, @@ -366,7 +380,7 @@ mod test_variant { #[test] fn test_extra() { - let (input, actual) = Variant::parse + let (input, actual) = Variant::parse_ .parse_peek("A Cv: acknowledgment's / Av B C: acknowledgement's") .unwrap(); assert_eq!(input, " / Av B C: acknowledgement's"); @@ -390,7 +404,7 @@ mod test_variant { #[test] fn test_underscore() { - let (input, actual) = Variant::parse.parse_peek("_: air_gun\n").unwrap(); + let (input, actual) = Variant::parse_.parse_peek("_: air_gun\n").unwrap(); assert_eq!(input, "\n"); assert_eq!( actual.types, @@ -405,10 +419,14 @@ mod test_variant { } impl Type { - pub fn parse(input: &mut &str) -> PResult { + pub fn parse(input: &str) -> Result { + Self::parse_.parse(input).map_err(|_err| ParseError) + } + + fn parse_(input: &mut &str) -> PResult { trace("type", move |input: &mut &str| { - let category = Category::parse(input)?; - let tag = winnow::combinator::opt(Tag::parse).parse_next(input)?; + let category = Category::parse_(input)?; + let tag = winnow::combinator::opt(Tag::parse_).parse_next(input)?; let num = winnow::combinator::opt(winnow::ascii::digit1).parse_next(input)?; let num = num.map(|s| s.parse().expect("parser ensured its a number")); let t = Type { category, tag, num }; @@ -426,13 +444,13 @@ mod test_type { fn test_valid() { // Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use // cases. - let (input, actual) = Type::parse.parse_peek("A ").unwrap(); + let (input, actual) = Type::parse_.parse_peek("A ").unwrap(); assert_eq!(input, " "); assert_eq!(actual.category, Category::American); assert_eq!(actual.tag, None); assert_eq!(actual.num, None); - let (input, actual) = Type::parse.parse_peek("Bv ").unwrap(); + let (input, actual) = Type::parse_.parse_peek("Bv ").unwrap(); assert_eq!(input, " "); assert_eq!(actual.category, Category::BritishIse); assert_eq!(actual.tag, Some(Tag::Variant)); @@ -441,13 +459,13 @@ mod test_type { #[test] fn test_extra() { - let (input, actual) = Type::parse.parse_peek("Z foobar").unwrap(); + let (input, actual) = Type::parse_.parse_peek("Z foobar").unwrap(); assert_eq!(input, " foobar"); assert_eq!(actual.category, Category::BritishIze); assert_eq!(actual.tag, None); assert_eq!(actual.num, None); - let (input, actual) = Type::parse.parse_peek("C- foobar").unwrap(); + let (input, actual) = Type::parse_.parse_peek("C- foobar").unwrap(); assert_eq!(input, " foobar"); assert_eq!(actual.category, Category::Canadian); assert_eq!(actual.tag, Some(Tag::Possible)); @@ -456,7 +474,7 @@ mod test_type { #[test] fn test_num() { - let (input, actual) = Type::parse.parse_peek("Av1 ").unwrap(); + let (input, actual) = Type::parse_.parse_peek("Av1 ").unwrap(); assert_eq!(input, " "); assert_eq!(actual.category, Category::American); assert_eq!(actual.tag, Some(Tag::Variant)); @@ -465,7 +483,11 @@ mod test_type { } impl Category { - pub fn parse(input: &mut &str) -> PResult { + pub fn parse(input: &str) -> Result { + Self::parse_.parse(input).map_err(|_err| ParseError) + } + + fn parse_(input: &mut &str) -> PResult { trace("category", move |input: &mut &str| { let symbols = winnow::token::one_of(['A', 'B', 'Z', 'C', 'D', '_']); symbols @@ -490,21 +512,25 @@ mod test_category { #[test] fn test_valid() { - let (input, actual) = Category::parse.parse_peek("A").unwrap(); + let (input, actual) = Category::parse_.parse_peek("A").unwrap(); assert_eq!(input, ""); assert_eq!(actual, Category::American); } #[test] fn test_extra() { - let (input, actual) = Category::parse.parse_peek("_ foobar").unwrap(); + let (input, actual) = Category::parse_.parse_peek("_ foobar").unwrap(); assert_eq!(input, " foobar"); assert_eq!(actual, Category::Other); } } impl Tag { - pub fn parse(input: &mut &str) -> PResult { + pub fn parse(input: &str) -> Result { + Self::parse_.parse(input).map_err(|_err| ParseError) + } + + fn parse_(input: &mut &str) -> PResult { trace("tag", move |input: &mut &str| { let symbols = winnow::token::one_of(['.', 'v', 'V', '-', 'x']); symbols @@ -528,21 +554,25 @@ mod test_tag { #[test] fn test_valid() { - let (input, actual) = Tag::parse.parse_peek(".").unwrap(); + let (input, actual) = Tag::parse_.parse_peek(".").unwrap(); assert_eq!(input, ""); assert_eq!(actual, Tag::Eq); } #[test] fn test_extra() { - let (input, actual) = Tag::parse.parse_peek("x foobar").unwrap(); + let (input, actual) = Tag::parse_.parse_peek("x foobar").unwrap(); assert_eq!(input, " foobar"); assert_eq!(actual, Tag::Improper); } } impl Pos { - pub fn parse(input: &mut &str) -> PResult { + pub fn parse(input: &str) -> Result { + Self::parse_.parse(input).map_err(|_err| ParseError) + } + + fn parse_(input: &mut &str) -> PResult { trace("pos", move |input: &mut &str| { winnow::combinator::alt(( "".value(Pos::Noun), @@ -562,15 +592,26 @@ mod test_pos { #[test] fn test_valid() { - let (input, actual) = Pos::parse.parse_peek("").unwrap(); + let (input, actual) = Pos::parse_.parse_peek("").unwrap(); assert_eq!(input, ""); assert_eq!(actual, Pos::Noun); } #[test] fn test_extra() { - let (input, actual) = Pos::parse.parse_peek(" foobar").unwrap(); + let (input, actual) = Pos::parse_.parse_peek(" foobar").unwrap(); assert_eq!(input, " foobar"); assert_eq!(actual, Pos::Adjective); } } + +#[derive(Debug)] +pub struct ParseError; + +impl std::fmt::Display for ParseError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "invalid") + } +} + +impl std::error::Error for ParseError {} From 6f40717c8fd228852bcd4056d284aa81f0a90ab0 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Fri, 14 Jul 2023 13:28:36 -0500 Subject: [PATCH 6/7] refactor(typos): Switch to BStr for better debugging --- crates/typos/src/tokens.rs | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/crates/typos/src/tokens.rs b/crates/typos/src/tokens.rs index d36c94c..f2997cc 100644 --- a/crates/typos/src/tokens.rs +++ b/crates/typos/src/tokens.rs @@ -1,4 +1,5 @@ use bstr::ByteSlice; +use winnow::BStr; /// Define rules for tokenizaing a buffer. #[derive(Debug, Clone, PartialEq, Eq, Hash)] @@ -48,7 +49,9 @@ impl Tokenizer { let iter = if self.unicode && !ByteSlice::is_ascii(content.as_bytes()) { itertools::Either::Left(unicode_parser::iter_identifiers(content)) } else { - itertools::Either::Right(ascii_parser::iter_identifiers(content.as_bytes())) + itertools::Either::Right(ascii_parser::iter_identifiers(BStr::new( + content.as_bytes(), + ))) }; iter.map(move |identifier| self.transform(identifier, content.as_bytes())) } @@ -58,7 +61,7 @@ impl Tokenizer { let iter = Utf8Chunks::new(content).flat_map(unicode_parser::iter_identifiers); itertools::Either::Left(iter) } else { - itertools::Either::Right(ascii_parser::iter_identifiers(content)) + itertools::Either::Right(ascii_parser::iter_identifiers(BStr::new(content))) }; iter.map(move |identifier| self.transform(identifier, content)) } @@ -630,7 +633,9 @@ mod unicode_parser { mod ascii_parser { use super::parser::next_identifier; - pub(crate) fn iter_identifiers(mut input: &[u8]) -> impl Iterator { + use winnow::BStr; + + pub(crate) fn iter_identifiers(mut input: &BStr) -> impl Iterator { std::iter::from_fn(move || match next_identifier(input) { Ok((i, o)) => { input = i; From b6c78eb8ac8396ed583bee5a9e2ffcf17d74e09e Mon Sep 17 00:00:00 2001 From: Ed Page Date: Fri, 14 Jul 2023 13:26:45 -0500 Subject: [PATCH 7/7] refactor(typos): Upgrade to winnow 0.5 --- Cargo.lock | 2 +- crates/typos/Cargo.toml | 2 +- crates/typos/src/tokens.rs | 71 +++++++++++++++++++------------------- 3 files changed, 38 insertions(+), 37 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ecb55b7..7fc4ab8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1648,7 +1648,7 @@ dependencies = [ "thiserror", "unicode-segmentation", "unicode-xid", - "winnow 0.4.9", + "winnow 0.5.0", ] [[package]] diff --git a/crates/typos/Cargo.toml b/crates/typos/Cargo.toml index 27eb0d7..53ea8fd 100644 --- a/crates/typos/Cargo.toml +++ b/crates/typos/Cargo.toml @@ -14,7 +14,7 @@ include.workspace = true [dependencies] anyhow = "1.0" thiserror = "1.0" -winnow = "0.4.9" +winnow = "0.5.0" unicode-xid = "0.2.4" once_cell = "1.17.2" serde = { version = "1.0", features = ["derive"] } diff --git a/crates/typos/src/tokens.rs b/crates/typos/src/tokens.rs index f2997cc..92b2d56 100644 --- a/crates/typos/src/tokens.rs +++ b/crates/typos/src/tokens.rs @@ -129,6 +129,7 @@ impl<'s> Iterator for Utf8Chunks<'s> { mod parser { use winnow::combinator::*; + use winnow::error::ParserError; use winnow::prelude::*; use winnow::stream::AsBStr; use winnow::stream::AsChar; @@ -138,7 +139,7 @@ mod parser { use winnow::token::*; use winnow::trace::trace; - pub(crate) fn next_identifier(input: T) -> IResult::Slice> + pub(crate) fn next_identifier(input: &mut T) -> PResult<::Slice, ()> where T: Stream + StreamIsPartial + PartialEq, ::Slice: AsBStr + SliceLen + Default, @@ -147,7 +148,7 @@ mod parser { preceded(ignore, identifier).parse_next(input) } - fn identifier(input: T) -> IResult::Slice> + fn identifier(input: &mut T) -> PResult<::Slice, ()> where T: Stream + StreamIsPartial + PartialEq, ::Slice: AsBStr + SliceLen + Default, @@ -160,7 +161,7 @@ mod parser { trace("identifier", take_while(1.., is_xid_continue)).parse_next(input) } - fn ignore(input: T) -> IResult::Slice> + fn ignore(input: &mut T) -> PResult<::Slice, ()> where T: Stream + StreamIsPartial + PartialEq, ::Slice: AsBStr + SliceLen + Default, @@ -189,7 +190,7 @@ mod parser { .parse_next(input) } - fn sep1(input: T) -> IResult::Slice> + fn sep1(input: &mut T) -> PResult<::Slice, ()> where T: Stream + StreamIsPartial + PartialEq, ::Slice: AsBStr + SliceLen + Default, @@ -202,7 +203,7 @@ mod parser { .parse_next(input) } - fn other(input: T) -> IResult::Slice> + fn other(input: &mut T) -> PResult<::Slice, ()> where T: Stream + StreamIsPartial + PartialEq, ::Slice: AsBStr + SliceLen + Default, @@ -219,7 +220,7 @@ mod parser { .parse_next(input) } - fn ordinal_literal(input: T) -> IResult::Slice> + fn ordinal_literal(input: &mut T) -> PResult<::Slice, ()> where T: Stream + StreamIsPartial + PartialEq, ::Slice: AsBStr + SliceLen + Default, @@ -244,7 +245,7 @@ mod parser { .parse_next(input) } - fn dec_literal(input: T) -> IResult::Slice> + fn dec_literal(input: &mut T) -> PResult<::Slice, ()> where T: Stream + StreamIsPartial + PartialEq, ::Slice: AsBStr + SliceLen + Default, @@ -253,7 +254,7 @@ mod parser { trace("dec_literal", take_while(1.., is_dec_digit_with_sep)).parse_next(input) } - fn hex_literal(input: T) -> IResult::Slice> + fn hex_literal(input: &mut T) -> PResult<::Slice, ()> where T: Stream + StreamIsPartial + PartialEq, ::Slice: AsBStr + SliceLen + Default, @@ -266,7 +267,7 @@ mod parser { .parse_next(input) } - fn css_color(input: T) -> IResult::Slice> + fn css_color(input: &mut T) -> PResult<::Slice, ()> where T: Stream + StreamIsPartial + PartialEq, ::Slice: AsBStr + SliceLen + Default, @@ -285,7 +286,7 @@ mod parser { .parse_next(input) } - fn uuid_literal(input: T) -> IResult::Slice> + fn uuid_literal(input: &mut T) -> PResult<::Slice, ()> where T: Stream + StreamIsPartial + PartialEq, ::Slice: AsBStr + SliceLen + Default, @@ -322,7 +323,7 @@ mod parser { .parse_next(input) } - fn hash_literal(input: T) -> IResult::Slice> + fn hash_literal(input: &mut T) -> PResult<::Slice, ()> where T: Stream + StreamIsPartial + PartialEq, ::Slice: AsBStr + SliceLen + Default, @@ -348,17 +349,18 @@ mod parser { .parse_next(input) } - fn base64_literal(input: T) -> IResult::Slice> + fn base64_literal(input: &mut T) -> PResult<::Slice, ()> where T: Stream + StreamIsPartial + PartialEq, ::Slice: AsBStr + SliceLen + Default, ::Token: AsChar + Copy, { - trace("base64", move |input: T| { - let (padding, captured) = take_while(1.., is_base64_digit).parse_next(input.clone())?; + trace("base64", move |input: &mut T| { + let start = input.checkpoint(); + let captured = take_while(1.., is_base64_digit).parse_next(input)?; const CHUNK: usize = 4; - let padding_offset = input.offset_to(&padding); + let padding_offset = input.offset_from(&start); let mut padding_len = CHUNK - padding_offset % CHUNK; if padding_len == CHUNK { padding_len = 0; @@ -371,21 +373,22 @@ mod parser { .iter() .all(|c| !['/', '+'].contains(&c.as_char())) { - return Err(winnow::error::ErrMode::Backtrack( - winnow::error::Error::new(input, winnow::error::ErrorKind::Slice), + return Err(winnow::error::ErrMode::from_error_kind( + input, + winnow::error::ErrorKind::Slice, )); } - let (after, _) = - take_while(padding_len..=padding_len, is_base64_padding).parse_next(padding)?; + take_while(padding_len..=padding_len, is_base64_padding).parse_next(input)?; - let after_offset = input.offset_to(&after); + let after_offset = input.offset_from(&start); + input.reset(start); Ok(input.next_slice(after_offset)) }) .parse_next(input) } - fn email_literal(input: T) -> IResult::Slice> + fn email_literal(input: &mut T) -> PResult<::Slice, ()> where T: Stream + StreamIsPartial + PartialEq, ::Slice: AsBStr + SliceLen + Default, @@ -403,7 +406,7 @@ mod parser { .parse_next(input) } - fn url_literal(input: T) -> IResult::Slice> + fn url_literal(input: &mut T) -> PResult<::Slice, ()> where T: Stream + StreamIsPartial + PartialEq, ::Slice: AsBStr + SliceLen + Default, @@ -432,7 +435,7 @@ mod parser { .parse_next(input) } - fn url_userinfo(input: T) -> IResult::Slice> + fn url_userinfo(input: &mut T) -> PResult<::Slice, ()> where T: Stream + StreamIsPartial + PartialEq, ::Slice: AsBStr + SliceLen + Default, @@ -449,7 +452,7 @@ mod parser { .parse_next(input) } - fn c_escape(input: T) -> IResult::Slice> + fn c_escape(input: &mut T) -> PResult<::Slice, ()> where T: Stream + StreamIsPartial + PartialEq, ::Slice: AsBStr + SliceLen + Default, @@ -466,7 +469,7 @@ mod parser { .parse_next(input) } - fn printf(input: T) -> IResult::Slice> + fn printf(input: &mut T) -> PResult<::Slice, ()> where T: Stream + StreamIsPartial + PartialEq, ::Slice: AsBStr + SliceLen + Default, @@ -475,13 +478,13 @@ mod parser { trace("printf", preceded('%', take_while(1.., is_xid_continue))).parse_next(input) } - fn take_many0(mut f: F) -> impl FnMut(I) -> IResult::Slice, E> + fn take_many0(mut f: F) -> impl Parser::Slice, E> where I: Stream, - F: winnow::Parser::Slice, E>, - E: winnow::error::ParseError, + F: Parser::Slice, E>, + E: ParserError, { - move |i: I| { + move |i: &mut I| { repeat(0.., f.by_ref()) .map(|()| ()) .recognize() @@ -619,9 +622,8 @@ mod unicode_parser { use super::parser::next_identifier; pub(crate) fn iter_identifiers(mut input: &str) -> impl Iterator { - std::iter::from_fn(move || match next_identifier(input) { - Ok((i, o)) => { - input = i; + std::iter::from_fn(move || match next_identifier(&mut input) { + Ok(o) => { debug_assert_ne!(o, ""); Some(o) } @@ -636,9 +638,8 @@ mod ascii_parser { use winnow::BStr; pub(crate) fn iter_identifiers(mut input: &BStr) -> impl Iterator { - std::iter::from_fn(move || match next_identifier(input) { - Ok((i, o)) => { - input = i; + std::iter::from_fn(move || match next_identifier(&mut input) { + Ok(o) => { debug_assert_ne!(o, b""); // This is safe because we've checked that the strings are a subset of ASCII // characters.