diff --git a/Cargo.lock b/Cargo.lock index 34ed77d..7fc4ab8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1610,7 +1610,7 @@ dependencies = [ "serde", "serde_spanned", "toml_datetime", - "winnow", + "winnow 0.4.9", ] [[package]] @@ -1648,7 +1648,7 @@ dependencies = [ "thiserror", "unicode-segmentation", "unicode-xid", - "winnow", + "winnow 0.5.0", ] [[package]] @@ -1831,7 +1831,7 @@ name = "varcon-core" version = "2.2.12" dependencies = [ "enumflags2", - "winnow", + "winnow 0.5.0", ] [[package]] @@ -2121,9 +2121,18 @@ checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" [[package]] name = "winnow" -version = "0.4.6" +version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61de7bac303dc551fe038e2b3cef0f571087a47571ea6e79a87692ac99b99699" +checksum = "81a2094c43cc94775293eaa0e499fbc30048a6d824ac82c0351a8c0bf9112529" +dependencies = [ + "memchr", +] + +[[package]] +name = "winnow" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81fac9742fd1ad1bd9643b991319f72dd031016d44b77039a26977eb667141e7" dependencies = [ "memchr", ] diff --git a/crates/dictgen/Cargo.toml b/crates/dictgen/Cargo.toml index 648a746..3045ba3 100644 --- a/crates/dictgen/Cargo.toml +++ b/crates/dictgen/Cargo.toml @@ -13,8 +13,8 @@ include.workspace = true [features] default = ["std"] std = [] -codegen = ["std", "phf_codegen"] -map = ["phf", "phf_shared"] +codegen = ["std", "dep:phf_codegen"] +map = ["dep:phf", "dep:phf_shared"] [dependencies] unicase = "2.6" diff --git a/crates/typos-cli/Cargo.toml b/crates/typos-cli/Cargo.toml index ad938d4..194dd95 100644 --- a/crates/typos-cli/Cargo.toml +++ b/crates/typos-cli/Cargo.toml @@ -32,8 +32,8 @@ pre-release-replacements = [ [features] default = ["dict", "vars"] -dict = ["typos-dict"] -vars = ["typos-vars"] +dict = ["dep:typos-dict"] +vars = ["dep:typos-vars"] [[bin]] diff --git a/crates/typos/Cargo.toml b/crates/typos/Cargo.toml index 5aebf42..53ea8fd 100644 --- a/crates/typos/Cargo.toml +++ b/crates/typos/Cargo.toml @@ -14,7 +14,7 @@ include.workspace = true [dependencies] anyhow = "1.0" thiserror = "1.0" -winnow = "0.4.6" +winnow = "0.5.0" unicode-xid = "0.2.4" once_cell = "1.17.2" serde = { version = "1.0", features = ["derive"] } diff --git a/crates/typos/src/tokens.rs b/crates/typos/src/tokens.rs index 6ac6ce1..92b2d56 100644 --- a/crates/typos/src/tokens.rs +++ b/crates/typos/src/tokens.rs @@ -1,4 +1,5 @@ use bstr::ByteSlice; +use winnow::BStr; /// Define rules for tokenizaing a buffer. #[derive(Debug, Clone, PartialEq, Eq, Hash)] @@ -48,7 +49,9 @@ impl Tokenizer { let iter = if self.unicode && !ByteSlice::is_ascii(content.as_bytes()) { itertools::Either::Left(unicode_parser::iter_identifiers(content)) } else { - itertools::Either::Right(ascii_parser::iter_identifiers(content.as_bytes())) + itertools::Either::Right(ascii_parser::iter_identifiers(BStr::new( + content.as_bytes(), + ))) }; iter.map(move |identifier| self.transform(identifier, content.as_bytes())) } @@ -58,7 +61,7 @@ impl Tokenizer { let iter = Utf8Chunks::new(content).flat_map(unicode_parser::iter_identifiers); itertools::Either::Left(iter) } else { - itertools::Either::Right(ascii_parser::iter_identifiers(content)) + itertools::Either::Right(ascii_parser::iter_identifiers(BStr::new(content))) }; iter.map(move |identifier| self.transform(identifier, content)) } @@ -126,6 +129,7 @@ impl<'s> Iterator for Utf8Chunks<'s> { mod parser { use winnow::combinator::*; + use winnow::error::ParserError; use winnow::prelude::*; use winnow::stream::AsBStr; use winnow::stream::AsChar; @@ -133,8 +137,9 @@ mod parser { use winnow::stream::Stream; use winnow::stream::StreamIsPartial; use winnow::token::*; + use winnow::trace::trace; - pub(crate) fn next_identifier(input: T) -> IResult::Slice> + pub(crate) fn next_identifier(input: &mut T) -> PResult<::Slice, ()> where T: Stream + StreamIsPartial + PartialEq, ::Slice: AsBStr + SliceLen + Default, @@ -143,7 +148,7 @@ mod parser { preceded(ignore, identifier).parse_next(input) } - fn identifier(input: T) -> IResult::Slice> + fn identifier(input: &mut T) -> PResult<::Slice, ()> where T: Stream + StreamIsPartial + PartialEq, ::Slice: AsBStr + SliceLen + Default, @@ -153,36 +158,39 @@ mod parser { // `{XID_Continue}+` because XID_Continue is a superset of XID_Start and rather catch odd // or unexpected cases than strip off start characters to a word since we aren't doing a // proper word boundary parse - take_while(1.., is_xid_continue).parse_next(input) + trace("identifier", take_while(1.., is_xid_continue)).parse_next(input) } - fn ignore(input: T) -> IResult::Slice> + fn ignore(input: &mut T) -> PResult<::Slice, ()> where T: Stream + StreamIsPartial + PartialEq, ::Slice: AsBStr + SliceLen + Default, ::Token: AsChar + Copy, { - take_many0(alt(( - // CAUTION: If adding an ignorable literal, if it doesn't start with `is_xid_continue`, - // - Update `is_ignore_char` to make sure `sep1` doesn't eat it all up - // - Make sure you always consume it - terminated(uuid_literal, peek(sep1)), - terminated(hash_literal, peek(sep1)), - terminated(base64_literal, peek(sep1)), // base64 should be quoted or something - terminated(ordinal_literal, peek(sep1)), - terminated(hex_literal, peek(sep1)), - terminated(dec_literal, peek(sep1)), // Allow digit-prefixed words - terminated(email_literal, peek(sep1)), - terminated(url_literal, peek(sep1)), - terminated(css_color, peek(sep1)), - c_escape, - printf, - other, - ))) + trace( + "ignore", + take_many0(alt(( + // CAUTION: If adding an ignorable literal, if it doesn't start with `is_xid_continue`, + // - Update `is_ignore_char` to make sure `sep1` doesn't eat it all up + // - Make sure you always consume it + terminated(uuid_literal, peek(sep1)), + terminated(hash_literal, peek(sep1)), + terminated(base64_literal, peek(sep1)), // base64 should be quoted or something + terminated(ordinal_literal, peek(sep1)), + terminated(hex_literal, peek(sep1)), + terminated(dec_literal, peek(sep1)), // Allow digit-prefixed words + terminated(email_literal, peek(sep1)), + terminated(url_literal, peek(sep1)), + terminated(css_color, peek(sep1)), + c_escape, + printf, + other, + ))), + ) .parse_next(input) } - fn sep1(input: T) -> IResult::Slice> + fn sep1(input: &mut T) -> PResult<::Slice, ()> where T: Stream + StreamIsPartial + PartialEq, ::Slice: AsBStr + SliceLen + Default, @@ -195,21 +203,24 @@ mod parser { .parse_next(input) } - fn other(input: T) -> IResult::Slice> + fn other(input: &mut T) -> PResult<::Slice, ()> where T: Stream + StreamIsPartial + PartialEq, ::Slice: AsBStr + SliceLen + Default, ::Token: AsChar + Copy, { - ( - one_of(|c| !is_xid_continue(c)), - take_while(0.., is_ignore_char), + trace( + "other", + ( + one_of(|c| !is_xid_continue(c)), + take_while(0.., is_ignore_char), + ) + .recognize(), ) - .recognize() - .parse_next(input) + .parse_next(input) } - fn ordinal_literal(input: T) -> IResult::Slice> + fn ordinal_literal(input: &mut T) -> PResult<::Slice, ()> where T: Stream + StreamIsPartial + PartialEq, ::Slice: AsBStr + SliceLen + Default, @@ -221,26 +232,29 @@ mod parser { ['_'].contains(&c) } - ( - take_while(0.., is_sep), - take_while(1.., is_dec_digit), - alt((('s', 't'), ('n', 'd'), ('r', 'd'), ('t', 'h'))), - take_while(0.., is_sep), + trace( + "ordinal_literal", + ( + take_while(0.., is_sep), + take_while(1.., is_dec_digit), + alt((('s', 't'), ('n', 'd'), ('r', 'd'), ('t', 'h'))), + take_while(0.., is_sep), + ) + .recognize(), ) - .recognize() - .parse_next(input) + .parse_next(input) } - fn dec_literal(input: T) -> IResult::Slice> + fn dec_literal(input: &mut T) -> PResult<::Slice, ()> where T: Stream + StreamIsPartial + PartialEq, ::Slice: AsBStr + SliceLen + Default, ::Token: AsChar + Copy, { - take_while(1.., is_dec_digit_with_sep).parse_next(input) + trace("dec_literal", take_while(1.., is_dec_digit_with_sep)).parse_next(input) } - fn hex_literal(input: T) -> IResult::Slice> + fn hex_literal(input: &mut T) -> PResult<::Slice, ()> where T: Stream + StreamIsPartial + PartialEq, ::Slice: AsBStr + SliceLen + Default, @@ -253,57 +267,63 @@ mod parser { .parse_next(input) } - fn css_color(input: T) -> IResult::Slice> + fn css_color(input: &mut T) -> PResult<::Slice, ()> where T: Stream + StreamIsPartial + PartialEq, ::Slice: AsBStr + SliceLen + Default, ::Token: AsChar + Copy, { - preceded( - '#', - alt(( - terminated(take_while(3..=8, is_lower_hex_digit), peek(sep1)), - terminated(take_while(3..=8, is_upper_hex_digit), peek(sep1)), - )), + trace( + "color", + preceded( + '#', + alt(( + terminated(take_while(3..=8, is_lower_hex_digit), peek(sep1)), + terminated(take_while(3..=8, is_upper_hex_digit), peek(sep1)), + )), + ), ) .parse_next(input) } - fn uuid_literal(input: T) -> IResult::Slice> + fn uuid_literal(input: &mut T) -> PResult<::Slice, ()> where T: Stream + StreamIsPartial + PartialEq, ::Slice: AsBStr + SliceLen + Default, ::Token: AsChar + Copy, { - alt(( - ( - take_while(8, is_lower_hex_digit), - '-', - take_while(4, is_lower_hex_digit), - '-', - take_while(4, is_lower_hex_digit), - '-', - take_while(4, is_lower_hex_digit), - '-', - take_while(12, is_lower_hex_digit), - ), - ( - take_while(8, is_upper_hex_digit), - '-', - take_while(4, is_upper_hex_digit), - '-', - take_while(4, is_upper_hex_digit), - '-', - take_while(4, is_upper_hex_digit), - '-', - take_while(12, is_upper_hex_digit), - ), - )) - .recognize() + trace( + "uuid", + alt(( + ( + take_while(8, is_lower_hex_digit), + '-', + take_while(4, is_lower_hex_digit), + '-', + take_while(4, is_lower_hex_digit), + '-', + take_while(4, is_lower_hex_digit), + '-', + take_while(12, is_lower_hex_digit), + ), + ( + take_while(8, is_upper_hex_digit), + '-', + take_while(4, is_upper_hex_digit), + '-', + take_while(4, is_upper_hex_digit), + '-', + take_while(4, is_upper_hex_digit), + '-', + take_while(12, is_upper_hex_digit), + ), + )) + .recognize(), + ) .parse_next(input) } - fn hash_literal(input: T) -> IResult::Slice> + fn hash_literal(input: &mut T) -> PResult<::Slice, ()> where T: Stream + StreamIsPartial + PartialEq, ::Slice: AsBStr + SliceLen + Default, @@ -319,103 +339,120 @@ mod parser { // or more. const IGNORE_HEX_MIN: usize = 32; - alt(( - take_while(IGNORE_HEX_MIN.., is_lower_hex_digit), - take_while(IGNORE_HEX_MIN.., is_upper_hex_digit), - )) + trace( + "hash", + alt(( + take_while(IGNORE_HEX_MIN.., is_lower_hex_digit), + take_while(IGNORE_HEX_MIN.., is_upper_hex_digit), + )), + ) .parse_next(input) } - fn base64_literal(input: T) -> IResult::Slice> + fn base64_literal(input: &mut T) -> PResult<::Slice, ()> where T: Stream + StreamIsPartial + PartialEq, ::Slice: AsBStr + SliceLen + Default, ::Token: AsChar + Copy, { - let (padding, captured) = take_while(1.., is_base64_digit).parse_next(input.clone())?; + trace("base64", move |input: &mut T| { + let start = input.checkpoint(); + let captured = take_while(1.., is_base64_digit).parse_next(input)?; - const CHUNK: usize = 4; - let padding_offset = input.offset_to(&padding); - let mut padding_len = CHUNK - padding_offset % CHUNK; - if padding_len == CHUNK { - padding_len = 0; - } + const CHUNK: usize = 4; + let padding_offset = input.offset_from(&start); + let mut padding_len = CHUNK - padding_offset % CHUNK; + if padding_len == CHUNK { + padding_len = 0; + } - if captured.slice_len() < 90 - && padding_len == 0 - && captured - .as_bstr() - .iter() - .all(|c| !['/', '+'].contains(&c.as_char())) - { - return Err(winnow::error::ErrMode::Backtrack( - winnow::error::Error::new(input, winnow::error::ErrorKind::Slice), - )); - } + if captured.slice_len() < 90 + && padding_len == 0 + && captured + .as_bstr() + .iter() + .all(|c| !['/', '+'].contains(&c.as_char())) + { + return Err(winnow::error::ErrMode::from_error_kind( + input, + winnow::error::ErrorKind::Slice, + )); + } - let (after, _) = - take_while(padding_len..=padding_len, is_base64_padding).parse_next(padding)?; + take_while(padding_len..=padding_len, is_base64_padding).parse_next(input)?; - let after_offset = input.offset_to(&after); - Ok(input.next_slice(after_offset)) + let after_offset = input.offset_from(&start); + input.reset(start); + Ok(input.next_slice(after_offset)) + }) + .parse_next(input) } - fn email_literal(input: T) -> IResult::Slice> + fn email_literal(input: &mut T) -> PResult<::Slice, ()> where T: Stream + StreamIsPartial + PartialEq, ::Slice: AsBStr + SliceLen + Default, ::Token: AsChar + Copy, { - ( - take_while(1.., is_localport_char), - '@', - take_while(1.., is_domain_char), - ) - .recognize() - .parse_next(input) - } - - fn url_literal(input: T) -> IResult::Slice> - where - T: Stream + StreamIsPartial + PartialEq, - ::Slice: AsBStr + SliceLen + Default, - ::Token: AsChar + Copy, - { - ( - opt(terminated( - take_while(1.., is_scheme_char), - // HACK: Technically you can skip `//` if you don't have a domain but that would - // get messy to support. - (':', '/', '/'), - )), + trace( + "email", ( - opt(terminated(url_userinfo, '@')), + take_while(1.., is_localport_char), + '@', take_while(1.., is_domain_char), - opt(preceded(':', take_while(1.., AsChar::is_dec_digit))), - ), - '/', - // HACK: Too lazy to enumerate - take_while(0.., is_path_query_fragment), + ) + .recognize(), ) - .recognize() - .parse_next(input) + .parse_next(input) } - fn url_userinfo(input: T) -> IResult::Slice> + fn url_literal(input: &mut T) -> PResult<::Slice, ()> where T: Stream + StreamIsPartial + PartialEq, ::Slice: AsBStr + SliceLen + Default, ::Token: AsChar + Copy, { - ( - take_while(1.., is_localport_char), - opt(preceded(':', take_while(0.., is_localport_char))), + trace( + "url", + ( + opt(terminated( + take_while(1.., is_scheme_char), + // HACK: Technically you can skip `//` if you don't have a domain but that would + // get messy to support. + (':', '/', '/'), + )), + ( + opt(terminated(url_userinfo, '@')), + take_while(1.., is_domain_char), + opt(preceded(':', take_while(1.., AsChar::is_dec_digit))), + ), + '/', + // HACK: Too lazy to enumerate + take_while(0.., is_path_query_fragment), + ) + .recognize(), ) - .recognize() - .parse_next(input) + .parse_next(input) } - fn c_escape(input: T) -> IResult::Slice> + fn url_userinfo(input: &mut T) -> PResult<::Slice, ()> + where + T: Stream + StreamIsPartial + PartialEq, + ::Slice: AsBStr + SliceLen + Default, + ::Token: AsChar + Copy, + { + trace( + "userinfo", + ( + take_while(1.., is_localport_char), + opt(preceded(':', take_while(0.., is_localport_char))), + ) + .recognize(), + ) + .parse_next(input) + } + + fn c_escape(input: &mut T) -> PResult<::Slice, ()> where T: Stream + StreamIsPartial + PartialEq, ::Slice: AsBStr + SliceLen + Default, @@ -425,25 +462,29 @@ mod parser { // regular string that does escaping. The escaped letter might be part of a word, or it // might not be. Rather than guess and be wrong part of the time and correct people's words // incorrectly, we opt for just not evaluating it at all. - preceded(take_while(1.., is_escape), take_while(0.., is_xid_continue)).parse_next(input) + trace( + "escape", + preceded(take_while(1.., is_escape), take_while(0.., is_xid_continue)), + ) + .parse_next(input) } - fn printf(input: T) -> IResult::Slice> + fn printf(input: &mut T) -> PResult<::Slice, ()> where T: Stream + StreamIsPartial + PartialEq, ::Slice: AsBStr + SliceLen + Default, ::Token: AsChar + Copy, { - preceded('%', take_while(1.., is_xid_continue)).parse_next(input) + trace("printf", preceded('%', take_while(1.., is_xid_continue))).parse_next(input) } - fn take_many0(mut f: F) -> impl FnMut(I) -> IResult::Slice, E> + fn take_many0(mut f: F) -> impl Parser::Slice, E> where I: Stream, - F: winnow::Parser::Slice, E>, - E: winnow::error::ParseError, + F: Parser::Slice, E>, + E: ParserError, { - move |i: I| { + move |i: &mut I| { repeat(0.., f.by_ref()) .map(|()| ()) .recognize() @@ -581,9 +622,8 @@ mod unicode_parser { use super::parser::next_identifier; pub(crate) fn iter_identifiers(mut input: &str) -> impl Iterator { - std::iter::from_fn(move || match next_identifier(input) { - Ok((i, o)) => { - input = i; + std::iter::from_fn(move || match next_identifier(&mut input) { + Ok(o) => { debug_assert_ne!(o, ""); Some(o) } @@ -595,10 +635,11 @@ mod unicode_parser { mod ascii_parser { use super::parser::next_identifier; - pub(crate) fn iter_identifiers(mut input: &[u8]) -> impl Iterator { - std::iter::from_fn(move || match next_identifier(input) { - Ok((i, o)) => { - input = i; + use winnow::BStr; + + pub(crate) fn iter_identifiers(mut input: &BStr) -> impl Iterator { + std::iter::from_fn(move || match next_identifier(&mut input) { + Ok(o) => { debug_assert_ne!(o, b""); // This is safe because we've checked that the strings are a subset of ASCII // characters. diff --git a/crates/varcon-core/Cargo.toml b/crates/varcon-core/Cargo.toml index 971c460..671ef17 100644 --- a/crates/varcon-core/Cargo.toml +++ b/crates/varcon-core/Cargo.toml @@ -12,11 +12,11 @@ include.workspace = true [features] default = [] -parser = ["winnow"] -flags = ["enumflags2"] +parser = ["dep:winnow"] +flags = ["dep:enumflags2"] [dependencies] -winnow = { version = "0.4.6", optional = true } +winnow = { version = "0.5.0", optional = true } enumflags2 = { version = "0.7", optional = true } [package.metadata.docs.rs] diff --git a/crates/varcon-core/src/parser.rs b/crates/varcon-core/src/parser.rs index 8378e8c..87d63da 100644 --- a/crates/varcon-core/src/parser.rs +++ b/crates/varcon-core/src/parser.rs @@ -1,4 +1,5 @@ use winnow::prelude::*; +use winnow::trace::trace; use crate::*; @@ -17,10 +18,8 @@ impl<'i> Iterator for ClusterIter<'i> { type Item = Cluster; fn next(&mut self) -> Option { - let i = self.input.trim_start(); - let (i, c) = Cluster::parse(i).ok()?; - self.input = i; - Some(c) + self.input = self.input.trim_start(); + Cluster::parse_.parse_next(&mut self.input).ok() } } @@ -61,38 +60,45 @@ A Cv: acknowledgment's / Av B C: acknowledgement's } impl Cluster { - pub fn parse(input: &str) -> IResult<&str, Self> { - let header = ( - winnow::bytes::tag("#"), - winnow::character::space0, - winnow::character::not_line_ending, - winnow::character::line_ending, - ); - let note = winnow::sequence::preceded( - (winnow::bytes::tag("##"), winnow::character::space0), - winnow::sequence::terminated( - winnow::character::not_line_ending, - winnow::character::line_ending, - ), - ); - let mut cluster = ( - winnow::combinator::opt(header), - winnow::multi::many1(winnow::sequence::terminated( - Entry::parse, - winnow::character::line_ending, - )), - winnow::multi::many0(note), - ); - let (input, (header, entries, notes)): (_, (_, _, Vec<_>)) = cluster.parse_next(input)?; + pub fn parse(input: &str) -> Result { + Self::parse_.parse(input).map_err(|_err| ParseError) + } - let header = header.map(|s| s.2.to_owned()); - let notes = notes.into_iter().map(|s| s.to_owned()).collect(); - let c = Self { - header, - entries, - notes, - }; - Ok((input, c)) + fn parse_(input: &mut &str) -> PResult { + trace("cluster", move |input: &mut &str| { + let header = ( + "#", + winnow::ascii::space0, + winnow::ascii::not_line_ending, + winnow::ascii::line_ending, + ); + let note = winnow::combinator::preceded( + ("##", winnow::ascii::space0), + winnow::combinator::terminated( + winnow::ascii::not_line_ending, + winnow::ascii::line_ending, + ), + ); + let mut cluster = ( + winnow::combinator::opt(header), + winnow::combinator::repeat( + 1.., + winnow::combinator::terminated(Entry::parse_, winnow::ascii::line_ending), + ), + winnow::combinator::repeat(0.., note), + ); + let (header, entries, notes): (_, _, Vec<_>) = cluster.parse_next(input)?; + + let header = header.map(|s| s.2.to_owned()); + let notes = notes.into_iter().map(|s| s.to_owned()).collect(); + let c = Self { + header, + entries, + notes, + }; + Ok(c) + }) + .parse_next(input) } } @@ -102,15 +108,16 @@ mod test_cluster { #[test] fn test_basic() { - let (input, actual) = Cluster::parse( - "# acknowledgment (level 35) + let (input, actual) = Cluster::parse_ + .parse_peek( + "# acknowledgment (level 35) A Cv: acknowledgment / Av B C: acknowledgement A Cv: acknowledgments / Av B C: acknowledgements A Cv: acknowledgment's / Av B C: acknowledgement's ", - ) - .unwrap(); + ) + .unwrap(); assert_eq!(input, "\n"); assert_eq!( actual.header, @@ -122,8 +129,9 @@ A Cv: acknowledgment's / Av B C: acknowledgement's #[test] fn test_notes() { - let (input, actual) = Cluster::parse( - "# coloration (level 50) + let (input, actual) = Cluster::parse_ + .parse_peek( + "# coloration (level 50) A B C: coloration / B. Cv: colouration A B C: colorations / B. Cv: colourations A B C: coloration's / B. Cv: colouration's @@ -131,8 +139,8 @@ A B C: coloration's / B. Cv: colouration's ## variant for British Engl or some reason ", - ) - .unwrap(); + ) + .unwrap(); assert_eq!(input, "\n"); assert_eq!( actual.header, @@ -144,65 +152,75 @@ A B C: coloration's / B. Cv: colouration's } impl Entry { - pub fn parse(input: &str) -> IResult<&str, Self> { - let var_sep = (winnow::character::space0, '/', winnow::character::space0); - let (input, variants) = - winnow::multi::separated1(Variant::parse, var_sep).parse_next(input)?; - - let desc_sep = (winnow::character::space0, '|'); - let (input, description) = - winnow::combinator::opt((desc_sep, Self::parse_description)).parse_next(input)?; - - let comment_sep = (winnow::character::space0, '#'); - let (input, comment) = winnow::combinator::opt(( - comment_sep, - winnow::character::space1, - winnow::character::not_line_ending, - )) - .parse_next(input)?; - - let mut e = match description { - Some((_, description)) => description, - None => Self { - variants: Vec::new(), - pos: None, - archaic: false, - note: false, - description: None, - comment: None, - }, - }; - e.variants = variants; - e.comment = comment.map(|c| c.2.to_owned()); - Ok((input, e)) + pub fn parse(input: &str) -> Result { + Self::parse_.parse(input).map_err(|_err| ParseError) } - fn parse_description(input: &str) -> IResult<&str, Self> { - let (input, (pos, archaic, note, description)) = ( - winnow::combinator::opt((winnow::character::space1, Pos::parse)), - winnow::combinator::opt((winnow::character::space1, "(-)")), - winnow::combinator::opt((winnow::character::space1, "--")), - winnow::combinator::opt(( - winnow::character::space1, - winnow::bytes::take_till0(|c| c == '\n' || c == '\r' || c == '#'), - )), - ) + fn parse_(input: &mut &str) -> PResult { + trace("entry", move |input: &mut &str| { + let var_sep = (winnow::ascii::space0, '/', winnow::ascii::space0); + let variants = + winnow::combinator::separated1(Variant::parse_, var_sep).parse_next(input)?; + + let desc_sep = (winnow::ascii::space0, '|'); + let description = + winnow::combinator::opt((desc_sep, Self::parse_description)).parse_next(input)?; + + let comment_sep = (winnow::ascii::space0, '#'); + let comment = winnow::combinator::opt(( + comment_sep, + winnow::ascii::space1, + winnow::ascii::not_line_ending, + )) .parse_next(input)?; - let variants = Vec::new(); - let pos = pos.map(|(_, p)| p); - let archaic = archaic.is_some(); - let note = note.is_some(); - let description = description.map(|(_, d)| d.to_owned()); - let e = Self { - variants, - pos, - archaic, - note, - description, - comment: None, - }; - Ok((input, e)) + let mut e = match description { + Some((_, description)) => description, + None => Self { + variants: Vec::new(), + pos: None, + archaic: false, + note: false, + description: None, + comment: None, + }, + }; + e.variants = variants; + e.comment = comment.map(|c| c.2.to_owned()); + Ok(e) + }) + .parse_next(input) + } + + fn parse_description(input: &mut &str) -> PResult { + trace("description", move |input: &mut &str| { + let (pos, archaic, note, description) = ( + winnow::combinator::opt((winnow::ascii::space1, Pos::parse_)), + winnow::combinator::opt((winnow::ascii::space1, "(-)")), + winnow::combinator::opt((winnow::ascii::space1, "--")), + winnow::combinator::opt(( + winnow::ascii::space1, + winnow::token::take_till0(('\n', '\r', '#')), + )), + ) + .parse_next(input)?; + + let variants = Vec::new(); + let pos = pos.map(|(_, p)| p); + let archaic = archaic.is_some(); + let note = note.is_some(); + let description = description.map(|(_, d)| d.to_owned()); + let e = Self { + variants, + pos, + archaic, + note, + description, + comment: None, + }; + Ok(e) + }) + .parse_next(input) } } @@ -215,8 +233,9 @@ mod test_entry { fn test_variant_only() { // Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use // cases. - let (input, actual) = - Entry::parse("A Cv: acknowledgment's / Av B C: acknowledgement's\n").unwrap(); + let (input, actual) = Entry::parse_ + .parse_peek("A Cv: acknowledgment's / Av B C: acknowledgement's\n") + .unwrap(); assert_eq!(input, "\n"); assert_eq!(actual.variants.len(), 2); assert_eq!(actual.pos, None); @@ -229,7 +248,9 @@ mod test_entry { fn test_description() { // Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use // cases. - let (input, actual) = Entry::parse("A C: prize / B: prise | otherwise\n").unwrap(); + let (input, actual) = Entry::parse_ + .parse_peek("A C: prize / B: prise | otherwise\n") + .unwrap(); assert_eq!(input, "\n"); assert_eq!(actual.variants.len(), 2); assert_eq!(actual.pos, None); @@ -242,7 +263,9 @@ mod test_entry { fn test_pos() { // Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use // cases. - let (input, actual) = Entry::parse("A B C: practice / AV Cv: practise | \n").unwrap(); + let (input, actual) = Entry::parse_ + .parse_peek("A B C: practice / AV Cv: practise | \n") + .unwrap(); assert_eq!(input, "\n"); assert_eq!(actual.variants.len(), 2); assert_eq!(actual.pos, Some(Pos::Noun)); @@ -255,7 +278,9 @@ mod test_entry { fn test_archaic() { // Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use // cases. - let (input, actual) = Entry::parse("A: bark / Av B: barque | (-) ship\n").unwrap(); + let (input, actual) = Entry::parse_ + .parse_peek("A: bark / Av B: barque | (-) ship\n") + .unwrap(); assert_eq!(input, "\n"); assert_eq!(actual.variants.len(), 2); assert_eq!(actual.pos, None); @@ -268,7 +293,9 @@ mod test_entry { fn test_note() { // Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use // cases. - let (input, actual) = Entry::parse("_: cabbies | -- plural\n").unwrap(); + let (input, actual) = Entry::parse_ + .parse_peek("_: cabbies | -- plural\n") + .unwrap(); assert_eq!(input, "\n"); assert_eq!(actual.variants.len(), 1); assert_eq!(actual.pos, None); @@ -279,7 +306,7 @@ mod test_entry { #[test] fn test_trailing_comment() { - let (input, actual) = Entry::parse( + let (input, actual) = Entry::parse_.parse_peek( "A B: accursed / AV B-: accurst # ODE: archaic, M-W: 'or' but can find little evidence of use\n", ) .unwrap(); @@ -297,20 +324,30 @@ mod test_entry { } impl Variant { - pub fn parse(input: &str) -> IResult<&str, Self> { - let types = winnow::multi::separated1(Type::parse, winnow::character::space1); - let sep = (winnow::bytes::tag(":"), winnow::character::space0); - let (input, (types, word)) = - winnow::sequence::separated_pair(types, sep, word).parse_next(input)?; - let v = Self { types, word }; - Ok((input, v)) + pub fn parse(input: &str) -> Result { + Self::parse_.parse(input).map_err(|_err| ParseError) + } + + fn parse_(input: &mut &str) -> PResult { + trace("variant", move |input: &mut &str| { + let types = winnow::combinator::separated1(Type::parse_, winnow::ascii::space1); + let sep = (":", winnow::ascii::space0); + let (types, word) = + winnow::combinator::separated_pair(types, sep, word).parse_next(input)?; + let v = Self { types, word }; + Ok(v) + }) + .parse_next(input) } } -fn word(input: &str) -> IResult<&str, String> { - winnow::bytes::take_till1(|item: char| item.is_ascii_whitespace()) - .map(|s: &str| s.to_owned().replace('_', " ")) - .parse_next(input) +fn word(input: &mut &str) -> PResult { + trace("word", move |input: &mut &str| { + winnow::token::take_till1(|item: char| item.is_ascii_whitespace()) + .map(|s: &str| s.to_owned().replace('_', " ")) + .parse_next(input) + }) + .parse_next(input) } #[cfg(test)] @@ -321,7 +358,7 @@ mod test_variant { fn test_valid() { // Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use // cases. - let (input, actual) = Variant::parse("A Cv: acknowledgment ").unwrap(); + let (input, actual) = Variant::parse_.parse_peek("A Cv: acknowledgment ").unwrap(); assert_eq!(input, " "); assert_eq!( actual.types, @@ -343,8 +380,9 @@ mod test_variant { #[test] fn test_extra() { - let (input, actual) = - Variant::parse("A Cv: acknowledgment's / Av B C: acknowledgement's").unwrap(); + let (input, actual) = Variant::parse_ + .parse_peek("A Cv: acknowledgment's / Av B C: acknowledgement's") + .unwrap(); assert_eq!(input, " / Av B C: acknowledgement's"); assert_eq!( actual.types, @@ -366,7 +404,7 @@ mod test_variant { #[test] fn test_underscore() { - let (input, actual) = Variant::parse("_: air_gun\n").unwrap(); + let (input, actual) = Variant::parse_.parse_peek("_: air_gun\n").unwrap(); assert_eq!(input, "\n"); assert_eq!( actual.types, @@ -381,13 +419,20 @@ mod test_variant { } impl Type { - pub fn parse(input: &str) -> IResult<&str, Type> { - let (input, category) = Category::parse(input)?; - let (input, tag) = winnow::combinator::opt(Tag::parse).parse_next(input)?; - let (input, num) = winnow::combinator::opt(winnow::character::digit1).parse_next(input)?; - let num = num.map(|s| s.parse().expect("parser ensured its a number")); - let t = Type { category, tag, num }; - Ok((input, t)) + pub fn parse(input: &str) -> Result { + Self::parse_.parse(input).map_err(|_err| ParseError) + } + + fn parse_(input: &mut &str) -> PResult { + trace("type", move |input: &mut &str| { + let category = Category::parse_(input)?; + let tag = winnow::combinator::opt(Tag::parse_).parse_next(input)?; + let num = winnow::combinator::opt(winnow::ascii::digit1).parse_next(input)?; + let num = num.map(|s| s.parse().expect("parser ensured its a number")); + let t = Type { category, tag, num }; + Ok(t) + }) + .parse_next(input) } } @@ -399,13 +444,13 @@ mod test_type { fn test_valid() { // Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use // cases. - let (input, actual) = Type::parse("A ").unwrap(); + let (input, actual) = Type::parse_.parse_peek("A ").unwrap(); assert_eq!(input, " "); assert_eq!(actual.category, Category::American); assert_eq!(actual.tag, None); assert_eq!(actual.num, None); - let (input, actual) = Type::parse("Bv ").unwrap(); + let (input, actual) = Type::parse_.parse_peek("Bv ").unwrap(); assert_eq!(input, " "); assert_eq!(actual.category, Category::BritishIse); assert_eq!(actual.tag, Some(Tag::Variant)); @@ -414,13 +459,13 @@ mod test_type { #[test] fn test_extra() { - let (input, actual) = Type::parse("Z foobar").unwrap(); + let (input, actual) = Type::parse_.parse_peek("Z foobar").unwrap(); assert_eq!(input, " foobar"); assert_eq!(actual.category, Category::BritishIze); assert_eq!(actual.tag, None); assert_eq!(actual.num, None); - let (input, actual) = Type::parse("C- foobar").unwrap(); + let (input, actual) = Type::parse_.parse_peek("C- foobar").unwrap(); assert_eq!(input, " foobar"); assert_eq!(actual.category, Category::Canadian); assert_eq!(actual.tag, Some(Tag::Possible)); @@ -429,7 +474,7 @@ mod test_type { #[test] fn test_num() { - let (input, actual) = Type::parse("Av1 ").unwrap(); + let (input, actual) = Type::parse_.parse_peek("Av1 ").unwrap(); assert_eq!(input, " "); assert_eq!(actual.category, Category::American); assert_eq!(actual.tag, Some(Tag::Variant)); @@ -438,19 +483,26 @@ mod test_type { } impl Category { - pub fn parse(input: &str) -> IResult<&str, Category> { - let symbols = winnow::bytes::one_of("ABZCD_"); - symbols - .map(|c| match c { - 'A' => Category::American, - 'B' => Category::BritishIse, - 'Z' => Category::BritishIze, - 'C' => Category::Canadian, - 'D' => Category::Australian, - '_' => Category::Other, - _ => unreachable!("parser won't select this option"), - }) - .parse_next(input) + pub fn parse(input: &str) -> Result { + Self::parse_.parse(input).map_err(|_err| ParseError) + } + + fn parse_(input: &mut &str) -> PResult { + trace("category", move |input: &mut &str| { + let symbols = winnow::token::one_of(['A', 'B', 'Z', 'C', 'D', '_']); + symbols + .map(|c| match c { + 'A' => Category::American, + 'B' => Category::BritishIse, + 'Z' => Category::BritishIze, + 'C' => Category::Canadian, + 'D' => Category::Australian, + '_' => Category::Other, + _ => unreachable!("parser won't select this option"), + }) + .parse_next(input) + }) + .parse_next(input) } } @@ -460,32 +512,39 @@ mod test_category { #[test] fn test_valid() { - let (input, actual) = Category::parse("A").unwrap(); + let (input, actual) = Category::parse_.parse_peek("A").unwrap(); assert_eq!(input, ""); assert_eq!(actual, Category::American); } #[test] fn test_extra() { - let (input, actual) = Category::parse("_ foobar").unwrap(); + let (input, actual) = Category::parse_.parse_peek("_ foobar").unwrap(); assert_eq!(input, " foobar"); assert_eq!(actual, Category::Other); } } impl Tag { - pub fn parse(input: &str) -> IResult<&str, Tag> { - let symbols = winnow::bytes::one_of(".vV-x"); - symbols - .map(|c| match c { - '.' => Tag::Eq, - 'v' => Tag::Variant, - 'V' => Tag::Seldom, - '-' => Tag::Possible, - 'x' => Tag::Improper, - _ => unreachable!("parser won't select this option"), - }) - .parse_next(input) + pub fn parse(input: &str) -> Result { + Self::parse_.parse(input).map_err(|_err| ParseError) + } + + fn parse_(input: &mut &str) -> PResult { + trace("tag", move |input: &mut &str| { + let symbols = winnow::token::one_of(['.', 'v', 'V', '-', 'x']); + symbols + .map(|c| match c { + '.' => Tag::Eq, + 'v' => Tag::Variant, + 'V' => Tag::Seldom, + '-' => Tag::Possible, + 'x' => Tag::Improper, + _ => unreachable!("parser won't select this option"), + }) + .parse_next(input) + }) + .parse_next(input) } } @@ -495,32 +554,34 @@ mod test_tag { #[test] fn test_valid() { - let (input, actual) = Tag::parse(".").unwrap(); + let (input, actual) = Tag::parse_.parse_peek(".").unwrap(); assert_eq!(input, ""); assert_eq!(actual, Tag::Eq); } #[test] fn test_extra() { - let (input, actual) = Tag::parse("x foobar").unwrap(); + let (input, actual) = Tag::parse_.parse_peek("x foobar").unwrap(); assert_eq!(input, " foobar"); assert_eq!(actual, Tag::Improper); } } impl Pos { - pub fn parse(input: &str) -> IResult<&str, Pos> { - use winnow::bytes::tag; - let noun = tag(""); - let verb = tag(""); - let adjective = tag(""); - let adverb = tag(""); - winnow::branch::alt(( - noun.value(Pos::Noun), - verb.value(Pos::Verb), - adjective.value(Pos::Adjective), - adverb.value(Pos::Adverb), - )) + pub fn parse(input: &str) -> Result { + Self::parse_.parse(input).map_err(|_err| ParseError) + } + + fn parse_(input: &mut &str) -> PResult { + trace("pos", move |input: &mut &str| { + winnow::combinator::alt(( + "".value(Pos::Noun), + "".value(Pos::Verb), + "".value(Pos::Adjective), + "".value(Pos::Adverb), + )) + .parse_next(input) + }) .parse_next(input) } } @@ -531,15 +592,26 @@ mod test_pos { #[test] fn test_valid() { - let (input, actual) = Pos::parse("").unwrap(); + let (input, actual) = Pos::parse_.parse_peek("").unwrap(); assert_eq!(input, ""); assert_eq!(actual, Pos::Noun); } #[test] fn test_extra() { - let (input, actual) = Pos::parse(" foobar").unwrap(); + let (input, actual) = Pos::parse_.parse_peek(" foobar").unwrap(); assert_eq!(input, " foobar"); assert_eq!(actual, Pos::Adjective); } } + +#[derive(Debug)] +pub struct ParseError; + +impl std::fmt::Display for ParseError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "invalid") + } +} + +impl std::error::Error for ParseError {}