From 15e748d0e56c3ac0ee6210a2c8608ecddf6c2fc7 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Tue, 21 Feb 2023 10:41:45 -0600 Subject: [PATCH 1/2] refactor: Switch to winnow --- Cargo.lock | 29 ++- crates/typos/Cargo.toml | 2 +- crates/typos/src/tokens.rs | 325 +++++++++---------------------- crates/varcon-core/Cargo.toml | 4 +- crates/varcon-core/src/parser.rs | 136 ++++++------- 5 files changed, 175 insertions(+), 321 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 58714b8..be6f54a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -945,12 +945,6 @@ dependencies = [ "autocfg", ] -[[package]] -name = "minimal-lexical" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" - [[package]] name = "miniz_oxide" version = "0.5.4" @@ -972,16 +966,6 @@ dependencies = [ "unicase", ] -[[package]] -name = "nom" -version = "7.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8903e5a29a317527874d0402f867152a3d21c908bb0b933e416c65e301d4c36" -dependencies = [ - "memchr", - "minimal-lexical", -] - [[package]] name = "nom8" version = "0.2.0" @@ -1622,13 +1606,13 @@ dependencies = [ "anyhow", "bstr 1.1.0", "itertools", - "nom", "once_cell", "serde", "simdutf8", "thiserror", "unicode-segmentation", "unicode-xid", + "winnow", ] [[package]] @@ -1802,7 +1786,7 @@ name = "varcon-core" version = "2.2.7" dependencies = [ "enumflags2", - "nom", + "winnow", ] [[package]] @@ -2000,6 +1984,15 @@ version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f40009d85759725a34da6d89a94e63d7bdc50a862acf0dbc7c8e488f1edcb6f5" +[[package]] +name = "winnow" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "efdd927d1a3d5d98abcfc4cf8627371862ee6abfe52a988050621c50c66b4493" +dependencies = [ + "memchr", +] + [[package]] name = "yansi" version = "0.5.1" diff --git a/crates/typos/Cargo.toml b/crates/typos/Cargo.toml index 94e4cf1..355b6a3 100644 --- a/crates/typos/Cargo.toml +++ b/crates/typos/Cargo.toml @@ -14,7 +14,7 @@ include.workspace = true [dependencies] anyhow = "1.0" thiserror = "1.0" -nom = "7.1" +winnow = "0.3.0" unicode-xid = "0.2.4" once_cell = "1.17.0" serde = { version = "1.0", features = ["derive"] } diff --git a/crates/typos/src/tokens.rs b/crates/typos/src/tokens.rs index 890f66b..a64b54d 100644 --- a/crates/typos/src/tokens.rs +++ b/crates/typos/src/tokens.rs @@ -125,36 +125,31 @@ impl<'s> Iterator for Utf8Chunks<'s> { } mod parser { - use nom::branch::*; - use nom::bytes::complete::*; - use nom::character::complete::*; - use nom::combinator::*; - use nom::sequence::*; - use nom::{AsChar, IResult}; + use winnow::branch::*; + use winnow::bytes::complete::*; + use winnow::character::complete::*; + use winnow::combinator::*; + use winnow::prelude::*; + use winnow::sequence::*; + use winnow::stream::AsBStr; + use winnow::stream::AsChar; + use winnow::stream::SliceLen; + use winnow::stream::Stream; - pub(crate) fn next_identifier(input: T) -> IResult + pub(crate) fn next_identifier(input: T) -> IResult::Slice> where - T: nom::InputTakeAtPosition - + nom::InputTake - + nom::InputIter - + nom::InputLength - + nom::Slice> - + nom::Slice> - + nom::Offset - + Clone - + Default - + PartialEq - + std::fmt::Debug, - ::Item: AsChar + Copy, - ::Item: AsChar + Copy, + T: Stream + PartialEq, + ::Slice: AsBStr + SliceLen + Default, + ::Token: AsChar + Copy, { preceded(ignore, identifier)(input) } - fn identifier(input: T) -> IResult + fn identifier(input: T) -> IResult::Slice> where - T: nom::InputTakeAtPosition + std::fmt::Debug, - ::Item: AsChar + Copy, + T: Stream + PartialEq, + ::Slice: AsBStr + SliceLen + Default, + ::Token: AsChar + Copy, { // Generally a language would be `{XID_Start}{XID_Continue}*` but going with only // `{XID_Continue}+` because XID_Continue is a superset of XID_Start and rather catch odd @@ -163,21 +158,11 @@ mod parser { take_while1(is_xid_continue)(input) } - fn ignore(input: T) -> IResult + fn ignore(input: T) -> IResult::Slice> where - T: nom::InputTakeAtPosition - + nom::InputTake - + nom::InputIter - + nom::InputLength - + nom::Slice> - + nom::Slice> - + nom::Offset - + Clone - + Default - + PartialEq - + std::fmt::Debug, - ::Item: AsChar + Copy, - ::Item: AsChar + Copy, + T: Stream + PartialEq, + ::Slice: AsBStr + SliceLen + Default, + ::Token: AsChar + Copy, { take_many0(alt(( // CAUTION: If adding an ignorable literal, if it doesn't start with `is_xid_continue`, @@ -198,42 +183,23 @@ mod parser { )))(input) } - fn sep1(input: T) -> IResult + fn sep1(input: T) -> IResult::Slice> where - T: nom::InputTakeAtPosition - + nom::InputTake - + nom::InputIter - + nom::InputLength - + nom::Slice> - + nom::Slice> - + nom::Offset - + Clone - + Default - + PartialEq - + std::fmt::Debug, - ::Item: AsChar + Copy, - ::Item: AsChar + Copy, + T: Stream + PartialEq, + ::Slice: AsBStr + SliceLen + Default, + ::Token: AsChar + Copy, { alt(( recognize(satisfy(|c| !is_xid_continue(c))), - map(eof, |_| T::default()), + map(eof, |_| ::Slice::default()), ))(input) } - fn other(input: T) -> IResult + fn other(input: T) -> IResult::Slice> where - T: nom::InputTakeAtPosition - + nom::InputTake - + nom::InputIter - + nom::InputLength - + nom::Slice> - + nom::Slice> - + nom::Offset - + Clone - + PartialEq - + std::fmt::Debug, - ::Item: AsChar + Copy, - ::Item: AsChar + Copy, + T: Stream + PartialEq, + ::Slice: AsBStr + SliceLen + Default, + ::Token: AsChar + Copy, { recognize(tuple(( satisfy(|c| !is_xid_continue(c)), @@ -241,19 +207,11 @@ mod parser { )))(input) } - fn ordinal_literal(input: T) -> IResult + fn ordinal_literal(input: T) -> IResult::Slice> where - T: nom::InputTakeAtPosition - + nom::InputTake - + nom::InputIter - + nom::InputLength - + nom::Offset - + nom::Slice> - + nom::Slice> - + Clone - + std::fmt::Debug, - ::Item: AsChar + Copy, - ::Item: AsChar + Copy, + T: Stream + PartialEq, + ::Slice: AsBStr + SliceLen + Default, + ::Token: AsChar + Copy, { fn is_sep(c: impl AsChar) -> bool { let c = c.as_char(); @@ -274,25 +232,20 @@ mod parser { )))(input) } - fn dec_literal(input: T) -> IResult + fn dec_literal(input: T) -> IResult::Slice> where - T: nom::InputTakeAtPosition + std::fmt::Debug, - ::Item: AsChar + Copy, + T: Stream + PartialEq, + ::Slice: AsBStr + SliceLen + Default, + ::Token: AsChar + Copy, { take_while1(is_dec_digit_with_sep)(input) } - fn hex_literal(input: T) -> IResult + fn hex_literal(input: T) -> IResult::Slice> where - T: nom::InputTakeAtPosition - + nom::InputTake - + nom::InputIter - + nom::InputLength - + nom::Slice> - + Clone - + std::fmt::Debug, - ::Item: AsChar + Copy, - ::Item: AsChar + Copy, + T: Stream + PartialEq, + ::Slice: AsBStr + SliceLen + Default, + ::Token: AsChar + Copy, { preceded( pair(char('0'), alt((char('x'), char('X')))), @@ -300,21 +253,11 @@ mod parser { )(input) } - fn css_color(input: T) -> IResult + fn css_color(input: T) -> IResult::Slice> where - T: nom::InputTakeAtPosition - + nom::InputTake - + nom::InputIter - + nom::InputLength - + nom::Offset - + nom::Slice> - + nom::Slice> - + Clone - + Default - + PartialEq - + std::fmt::Debug, - ::Item: AsChar + Copy, - ::Item: AsChar + Copy, + T: Stream + PartialEq, + ::Slice: AsBStr + SliceLen + Default, + ::Token: AsChar + Copy, { preceded( char('#'), @@ -325,19 +268,11 @@ mod parser { )(input) } - fn uuid_literal(input: T) -> IResult + fn uuid_literal(input: T) -> IResult::Slice> where - T: nom::InputTakeAtPosition - + nom::InputTake - + nom::InputIter - + nom::InputLength - + nom::Offset - + nom::Slice> - + nom::Slice> - + Clone - + std::fmt::Debug, - ::Item: AsChar + Copy, - ::Item: AsChar + Copy, + T: Stream + PartialEq, + ::Slice: AsBStr + SliceLen + Default, + ::Token: AsChar + Copy, { recognize(alt(( tuple(( @@ -365,19 +300,11 @@ mod parser { )))(input) } - fn hash_literal(input: T) -> IResult + fn hash_literal(input: T) -> IResult::Slice> where - T: nom::InputTakeAtPosition - + nom::InputTake - + nom::InputIter - + nom::InputLength - + nom::Offset - + nom::Slice> - + nom::Slice> - + Clone - + std::fmt::Debug, - ::Item: AsChar + Copy, - ::Item: AsChar + Copy, + T: Stream + PartialEq, + ::Slice: AsBStr + SliceLen + Default, + ::Token: AsChar + Copy, { // Size considerations: // - 40 characters holds for a SHA-1 hash from older Git versions. @@ -396,19 +323,11 @@ mod parser { ))(input) } - fn base64_literal(input: T) -> IResult + fn base64_literal(input: T) -> IResult::Slice> where - T: nom::InputTakeAtPosition - + nom::InputTake - + nom::InputIter - + nom::InputLength - + nom::Offset - + nom::Slice> - + nom::Slice> - + Clone - + std::fmt::Debug, - ::Item: AsChar + Copy, - ::Item: AsChar + Copy, + T: Stream + PartialEq, + ::Slice: AsBStr + SliceLen + Default, + ::Token: AsChar + Copy, { let (padding, captured) = take_while1(is_base64_digit)(input.clone())?; @@ -419,37 +338,30 @@ mod parser { padding_len = 0; } - if captured.input_len() < 90 + if captured.slice_len() < 90 && padding_len == 0 && captured - .iter_elements() + .as_bstr() + .iter() .all(|c| !['/', '+'].contains(&c.as_char())) { - return Err(nom::Err::Error(nom::error::Error::new( + return Err(winnow::Err::Backtrack(winnow::error::Error::new( input, - nom::error::ErrorKind::LengthValue, + winnow::error::ErrorKind::LengthValue, ))); } let (after, _) = take_while_m_n(padding_len, padding_len, is_base64_padding)(padding)?; let after_offset = input.offset(&after); - Ok(input.take_split(after_offset)) + Ok(input.next_slice(after_offset)) } - fn email_literal(input: T) -> IResult + fn email_literal(input: T) -> IResult::Slice> where - T: nom::InputTakeAtPosition - + nom::InputTake - + nom::InputIter - + nom::InputLength - + nom::Offset - + nom::Slice> - + nom::Slice> - + Clone - + std::fmt::Debug, - ::Item: AsChar + Copy, - ::Item: AsChar + Copy, + T: Stream + PartialEq, + ::Slice: AsBStr + SliceLen + Default, + ::Token: AsChar + Copy, { recognize(tuple(( take_while1(is_localport_char), @@ -458,19 +370,11 @@ mod parser { )))(input) } - fn url_literal(input: T) -> IResult + fn url_literal(input: T) -> IResult::Slice> where - T: nom::InputTakeAtPosition - + nom::InputTake - + nom::InputIter - + nom::InputLength - + nom::Offset - + nom::Slice> - + nom::Slice> - + Clone - + std::fmt::Debug, - ::Item: AsChar + Copy, - ::Item: AsChar + Copy, + T: Stream + PartialEq, + ::Slice: AsBStr + SliceLen + Default, + ::Token: AsChar + Copy, { recognize(tuple(( opt(terminated( @@ -490,19 +394,11 @@ mod parser { )))(input) } - fn url_userinfo(input: T) -> IResult + fn url_userinfo(input: T) -> IResult::Slice> where - T: nom::InputTakeAtPosition - + nom::InputTake - + nom::InputIter - + nom::InputLength - + nom::Offset - + nom::Slice> - + nom::Slice> - + Clone - + std::fmt::Debug, - ::Item: AsChar + Copy, - ::Item: AsChar + Copy, + T: Stream + PartialEq, + ::Slice: AsBStr + SliceLen + Default, + ::Token: AsChar + Copy, { recognize(tuple(( take_while1(is_localport_char), @@ -510,19 +406,11 @@ mod parser { )))(input) } - fn c_escape(input: T) -> IResult + fn c_escape(input: T) -> IResult::Slice> where - T: nom::InputTakeAtPosition - + nom::InputTake - + nom::InputIter - + nom::InputLength - + nom::Offset - + nom::Slice> - + nom::Slice> - + Clone - + std::fmt::Debug, - ::Item: AsChar + Copy, - ::Item: AsChar + Copy, + T: Stream + PartialEq, + ::Slice: AsBStr + SliceLen + Default, + ::Token: AsChar + Copy, { // We don't know whether the string we are parsing is a literal string (no escaping) or // regular string that does escaping. The escaped letter might be part of a word, or it @@ -531,53 +419,26 @@ mod parser { preceded(take_while1(is_escape), take_while(is_xid_continue))(input) } - fn printf(input: T) -> IResult + fn printf(input: T) -> IResult::Slice> where - T: nom::InputTakeAtPosition - + nom::InputTake - + nom::InputIter - + nom::InputLength - + nom::Offset - + nom::Slice> - + nom::Slice> - + Clone - + std::fmt::Debug, - ::Item: AsChar + Copy, - ::Item: AsChar + Copy, + T: Stream + PartialEq, + ::Slice: AsBStr + SliceLen + Default, + ::Token: AsChar + Copy, { preceded(char('%'), take_while1(is_xid_continue))(input) } - fn take_many0(mut f: F) -> impl FnMut(I) -> IResult + fn take_many0(mut f: F) -> impl FnMut(I) -> IResult::Slice, E> where - I: nom::Offset + nom::InputTake + Clone + PartialEq + std::fmt::Debug, - F: nom::Parser, - E: nom::error::ParseError, + I: Stream, + F: winnow::Parser::Slice, E>, + E: winnow::error::ParseError, { move |i: I| { - let mut current = i.clone(); - loop { - match f.parse(current.clone()) { - Err(nom::Err::Error(_)) => { - let offset = i.offset(¤t); - let (after, before) = i.take_split(offset); - return Ok((after, before)); - } - Err(e) => { - return Err(e); - } - Ok((next, _)) => { - if next == current { - return Err(nom::Err::Error(E::from_error_kind( - i, - nom::error::ErrorKind::Many0, - ))); - } - - current = next; - } - } - } + winnow::multi::many0(f.by_ref()) + .map(|()| ()) + .recognize() + .parse_next(i) } } diff --git a/crates/varcon-core/Cargo.toml b/crates/varcon-core/Cargo.toml index 31a2e49..20ab866 100644 --- a/crates/varcon-core/Cargo.toml +++ b/crates/varcon-core/Cargo.toml @@ -12,11 +12,11 @@ include.workspace = true [features] default = [] -parser = ["nom"] +parser = ["winnow"] flags = ["enumflags2"] [dependencies] -nom = { version = "7", optional = true } +winnow = { version = "0.3.0", optional = true } enumflags2 = { version = "0.7", optional = true } [package.metadata.docs.rs] diff --git a/crates/varcon-core/src/parser.rs b/crates/varcon-core/src/parser.rs index 1be40b3..5fcbc8b 100644 --- a/crates/varcon-core/src/parser.rs +++ b/crates/varcon-core/src/parser.rs @@ -1,6 +1,6 @@ -use nom::IResult; -use nom::InputTakeAtPosition; -use nom::Parser; +use winnow::stream::Stream; +use winnow::IResult; +use winnow::Parser; use crate::*; @@ -64,31 +64,31 @@ A Cv: acknowledgment's / Av B C: acknowledgement's impl Cluster { pub fn parse(input: &str) -> IResult<&str, Self> { - let header = nom::sequence::tuple(( - nom::bytes::streaming::tag("#"), - nom::character::streaming::space0, - nom::character::streaming::not_line_ending, - nom::character::streaming::line_ending, + let header = winnow::sequence::tuple(( + winnow::bytes::streaming::tag("#"), + winnow::character::streaming::space0, + winnow::character::streaming::not_line_ending, + winnow::character::streaming::line_ending, )); - let note = nom::sequence::preceded( - nom::sequence::pair( - nom::bytes::streaming::tag("##"), - nom::character::streaming::space0, + let note = winnow::sequence::preceded( + winnow::sequence::pair( + winnow::bytes::streaming::tag("##"), + winnow::character::streaming::space0, ), - nom::sequence::terminated( - nom::character::streaming::not_line_ending, - nom::character::streaming::line_ending, + winnow::sequence::terminated( + winnow::character::streaming::not_line_ending, + winnow::character::streaming::line_ending, ), ); - let mut cluster = nom::sequence::tuple(( - nom::combinator::opt(header), - nom::multi::many1(nom::sequence::terminated( + let mut cluster = winnow::sequence::tuple(( + winnow::combinator::opt(header), + winnow::multi::many1(winnow::sequence::terminated( Entry::parse, - nom::character::streaming::line_ending, + winnow::character::streaming::line_ending, )), - nom::multi::many0(note), + winnow::multi::many0(note), )); - let (input, (header, entries, notes)) = (cluster)(input)?; + let (input, (header, entries, notes)): (_, (_, _, Vec<_>)) = (cluster)(input)?; let header = header.map(|s| s.2.to_owned()); let notes = notes.into_iter().map(|s| s.to_owned()).collect(); @@ -150,28 +150,30 @@ A B C: coloration's / B. Cv: colouration's impl Entry { pub fn parse(input: &str) -> IResult<&str, Self> { - let var_sep = nom::sequence::tuple(( - nom::character::streaming::space0, - nom::bytes::streaming::tag("/"), - nom::character::streaming::space0, + let var_sep = winnow::sequence::tuple(( + winnow::character::streaming::space0, + winnow::bytes::streaming::tag("/"), + winnow::character::streaming::space0, )); - let (input, variants) = nom::multi::separated_list1(var_sep, Variant::parse)(input)?; + let (input, variants) = winnow::multi::separated_list1(var_sep, Variant::parse)(input)?; - let desc_sep = nom::sequence::tuple(( - nom::character::streaming::space0, - nom::bytes::streaming::tag("|"), + let desc_sep = winnow::sequence::tuple(( + winnow::character::streaming::space0, + winnow::bytes::streaming::tag("|"), )); - let (input, description) = - nom::combinator::opt(nom::sequence::tuple((desc_sep, Self::parse_description)))(input)?; + let (input, description) = winnow::combinator::opt(winnow::sequence::tuple(( + desc_sep, + Self::parse_description, + )))(input)?; - let comment_sep = nom::sequence::tuple(( - nom::character::streaming::space0, - nom::bytes::streaming::tag("#"), + let comment_sep = winnow::sequence::tuple(( + winnow::character::streaming::space0, + winnow::bytes::streaming::tag("#"), )); - let (input, comment) = nom::combinator::opt(nom::sequence::tuple(( + let (input, comment) = winnow::combinator::opt(winnow::sequence::tuple(( comment_sep, - nom::character::streaming::space1, - nom::character::streaming::not_line_ending, + winnow::character::streaming::space1, + winnow::character::streaming::not_line_ending, )))(input)?; let mut e = match description { @@ -191,22 +193,22 @@ impl Entry { } fn parse_description(input: &str) -> IResult<&str, Self> { - let (input, (pos, archaic, note, description)) = nom::sequence::tuple(( - nom::combinator::opt(nom::sequence::tuple(( - nom::character::streaming::space1, + let (input, (pos, archaic, note, description)) = winnow::sequence::tuple(( + winnow::combinator::opt(winnow::sequence::tuple(( + winnow::character::streaming::space1, Pos::parse, ))), - nom::combinator::opt(nom::sequence::tuple(( - nom::character::streaming::space1, - nom::bytes::streaming::tag("(-)"), + winnow::combinator::opt(winnow::sequence::tuple(( + winnow::character::streaming::space1, + winnow::bytes::streaming::tag("(-)"), ))), - nom::combinator::opt(nom::sequence::tuple(( - nom::character::streaming::space1, - nom::bytes::streaming::tag("--"), + winnow::combinator::opt(winnow::sequence::tuple(( + winnow::character::streaming::space1, + winnow::bytes::streaming::tag("--"), ))), - nom::combinator::opt(nom::sequence::tuple(( - nom::character::streaming::space1, - nom::bytes::streaming::take_till(|c| c == '\n' || c == '\r' || c == '#'), + winnow::combinator::opt(winnow::sequence::tuple(( + winnow::character::streaming::space1, + winnow::bytes::streaming::take_till(|c| c == '\n' || c == '\r' || c == '#'), ))), ))(input)?; @@ -319,24 +321,22 @@ mod test_entry { impl Variant { pub fn parse(input: &str) -> IResult<&str, Self> { - let types = nom::multi::separated_list1(nom::character::streaming::space1, Type::parse); - let sep = nom::sequence::tuple(( - nom::bytes::streaming::tag(":"), - nom::character::streaming::space0, + let types = + winnow::multi::separated_list1(winnow::character::streaming::space1, Type::parse); + let sep = winnow::sequence::tuple(( + winnow::bytes::streaming::tag(":"), + winnow::character::streaming::space0, )); - let (input, (types, word)) = nom::sequence::separated_pair(types, sep, word)(input)?; + let (input, (types, word)) = winnow::sequence::separated_pair(types, sep, word)(input)?; let v = Self { types, word }; Ok((input, v)) } } fn word(input: &str) -> IResult<&str, String> { - input - .split_at_position1( - |item| item.is_ascii_whitespace(), - nom::error::ErrorKind::Alpha, - ) - .map(|(i, s)| (i, s.to_owned().replace('_', " "))) + winnow::bytes::take_till1(|item: char| item.is_ascii_whitespace()) + .map(|s: &str| s.to_owned().replace('_', " ")) + .parse_next(input) } #[cfg(test)] @@ -409,8 +409,8 @@ mod test_variant { impl Type { pub fn parse(input: &str) -> IResult<&str, Type> { let (input, category) = Category::parse(input)?; - let (input, tag) = nom::combinator::opt(Tag::parse)(input)?; - let (input, num) = nom::combinator::opt(nom::character::streaming::digit1)(input)?; + let (input, tag) = winnow::combinator::opt(Tag::parse)(input)?; + let (input, num) = winnow::combinator::opt(winnow::character::streaming::digit1)(input)?; let num = num.map(|s| s.parse().expect("parser ensured its a number")); let t = Type { category, tag, num }; Ok((input, t)) @@ -465,8 +465,8 @@ mod test_type { impl Category { pub fn parse(input: &str) -> IResult<&str, Category> { - let symbols = nom::character::streaming::one_of("ABZCD_"); - nom::combinator::map(symbols, |c| match c { + let symbols = winnow::character::streaming::one_of("ABZCD_"); + winnow::combinator::map(symbols, |c| match c { 'A' => Category::American, 'B' => Category::BritishIse, 'Z' => Category::BritishIze, @@ -499,8 +499,8 @@ mod test_category { impl Tag { pub fn parse(input: &str) -> IResult<&str, Tag> { - let symbols = nom::character::streaming::one_of(".vV-x"); - nom::combinator::map(symbols, |c| match c { + let symbols = winnow::character::streaming::one_of(".vV-x"); + winnow::combinator::map(symbols, |c| match c { '.' => Tag::Eq, 'v' => Tag::Variant, 'V' => Tag::Seldom, @@ -532,12 +532,12 @@ mod test_tag { impl Pos { pub fn parse(input: &str) -> IResult<&str, Pos> { - use nom::bytes::streaming::tag; + use winnow::bytes::streaming::tag; let noun = tag(""); let verb = tag(""); let adjective = tag(""); let adverb = tag(""); - nom::branch::alt(( + winnow::branch::alt(( noun.map(|_| Pos::Noun), verb.map(|_| Pos::Verb), adjective.map(|_| Pos::Adjective), From d99eb1601be52ac3a67bf72a62fc6541f7b3f96d Mon Sep 17 00:00:00 2001 From: Ed Page Date: Tue, 21 Feb 2023 11:11:24 -0600 Subject: [PATCH 2/2] refactor: Resolve deprecations --- crates/typos/src/tokens.rs | 154 +++++++++++++++--------------- crates/varcon-core/src/parser.rs | 155 +++++++++++++------------------ 2 files changed, 142 insertions(+), 167 deletions(-) diff --git a/crates/typos/src/tokens.rs b/crates/typos/src/tokens.rs index a64b54d..c5ae1f8 100644 --- a/crates/typos/src/tokens.rs +++ b/crates/typos/src/tokens.rs @@ -126,8 +126,7 @@ impl<'s> Iterator for Utf8Chunks<'s> { mod parser { use winnow::branch::*; - use winnow::bytes::complete::*; - use winnow::character::complete::*; + use winnow::bytes::*; use winnow::combinator::*; use winnow::prelude::*; use winnow::sequence::*; @@ -135,10 +134,11 @@ mod parser { use winnow::stream::AsChar; use winnow::stream::SliceLen; use winnow::stream::Stream; + use winnow::stream::StreamIsPartial; pub(crate) fn next_identifier(input: T) -> IResult::Slice> where - T: Stream + PartialEq, + T: Stream + StreamIsPartial + PartialEq, ::Slice: AsBStr + SliceLen + Default, ::Token: AsChar + Copy, { @@ -147,7 +147,7 @@ mod parser { fn identifier(input: T) -> IResult::Slice> where - T: Stream + PartialEq, + T: Stream + StreamIsPartial + PartialEq, ::Slice: AsBStr + SliceLen + Default, ::Token: AsChar + Copy, { @@ -160,7 +160,7 @@ mod parser { fn ignore(input: T) -> IResult::Slice> where - T: Stream + PartialEq, + T: Stream + StreamIsPartial + PartialEq, ::Slice: AsBStr + SliceLen + Default, ::Token: AsChar + Copy, { @@ -185,31 +185,30 @@ mod parser { fn sep1(input: T) -> IResult::Slice> where - T: Stream + PartialEq, + T: Stream + StreamIsPartial + PartialEq, ::Slice: AsBStr + SliceLen + Default, ::Token: AsChar + Copy, { alt(( - recognize(satisfy(|c| !is_xid_continue(c))), - map(eof, |_| ::Slice::default()), + one_of(|c| !is_xid_continue(c)).recognize(), + eof.map(|_| ::Slice::default()), ))(input) } fn other(input: T) -> IResult::Slice> where - T: Stream + PartialEq, + T: Stream + StreamIsPartial + PartialEq, ::Slice: AsBStr + SliceLen + Default, ::Token: AsChar + Copy, { - recognize(tuple(( - satisfy(|c| !is_xid_continue(c)), - take_while(is_ignore_char), - )))(input) + (one_of(|c| !is_xid_continue(c)), take_while0(is_ignore_char)) + .recognize() + .parse_next(input) } fn ordinal_literal(input: T) -> IResult::Slice> where - T: Stream + PartialEq, + T: Stream + StreamIsPartial + PartialEq, ::Slice: AsBStr + SliceLen + Default, ::Token: AsChar + Copy, { @@ -219,22 +218,19 @@ mod parser { ['_'].contains(&c) } - recognize(tuple(( - take_while(is_sep), + ( + take_while0(is_sep), take_while1(is_dec_digit), - alt(( - pair(char('s'), char('t')), - pair(char('n'), char('d')), - pair(char('r'), char('d')), - pair(char('t'), char('h')), - )), - take_while(is_sep), - )))(input) + alt((('s', 't'), ('n', 'd'), ('r', 'd'), ('t', 'h'))), + take_while0(is_sep), + ) + .recognize() + .parse_next(input) } fn dec_literal(input: T) -> IResult::Slice> where - T: Stream + PartialEq, + T: Stream + StreamIsPartial + PartialEq, ::Slice: AsBStr + SliceLen + Default, ::Token: AsChar + Copy, { @@ -243,24 +239,21 @@ mod parser { fn hex_literal(input: T) -> IResult::Slice> where - T: Stream + PartialEq, + T: Stream + StreamIsPartial + PartialEq, ::Slice: AsBStr + SliceLen + Default, ::Token: AsChar + Copy, { - preceded( - pair(char('0'), alt((char('x'), char('X')))), - take_while1(is_hex_digit_with_sep), - )(input) + preceded(('0', alt(('x', 'X'))), take_while1(is_hex_digit_with_sep))(input) } fn css_color(input: T) -> IResult::Slice> where - T: Stream + PartialEq, + T: Stream + StreamIsPartial + PartialEq, ::Slice: AsBStr + SliceLen + Default, ::Token: AsChar + Copy, { preceded( - char('#'), + '#', alt(( terminated(take_while_m_n(3, 8, is_lower_hex_digit), peek(sep1)), terminated(take_while_m_n(3, 8, is_upper_hex_digit), peek(sep1)), @@ -270,39 +263,41 @@ mod parser { fn uuid_literal(input: T) -> IResult::Slice> where - T: Stream + PartialEq, + T: Stream + StreamIsPartial + PartialEq, ::Slice: AsBStr + SliceLen + Default, ::Token: AsChar + Copy, { - recognize(alt(( - tuple(( + alt(( + ( take_while_m_n(8, 8, is_lower_hex_digit), - char('-'), + '-', take_while_m_n(4, 4, is_lower_hex_digit), - char('-'), + '-', take_while_m_n(4, 4, is_lower_hex_digit), - char('-'), + '-', take_while_m_n(4, 4, is_lower_hex_digit), - char('-'), + '-', take_while_m_n(12, 12, is_lower_hex_digit), - )), - tuple(( + ), + ( take_while_m_n(8, 8, is_upper_hex_digit), - char('-'), + '-', take_while_m_n(4, 4, is_upper_hex_digit), - char('-'), + '-', take_while_m_n(4, 4, is_upper_hex_digit), - char('-'), + '-', take_while_m_n(4, 4, is_upper_hex_digit), - char('-'), + '-', take_while_m_n(12, 12, is_upper_hex_digit), - )), - )))(input) + ), + )) + .recognize() + .parse_next(input) } fn hash_literal(input: T) -> IResult::Slice> where - T: Stream + PartialEq, + T: Stream + StreamIsPartial + PartialEq, ::Slice: AsBStr + SliceLen + Default, ::Token: AsChar + Copy, { @@ -325,14 +320,14 @@ mod parser { fn base64_literal(input: T) -> IResult::Slice> where - T: Stream + PartialEq, + T: Stream + StreamIsPartial + PartialEq, ::Slice: AsBStr + SliceLen + Default, ::Token: AsChar + Copy, { let (padding, captured) = take_while1(is_base64_digit)(input.clone())?; const CHUNK: usize = 4; - let padding_offset = input.offset(&padding); + let padding_offset = input.offset_to(&padding); let mut padding_len = CHUNK - padding_offset % CHUNK; if padding_len == CHUNK { padding_len = 0; @@ -345,70 +340,75 @@ mod parser { .iter() .all(|c| !['/', '+'].contains(&c.as_char())) { - return Err(winnow::Err::Backtrack(winnow::error::Error::new( - input, - winnow::error::ErrorKind::LengthValue, - ))); + return Err(winnow::error::ErrMode::Backtrack( + winnow::error::Error::new(input, winnow::error::ErrorKind::LengthValue), + )); } let (after, _) = take_while_m_n(padding_len, padding_len, is_base64_padding)(padding)?; - let after_offset = input.offset(&after); + let after_offset = input.offset_to(&after); Ok(input.next_slice(after_offset)) } fn email_literal(input: T) -> IResult::Slice> where - T: Stream + PartialEq, + T: Stream + StreamIsPartial + PartialEq, ::Slice: AsBStr + SliceLen + Default, ::Token: AsChar + Copy, { - recognize(tuple(( + ( take_while1(is_localport_char), - char('@'), + '@', take_while1(is_domain_char), - )))(input) + ) + .recognize() + .parse_next(input) } fn url_literal(input: T) -> IResult::Slice> where - T: Stream + PartialEq, + T: Stream + StreamIsPartial + PartialEq, ::Slice: AsBStr + SliceLen + Default, ::Token: AsChar + Copy, { - recognize(tuple(( + ( opt(terminated( take_while1(is_scheme_char), // HACK: Technically you can skip `//` if you don't have a domain but that would // get messy to support. - tuple((char(':'), char('/'), char('/'))), + (':', '/', '/'), )), - tuple(( - opt(terminated(url_userinfo, char('@'))), + ( + opt(terminated(url_userinfo, '@')), take_while1(is_domain_char), - opt(preceded(char(':'), take_while1(AsChar::is_dec_digit))), - )), - char('/'), + opt(preceded(':', take_while1(AsChar::is_dec_digit))), + ), + '/', // HACK: Too lazy to enumerate - take_while(is_path_query_fragment), - )))(input) + take_while0(is_path_query_fragment), + ) + .recognize() + .parse_next(input) } fn url_userinfo(input: T) -> IResult::Slice> where - T: Stream + PartialEq, + T: Stream + StreamIsPartial + PartialEq, ::Slice: AsBStr + SliceLen + Default, ::Token: AsChar + Copy, { - recognize(tuple(( + ( take_while1(is_localport_char), - opt(preceded(char(':'), take_while(is_localport_char))), - )))(input) + opt(preceded(':', take_while0(is_localport_char))), + ) + .recognize() + .parse_next(input) } fn c_escape(input: T) -> IResult::Slice> where - T: Stream + PartialEq, + T: Stream + StreamIsPartial + PartialEq, ::Slice: AsBStr + SliceLen + Default, ::Token: AsChar + Copy, { @@ -416,16 +416,16 @@ mod parser { // regular string that does escaping. The escaped letter might be part of a word, or it // might not be. Rather than guess and be wrong part of the time and correct people's words // incorrectly, we opt for just not evaluating it at all. - preceded(take_while1(is_escape), take_while(is_xid_continue))(input) + preceded(take_while1(is_escape), take_while0(is_xid_continue))(input) } fn printf(input: T) -> IResult::Slice> where - T: Stream + PartialEq, + T: Stream + StreamIsPartial + PartialEq, ::Slice: AsBStr + SliceLen + Default, ::Token: AsChar + Copy, { - preceded(char('%'), take_while1(is_xid_continue))(input) + preceded('%', take_while1(is_xid_continue))(input) } fn take_many0(mut f: F) -> impl FnMut(I) -> IResult::Slice, E> diff --git a/crates/varcon-core/src/parser.rs b/crates/varcon-core/src/parser.rs index 5fcbc8b..72c4bde 100644 --- a/crates/varcon-core/src/parser.rs +++ b/crates/varcon-core/src/parser.rs @@ -1,6 +1,4 @@ -use winnow::stream::Stream; -use winnow::IResult; -use winnow::Parser; +use winnow::prelude::*; use crate::*; @@ -64,31 +62,28 @@ A Cv: acknowledgment's / Av B C: acknowledgement's impl Cluster { pub fn parse(input: &str) -> IResult<&str, Self> { - let header = winnow::sequence::tuple(( - winnow::bytes::streaming::tag("#"), - winnow::character::streaming::space0, - winnow::character::streaming::not_line_ending, - winnow::character::streaming::line_ending, - )); + let header = ( + winnow::bytes::tag("#"), + winnow::character::space0, + winnow::character::not_line_ending, + winnow::character::line_ending, + ); let note = winnow::sequence::preceded( - winnow::sequence::pair( - winnow::bytes::streaming::tag("##"), - winnow::character::streaming::space0, - ), + (winnow::bytes::tag("##"), winnow::character::space0), winnow::sequence::terminated( - winnow::character::streaming::not_line_ending, - winnow::character::streaming::line_ending, + winnow::character::not_line_ending, + winnow::character::line_ending, ), ); - let mut cluster = winnow::sequence::tuple(( + let mut cluster = ( winnow::combinator::opt(header), winnow::multi::many1(winnow::sequence::terminated( Entry::parse, - winnow::character::streaming::line_ending, + winnow::character::line_ending, )), winnow::multi::many0(note), - )); - let (input, (header, entries, notes)): (_, (_, _, Vec<_>)) = (cluster)(input)?; + ); + let (input, (header, entries, notes)): (_, (_, _, Vec<_>)) = cluster.parse_next(input)?; let header = header.map(|s| s.2.to_owned()); let notes = notes.into_iter().map(|s| s.to_owned()).collect(); @@ -150,31 +145,19 @@ A B C: coloration's / B. Cv: colouration's impl Entry { pub fn parse(input: &str) -> IResult<&str, Self> { - let var_sep = winnow::sequence::tuple(( - winnow::character::streaming::space0, - winnow::bytes::streaming::tag("/"), - winnow::character::streaming::space0, - )); - let (input, variants) = winnow::multi::separated_list1(var_sep, Variant::parse)(input)?; + let var_sep = (winnow::character::space0, '/', winnow::character::space0); + let (input, variants) = winnow::multi::separated1(Variant::parse, var_sep)(input)?; - let desc_sep = winnow::sequence::tuple(( - winnow::character::streaming::space0, - winnow::bytes::streaming::tag("|"), - )); - let (input, description) = winnow::combinator::opt(winnow::sequence::tuple(( - desc_sep, - Self::parse_description, - )))(input)?; + let desc_sep = (winnow::character::space0, '|'); + let (input, description) = + winnow::combinator::opt((desc_sep, Self::parse_description))(input)?; - let comment_sep = winnow::sequence::tuple(( - winnow::character::streaming::space0, - winnow::bytes::streaming::tag("#"), - )); - let (input, comment) = winnow::combinator::opt(winnow::sequence::tuple(( + let comment_sep = (winnow::character::space0, '#'); + let (input, comment) = winnow::combinator::opt(( comment_sep, - winnow::character::streaming::space1, - winnow::character::streaming::not_line_ending, - )))(input)?; + winnow::character::space1, + winnow::character::not_line_ending, + ))(input)?; let mut e = match description { Some((_, description)) => description, @@ -193,24 +176,16 @@ impl Entry { } fn parse_description(input: &str) -> IResult<&str, Self> { - let (input, (pos, archaic, note, description)) = winnow::sequence::tuple(( - winnow::combinator::opt(winnow::sequence::tuple(( - winnow::character::streaming::space1, - Pos::parse, - ))), - winnow::combinator::opt(winnow::sequence::tuple(( - winnow::character::streaming::space1, - winnow::bytes::streaming::tag("(-)"), - ))), - winnow::combinator::opt(winnow::sequence::tuple(( - winnow::character::streaming::space1, - winnow::bytes::streaming::tag("--"), - ))), - winnow::combinator::opt(winnow::sequence::tuple(( - winnow::character::streaming::space1, - winnow::bytes::streaming::take_till(|c| c == '\n' || c == '\r' || c == '#'), - ))), - ))(input)?; + let (input, (pos, archaic, note, description)) = ( + winnow::combinator::opt((winnow::character::space1, Pos::parse)), + winnow::combinator::opt((winnow::character::space1, "(-)")), + winnow::combinator::opt((winnow::character::space1, "--")), + winnow::combinator::opt(( + winnow::character::space1, + winnow::bytes::take_till0(|c| c == '\n' || c == '\r' || c == '#'), + )), + ) + .parse_next(input)?; let variants = Vec::new(); let pos = pos.map(|(_, p)| p); @@ -321,12 +296,8 @@ mod test_entry { impl Variant { pub fn parse(input: &str) -> IResult<&str, Self> { - let types = - winnow::multi::separated_list1(winnow::character::streaming::space1, Type::parse); - let sep = winnow::sequence::tuple(( - winnow::bytes::streaming::tag(":"), - winnow::character::streaming::space0, - )); + let types = winnow::multi::separated1(Type::parse, winnow::character::space1); + let sep = (winnow::bytes::tag(":"), winnow::character::space0); let (input, (types, word)) = winnow::sequence::separated_pair(types, sep, word)(input)?; let v = Self { types, word }; Ok((input, v)) @@ -410,7 +381,7 @@ impl Type { pub fn parse(input: &str) -> IResult<&str, Type> { let (input, category) = Category::parse(input)?; let (input, tag) = winnow::combinator::opt(Tag::parse)(input)?; - let (input, num) = winnow::combinator::opt(winnow::character::streaming::digit1)(input)?; + let (input, num) = winnow::combinator::opt(winnow::character::digit1)(input)?; let num = num.map(|s| s.parse().expect("parser ensured its a number")); let t = Type { category, tag, num }; Ok((input, t)) @@ -465,16 +436,18 @@ mod test_type { impl Category { pub fn parse(input: &str) -> IResult<&str, Category> { - let symbols = winnow::character::streaming::one_of("ABZCD_"); - winnow::combinator::map(symbols, |c| match c { - 'A' => Category::American, - 'B' => Category::BritishIse, - 'Z' => Category::BritishIze, - 'C' => Category::Canadian, - 'D' => Category::Australian, - '_' => Category::Other, - _ => unreachable!("parser won't select this option"), - })(input) + let symbols = winnow::bytes::one_of("ABZCD_"); + symbols + .map(|c| match c { + 'A' => Category::American, + 'B' => Category::BritishIse, + 'Z' => Category::BritishIze, + 'C' => Category::Canadian, + 'D' => Category::Australian, + '_' => Category::Other, + _ => unreachable!("parser won't select this option"), + }) + .parse_next(input) } } @@ -499,15 +472,17 @@ mod test_category { impl Tag { pub fn parse(input: &str) -> IResult<&str, Tag> { - let symbols = winnow::character::streaming::one_of(".vV-x"); - winnow::combinator::map(symbols, |c| match c { - '.' => Tag::Eq, - 'v' => Tag::Variant, - 'V' => Tag::Seldom, - '-' => Tag::Possible, - 'x' => Tag::Improper, - _ => unreachable!("parser won't select this option"), - })(input) + let symbols = winnow::bytes::one_of(".vV-x"); + symbols + .map(|c| match c { + '.' => Tag::Eq, + 'v' => Tag::Variant, + 'V' => Tag::Seldom, + '-' => Tag::Possible, + 'x' => Tag::Improper, + _ => unreachable!("parser won't select this option"), + }) + .parse_next(input) } } @@ -532,16 +507,16 @@ mod test_tag { impl Pos { pub fn parse(input: &str) -> IResult<&str, Pos> { - use winnow::bytes::streaming::tag; + use winnow::bytes::tag; let noun = tag(""); let verb = tag(""); let adjective = tag(""); let adverb = tag(""); winnow::branch::alt(( - noun.map(|_| Pos::Noun), - verb.map(|_| Pos::Verb), - adjective.map(|_| Pos::Adjective), - adverb.map(|_| Pos::Adverb), + noun.value(Pos::Noun), + verb.value(Pos::Verb), + adjective.value(Pos::Adjective), + adverb.value(Pos::Adverb), ))(input) } }