diff --git a/Cargo.lock b/Cargo.lock index 58714b8..be6f54a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -945,12 +945,6 @@ dependencies = [ "autocfg", ] -[[package]] -name = "minimal-lexical" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" - [[package]] name = "miniz_oxide" version = "0.5.4" @@ -972,16 +966,6 @@ dependencies = [ "unicase", ] -[[package]] -name = "nom" -version = "7.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8903e5a29a317527874d0402f867152a3d21c908bb0b933e416c65e301d4c36" -dependencies = [ - "memchr", - "minimal-lexical", -] - [[package]] name = "nom8" version = "0.2.0" @@ -1622,13 +1606,13 @@ dependencies = [ "anyhow", "bstr 1.1.0", "itertools", - "nom", "once_cell", "serde", "simdutf8", "thiserror", "unicode-segmentation", "unicode-xid", + "winnow", ] [[package]] @@ -1802,7 +1786,7 @@ name = "varcon-core" version = "2.2.7" dependencies = [ "enumflags2", - "nom", + "winnow", ] [[package]] @@ -2000,6 +1984,15 @@ version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f40009d85759725a34da6d89a94e63d7bdc50a862acf0dbc7c8e488f1edcb6f5" +[[package]] +name = "winnow" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "efdd927d1a3d5d98abcfc4cf8627371862ee6abfe52a988050621c50c66b4493" +dependencies = [ + "memchr", +] + [[package]] name = "yansi" version = "0.5.1" diff --git a/crates/typos/Cargo.toml b/crates/typos/Cargo.toml index 94e4cf1..355b6a3 100644 --- a/crates/typos/Cargo.toml +++ b/crates/typos/Cargo.toml @@ -14,7 +14,7 @@ include.workspace = true [dependencies] anyhow = "1.0" thiserror = "1.0" -nom = "7.1" +winnow = "0.3.0" unicode-xid = "0.2.4" once_cell = "1.17.0" serde = { version = "1.0", features = ["derive"] } diff --git a/crates/typos/src/tokens.rs b/crates/typos/src/tokens.rs index 890f66b..a64b54d 100644 --- a/crates/typos/src/tokens.rs +++ b/crates/typos/src/tokens.rs @@ -125,36 +125,31 @@ impl<'s> Iterator for Utf8Chunks<'s> { } mod parser { - use nom::branch::*; - use nom::bytes::complete::*; - use nom::character::complete::*; - use nom::combinator::*; - use nom::sequence::*; - use nom::{AsChar, IResult}; + use winnow::branch::*; + use winnow::bytes::complete::*; + use winnow::character::complete::*; + use winnow::combinator::*; + use winnow::prelude::*; + use winnow::sequence::*; + use winnow::stream::AsBStr; + use winnow::stream::AsChar; + use winnow::stream::SliceLen; + use winnow::stream::Stream; - pub(crate) fn next_identifier(input: T) -> IResult + pub(crate) fn next_identifier(input: T) -> IResult::Slice> where - T: nom::InputTakeAtPosition - + nom::InputTake - + nom::InputIter - + nom::InputLength - + nom::Slice> - + nom::Slice> - + nom::Offset - + Clone - + Default - + PartialEq - + std::fmt::Debug, - ::Item: AsChar + Copy, - ::Item: AsChar + Copy, + T: Stream + PartialEq, + ::Slice: AsBStr + SliceLen + Default, + ::Token: AsChar + Copy, { preceded(ignore, identifier)(input) } - fn identifier(input: T) -> IResult + fn identifier(input: T) -> IResult::Slice> where - T: nom::InputTakeAtPosition + std::fmt::Debug, - ::Item: AsChar + Copy, + T: Stream + PartialEq, + ::Slice: AsBStr + SliceLen + Default, + ::Token: AsChar + Copy, { // Generally a language would be `{XID_Start}{XID_Continue}*` but going with only // `{XID_Continue}+` because XID_Continue is a superset of XID_Start and rather catch odd @@ -163,21 +158,11 @@ mod parser { take_while1(is_xid_continue)(input) } - fn ignore(input: T) -> IResult + fn ignore(input: T) -> IResult::Slice> where - T: nom::InputTakeAtPosition - + nom::InputTake - + nom::InputIter - + nom::InputLength - + nom::Slice> - + nom::Slice> - + nom::Offset - + Clone - + Default - + PartialEq - + std::fmt::Debug, - ::Item: AsChar + Copy, - ::Item: AsChar + Copy, + T: Stream + PartialEq, + ::Slice: AsBStr + SliceLen + Default, + ::Token: AsChar + Copy, { take_many0(alt(( // CAUTION: If adding an ignorable literal, if it doesn't start with `is_xid_continue`, @@ -198,42 +183,23 @@ mod parser { )))(input) } - fn sep1(input: T) -> IResult + fn sep1(input: T) -> IResult::Slice> where - T: nom::InputTakeAtPosition - + nom::InputTake - + nom::InputIter - + nom::InputLength - + nom::Slice> - + nom::Slice> - + nom::Offset - + Clone - + Default - + PartialEq - + std::fmt::Debug, - ::Item: AsChar + Copy, - ::Item: AsChar + Copy, + T: Stream + PartialEq, + ::Slice: AsBStr + SliceLen + Default, + ::Token: AsChar + Copy, { alt(( recognize(satisfy(|c| !is_xid_continue(c))), - map(eof, |_| T::default()), + map(eof, |_| ::Slice::default()), ))(input) } - fn other(input: T) -> IResult + fn other(input: T) -> IResult::Slice> where - T: nom::InputTakeAtPosition - + nom::InputTake - + nom::InputIter - + nom::InputLength - + nom::Slice> - + nom::Slice> - + nom::Offset - + Clone - + PartialEq - + std::fmt::Debug, - ::Item: AsChar + Copy, - ::Item: AsChar + Copy, + T: Stream + PartialEq, + ::Slice: AsBStr + SliceLen + Default, + ::Token: AsChar + Copy, { recognize(tuple(( satisfy(|c| !is_xid_continue(c)), @@ -241,19 +207,11 @@ mod parser { )))(input) } - fn ordinal_literal(input: T) -> IResult + fn ordinal_literal(input: T) -> IResult::Slice> where - T: nom::InputTakeAtPosition - + nom::InputTake - + nom::InputIter - + nom::InputLength - + nom::Offset - + nom::Slice> - + nom::Slice> - + Clone - + std::fmt::Debug, - ::Item: AsChar + Copy, - ::Item: AsChar + Copy, + T: Stream + PartialEq, + ::Slice: AsBStr + SliceLen + Default, + ::Token: AsChar + Copy, { fn is_sep(c: impl AsChar) -> bool { let c = c.as_char(); @@ -274,25 +232,20 @@ mod parser { )))(input) } - fn dec_literal(input: T) -> IResult + fn dec_literal(input: T) -> IResult::Slice> where - T: nom::InputTakeAtPosition + std::fmt::Debug, - ::Item: AsChar + Copy, + T: Stream + PartialEq, + ::Slice: AsBStr + SliceLen + Default, + ::Token: AsChar + Copy, { take_while1(is_dec_digit_with_sep)(input) } - fn hex_literal(input: T) -> IResult + fn hex_literal(input: T) -> IResult::Slice> where - T: nom::InputTakeAtPosition - + nom::InputTake - + nom::InputIter - + nom::InputLength - + nom::Slice> - + Clone - + std::fmt::Debug, - ::Item: AsChar + Copy, - ::Item: AsChar + Copy, + T: Stream + PartialEq, + ::Slice: AsBStr + SliceLen + Default, + ::Token: AsChar + Copy, { preceded( pair(char('0'), alt((char('x'), char('X')))), @@ -300,21 +253,11 @@ mod parser { )(input) } - fn css_color(input: T) -> IResult + fn css_color(input: T) -> IResult::Slice> where - T: nom::InputTakeAtPosition - + nom::InputTake - + nom::InputIter - + nom::InputLength - + nom::Offset - + nom::Slice> - + nom::Slice> - + Clone - + Default - + PartialEq - + std::fmt::Debug, - ::Item: AsChar + Copy, - ::Item: AsChar + Copy, + T: Stream + PartialEq, + ::Slice: AsBStr + SliceLen + Default, + ::Token: AsChar + Copy, { preceded( char('#'), @@ -325,19 +268,11 @@ mod parser { )(input) } - fn uuid_literal(input: T) -> IResult + fn uuid_literal(input: T) -> IResult::Slice> where - T: nom::InputTakeAtPosition - + nom::InputTake - + nom::InputIter - + nom::InputLength - + nom::Offset - + nom::Slice> - + nom::Slice> - + Clone - + std::fmt::Debug, - ::Item: AsChar + Copy, - ::Item: AsChar + Copy, + T: Stream + PartialEq, + ::Slice: AsBStr + SliceLen + Default, + ::Token: AsChar + Copy, { recognize(alt(( tuple(( @@ -365,19 +300,11 @@ mod parser { )))(input) } - fn hash_literal(input: T) -> IResult + fn hash_literal(input: T) -> IResult::Slice> where - T: nom::InputTakeAtPosition - + nom::InputTake - + nom::InputIter - + nom::InputLength - + nom::Offset - + nom::Slice> - + nom::Slice> - + Clone - + std::fmt::Debug, - ::Item: AsChar + Copy, - ::Item: AsChar + Copy, + T: Stream + PartialEq, + ::Slice: AsBStr + SliceLen + Default, + ::Token: AsChar + Copy, { // Size considerations: // - 40 characters holds for a SHA-1 hash from older Git versions. @@ -396,19 +323,11 @@ mod parser { ))(input) } - fn base64_literal(input: T) -> IResult + fn base64_literal(input: T) -> IResult::Slice> where - T: nom::InputTakeAtPosition - + nom::InputTake - + nom::InputIter - + nom::InputLength - + nom::Offset - + nom::Slice> - + nom::Slice> - + Clone - + std::fmt::Debug, - ::Item: AsChar + Copy, - ::Item: AsChar + Copy, + T: Stream + PartialEq, + ::Slice: AsBStr + SliceLen + Default, + ::Token: AsChar + Copy, { let (padding, captured) = take_while1(is_base64_digit)(input.clone())?; @@ -419,37 +338,30 @@ mod parser { padding_len = 0; } - if captured.input_len() < 90 + if captured.slice_len() < 90 && padding_len == 0 && captured - .iter_elements() + .as_bstr() + .iter() .all(|c| !['/', '+'].contains(&c.as_char())) { - return Err(nom::Err::Error(nom::error::Error::new( + return Err(winnow::Err::Backtrack(winnow::error::Error::new( input, - nom::error::ErrorKind::LengthValue, + winnow::error::ErrorKind::LengthValue, ))); } let (after, _) = take_while_m_n(padding_len, padding_len, is_base64_padding)(padding)?; let after_offset = input.offset(&after); - Ok(input.take_split(after_offset)) + Ok(input.next_slice(after_offset)) } - fn email_literal(input: T) -> IResult + fn email_literal(input: T) -> IResult::Slice> where - T: nom::InputTakeAtPosition - + nom::InputTake - + nom::InputIter - + nom::InputLength - + nom::Offset - + nom::Slice> - + nom::Slice> - + Clone - + std::fmt::Debug, - ::Item: AsChar + Copy, - ::Item: AsChar + Copy, + T: Stream + PartialEq, + ::Slice: AsBStr + SliceLen + Default, + ::Token: AsChar + Copy, { recognize(tuple(( take_while1(is_localport_char), @@ -458,19 +370,11 @@ mod parser { )))(input) } - fn url_literal(input: T) -> IResult + fn url_literal(input: T) -> IResult::Slice> where - T: nom::InputTakeAtPosition - + nom::InputTake - + nom::InputIter - + nom::InputLength - + nom::Offset - + nom::Slice> - + nom::Slice> - + Clone - + std::fmt::Debug, - ::Item: AsChar + Copy, - ::Item: AsChar + Copy, + T: Stream + PartialEq, + ::Slice: AsBStr + SliceLen + Default, + ::Token: AsChar + Copy, { recognize(tuple(( opt(terminated( @@ -490,19 +394,11 @@ mod parser { )))(input) } - fn url_userinfo(input: T) -> IResult + fn url_userinfo(input: T) -> IResult::Slice> where - T: nom::InputTakeAtPosition - + nom::InputTake - + nom::InputIter - + nom::InputLength - + nom::Offset - + nom::Slice> - + nom::Slice> - + Clone - + std::fmt::Debug, - ::Item: AsChar + Copy, - ::Item: AsChar + Copy, + T: Stream + PartialEq, + ::Slice: AsBStr + SliceLen + Default, + ::Token: AsChar + Copy, { recognize(tuple(( take_while1(is_localport_char), @@ -510,19 +406,11 @@ mod parser { )))(input) } - fn c_escape(input: T) -> IResult + fn c_escape(input: T) -> IResult::Slice> where - T: nom::InputTakeAtPosition - + nom::InputTake - + nom::InputIter - + nom::InputLength - + nom::Offset - + nom::Slice> - + nom::Slice> - + Clone - + std::fmt::Debug, - ::Item: AsChar + Copy, - ::Item: AsChar + Copy, + T: Stream + PartialEq, + ::Slice: AsBStr + SliceLen + Default, + ::Token: AsChar + Copy, { // We don't know whether the string we are parsing is a literal string (no escaping) or // regular string that does escaping. The escaped letter might be part of a word, or it @@ -531,53 +419,26 @@ mod parser { preceded(take_while1(is_escape), take_while(is_xid_continue))(input) } - fn printf(input: T) -> IResult + fn printf(input: T) -> IResult::Slice> where - T: nom::InputTakeAtPosition - + nom::InputTake - + nom::InputIter - + nom::InputLength - + nom::Offset - + nom::Slice> - + nom::Slice> - + Clone - + std::fmt::Debug, - ::Item: AsChar + Copy, - ::Item: AsChar + Copy, + T: Stream + PartialEq, + ::Slice: AsBStr + SliceLen + Default, + ::Token: AsChar + Copy, { preceded(char('%'), take_while1(is_xid_continue))(input) } - fn take_many0(mut f: F) -> impl FnMut(I) -> IResult + fn take_many0(mut f: F) -> impl FnMut(I) -> IResult::Slice, E> where - I: nom::Offset + nom::InputTake + Clone + PartialEq + std::fmt::Debug, - F: nom::Parser, - E: nom::error::ParseError, + I: Stream, + F: winnow::Parser::Slice, E>, + E: winnow::error::ParseError, { move |i: I| { - let mut current = i.clone(); - loop { - match f.parse(current.clone()) { - Err(nom::Err::Error(_)) => { - let offset = i.offset(¤t); - let (after, before) = i.take_split(offset); - return Ok((after, before)); - } - Err(e) => { - return Err(e); - } - Ok((next, _)) => { - if next == current { - return Err(nom::Err::Error(E::from_error_kind( - i, - nom::error::ErrorKind::Many0, - ))); - } - - current = next; - } - } - } + winnow::multi::many0(f.by_ref()) + .map(|()| ()) + .recognize() + .parse_next(i) } } diff --git a/crates/varcon-core/Cargo.toml b/crates/varcon-core/Cargo.toml index 31a2e49..20ab866 100644 --- a/crates/varcon-core/Cargo.toml +++ b/crates/varcon-core/Cargo.toml @@ -12,11 +12,11 @@ include.workspace = true [features] default = [] -parser = ["nom"] +parser = ["winnow"] flags = ["enumflags2"] [dependencies] -nom = { version = "7", optional = true } +winnow = { version = "0.3.0", optional = true } enumflags2 = { version = "0.7", optional = true } [package.metadata.docs.rs] diff --git a/crates/varcon-core/src/parser.rs b/crates/varcon-core/src/parser.rs index 1be40b3..5fcbc8b 100644 --- a/crates/varcon-core/src/parser.rs +++ b/crates/varcon-core/src/parser.rs @@ -1,6 +1,6 @@ -use nom::IResult; -use nom::InputTakeAtPosition; -use nom::Parser; +use winnow::stream::Stream; +use winnow::IResult; +use winnow::Parser; use crate::*; @@ -64,31 +64,31 @@ A Cv: acknowledgment's / Av B C: acknowledgement's impl Cluster { pub fn parse(input: &str) -> IResult<&str, Self> { - let header = nom::sequence::tuple(( - nom::bytes::streaming::tag("#"), - nom::character::streaming::space0, - nom::character::streaming::not_line_ending, - nom::character::streaming::line_ending, + let header = winnow::sequence::tuple(( + winnow::bytes::streaming::tag("#"), + winnow::character::streaming::space0, + winnow::character::streaming::not_line_ending, + winnow::character::streaming::line_ending, )); - let note = nom::sequence::preceded( - nom::sequence::pair( - nom::bytes::streaming::tag("##"), - nom::character::streaming::space0, + let note = winnow::sequence::preceded( + winnow::sequence::pair( + winnow::bytes::streaming::tag("##"), + winnow::character::streaming::space0, ), - nom::sequence::terminated( - nom::character::streaming::not_line_ending, - nom::character::streaming::line_ending, + winnow::sequence::terminated( + winnow::character::streaming::not_line_ending, + winnow::character::streaming::line_ending, ), ); - let mut cluster = nom::sequence::tuple(( - nom::combinator::opt(header), - nom::multi::many1(nom::sequence::terminated( + let mut cluster = winnow::sequence::tuple(( + winnow::combinator::opt(header), + winnow::multi::many1(winnow::sequence::terminated( Entry::parse, - nom::character::streaming::line_ending, + winnow::character::streaming::line_ending, )), - nom::multi::many0(note), + winnow::multi::many0(note), )); - let (input, (header, entries, notes)) = (cluster)(input)?; + let (input, (header, entries, notes)): (_, (_, _, Vec<_>)) = (cluster)(input)?; let header = header.map(|s| s.2.to_owned()); let notes = notes.into_iter().map(|s| s.to_owned()).collect(); @@ -150,28 +150,30 @@ A B C: coloration's / B. Cv: colouration's impl Entry { pub fn parse(input: &str) -> IResult<&str, Self> { - let var_sep = nom::sequence::tuple(( - nom::character::streaming::space0, - nom::bytes::streaming::tag("/"), - nom::character::streaming::space0, + let var_sep = winnow::sequence::tuple(( + winnow::character::streaming::space0, + winnow::bytes::streaming::tag("/"), + winnow::character::streaming::space0, )); - let (input, variants) = nom::multi::separated_list1(var_sep, Variant::parse)(input)?; + let (input, variants) = winnow::multi::separated_list1(var_sep, Variant::parse)(input)?; - let desc_sep = nom::sequence::tuple(( - nom::character::streaming::space0, - nom::bytes::streaming::tag("|"), + let desc_sep = winnow::sequence::tuple(( + winnow::character::streaming::space0, + winnow::bytes::streaming::tag("|"), )); - let (input, description) = - nom::combinator::opt(nom::sequence::tuple((desc_sep, Self::parse_description)))(input)?; + let (input, description) = winnow::combinator::opt(winnow::sequence::tuple(( + desc_sep, + Self::parse_description, + )))(input)?; - let comment_sep = nom::sequence::tuple(( - nom::character::streaming::space0, - nom::bytes::streaming::tag("#"), + let comment_sep = winnow::sequence::tuple(( + winnow::character::streaming::space0, + winnow::bytes::streaming::tag("#"), )); - let (input, comment) = nom::combinator::opt(nom::sequence::tuple(( + let (input, comment) = winnow::combinator::opt(winnow::sequence::tuple(( comment_sep, - nom::character::streaming::space1, - nom::character::streaming::not_line_ending, + winnow::character::streaming::space1, + winnow::character::streaming::not_line_ending, )))(input)?; let mut e = match description { @@ -191,22 +193,22 @@ impl Entry { } fn parse_description(input: &str) -> IResult<&str, Self> { - let (input, (pos, archaic, note, description)) = nom::sequence::tuple(( - nom::combinator::opt(nom::sequence::tuple(( - nom::character::streaming::space1, + let (input, (pos, archaic, note, description)) = winnow::sequence::tuple(( + winnow::combinator::opt(winnow::sequence::tuple(( + winnow::character::streaming::space1, Pos::parse, ))), - nom::combinator::opt(nom::sequence::tuple(( - nom::character::streaming::space1, - nom::bytes::streaming::tag("(-)"), + winnow::combinator::opt(winnow::sequence::tuple(( + winnow::character::streaming::space1, + winnow::bytes::streaming::tag("(-)"), ))), - nom::combinator::opt(nom::sequence::tuple(( - nom::character::streaming::space1, - nom::bytes::streaming::tag("--"), + winnow::combinator::opt(winnow::sequence::tuple(( + winnow::character::streaming::space1, + winnow::bytes::streaming::tag("--"), ))), - nom::combinator::opt(nom::sequence::tuple(( - nom::character::streaming::space1, - nom::bytes::streaming::take_till(|c| c == '\n' || c == '\r' || c == '#'), + winnow::combinator::opt(winnow::sequence::tuple(( + winnow::character::streaming::space1, + winnow::bytes::streaming::take_till(|c| c == '\n' || c == '\r' || c == '#'), ))), ))(input)?; @@ -319,24 +321,22 @@ mod test_entry { impl Variant { pub fn parse(input: &str) -> IResult<&str, Self> { - let types = nom::multi::separated_list1(nom::character::streaming::space1, Type::parse); - let sep = nom::sequence::tuple(( - nom::bytes::streaming::tag(":"), - nom::character::streaming::space0, + let types = + winnow::multi::separated_list1(winnow::character::streaming::space1, Type::parse); + let sep = winnow::sequence::tuple(( + winnow::bytes::streaming::tag(":"), + winnow::character::streaming::space0, )); - let (input, (types, word)) = nom::sequence::separated_pair(types, sep, word)(input)?; + let (input, (types, word)) = winnow::sequence::separated_pair(types, sep, word)(input)?; let v = Self { types, word }; Ok((input, v)) } } fn word(input: &str) -> IResult<&str, String> { - input - .split_at_position1( - |item| item.is_ascii_whitespace(), - nom::error::ErrorKind::Alpha, - ) - .map(|(i, s)| (i, s.to_owned().replace('_', " "))) + winnow::bytes::take_till1(|item: char| item.is_ascii_whitespace()) + .map(|s: &str| s.to_owned().replace('_', " ")) + .parse_next(input) } #[cfg(test)] @@ -409,8 +409,8 @@ mod test_variant { impl Type { pub fn parse(input: &str) -> IResult<&str, Type> { let (input, category) = Category::parse(input)?; - let (input, tag) = nom::combinator::opt(Tag::parse)(input)?; - let (input, num) = nom::combinator::opt(nom::character::streaming::digit1)(input)?; + let (input, tag) = winnow::combinator::opt(Tag::parse)(input)?; + let (input, num) = winnow::combinator::opt(winnow::character::streaming::digit1)(input)?; let num = num.map(|s| s.parse().expect("parser ensured its a number")); let t = Type { category, tag, num }; Ok((input, t)) @@ -465,8 +465,8 @@ mod test_type { impl Category { pub fn parse(input: &str) -> IResult<&str, Category> { - let symbols = nom::character::streaming::one_of("ABZCD_"); - nom::combinator::map(symbols, |c| match c { + let symbols = winnow::character::streaming::one_of("ABZCD_"); + winnow::combinator::map(symbols, |c| match c { 'A' => Category::American, 'B' => Category::BritishIse, 'Z' => Category::BritishIze, @@ -499,8 +499,8 @@ mod test_category { impl Tag { pub fn parse(input: &str) -> IResult<&str, Tag> { - let symbols = nom::character::streaming::one_of(".vV-x"); - nom::combinator::map(symbols, |c| match c { + let symbols = winnow::character::streaming::one_of(".vV-x"); + winnow::combinator::map(symbols, |c| match c { '.' => Tag::Eq, 'v' => Tag::Variant, 'V' => Tag::Seldom, @@ -532,12 +532,12 @@ mod test_tag { impl Pos { pub fn parse(input: &str) -> IResult<&str, Pos> { - use nom::bytes::streaming::tag; + use winnow::bytes::streaming::tag; let noun = tag(""); let verb = tag(""); let adjective = tag(""); let adverb = tag(""); - nom::branch::alt(( + winnow::branch::alt(( noun.map(|_| Pos::Noun), verb.map(|_| Pos::Verb), adjective.map(|_| Pos::Adjective),