Merge pull request #664 from epage/winnow

refactor: Switch to winnow
This commit is contained in:
Ed Page 2023-02-22 11:25:02 -06:00 committed by GitHub
commit 5fc5101baf
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 237 additions and 408 deletions

29
Cargo.lock generated
View file

@ -945,12 +945,6 @@ dependencies = [
"autocfg", "autocfg",
] ]
[[package]]
name = "minimal-lexical"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
[[package]] [[package]]
name = "miniz_oxide" name = "miniz_oxide"
version = "0.5.4" version = "0.5.4"
@ -972,16 +966,6 @@ dependencies = [
"unicase", "unicase",
] ]
[[package]]
name = "nom"
version = "7.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8903e5a29a317527874d0402f867152a3d21c908bb0b933e416c65e301d4c36"
dependencies = [
"memchr",
"minimal-lexical",
]
[[package]] [[package]]
name = "nom8" name = "nom8"
version = "0.2.0" version = "0.2.0"
@ -1622,13 +1606,13 @@ dependencies = [
"anyhow", "anyhow",
"bstr 1.1.0", "bstr 1.1.0",
"itertools", "itertools",
"nom",
"once_cell", "once_cell",
"serde", "serde",
"simdutf8", "simdutf8",
"thiserror", "thiserror",
"unicode-segmentation", "unicode-segmentation",
"unicode-xid", "unicode-xid",
"winnow",
] ]
[[package]] [[package]]
@ -1802,7 +1786,7 @@ name = "varcon-core"
version = "2.2.7" version = "2.2.7"
dependencies = [ dependencies = [
"enumflags2", "enumflags2",
"nom", "winnow",
] ]
[[package]] [[package]]
@ -2000,6 +1984,15 @@ version = "0.42.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f40009d85759725a34da6d89a94e63d7bdc50a862acf0dbc7c8e488f1edcb6f5" checksum = "f40009d85759725a34da6d89a94e63d7bdc50a862acf0dbc7c8e488f1edcb6f5"
[[package]]
name = "winnow"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "efdd927d1a3d5d98abcfc4cf8627371862ee6abfe52a988050621c50c66b4493"
dependencies = [
"memchr",
]
[[package]] [[package]]
name = "yansi" name = "yansi"
version = "0.5.1" version = "0.5.1"

View file

@ -14,7 +14,7 @@ include.workspace = true
[dependencies] [dependencies]
anyhow = "1.0" anyhow = "1.0"
thiserror = "1.0" thiserror = "1.0"
nom = "7.1" winnow = "0.3.0"
unicode-xid = "0.2.4" unicode-xid = "0.2.4"
once_cell = "1.17.0" once_cell = "1.17.0"
serde = { version = "1.0", features = ["derive"] } serde = { version = "1.0", features = ["derive"] }

View file

@ -125,36 +125,31 @@ impl<'s> Iterator for Utf8Chunks<'s> {
} }
mod parser { mod parser {
use nom::branch::*; use winnow::branch::*;
use nom::bytes::complete::*; use winnow::bytes::*;
use nom::character::complete::*; use winnow::combinator::*;
use nom::combinator::*; use winnow::prelude::*;
use nom::sequence::*; use winnow::sequence::*;
use nom::{AsChar, IResult}; use winnow::stream::AsBStr;
use winnow::stream::AsChar;
use winnow::stream::SliceLen;
use winnow::stream::Stream;
use winnow::stream::StreamIsPartial;
pub(crate) fn next_identifier<T>(input: T) -> IResult<T, T> pub(crate) fn next_identifier<T>(input: T) -> IResult<T, <T as Stream>::Slice>
where where
T: nom::InputTakeAtPosition T: Stream + StreamIsPartial + PartialEq,
+ nom::InputTake <T as Stream>::Slice: AsBStr + SliceLen + Default,
+ nom::InputIter <T as Stream>::Token: AsChar + Copy,
+ nom::InputLength
+ nom::Slice<std::ops::RangeFrom<usize>>
+ nom::Slice<std::ops::RangeTo<usize>>
+ nom::Offset
+ Clone
+ Default
+ PartialEq
+ std::fmt::Debug,
<T as nom::InputTakeAtPosition>::Item: AsChar + Copy,
<T as nom::InputIter>::Item: AsChar + Copy,
{ {
preceded(ignore, identifier)(input) preceded(ignore, identifier)(input)
} }
fn identifier<T>(input: T) -> IResult<T, T> fn identifier<T>(input: T) -> IResult<T, <T as Stream>::Slice>
where where
T: nom::InputTakeAtPosition + std::fmt::Debug, T: Stream + StreamIsPartial + PartialEq,
<T as nom::InputTakeAtPosition>::Item: AsChar + Copy, <T as Stream>::Slice: AsBStr + SliceLen + Default,
<T as Stream>::Token: AsChar + Copy,
{ {
// Generally a language would be `{XID_Start}{XID_Continue}*` but going with only // Generally a language would be `{XID_Start}{XID_Continue}*` but going with only
// `{XID_Continue}+` because XID_Continue is a superset of XID_Start and rather catch odd // `{XID_Continue}+` because XID_Continue is a superset of XID_Start and rather catch odd
@ -163,21 +158,11 @@ mod parser {
take_while1(is_xid_continue)(input) take_while1(is_xid_continue)(input)
} }
fn ignore<T>(input: T) -> IResult<T, T> fn ignore<T>(input: T) -> IResult<T, <T as Stream>::Slice>
where where
T: nom::InputTakeAtPosition T: Stream + StreamIsPartial + PartialEq,
+ nom::InputTake <T as Stream>::Slice: AsBStr + SliceLen + Default,
+ nom::InputIter <T as Stream>::Token: AsChar + Copy,
+ nom::InputLength
+ nom::Slice<std::ops::RangeFrom<usize>>
+ nom::Slice<std::ops::RangeTo<usize>>
+ nom::Offset
+ Clone
+ Default
+ PartialEq
+ std::fmt::Debug,
<T as nom::InputTakeAtPosition>::Item: AsChar + Copy,
<T as nom::InputIter>::Item: AsChar + Copy,
{ {
take_many0(alt(( take_many0(alt((
// CAUTION: If adding an ignorable literal, if it doesn't start with `is_xid_continue`, // CAUTION: If adding an ignorable literal, if it doesn't start with `is_xid_continue`,
@ -198,62 +183,34 @@ mod parser {
)))(input) )))(input)
} }
fn sep1<T>(input: T) -> IResult<T, T> fn sep1<T>(input: T) -> IResult<T, <T as Stream>::Slice>
where where
T: nom::InputTakeAtPosition T: Stream + StreamIsPartial + PartialEq,
+ nom::InputTake <T as Stream>::Slice: AsBStr + SliceLen + Default,
+ nom::InputIter <T as Stream>::Token: AsChar + Copy,
+ nom::InputLength
+ nom::Slice<std::ops::RangeFrom<usize>>
+ nom::Slice<std::ops::RangeTo<usize>>
+ nom::Offset
+ Clone
+ Default
+ PartialEq
+ std::fmt::Debug,
<T as nom::InputTakeAtPosition>::Item: AsChar + Copy,
<T as nom::InputIter>::Item: AsChar + Copy,
{ {
alt(( alt((
recognize(satisfy(|c| !is_xid_continue(c))), one_of(|c| !is_xid_continue(c)).recognize(),
map(eof, |_| T::default()), eof.map(|_| <T as Stream>::Slice::default()),
))(input) ))(input)
} }
fn other<T>(input: T) -> IResult<T, T> fn other<T>(input: T) -> IResult<T, <T as Stream>::Slice>
where where
T: nom::InputTakeAtPosition T: Stream + StreamIsPartial + PartialEq,
+ nom::InputTake <T as Stream>::Slice: AsBStr + SliceLen + Default,
+ nom::InputIter <T as Stream>::Token: AsChar + Copy,
+ nom::InputLength
+ nom::Slice<std::ops::RangeFrom<usize>>
+ nom::Slice<std::ops::RangeTo<usize>>
+ nom::Offset
+ Clone
+ PartialEq
+ std::fmt::Debug,
<T as nom::InputTakeAtPosition>::Item: AsChar + Copy,
<T as nom::InputIter>::Item: AsChar + Copy,
{ {
recognize(tuple(( (one_of(|c| !is_xid_continue(c)), take_while0(is_ignore_char))
satisfy(|c| !is_xid_continue(c)), .recognize()
take_while(is_ignore_char), .parse_next(input)
)))(input)
} }
fn ordinal_literal<T>(input: T) -> IResult<T, T> fn ordinal_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice>
where where
T: nom::InputTakeAtPosition T: Stream + StreamIsPartial + PartialEq,
+ nom::InputTake <T as Stream>::Slice: AsBStr + SliceLen + Default,
+ nom::InputIter <T as Stream>::Token: AsChar + Copy,
+ nom::InputLength
+ nom::Offset
+ nom::Slice<std::ops::RangeTo<usize>>
+ nom::Slice<std::ops::RangeFrom<usize>>
+ Clone
+ std::fmt::Debug,
<T as nom::InputTakeAtPosition>::Item: AsChar + Copy,
<T as nom::InputIter>::Item: AsChar + Copy,
{ {
fn is_sep(c: impl AsChar) -> bool { fn is_sep(c: impl AsChar) -> bool {
let c = c.as_char(); let c = c.as_char();
@ -261,63 +218,42 @@ mod parser {
['_'].contains(&c) ['_'].contains(&c)
} }
recognize(tuple(( (
take_while(is_sep), take_while0(is_sep),
take_while1(is_dec_digit), take_while1(is_dec_digit),
alt(( alt((('s', 't'), ('n', 'd'), ('r', 'd'), ('t', 'h'))),
pair(char('s'), char('t')), take_while0(is_sep),
pair(char('n'), char('d')), )
pair(char('r'), char('d')), .recognize()
pair(char('t'), char('h')), .parse_next(input)
)),
take_while(is_sep),
)))(input)
} }
fn dec_literal<T>(input: T) -> IResult<T, T> fn dec_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice>
where where
T: nom::InputTakeAtPosition + std::fmt::Debug, T: Stream + StreamIsPartial + PartialEq,
<T as nom::InputTakeAtPosition>::Item: AsChar + Copy, <T as Stream>::Slice: AsBStr + SliceLen + Default,
<T as Stream>::Token: AsChar + Copy,
{ {
take_while1(is_dec_digit_with_sep)(input) take_while1(is_dec_digit_with_sep)(input)
} }
fn hex_literal<T>(input: T) -> IResult<T, T> fn hex_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice>
where where
T: nom::InputTakeAtPosition T: Stream + StreamIsPartial + PartialEq,
+ nom::InputTake <T as Stream>::Slice: AsBStr + SliceLen + Default,
+ nom::InputIter <T as Stream>::Token: AsChar + Copy,
+ nom::InputLength
+ nom::Slice<std::ops::RangeFrom<usize>>
+ Clone
+ std::fmt::Debug,
<T as nom::InputTakeAtPosition>::Item: AsChar + Copy,
<T as nom::InputIter>::Item: AsChar + Copy,
{ {
preceded( preceded(('0', alt(('x', 'X'))), take_while1(is_hex_digit_with_sep))(input)
pair(char('0'), alt((char('x'), char('X')))),
take_while1(is_hex_digit_with_sep),
)(input)
} }
fn css_color<T>(input: T) -> IResult<T, T> fn css_color<T>(input: T) -> IResult<T, <T as Stream>::Slice>
where where
T: nom::InputTakeAtPosition T: Stream + StreamIsPartial + PartialEq,
+ nom::InputTake <T as Stream>::Slice: AsBStr + SliceLen + Default,
+ nom::InputIter <T as Stream>::Token: AsChar + Copy,
+ nom::InputLength
+ nom::Offset
+ nom::Slice<std::ops::RangeTo<usize>>
+ nom::Slice<std::ops::RangeFrom<usize>>
+ Clone
+ Default
+ PartialEq
+ std::fmt::Debug,
<T as nom::InputTakeAtPosition>::Item: AsChar + Copy,
<T as nom::InputIter>::Item: AsChar + Copy,
{ {
preceded( preceded(
char('#'), '#',
alt(( alt((
terminated(take_while_m_n(3, 8, is_lower_hex_digit), peek(sep1)), terminated(take_while_m_n(3, 8, is_lower_hex_digit), peek(sep1)),
terminated(take_while_m_n(3, 8, is_upper_hex_digit), peek(sep1)), terminated(take_while_m_n(3, 8, is_upper_hex_digit), peek(sep1)),
@ -325,59 +261,45 @@ mod parser {
)(input) )(input)
} }
fn uuid_literal<T>(input: T) -> IResult<T, T> fn uuid_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice>
where where
T: nom::InputTakeAtPosition T: Stream + StreamIsPartial + PartialEq,
+ nom::InputTake <T as Stream>::Slice: AsBStr + SliceLen + Default,
+ nom::InputIter <T as Stream>::Token: AsChar + Copy,
+ nom::InputLength
+ nom::Offset
+ nom::Slice<std::ops::RangeTo<usize>>
+ nom::Slice<std::ops::RangeFrom<usize>>
+ Clone
+ std::fmt::Debug,
<T as nom::InputTakeAtPosition>::Item: AsChar + Copy,
<T as nom::InputIter>::Item: AsChar + Copy,
{ {
recognize(alt(( alt((
tuple(( (
take_while_m_n(8, 8, is_lower_hex_digit), take_while_m_n(8, 8, is_lower_hex_digit),
char('-'), '-',
take_while_m_n(4, 4, is_lower_hex_digit), take_while_m_n(4, 4, is_lower_hex_digit),
char('-'), '-',
take_while_m_n(4, 4, is_lower_hex_digit), take_while_m_n(4, 4, is_lower_hex_digit),
char('-'), '-',
take_while_m_n(4, 4, is_lower_hex_digit), take_while_m_n(4, 4, is_lower_hex_digit),
char('-'), '-',
take_while_m_n(12, 12, is_lower_hex_digit), take_while_m_n(12, 12, is_lower_hex_digit),
)), ),
tuple(( (
take_while_m_n(8, 8, is_upper_hex_digit), take_while_m_n(8, 8, is_upper_hex_digit),
char('-'), '-',
take_while_m_n(4, 4, is_upper_hex_digit), take_while_m_n(4, 4, is_upper_hex_digit),
char('-'), '-',
take_while_m_n(4, 4, is_upper_hex_digit), take_while_m_n(4, 4, is_upper_hex_digit),
char('-'), '-',
take_while_m_n(4, 4, is_upper_hex_digit), take_while_m_n(4, 4, is_upper_hex_digit),
char('-'), '-',
take_while_m_n(12, 12, is_upper_hex_digit), take_while_m_n(12, 12, is_upper_hex_digit),
)), ),
)))(input) ))
.recognize()
.parse_next(input)
} }
fn hash_literal<T>(input: T) -> IResult<T, T> fn hash_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice>
where where
T: nom::InputTakeAtPosition T: Stream + StreamIsPartial + PartialEq,
+ nom::InputTake <T as Stream>::Slice: AsBStr + SliceLen + Default,
+ nom::InputIter <T as Stream>::Token: AsChar + Copy,
+ nom::InputLength
+ nom::Offset
+ nom::Slice<std::ops::RangeTo<usize>>
+ nom::Slice<std::ops::RangeFrom<usize>>
+ Clone
+ std::fmt::Debug,
<T as nom::InputTakeAtPosition>::Item: AsChar + Copy,
<T as nom::InputIter>::Item: AsChar + Copy,
{ {
// Size considerations: // Size considerations:
// - 40 characters holds for a SHA-1 hash from older Git versions. // - 40 characters holds for a SHA-1 hash from older Git versions.
@ -396,188 +318,127 @@ mod parser {
))(input) ))(input)
} }
fn base64_literal<T>(input: T) -> IResult<T, T> fn base64_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice>
where where
T: nom::InputTakeAtPosition T: Stream + StreamIsPartial + PartialEq,
+ nom::InputTake <T as Stream>::Slice: AsBStr + SliceLen + Default,
+ nom::InputIter <T as Stream>::Token: AsChar + Copy,
+ nom::InputLength
+ nom::Offset
+ nom::Slice<std::ops::RangeTo<usize>>
+ nom::Slice<std::ops::RangeFrom<usize>>
+ Clone
+ std::fmt::Debug,
<T as nom::InputTakeAtPosition>::Item: AsChar + Copy,
<T as nom::InputIter>::Item: AsChar + Copy,
{ {
let (padding, captured) = take_while1(is_base64_digit)(input.clone())?; let (padding, captured) = take_while1(is_base64_digit)(input.clone())?;
const CHUNK: usize = 4; const CHUNK: usize = 4;
let padding_offset = input.offset(&padding); let padding_offset = input.offset_to(&padding);
let mut padding_len = CHUNK - padding_offset % CHUNK; let mut padding_len = CHUNK - padding_offset % CHUNK;
if padding_len == CHUNK { if padding_len == CHUNK {
padding_len = 0; padding_len = 0;
} }
if captured.input_len() < 90 if captured.slice_len() < 90
&& padding_len == 0 && padding_len == 0
&& captured && captured
.iter_elements() .as_bstr()
.iter()
.all(|c| !['/', '+'].contains(&c.as_char())) .all(|c| !['/', '+'].contains(&c.as_char()))
{ {
return Err(nom::Err::Error(nom::error::Error::new( return Err(winnow::error::ErrMode::Backtrack(
input, winnow::error::Error::new(input, winnow::error::ErrorKind::LengthValue),
nom::error::ErrorKind::LengthValue, ));
)));
} }
let (after, _) = take_while_m_n(padding_len, padding_len, is_base64_padding)(padding)?; let (after, _) = take_while_m_n(padding_len, padding_len, is_base64_padding)(padding)?;
let after_offset = input.offset(&after); let after_offset = input.offset_to(&after);
Ok(input.take_split(after_offset)) Ok(input.next_slice(after_offset))
} }
fn email_literal<T>(input: T) -> IResult<T, T> fn email_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice>
where where
T: nom::InputTakeAtPosition T: Stream + StreamIsPartial + PartialEq,
+ nom::InputTake <T as Stream>::Slice: AsBStr + SliceLen + Default,
+ nom::InputIter <T as Stream>::Token: AsChar + Copy,
+ nom::InputLength
+ nom::Offset
+ nom::Slice<std::ops::RangeTo<usize>>
+ nom::Slice<std::ops::RangeFrom<usize>>
+ Clone
+ std::fmt::Debug,
<T as nom::InputTakeAtPosition>::Item: AsChar + Copy,
<T as nom::InputIter>::Item: AsChar + Copy,
{ {
recognize(tuple(( (
take_while1(is_localport_char), take_while1(is_localport_char),
char('@'), '@',
take_while1(is_domain_char), take_while1(is_domain_char),
)))(input) )
.recognize()
.parse_next(input)
} }
fn url_literal<T>(input: T) -> IResult<T, T> fn url_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice>
where where
T: nom::InputTakeAtPosition T: Stream + StreamIsPartial + PartialEq,
+ nom::InputTake <T as Stream>::Slice: AsBStr + SliceLen + Default,
+ nom::InputIter <T as Stream>::Token: AsChar + Copy,
+ nom::InputLength
+ nom::Offset
+ nom::Slice<std::ops::RangeTo<usize>>
+ nom::Slice<std::ops::RangeFrom<usize>>
+ Clone
+ std::fmt::Debug,
<T as nom::InputTakeAtPosition>::Item: AsChar + Copy,
<T as nom::InputIter>::Item: AsChar + Copy,
{ {
recognize(tuple(( (
opt(terminated( opt(terminated(
take_while1(is_scheme_char), take_while1(is_scheme_char),
// HACK: Technically you can skip `//` if you don't have a domain but that would // HACK: Technically you can skip `//` if you don't have a domain but that would
// get messy to support. // get messy to support.
tuple((char(':'), char('/'), char('/'))), (':', '/', '/'),
)), )),
tuple(( (
opt(terminated(url_userinfo, char('@'))), opt(terminated(url_userinfo, '@')),
take_while1(is_domain_char), take_while1(is_domain_char),
opt(preceded(char(':'), take_while1(AsChar::is_dec_digit))), opt(preceded(':', take_while1(AsChar::is_dec_digit))),
)), ),
char('/'), '/',
// HACK: Too lazy to enumerate // HACK: Too lazy to enumerate
take_while(is_path_query_fragment), take_while0(is_path_query_fragment),
)))(input) )
.recognize()
.parse_next(input)
} }
fn url_userinfo<T>(input: T) -> IResult<T, T> fn url_userinfo<T>(input: T) -> IResult<T, <T as Stream>::Slice>
where where
T: nom::InputTakeAtPosition T: Stream + StreamIsPartial + PartialEq,
+ nom::InputTake <T as Stream>::Slice: AsBStr + SliceLen + Default,
+ nom::InputIter <T as Stream>::Token: AsChar + Copy,
+ nom::InputLength
+ nom::Offset
+ nom::Slice<std::ops::RangeTo<usize>>
+ nom::Slice<std::ops::RangeFrom<usize>>
+ Clone
+ std::fmt::Debug,
<T as nom::InputTakeAtPosition>::Item: AsChar + Copy,
<T as nom::InputIter>::Item: AsChar + Copy,
{ {
recognize(tuple(( (
take_while1(is_localport_char), take_while1(is_localport_char),
opt(preceded(char(':'), take_while(is_localport_char))), opt(preceded(':', take_while0(is_localport_char))),
)))(input) )
.recognize()
.parse_next(input)
} }
fn c_escape<T>(input: T) -> IResult<T, T> fn c_escape<T>(input: T) -> IResult<T, <T as Stream>::Slice>
where where
T: nom::InputTakeAtPosition T: Stream + StreamIsPartial + PartialEq,
+ nom::InputTake <T as Stream>::Slice: AsBStr + SliceLen + Default,
+ nom::InputIter <T as Stream>::Token: AsChar + Copy,
+ nom::InputLength
+ nom::Offset
+ nom::Slice<std::ops::RangeTo<usize>>
+ nom::Slice<std::ops::RangeFrom<usize>>
+ Clone
+ std::fmt::Debug,
<T as nom::InputTakeAtPosition>::Item: AsChar + Copy,
<T as nom::InputIter>::Item: AsChar + Copy,
{ {
// We don't know whether the string we are parsing is a literal string (no escaping) or // We don't know whether the string we are parsing is a literal string (no escaping) or
// regular string that does escaping. The escaped letter might be part of a word, or it // regular string that does escaping. The escaped letter might be part of a word, or it
// might not be. Rather than guess and be wrong part of the time and correct people's words // might not be. Rather than guess and be wrong part of the time and correct people's words
// incorrectly, we opt for just not evaluating it at all. // incorrectly, we opt for just not evaluating it at all.
preceded(take_while1(is_escape), take_while(is_xid_continue))(input) preceded(take_while1(is_escape), take_while0(is_xid_continue))(input)
} }
fn printf<T>(input: T) -> IResult<T, T> fn printf<T>(input: T) -> IResult<T, <T as Stream>::Slice>
where where
T: nom::InputTakeAtPosition T: Stream + StreamIsPartial + PartialEq,
+ nom::InputTake <T as Stream>::Slice: AsBStr + SliceLen + Default,
+ nom::InputIter <T as Stream>::Token: AsChar + Copy,
+ nom::InputLength
+ nom::Offset
+ nom::Slice<std::ops::RangeTo<usize>>
+ nom::Slice<std::ops::RangeFrom<usize>>
+ Clone
+ std::fmt::Debug,
<T as nom::InputTakeAtPosition>::Item: AsChar + Copy,
<T as nom::InputIter>::Item: AsChar + Copy,
{ {
preceded(char('%'), take_while1(is_xid_continue))(input) preceded('%', take_while1(is_xid_continue))(input)
} }
fn take_many0<I, E, F>(mut f: F) -> impl FnMut(I) -> IResult<I, I, E> fn take_many0<I, E, F>(mut f: F) -> impl FnMut(I) -> IResult<I, <I as Stream>::Slice, E>
where where
I: nom::Offset + nom::InputTake + Clone + PartialEq + std::fmt::Debug, I: Stream,
F: nom::Parser<I, I, E>, F: winnow::Parser<I, <I as Stream>::Slice, E>,
E: nom::error::ParseError<I>, E: winnow::error::ParseError<I>,
{ {
move |i: I| { move |i: I| {
let mut current = i.clone(); winnow::multi::many0(f.by_ref())
loop { .map(|()| ())
match f.parse(current.clone()) { .recognize()
Err(nom::Err::Error(_)) => { .parse_next(i)
let offset = i.offset(&current);
let (after, before) = i.take_split(offset);
return Ok((after, before));
}
Err(e) => {
return Err(e);
}
Ok((next, _)) => {
if next == current {
return Err(nom::Err::Error(E::from_error_kind(
i,
nom::error::ErrorKind::Many0,
)));
}
current = next;
}
}
}
} }
} }

View file

@ -12,11 +12,11 @@ include.workspace = true
[features] [features]
default = [] default = []
parser = ["nom"] parser = ["winnow"]
flags = ["enumflags2"] flags = ["enumflags2"]
[dependencies] [dependencies]
nom = { version = "7", optional = true } winnow = { version = "0.3.0", optional = true }
enumflags2 = { version = "0.7", optional = true } enumflags2 = { version = "0.7", optional = true }
[package.metadata.docs.rs] [package.metadata.docs.rs]

View file

@ -1,6 +1,4 @@
use nom::IResult; use winnow::prelude::*;
use nom::InputTakeAtPosition;
use nom::Parser;
use crate::*; use crate::*;
@ -64,31 +62,28 @@ A Cv: acknowledgment's / Av B C: acknowledgement's
impl Cluster { impl Cluster {
pub fn parse(input: &str) -> IResult<&str, Self> { pub fn parse(input: &str) -> IResult<&str, Self> {
let header = nom::sequence::tuple(( let header = (
nom::bytes::streaming::tag("#"), winnow::bytes::tag("#"),
nom::character::streaming::space0, winnow::character::space0,
nom::character::streaming::not_line_ending, winnow::character::not_line_ending,
nom::character::streaming::line_ending, winnow::character::line_ending,
)); );
let note = nom::sequence::preceded( let note = winnow::sequence::preceded(
nom::sequence::pair( (winnow::bytes::tag("##"), winnow::character::space0),
nom::bytes::streaming::tag("##"), winnow::sequence::terminated(
nom::character::streaming::space0, winnow::character::not_line_ending,
), winnow::character::line_ending,
nom::sequence::terminated(
nom::character::streaming::not_line_ending,
nom::character::streaming::line_ending,
), ),
); );
let mut cluster = nom::sequence::tuple(( let mut cluster = (
nom::combinator::opt(header), winnow::combinator::opt(header),
nom::multi::many1(nom::sequence::terminated( winnow::multi::many1(winnow::sequence::terminated(
Entry::parse, Entry::parse,
nom::character::streaming::line_ending, winnow::character::line_ending,
)), )),
nom::multi::many0(note), winnow::multi::many0(note),
)); );
let (input, (header, entries, notes)) = (cluster)(input)?; let (input, (header, entries, notes)): (_, (_, _, Vec<_>)) = cluster.parse_next(input)?;
let header = header.map(|s| s.2.to_owned()); let header = header.map(|s| s.2.to_owned());
let notes = notes.into_iter().map(|s| s.to_owned()).collect(); let notes = notes.into_iter().map(|s| s.to_owned()).collect();
@ -150,29 +145,19 @@ A B C: coloration's / B. Cv: colouration's
impl Entry { impl Entry {
pub fn parse(input: &str) -> IResult<&str, Self> { pub fn parse(input: &str) -> IResult<&str, Self> {
let var_sep = nom::sequence::tuple(( let var_sep = (winnow::character::space0, '/', winnow::character::space0);
nom::character::streaming::space0, let (input, variants) = winnow::multi::separated1(Variant::parse, var_sep)(input)?;
nom::bytes::streaming::tag("/"),
nom::character::streaming::space0,
));
let (input, variants) = nom::multi::separated_list1(var_sep, Variant::parse)(input)?;
let desc_sep = nom::sequence::tuple(( let desc_sep = (winnow::character::space0, '|');
nom::character::streaming::space0,
nom::bytes::streaming::tag("|"),
));
let (input, description) = let (input, description) =
nom::combinator::opt(nom::sequence::tuple((desc_sep, Self::parse_description)))(input)?; winnow::combinator::opt((desc_sep, Self::parse_description))(input)?;
let comment_sep = nom::sequence::tuple(( let comment_sep = (winnow::character::space0, '#');
nom::character::streaming::space0, let (input, comment) = winnow::combinator::opt((
nom::bytes::streaming::tag("#"),
));
let (input, comment) = nom::combinator::opt(nom::sequence::tuple((
comment_sep, comment_sep,
nom::character::streaming::space1, winnow::character::space1,
nom::character::streaming::not_line_ending, winnow::character::not_line_ending,
)))(input)?; ))(input)?;
let mut e = match description { let mut e = match description {
Some((_, description)) => description, Some((_, description)) => description,
@ -191,24 +176,16 @@ impl Entry {
} }
fn parse_description(input: &str) -> IResult<&str, Self> { fn parse_description(input: &str) -> IResult<&str, Self> {
let (input, (pos, archaic, note, description)) = nom::sequence::tuple(( let (input, (pos, archaic, note, description)) = (
nom::combinator::opt(nom::sequence::tuple(( winnow::combinator::opt((winnow::character::space1, Pos::parse)),
nom::character::streaming::space1, winnow::combinator::opt((winnow::character::space1, "(-)")),
Pos::parse, winnow::combinator::opt((winnow::character::space1, "--")),
))), winnow::combinator::opt((
nom::combinator::opt(nom::sequence::tuple(( winnow::character::space1,
nom::character::streaming::space1, winnow::bytes::take_till0(|c| c == '\n' || c == '\r' || c == '#'),
nom::bytes::streaming::tag("(-)"), )),
))), )
nom::combinator::opt(nom::sequence::tuple(( .parse_next(input)?;
nom::character::streaming::space1,
nom::bytes::streaming::tag("--"),
))),
nom::combinator::opt(nom::sequence::tuple((
nom::character::streaming::space1,
nom::bytes::streaming::take_till(|c| c == '\n' || c == '\r' || c == '#'),
))),
))(input)?;
let variants = Vec::new(); let variants = Vec::new();
let pos = pos.map(|(_, p)| p); let pos = pos.map(|(_, p)| p);
@ -319,24 +296,18 @@ mod test_entry {
impl Variant { impl Variant {
pub fn parse(input: &str) -> IResult<&str, Self> { pub fn parse(input: &str) -> IResult<&str, Self> {
let types = nom::multi::separated_list1(nom::character::streaming::space1, Type::parse); let types = winnow::multi::separated1(Type::parse, winnow::character::space1);
let sep = nom::sequence::tuple(( let sep = (winnow::bytes::tag(":"), winnow::character::space0);
nom::bytes::streaming::tag(":"), let (input, (types, word)) = winnow::sequence::separated_pair(types, sep, word)(input)?;
nom::character::streaming::space0,
));
let (input, (types, word)) = nom::sequence::separated_pair(types, sep, word)(input)?;
let v = Self { types, word }; let v = Self { types, word };
Ok((input, v)) Ok((input, v))
} }
} }
fn word(input: &str) -> IResult<&str, String> { fn word(input: &str) -> IResult<&str, String> {
input winnow::bytes::take_till1(|item: char| item.is_ascii_whitespace())
.split_at_position1( .map(|s: &str| s.to_owned().replace('_', " "))
|item| item.is_ascii_whitespace(), .parse_next(input)
nom::error::ErrorKind::Alpha,
)
.map(|(i, s)| (i, s.to_owned().replace('_', " ")))
} }
#[cfg(test)] #[cfg(test)]
@ -409,8 +380,8 @@ mod test_variant {
impl Type { impl Type {
pub fn parse(input: &str) -> IResult<&str, Type> { pub fn parse(input: &str) -> IResult<&str, Type> {
let (input, category) = Category::parse(input)?; let (input, category) = Category::parse(input)?;
let (input, tag) = nom::combinator::opt(Tag::parse)(input)?; let (input, tag) = winnow::combinator::opt(Tag::parse)(input)?;
let (input, num) = nom::combinator::opt(nom::character::streaming::digit1)(input)?; let (input, num) = winnow::combinator::opt(winnow::character::digit1)(input)?;
let num = num.map(|s| s.parse().expect("parser ensured its a number")); let num = num.map(|s| s.parse().expect("parser ensured its a number"));
let t = Type { category, tag, num }; let t = Type { category, tag, num };
Ok((input, t)) Ok((input, t))
@ -465,16 +436,18 @@ mod test_type {
impl Category { impl Category {
pub fn parse(input: &str) -> IResult<&str, Category> { pub fn parse(input: &str) -> IResult<&str, Category> {
let symbols = nom::character::streaming::one_of("ABZCD_"); let symbols = winnow::bytes::one_of("ABZCD_");
nom::combinator::map(symbols, |c| match c { symbols
'A' => Category::American, .map(|c| match c {
'B' => Category::BritishIse, 'A' => Category::American,
'Z' => Category::BritishIze, 'B' => Category::BritishIse,
'C' => Category::Canadian, 'Z' => Category::BritishIze,
'D' => Category::Australian, 'C' => Category::Canadian,
'_' => Category::Other, 'D' => Category::Australian,
_ => unreachable!("parser won't select this option"), '_' => Category::Other,
})(input) _ => unreachable!("parser won't select this option"),
})
.parse_next(input)
} }
} }
@ -499,15 +472,17 @@ mod test_category {
impl Tag { impl Tag {
pub fn parse(input: &str) -> IResult<&str, Tag> { pub fn parse(input: &str) -> IResult<&str, Tag> {
let symbols = nom::character::streaming::one_of(".vV-x"); let symbols = winnow::bytes::one_of(".vV-x");
nom::combinator::map(symbols, |c| match c { symbols
'.' => Tag::Eq, .map(|c| match c {
'v' => Tag::Variant, '.' => Tag::Eq,
'V' => Tag::Seldom, 'v' => Tag::Variant,
'-' => Tag::Possible, 'V' => Tag::Seldom,
'x' => Tag::Improper, '-' => Tag::Possible,
_ => unreachable!("parser won't select this option"), 'x' => Tag::Improper,
})(input) _ => unreachable!("parser won't select this option"),
})
.parse_next(input)
} }
} }
@ -532,16 +507,16 @@ mod test_tag {
impl Pos { impl Pos {
pub fn parse(input: &str) -> IResult<&str, Pos> { pub fn parse(input: &str) -> IResult<&str, Pos> {
use nom::bytes::streaming::tag; use winnow::bytes::tag;
let noun = tag("<N>"); let noun = tag("<N>");
let verb = tag("<V>"); let verb = tag("<V>");
let adjective = tag("<Adj>"); let adjective = tag("<Adj>");
let adverb = tag("<Adv>"); let adverb = tag("<Adv>");
nom::branch::alt(( winnow::branch::alt((
noun.map(|_| Pos::Noun), noun.value(Pos::Noun),
verb.map(|_| Pos::Verb), verb.value(Pos::Verb),
adjective.map(|_| Pos::Adjective), adjective.value(Pos::Adjective),
adverb.map(|_| Pos::Adverb), adverb.value(Pos::Adverb),
))(input) ))(input)
} }
} }