Merge pull request #780 from epage/winnow

refactor(typos): Upgrade to winnow 0.5
This commit is contained in:
Ed Page 2023-07-14 13:57:26 -05:00 committed by GitHub
commit 383d51ddf1
WARNING! Although there is a key with this ID in the database it does not verify this commit! This commit is SUSPICIOUS.
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 464 additions and 342 deletions

19
Cargo.lock generated
View file

@ -1610,7 +1610,7 @@ dependencies = [
"serde", "serde",
"serde_spanned", "serde_spanned",
"toml_datetime", "toml_datetime",
"winnow", "winnow 0.4.9",
] ]
[[package]] [[package]]
@ -1648,7 +1648,7 @@ dependencies = [
"thiserror", "thiserror",
"unicode-segmentation", "unicode-segmentation",
"unicode-xid", "unicode-xid",
"winnow", "winnow 0.5.0",
] ]
[[package]] [[package]]
@ -1831,7 +1831,7 @@ name = "varcon-core"
version = "2.2.12" version = "2.2.12"
dependencies = [ dependencies = [
"enumflags2", "enumflags2",
"winnow", "winnow 0.5.0",
] ]
[[package]] [[package]]
@ -2121,9 +2121,18 @@ checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a"
[[package]] [[package]]
name = "winnow" name = "winnow"
version = "0.4.6" version = "0.4.9"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "61de7bac303dc551fe038e2b3cef0f571087a47571ea6e79a87692ac99b99699" checksum = "81a2094c43cc94775293eaa0e499fbc30048a6d824ac82c0351a8c0bf9112529"
dependencies = [
"memchr",
]
[[package]]
name = "winnow"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "81fac9742fd1ad1bd9643b991319f72dd031016d44b77039a26977eb667141e7"
dependencies = [ dependencies = [
"memchr", "memchr",
] ]

View file

@ -13,8 +13,8 @@ include.workspace = true
[features] [features]
default = ["std"] default = ["std"]
std = [] std = []
codegen = ["std", "phf_codegen"] codegen = ["std", "dep:phf_codegen"]
map = ["phf", "phf_shared"] map = ["dep:phf", "dep:phf_shared"]
[dependencies] [dependencies]
unicase = "2.6" unicase = "2.6"

View file

@ -32,8 +32,8 @@ pre-release-replacements = [
[features] [features]
default = ["dict", "vars"] default = ["dict", "vars"]
dict = ["typos-dict"] dict = ["dep:typos-dict"]
vars = ["typos-vars"] vars = ["dep:typos-vars"]
[[bin]] [[bin]]

View file

@ -14,7 +14,7 @@ include.workspace = true
[dependencies] [dependencies]
anyhow = "1.0" anyhow = "1.0"
thiserror = "1.0" thiserror = "1.0"
winnow = "0.4.6" winnow = "0.5.0"
unicode-xid = "0.2.4" unicode-xid = "0.2.4"
once_cell = "1.17.2" once_cell = "1.17.2"
serde = { version = "1.0", features = ["derive"] } serde = { version = "1.0", features = ["derive"] }

View file

@ -1,4 +1,5 @@
use bstr::ByteSlice; use bstr::ByteSlice;
use winnow::BStr;
/// Define rules for tokenizaing a buffer. /// Define rules for tokenizaing a buffer.
#[derive(Debug, Clone, PartialEq, Eq, Hash)] #[derive(Debug, Clone, PartialEq, Eq, Hash)]
@ -48,7 +49,9 @@ impl Tokenizer {
let iter = if self.unicode && !ByteSlice::is_ascii(content.as_bytes()) { let iter = if self.unicode && !ByteSlice::is_ascii(content.as_bytes()) {
itertools::Either::Left(unicode_parser::iter_identifiers(content)) itertools::Either::Left(unicode_parser::iter_identifiers(content))
} else { } else {
itertools::Either::Right(ascii_parser::iter_identifiers(content.as_bytes())) itertools::Either::Right(ascii_parser::iter_identifiers(BStr::new(
content.as_bytes(),
)))
}; };
iter.map(move |identifier| self.transform(identifier, content.as_bytes())) iter.map(move |identifier| self.transform(identifier, content.as_bytes()))
} }
@ -58,7 +61,7 @@ impl Tokenizer {
let iter = Utf8Chunks::new(content).flat_map(unicode_parser::iter_identifiers); let iter = Utf8Chunks::new(content).flat_map(unicode_parser::iter_identifiers);
itertools::Either::Left(iter) itertools::Either::Left(iter)
} else { } else {
itertools::Either::Right(ascii_parser::iter_identifiers(content)) itertools::Either::Right(ascii_parser::iter_identifiers(BStr::new(content)))
}; };
iter.map(move |identifier| self.transform(identifier, content)) iter.map(move |identifier| self.transform(identifier, content))
} }
@ -126,6 +129,7 @@ impl<'s> Iterator for Utf8Chunks<'s> {
mod parser { mod parser {
use winnow::combinator::*; use winnow::combinator::*;
use winnow::error::ParserError;
use winnow::prelude::*; use winnow::prelude::*;
use winnow::stream::AsBStr; use winnow::stream::AsBStr;
use winnow::stream::AsChar; use winnow::stream::AsChar;
@ -133,8 +137,9 @@ mod parser {
use winnow::stream::Stream; use winnow::stream::Stream;
use winnow::stream::StreamIsPartial; use winnow::stream::StreamIsPartial;
use winnow::token::*; use winnow::token::*;
use winnow::trace::trace;
pub(crate) fn next_identifier<T>(input: T) -> IResult<T, <T as Stream>::Slice> pub(crate) fn next_identifier<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
where where
T: Stream + StreamIsPartial + PartialEq, T: Stream + StreamIsPartial + PartialEq,
<T as Stream>::Slice: AsBStr + SliceLen + Default, <T as Stream>::Slice: AsBStr + SliceLen + Default,
@ -143,7 +148,7 @@ mod parser {
preceded(ignore, identifier).parse_next(input) preceded(ignore, identifier).parse_next(input)
} }
fn identifier<T>(input: T) -> IResult<T, <T as Stream>::Slice> fn identifier<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
where where
T: Stream + StreamIsPartial + PartialEq, T: Stream + StreamIsPartial + PartialEq,
<T as Stream>::Slice: AsBStr + SliceLen + Default, <T as Stream>::Slice: AsBStr + SliceLen + Default,
@ -153,15 +158,17 @@ mod parser {
// `{XID_Continue}+` because XID_Continue is a superset of XID_Start and rather catch odd // `{XID_Continue}+` because XID_Continue is a superset of XID_Start and rather catch odd
// or unexpected cases than strip off start characters to a word since we aren't doing a // or unexpected cases than strip off start characters to a word since we aren't doing a
// proper word boundary parse // proper word boundary parse
take_while(1.., is_xid_continue).parse_next(input) trace("identifier", take_while(1.., is_xid_continue)).parse_next(input)
} }
fn ignore<T>(input: T) -> IResult<T, <T as Stream>::Slice> fn ignore<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
where where
T: Stream + StreamIsPartial + PartialEq, T: Stream + StreamIsPartial + PartialEq,
<T as Stream>::Slice: AsBStr + SliceLen + Default, <T as Stream>::Slice: AsBStr + SliceLen + Default,
<T as Stream>::Token: AsChar + Copy, <T as Stream>::Token: AsChar + Copy,
{ {
trace(
"ignore",
take_many0(alt(( take_many0(alt((
// CAUTION: If adding an ignorable literal, if it doesn't start with `is_xid_continue`, // CAUTION: If adding an ignorable literal, if it doesn't start with `is_xid_continue`,
// - Update `is_ignore_char` to make sure `sep1` doesn't eat it all up // - Update `is_ignore_char` to make sure `sep1` doesn't eat it all up
@ -178,11 +185,12 @@ mod parser {
c_escape, c_escape,
printf, printf,
other, other,
))) ))),
)
.parse_next(input) .parse_next(input)
} }
fn sep1<T>(input: T) -> IResult<T, <T as Stream>::Slice> fn sep1<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
where where
T: Stream + StreamIsPartial + PartialEq, T: Stream + StreamIsPartial + PartialEq,
<T as Stream>::Slice: AsBStr + SliceLen + Default, <T as Stream>::Slice: AsBStr + SliceLen + Default,
@ -195,21 +203,24 @@ mod parser {
.parse_next(input) .parse_next(input)
} }
fn other<T>(input: T) -> IResult<T, <T as Stream>::Slice> fn other<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
where where
T: Stream + StreamIsPartial + PartialEq, T: Stream + StreamIsPartial + PartialEq,
<T as Stream>::Slice: AsBStr + SliceLen + Default, <T as Stream>::Slice: AsBStr + SliceLen + Default,
<T as Stream>::Token: AsChar + Copy, <T as Stream>::Token: AsChar + Copy,
{ {
trace(
"other",
( (
one_of(|c| !is_xid_continue(c)), one_of(|c| !is_xid_continue(c)),
take_while(0.., is_ignore_char), take_while(0.., is_ignore_char),
) )
.recognize() .recognize(),
)
.parse_next(input) .parse_next(input)
} }
fn ordinal_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice> fn ordinal_literal<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
where where
T: Stream + StreamIsPartial + PartialEq, T: Stream + StreamIsPartial + PartialEq,
<T as Stream>::Slice: AsBStr + SliceLen + Default, <T as Stream>::Slice: AsBStr + SliceLen + Default,
@ -221,26 +232,29 @@ mod parser {
['_'].contains(&c) ['_'].contains(&c)
} }
trace(
"ordinal_literal",
( (
take_while(0.., is_sep), take_while(0.., is_sep),
take_while(1.., is_dec_digit), take_while(1.., is_dec_digit),
alt((('s', 't'), ('n', 'd'), ('r', 'd'), ('t', 'h'))), alt((('s', 't'), ('n', 'd'), ('r', 'd'), ('t', 'h'))),
take_while(0.., is_sep), take_while(0.., is_sep),
) )
.recognize() .recognize(),
)
.parse_next(input) .parse_next(input)
} }
fn dec_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice> fn dec_literal<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
where where
T: Stream + StreamIsPartial + PartialEq, T: Stream + StreamIsPartial + PartialEq,
<T as Stream>::Slice: AsBStr + SliceLen + Default, <T as Stream>::Slice: AsBStr + SliceLen + Default,
<T as Stream>::Token: AsChar + Copy, <T as Stream>::Token: AsChar + Copy,
{ {
take_while(1.., is_dec_digit_with_sep).parse_next(input) trace("dec_literal", take_while(1.., is_dec_digit_with_sep)).parse_next(input)
} }
fn hex_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice> fn hex_literal<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
where where
T: Stream + StreamIsPartial + PartialEq, T: Stream + StreamIsPartial + PartialEq,
<T as Stream>::Slice: AsBStr + SliceLen + Default, <T as Stream>::Slice: AsBStr + SliceLen + Default,
@ -253,28 +267,33 @@ mod parser {
.parse_next(input) .parse_next(input)
} }
fn css_color<T>(input: T) -> IResult<T, <T as Stream>::Slice> fn css_color<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
where where
T: Stream + StreamIsPartial + PartialEq, T: Stream + StreamIsPartial + PartialEq,
<T as Stream>::Slice: AsBStr + SliceLen + Default, <T as Stream>::Slice: AsBStr + SliceLen + Default,
<T as Stream>::Token: AsChar + Copy, <T as Stream>::Token: AsChar + Copy,
{ {
trace(
"color",
preceded( preceded(
'#', '#',
alt(( alt((
terminated(take_while(3..=8, is_lower_hex_digit), peek(sep1)), terminated(take_while(3..=8, is_lower_hex_digit), peek(sep1)),
terminated(take_while(3..=8, is_upper_hex_digit), peek(sep1)), terminated(take_while(3..=8, is_upper_hex_digit), peek(sep1)),
)), )),
),
) )
.parse_next(input) .parse_next(input)
} }
fn uuid_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice> fn uuid_literal<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
where where
T: Stream + StreamIsPartial + PartialEq, T: Stream + StreamIsPartial + PartialEq,
<T as Stream>::Slice: AsBStr + SliceLen + Default, <T as Stream>::Slice: AsBStr + SliceLen + Default,
<T as Stream>::Token: AsChar + Copy, <T as Stream>::Token: AsChar + Copy,
{ {
trace(
"uuid",
alt(( alt((
( (
take_while(8, is_lower_hex_digit), take_while(8, is_lower_hex_digit),
@ -299,11 +318,12 @@ mod parser {
take_while(12, is_upper_hex_digit), take_while(12, is_upper_hex_digit),
), ),
)) ))
.recognize() .recognize(),
)
.parse_next(input) .parse_next(input)
} }
fn hash_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice> fn hash_literal<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
where where
T: Stream + StreamIsPartial + PartialEq, T: Stream + StreamIsPartial + PartialEq,
<T as Stream>::Slice: AsBStr + SliceLen + Default, <T as Stream>::Slice: AsBStr + SliceLen + Default,
@ -319,23 +339,28 @@ mod parser {
// or more. // or more.
const IGNORE_HEX_MIN: usize = 32; const IGNORE_HEX_MIN: usize = 32;
trace(
"hash",
alt(( alt((
take_while(IGNORE_HEX_MIN.., is_lower_hex_digit), take_while(IGNORE_HEX_MIN.., is_lower_hex_digit),
take_while(IGNORE_HEX_MIN.., is_upper_hex_digit), take_while(IGNORE_HEX_MIN.., is_upper_hex_digit),
)) )),
)
.parse_next(input) .parse_next(input)
} }
fn base64_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice> fn base64_literal<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
where where
T: Stream + StreamIsPartial + PartialEq, T: Stream + StreamIsPartial + PartialEq,
<T as Stream>::Slice: AsBStr + SliceLen + Default, <T as Stream>::Slice: AsBStr + SliceLen + Default,
<T as Stream>::Token: AsChar + Copy, <T as Stream>::Token: AsChar + Copy,
{ {
let (padding, captured) = take_while(1.., is_base64_digit).parse_next(input.clone())?; trace("base64", move |input: &mut T| {
let start = input.checkpoint();
let captured = take_while(1.., is_base64_digit).parse_next(input)?;
const CHUNK: usize = 4; const CHUNK: usize = 4;
let padding_offset = input.offset_to(&padding); let padding_offset = input.offset_from(&start);
let mut padding_len = CHUNK - padding_offset % CHUNK; let mut padding_len = CHUNK - padding_offset % CHUNK;
if padding_len == CHUNK { if padding_len == CHUNK {
padding_len = 0; padding_len = 0;
@ -348,39 +373,47 @@ mod parser {
.iter() .iter()
.all(|c| !['/', '+'].contains(&c.as_char())) .all(|c| !['/', '+'].contains(&c.as_char()))
{ {
return Err(winnow::error::ErrMode::Backtrack( return Err(winnow::error::ErrMode::from_error_kind(
winnow::error::Error::new(input, winnow::error::ErrorKind::Slice), input,
winnow::error::ErrorKind::Slice,
)); ));
} }
let (after, _) = take_while(padding_len..=padding_len, is_base64_padding).parse_next(input)?;
take_while(padding_len..=padding_len, is_base64_padding).parse_next(padding)?;
let after_offset = input.offset_to(&after); let after_offset = input.offset_from(&start);
input.reset(start);
Ok(input.next_slice(after_offset)) Ok(input.next_slice(after_offset))
})
.parse_next(input)
} }
fn email_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice> fn email_literal<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
where where
T: Stream + StreamIsPartial + PartialEq, T: Stream + StreamIsPartial + PartialEq,
<T as Stream>::Slice: AsBStr + SliceLen + Default, <T as Stream>::Slice: AsBStr + SliceLen + Default,
<T as Stream>::Token: AsChar + Copy, <T as Stream>::Token: AsChar + Copy,
{ {
trace(
"email",
( (
take_while(1.., is_localport_char), take_while(1.., is_localport_char),
'@', '@',
take_while(1.., is_domain_char), take_while(1.., is_domain_char),
) )
.recognize() .recognize(),
)
.parse_next(input) .parse_next(input)
} }
fn url_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice> fn url_literal<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
where where
T: Stream + StreamIsPartial + PartialEq, T: Stream + StreamIsPartial + PartialEq,
<T as Stream>::Slice: AsBStr + SliceLen + Default, <T as Stream>::Slice: AsBStr + SliceLen + Default,
<T as Stream>::Token: AsChar + Copy, <T as Stream>::Token: AsChar + Copy,
{ {
trace(
"url",
( (
opt(terminated( opt(terminated(
take_while(1.., is_scheme_char), take_while(1.., is_scheme_char),
@ -397,25 +430,29 @@ mod parser {
// HACK: Too lazy to enumerate // HACK: Too lazy to enumerate
take_while(0.., is_path_query_fragment), take_while(0.., is_path_query_fragment),
) )
.recognize() .recognize(),
)
.parse_next(input) .parse_next(input)
} }
fn url_userinfo<T>(input: T) -> IResult<T, <T as Stream>::Slice> fn url_userinfo<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
where where
T: Stream + StreamIsPartial + PartialEq, T: Stream + StreamIsPartial + PartialEq,
<T as Stream>::Slice: AsBStr + SliceLen + Default, <T as Stream>::Slice: AsBStr + SliceLen + Default,
<T as Stream>::Token: AsChar + Copy, <T as Stream>::Token: AsChar + Copy,
{ {
trace(
"userinfo",
( (
take_while(1.., is_localport_char), take_while(1.., is_localport_char),
opt(preceded(':', take_while(0.., is_localport_char))), opt(preceded(':', take_while(0.., is_localport_char))),
) )
.recognize() .recognize(),
)
.parse_next(input) .parse_next(input)
} }
fn c_escape<T>(input: T) -> IResult<T, <T as Stream>::Slice> fn c_escape<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
where where
T: Stream + StreamIsPartial + PartialEq, T: Stream + StreamIsPartial + PartialEq,
<T as Stream>::Slice: AsBStr + SliceLen + Default, <T as Stream>::Slice: AsBStr + SliceLen + Default,
@ -425,25 +462,29 @@ mod parser {
// regular string that does escaping. The escaped letter might be part of a word, or it // regular string that does escaping. The escaped letter might be part of a word, or it
// might not be. Rather than guess and be wrong part of the time and correct people's words // might not be. Rather than guess and be wrong part of the time and correct people's words
// incorrectly, we opt for just not evaluating it at all. // incorrectly, we opt for just not evaluating it at all.
preceded(take_while(1.., is_escape), take_while(0.., is_xid_continue)).parse_next(input) trace(
"escape",
preceded(take_while(1.., is_escape), take_while(0.., is_xid_continue)),
)
.parse_next(input)
} }
fn printf<T>(input: T) -> IResult<T, <T as Stream>::Slice> fn printf<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
where where
T: Stream + StreamIsPartial + PartialEq, T: Stream + StreamIsPartial + PartialEq,
<T as Stream>::Slice: AsBStr + SliceLen + Default, <T as Stream>::Slice: AsBStr + SliceLen + Default,
<T as Stream>::Token: AsChar + Copy, <T as Stream>::Token: AsChar + Copy,
{ {
preceded('%', take_while(1.., is_xid_continue)).parse_next(input) trace("printf", preceded('%', take_while(1.., is_xid_continue))).parse_next(input)
} }
fn take_many0<I, E, F>(mut f: F) -> impl FnMut(I) -> IResult<I, <I as Stream>::Slice, E> fn take_many0<I, E, F>(mut f: F) -> impl Parser<I, <I as Stream>::Slice, E>
where where
I: Stream, I: Stream,
F: winnow::Parser<I, <I as Stream>::Slice, E>, F: Parser<I, <I as Stream>::Slice, E>,
E: winnow::error::ParseError<I>, E: ParserError<I>,
{ {
move |i: I| { move |i: &mut I| {
repeat(0.., f.by_ref()) repeat(0.., f.by_ref())
.map(|()| ()) .map(|()| ())
.recognize() .recognize()
@ -581,9 +622,8 @@ mod unicode_parser {
use super::parser::next_identifier; use super::parser::next_identifier;
pub(crate) fn iter_identifiers(mut input: &str) -> impl Iterator<Item = &str> { pub(crate) fn iter_identifiers(mut input: &str) -> impl Iterator<Item = &str> {
std::iter::from_fn(move || match next_identifier(input) { std::iter::from_fn(move || match next_identifier(&mut input) {
Ok((i, o)) => { Ok(o) => {
input = i;
debug_assert_ne!(o, ""); debug_assert_ne!(o, "");
Some(o) Some(o)
} }
@ -595,10 +635,11 @@ mod unicode_parser {
mod ascii_parser { mod ascii_parser {
use super::parser::next_identifier; use super::parser::next_identifier;
pub(crate) fn iter_identifiers(mut input: &[u8]) -> impl Iterator<Item = &str> { use winnow::BStr;
std::iter::from_fn(move || match next_identifier(input) {
Ok((i, o)) => { pub(crate) fn iter_identifiers(mut input: &BStr) -> impl Iterator<Item = &str> {
input = i; std::iter::from_fn(move || match next_identifier(&mut input) {
Ok(o) => {
debug_assert_ne!(o, b""); debug_assert_ne!(o, b"");
// This is safe because we've checked that the strings are a subset of ASCII // This is safe because we've checked that the strings are a subset of ASCII
// characters. // characters.

View file

@ -12,11 +12,11 @@ include.workspace = true
[features] [features]
default = [] default = []
parser = ["winnow"] parser = ["dep:winnow"]
flags = ["enumflags2"] flags = ["dep:enumflags2"]
[dependencies] [dependencies]
winnow = { version = "0.4.6", optional = true } winnow = { version = "0.5.0", optional = true }
enumflags2 = { version = "0.7", optional = true } enumflags2 = { version = "0.7", optional = true }
[package.metadata.docs.rs] [package.metadata.docs.rs]

View file

@ -1,4 +1,5 @@
use winnow::prelude::*; use winnow::prelude::*;
use winnow::trace::trace;
use crate::*; use crate::*;
@ -17,10 +18,8 @@ impl<'i> Iterator for ClusterIter<'i> {
type Item = Cluster; type Item = Cluster;
fn next(&mut self) -> Option<Cluster> { fn next(&mut self) -> Option<Cluster> {
let i = self.input.trim_start(); self.input = self.input.trim_start();
let (i, c) = Cluster::parse(i).ok()?; Cluster::parse_.parse_next(&mut self.input).ok()
self.input = i;
Some(c)
} }
} }
@ -61,29 +60,34 @@ A Cv: acknowledgment's / Av B C: acknowledgement's
} }
impl Cluster { impl Cluster {
pub fn parse(input: &str) -> IResult<&str, Self> { pub fn parse(input: &str) -> Result<Self, ParseError> {
Self::parse_.parse(input).map_err(|_err| ParseError)
}
fn parse_(input: &mut &str) -> PResult<Self, ()> {
trace("cluster", move |input: &mut &str| {
let header = ( let header = (
winnow::bytes::tag("#"), "#",
winnow::character::space0, winnow::ascii::space0,
winnow::character::not_line_ending, winnow::ascii::not_line_ending,
winnow::character::line_ending, winnow::ascii::line_ending,
); );
let note = winnow::sequence::preceded( let note = winnow::combinator::preceded(
(winnow::bytes::tag("##"), winnow::character::space0), ("##", winnow::ascii::space0),
winnow::sequence::terminated( winnow::combinator::terminated(
winnow::character::not_line_ending, winnow::ascii::not_line_ending,
winnow::character::line_ending, winnow::ascii::line_ending,
), ),
); );
let mut cluster = ( let mut cluster = (
winnow::combinator::opt(header), winnow::combinator::opt(header),
winnow::multi::many1(winnow::sequence::terminated( winnow::combinator::repeat(
Entry::parse, 1..,
winnow::character::line_ending, winnow::combinator::terminated(Entry::parse_, winnow::ascii::line_ending),
)), ),
winnow::multi::many0(note), winnow::combinator::repeat(0.., note),
); );
let (input, (header, entries, notes)): (_, (_, _, Vec<_>)) = cluster.parse_next(input)?; let (header, entries, notes): (_, _, Vec<_>) = cluster.parse_next(input)?;
let header = header.map(|s| s.2.to_owned()); let header = header.map(|s| s.2.to_owned());
let notes = notes.into_iter().map(|s| s.to_owned()).collect(); let notes = notes.into_iter().map(|s| s.to_owned()).collect();
@ -92,7 +96,9 @@ impl Cluster {
entries, entries,
notes, notes,
}; };
Ok((input, c)) Ok(c)
})
.parse_next(input)
} }
} }
@ -102,7 +108,8 @@ mod test_cluster {
#[test] #[test]
fn test_basic() { fn test_basic() {
let (input, actual) = Cluster::parse( let (input, actual) = Cluster::parse_
.parse_peek(
"# acknowledgment <verified> (level 35) "# acknowledgment <verified> (level 35)
A Cv: acknowledgment / Av B C: acknowledgement A Cv: acknowledgment / Av B C: acknowledgement
A Cv: acknowledgments / Av B C: acknowledgements A Cv: acknowledgments / Av B C: acknowledgements
@ -122,7 +129,8 @@ A Cv: acknowledgment's / Av B C: acknowledgement's
#[test] #[test]
fn test_notes() { fn test_notes() {
let (input, actual) = Cluster::parse( let (input, actual) = Cluster::parse_
.parse_peek(
"# coloration <verified> (level 50) "# coloration <verified> (level 50)
A B C: coloration / B. Cv: colouration A B C: coloration / B. Cv: colouration
A B C: colorations / B. Cv: colourations A B C: colorations / B. Cv: colourations
@ -144,20 +152,25 @@ A B C: coloration's / B. Cv: colouration's
} }
impl Entry { impl Entry {
pub fn parse(input: &str) -> IResult<&str, Self> { pub fn parse(input: &str) -> Result<Self, ParseError> {
let var_sep = (winnow::character::space0, '/', winnow::character::space0); Self::parse_.parse(input).map_err(|_err| ParseError)
let (input, variants) = }
winnow::multi::separated1(Variant::parse, var_sep).parse_next(input)?;
let desc_sep = (winnow::character::space0, '|'); fn parse_(input: &mut &str) -> PResult<Self, ()> {
let (input, description) = trace("entry", move |input: &mut &str| {
let var_sep = (winnow::ascii::space0, '/', winnow::ascii::space0);
let variants =
winnow::combinator::separated1(Variant::parse_, var_sep).parse_next(input)?;
let desc_sep = (winnow::ascii::space0, '|');
let description =
winnow::combinator::opt((desc_sep, Self::parse_description)).parse_next(input)?; winnow::combinator::opt((desc_sep, Self::parse_description)).parse_next(input)?;
let comment_sep = (winnow::character::space0, '#'); let comment_sep = (winnow::ascii::space0, '#');
let (input, comment) = winnow::combinator::opt(( let comment = winnow::combinator::opt((
comment_sep, comment_sep,
winnow::character::space1, winnow::ascii::space1,
winnow::character::not_line_ending, winnow::ascii::not_line_ending,
)) ))
.parse_next(input)?; .parse_next(input)?;
@ -174,17 +187,20 @@ impl Entry {
}; };
e.variants = variants; e.variants = variants;
e.comment = comment.map(|c| c.2.to_owned()); e.comment = comment.map(|c| c.2.to_owned());
Ok((input, e)) Ok(e)
})
.parse_next(input)
} }
fn parse_description(input: &str) -> IResult<&str, Self> { fn parse_description(input: &mut &str) -> PResult<Self, ()> {
let (input, (pos, archaic, note, description)) = ( trace("description", move |input: &mut &str| {
winnow::combinator::opt((winnow::character::space1, Pos::parse)), let (pos, archaic, note, description) = (
winnow::combinator::opt((winnow::character::space1, "(-)")), winnow::combinator::opt((winnow::ascii::space1, Pos::parse_)),
winnow::combinator::opt((winnow::character::space1, "--")), winnow::combinator::opt((winnow::ascii::space1, "(-)")),
winnow::combinator::opt((winnow::ascii::space1, "--")),
winnow::combinator::opt(( winnow::combinator::opt((
winnow::character::space1, winnow::ascii::space1,
winnow::bytes::take_till0(|c| c == '\n' || c == '\r' || c == '#'), winnow::token::take_till0(('\n', '\r', '#')),
)), )),
) )
.parse_next(input)?; .parse_next(input)?;
@ -202,7 +218,9 @@ impl Entry {
description, description,
comment: None, comment: None,
}; };
Ok((input, e)) Ok(e)
})
.parse_next(input)
} }
} }
@ -215,8 +233,9 @@ mod test_entry {
fn test_variant_only() { fn test_variant_only() {
// Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use // Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
// cases. // cases.
let (input, actual) = let (input, actual) = Entry::parse_
Entry::parse("A Cv: acknowledgment's / Av B C: acknowledgement's\n").unwrap(); .parse_peek("A Cv: acknowledgment's / Av B C: acknowledgement's\n")
.unwrap();
assert_eq!(input, "\n"); assert_eq!(input, "\n");
assert_eq!(actual.variants.len(), 2); assert_eq!(actual.variants.len(), 2);
assert_eq!(actual.pos, None); assert_eq!(actual.pos, None);
@ -229,7 +248,9 @@ mod test_entry {
fn test_description() { fn test_description() {
// Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use // Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
// cases. // cases.
let (input, actual) = Entry::parse("A C: prize / B: prise | otherwise\n").unwrap(); let (input, actual) = Entry::parse_
.parse_peek("A C: prize / B: prise | otherwise\n")
.unwrap();
assert_eq!(input, "\n"); assert_eq!(input, "\n");
assert_eq!(actual.variants.len(), 2); assert_eq!(actual.variants.len(), 2);
assert_eq!(actual.pos, None); assert_eq!(actual.pos, None);
@ -242,7 +263,9 @@ mod test_entry {
fn test_pos() { fn test_pos() {
// Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use // Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
// cases. // cases.
let (input, actual) = Entry::parse("A B C: practice / AV Cv: practise | <N>\n").unwrap(); let (input, actual) = Entry::parse_
.parse_peek("A B C: practice / AV Cv: practise | <N>\n")
.unwrap();
assert_eq!(input, "\n"); assert_eq!(input, "\n");
assert_eq!(actual.variants.len(), 2); assert_eq!(actual.variants.len(), 2);
assert_eq!(actual.pos, Some(Pos::Noun)); assert_eq!(actual.pos, Some(Pos::Noun));
@ -255,7 +278,9 @@ mod test_entry {
fn test_archaic() { fn test_archaic() {
// Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use // Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
// cases. // cases.
let (input, actual) = Entry::parse("A: bark / Av B: barque | (-) ship\n").unwrap(); let (input, actual) = Entry::parse_
.parse_peek("A: bark / Av B: barque | (-) ship\n")
.unwrap();
assert_eq!(input, "\n"); assert_eq!(input, "\n");
assert_eq!(actual.variants.len(), 2); assert_eq!(actual.variants.len(), 2);
assert_eq!(actual.pos, None); assert_eq!(actual.pos, None);
@ -268,7 +293,9 @@ mod test_entry {
fn test_note() { fn test_note() {
// Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use // Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
// cases. // cases.
let (input, actual) = Entry::parse("_: cabbies | -- plural\n").unwrap(); let (input, actual) = Entry::parse_
.parse_peek("_: cabbies | -- plural\n")
.unwrap();
assert_eq!(input, "\n"); assert_eq!(input, "\n");
assert_eq!(actual.variants.len(), 1); assert_eq!(actual.variants.len(), 1);
assert_eq!(actual.pos, None); assert_eq!(actual.pos, None);
@ -279,7 +306,7 @@ mod test_entry {
#[test] #[test]
fn test_trailing_comment() { fn test_trailing_comment() {
let (input, actual) = Entry::parse( let (input, actual) = Entry::parse_.parse_peek(
"A B: accursed / AV B-: accurst # ODE: archaic, M-W: 'or' but can find little evidence of use\n", "A B: accursed / AV B-: accurst # ODE: archaic, M-W: 'or' but can find little evidence of use\n",
) )
.unwrap(); .unwrap();
@ -297,20 +324,30 @@ mod test_entry {
} }
impl Variant { impl Variant {
pub fn parse(input: &str) -> IResult<&str, Self> { pub fn parse(input: &str) -> Result<Self, ParseError> {
let types = winnow::multi::separated1(Type::parse, winnow::character::space1); Self::parse_.parse(input).map_err(|_err| ParseError)
let sep = (winnow::bytes::tag(":"), winnow::character::space0); }
let (input, (types, word)) =
winnow::sequence::separated_pair(types, sep, word).parse_next(input)?; fn parse_(input: &mut &str) -> PResult<Self, ()> {
trace("variant", move |input: &mut &str| {
let types = winnow::combinator::separated1(Type::parse_, winnow::ascii::space1);
let sep = (":", winnow::ascii::space0);
let (types, word) =
winnow::combinator::separated_pair(types, sep, word).parse_next(input)?;
let v = Self { types, word }; let v = Self { types, word };
Ok((input, v)) Ok(v)
})
.parse_next(input)
} }
} }
fn word(input: &str) -> IResult<&str, String> { fn word(input: &mut &str) -> PResult<String, ()> {
winnow::bytes::take_till1(|item: char| item.is_ascii_whitespace()) trace("word", move |input: &mut &str| {
winnow::token::take_till1(|item: char| item.is_ascii_whitespace())
.map(|s: &str| s.to_owned().replace('_', " ")) .map(|s: &str| s.to_owned().replace('_', " "))
.parse_next(input) .parse_next(input)
})
.parse_next(input)
} }
#[cfg(test)] #[cfg(test)]
@ -321,7 +358,7 @@ mod test_variant {
fn test_valid() { fn test_valid() {
// Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use // Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
// cases. // cases.
let (input, actual) = Variant::parse("A Cv: acknowledgment ").unwrap(); let (input, actual) = Variant::parse_.parse_peek("A Cv: acknowledgment ").unwrap();
assert_eq!(input, " "); assert_eq!(input, " ");
assert_eq!( assert_eq!(
actual.types, actual.types,
@ -343,8 +380,9 @@ mod test_variant {
#[test] #[test]
fn test_extra() { fn test_extra() {
let (input, actual) = let (input, actual) = Variant::parse_
Variant::parse("A Cv: acknowledgment's / Av B C: acknowledgement's").unwrap(); .parse_peek("A Cv: acknowledgment's / Av B C: acknowledgement's")
.unwrap();
assert_eq!(input, " / Av B C: acknowledgement's"); assert_eq!(input, " / Av B C: acknowledgement's");
assert_eq!( assert_eq!(
actual.types, actual.types,
@ -366,7 +404,7 @@ mod test_variant {
#[test] #[test]
fn test_underscore() { fn test_underscore() {
let (input, actual) = Variant::parse("_: air_gun\n").unwrap(); let (input, actual) = Variant::parse_.parse_peek("_: air_gun\n").unwrap();
assert_eq!(input, "\n"); assert_eq!(input, "\n");
assert_eq!( assert_eq!(
actual.types, actual.types,
@ -381,13 +419,20 @@ mod test_variant {
} }
impl Type { impl Type {
pub fn parse(input: &str) -> IResult<&str, Type> { pub fn parse(input: &str) -> Result<Self, ParseError> {
let (input, category) = Category::parse(input)?; Self::parse_.parse(input).map_err(|_err| ParseError)
let (input, tag) = winnow::combinator::opt(Tag::parse).parse_next(input)?; }
let (input, num) = winnow::combinator::opt(winnow::character::digit1).parse_next(input)?;
fn parse_(input: &mut &str) -> PResult<Type, ()> {
trace("type", move |input: &mut &str| {
let category = Category::parse_(input)?;
let tag = winnow::combinator::opt(Tag::parse_).parse_next(input)?;
let num = winnow::combinator::opt(winnow::ascii::digit1).parse_next(input)?;
let num = num.map(|s| s.parse().expect("parser ensured its a number")); let num = num.map(|s| s.parse().expect("parser ensured its a number"));
let t = Type { category, tag, num }; let t = Type { category, tag, num };
Ok((input, t)) Ok(t)
})
.parse_next(input)
} }
} }
@ -399,13 +444,13 @@ mod test_type {
fn test_valid() { fn test_valid() {
// Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use // Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
// cases. // cases.
let (input, actual) = Type::parse("A ").unwrap(); let (input, actual) = Type::parse_.parse_peek("A ").unwrap();
assert_eq!(input, " "); assert_eq!(input, " ");
assert_eq!(actual.category, Category::American); assert_eq!(actual.category, Category::American);
assert_eq!(actual.tag, None); assert_eq!(actual.tag, None);
assert_eq!(actual.num, None); assert_eq!(actual.num, None);
let (input, actual) = Type::parse("Bv ").unwrap(); let (input, actual) = Type::parse_.parse_peek("Bv ").unwrap();
assert_eq!(input, " "); assert_eq!(input, " ");
assert_eq!(actual.category, Category::BritishIse); assert_eq!(actual.category, Category::BritishIse);
assert_eq!(actual.tag, Some(Tag::Variant)); assert_eq!(actual.tag, Some(Tag::Variant));
@ -414,13 +459,13 @@ mod test_type {
#[test] #[test]
fn test_extra() { fn test_extra() {
let (input, actual) = Type::parse("Z foobar").unwrap(); let (input, actual) = Type::parse_.parse_peek("Z foobar").unwrap();
assert_eq!(input, " foobar"); assert_eq!(input, " foobar");
assert_eq!(actual.category, Category::BritishIze); assert_eq!(actual.category, Category::BritishIze);
assert_eq!(actual.tag, None); assert_eq!(actual.tag, None);
assert_eq!(actual.num, None); assert_eq!(actual.num, None);
let (input, actual) = Type::parse("C- foobar").unwrap(); let (input, actual) = Type::parse_.parse_peek("C- foobar").unwrap();
assert_eq!(input, " foobar"); assert_eq!(input, " foobar");
assert_eq!(actual.category, Category::Canadian); assert_eq!(actual.category, Category::Canadian);
assert_eq!(actual.tag, Some(Tag::Possible)); assert_eq!(actual.tag, Some(Tag::Possible));
@ -429,7 +474,7 @@ mod test_type {
#[test] #[test]
fn test_num() { fn test_num() {
let (input, actual) = Type::parse("Av1 ").unwrap(); let (input, actual) = Type::parse_.parse_peek("Av1 ").unwrap();
assert_eq!(input, " "); assert_eq!(input, " ");
assert_eq!(actual.category, Category::American); assert_eq!(actual.category, Category::American);
assert_eq!(actual.tag, Some(Tag::Variant)); assert_eq!(actual.tag, Some(Tag::Variant));
@ -438,8 +483,13 @@ mod test_type {
} }
impl Category { impl Category {
pub fn parse(input: &str) -> IResult<&str, Category> { pub fn parse(input: &str) -> Result<Self, ParseError> {
let symbols = winnow::bytes::one_of("ABZCD_"); Self::parse_.parse(input).map_err(|_err| ParseError)
}
fn parse_(input: &mut &str) -> PResult<Self, ()> {
trace("category", move |input: &mut &str| {
let symbols = winnow::token::one_of(['A', 'B', 'Z', 'C', 'D', '_']);
symbols symbols
.map(|c| match c { .map(|c| match c {
'A' => Category::American, 'A' => Category::American,
@ -451,6 +501,8 @@ impl Category {
_ => unreachable!("parser won't select this option"), _ => unreachable!("parser won't select this option"),
}) })
.parse_next(input) .parse_next(input)
})
.parse_next(input)
} }
} }
@ -460,22 +512,27 @@ mod test_category {
#[test] #[test]
fn test_valid() { fn test_valid() {
let (input, actual) = Category::parse("A").unwrap(); let (input, actual) = Category::parse_.parse_peek("A").unwrap();
assert_eq!(input, ""); assert_eq!(input, "");
assert_eq!(actual, Category::American); assert_eq!(actual, Category::American);
} }
#[test] #[test]
fn test_extra() { fn test_extra() {
let (input, actual) = Category::parse("_ foobar").unwrap(); let (input, actual) = Category::parse_.parse_peek("_ foobar").unwrap();
assert_eq!(input, " foobar"); assert_eq!(input, " foobar");
assert_eq!(actual, Category::Other); assert_eq!(actual, Category::Other);
} }
} }
impl Tag { impl Tag {
pub fn parse(input: &str) -> IResult<&str, Tag> { pub fn parse(input: &str) -> Result<Self, ParseError> {
let symbols = winnow::bytes::one_of(".vV-x"); Self::parse_.parse(input).map_err(|_err| ParseError)
}
fn parse_(input: &mut &str) -> PResult<Self, ()> {
trace("tag", move |input: &mut &str| {
let symbols = winnow::token::one_of(['.', 'v', 'V', '-', 'x']);
symbols symbols
.map(|c| match c { .map(|c| match c {
'.' => Tag::Eq, '.' => Tag::Eq,
@ -486,6 +543,8 @@ impl Tag {
_ => unreachable!("parser won't select this option"), _ => unreachable!("parser won't select this option"),
}) })
.parse_next(input) .parse_next(input)
})
.parse_next(input)
} }
} }
@ -495,33 +554,35 @@ mod test_tag {
#[test] #[test]
fn test_valid() { fn test_valid() {
let (input, actual) = Tag::parse(".").unwrap(); let (input, actual) = Tag::parse_.parse_peek(".").unwrap();
assert_eq!(input, ""); assert_eq!(input, "");
assert_eq!(actual, Tag::Eq); assert_eq!(actual, Tag::Eq);
} }
#[test] #[test]
fn test_extra() { fn test_extra() {
let (input, actual) = Tag::parse("x foobar").unwrap(); let (input, actual) = Tag::parse_.parse_peek("x foobar").unwrap();
assert_eq!(input, " foobar"); assert_eq!(input, " foobar");
assert_eq!(actual, Tag::Improper); assert_eq!(actual, Tag::Improper);
} }
} }
impl Pos { impl Pos {
pub fn parse(input: &str) -> IResult<&str, Pos> { pub fn parse(input: &str) -> Result<Self, ParseError> {
use winnow::bytes::tag; Self::parse_.parse(input).map_err(|_err| ParseError)
let noun = tag("<N>"); }
let verb = tag("<V>");
let adjective = tag("<Adj>"); fn parse_(input: &mut &str) -> PResult<Self, ()> {
let adverb = tag("<Adv>"); trace("pos", move |input: &mut &str| {
winnow::branch::alt(( winnow::combinator::alt((
noun.value(Pos::Noun), "<N>".value(Pos::Noun),
verb.value(Pos::Verb), "<V>".value(Pos::Verb),
adjective.value(Pos::Adjective), "<Adj>".value(Pos::Adjective),
adverb.value(Pos::Adverb), "<Adv>".value(Pos::Adverb),
)) ))
.parse_next(input) .parse_next(input)
})
.parse_next(input)
} }
} }
@ -531,15 +592,26 @@ mod test_pos {
#[test] #[test]
fn test_valid() { fn test_valid() {
let (input, actual) = Pos::parse("<N>").unwrap(); let (input, actual) = Pos::parse_.parse_peek("<N>").unwrap();
assert_eq!(input, ""); assert_eq!(input, "");
assert_eq!(actual, Pos::Noun); assert_eq!(actual, Pos::Noun);
} }
#[test] #[test]
fn test_extra() { fn test_extra() {
let (input, actual) = Pos::parse("<Adj> foobar").unwrap(); let (input, actual) = Pos::parse_.parse_peek("<Adj> foobar").unwrap();
assert_eq!(input, " foobar"); assert_eq!(input, " foobar");
assert_eq!(actual, Pos::Adjective); assert_eq!(actual, Pos::Adjective);
} }
} }
#[derive(Debug)]
pub struct ParseError;
impl std::fmt::Display for ParseError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "invalid")
}
}
impl std::error::Error for ParseError {}