Merge pull request #780 from epage/winnow

refactor(typos): Upgrade to winnow 0.5
This commit is contained in:
Ed Page 2023-07-14 13:57:26 -05:00 committed by GitHub
commit 383d51ddf1
WARNING! Although there is a key with this ID in the database it does not verify this commit! This commit is SUSPICIOUS.
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 464 additions and 342 deletions

19
Cargo.lock generated
View file

@ -1610,7 +1610,7 @@ dependencies = [
"serde", "serde",
"serde_spanned", "serde_spanned",
"toml_datetime", "toml_datetime",
"winnow", "winnow 0.4.9",
] ]
[[package]] [[package]]
@ -1648,7 +1648,7 @@ dependencies = [
"thiserror", "thiserror",
"unicode-segmentation", "unicode-segmentation",
"unicode-xid", "unicode-xid",
"winnow", "winnow 0.5.0",
] ]
[[package]] [[package]]
@ -1831,7 +1831,7 @@ name = "varcon-core"
version = "2.2.12" version = "2.2.12"
dependencies = [ dependencies = [
"enumflags2", "enumflags2",
"winnow", "winnow 0.5.0",
] ]
[[package]] [[package]]
@ -2121,9 +2121,18 @@ checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a"
[[package]] [[package]]
name = "winnow" name = "winnow"
version = "0.4.6" version = "0.4.9"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "61de7bac303dc551fe038e2b3cef0f571087a47571ea6e79a87692ac99b99699" checksum = "81a2094c43cc94775293eaa0e499fbc30048a6d824ac82c0351a8c0bf9112529"
dependencies = [
"memchr",
]
[[package]]
name = "winnow"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "81fac9742fd1ad1bd9643b991319f72dd031016d44b77039a26977eb667141e7"
dependencies = [ dependencies = [
"memchr", "memchr",
] ]

View file

@ -13,8 +13,8 @@ include.workspace = true
[features] [features]
default = ["std"] default = ["std"]
std = [] std = []
codegen = ["std", "phf_codegen"] codegen = ["std", "dep:phf_codegen"]
map = ["phf", "phf_shared"] map = ["dep:phf", "dep:phf_shared"]
[dependencies] [dependencies]
unicase = "2.6" unicase = "2.6"

View file

@ -32,8 +32,8 @@ pre-release-replacements = [
[features] [features]
default = ["dict", "vars"] default = ["dict", "vars"]
dict = ["typos-dict"] dict = ["dep:typos-dict"]
vars = ["typos-vars"] vars = ["dep:typos-vars"]
[[bin]] [[bin]]

View file

@ -14,7 +14,7 @@ include.workspace = true
[dependencies] [dependencies]
anyhow = "1.0" anyhow = "1.0"
thiserror = "1.0" thiserror = "1.0"
winnow = "0.4.6" winnow = "0.5.0"
unicode-xid = "0.2.4" unicode-xid = "0.2.4"
once_cell = "1.17.2" once_cell = "1.17.2"
serde = { version = "1.0", features = ["derive"] } serde = { version = "1.0", features = ["derive"] }

View file

@ -1,4 +1,5 @@
use bstr::ByteSlice; use bstr::ByteSlice;
use winnow::BStr;
/// Define rules for tokenizaing a buffer. /// Define rules for tokenizaing a buffer.
#[derive(Debug, Clone, PartialEq, Eq, Hash)] #[derive(Debug, Clone, PartialEq, Eq, Hash)]
@ -48,7 +49,9 @@ impl Tokenizer {
let iter = if self.unicode && !ByteSlice::is_ascii(content.as_bytes()) { let iter = if self.unicode && !ByteSlice::is_ascii(content.as_bytes()) {
itertools::Either::Left(unicode_parser::iter_identifiers(content)) itertools::Either::Left(unicode_parser::iter_identifiers(content))
} else { } else {
itertools::Either::Right(ascii_parser::iter_identifiers(content.as_bytes())) itertools::Either::Right(ascii_parser::iter_identifiers(BStr::new(
content.as_bytes(),
)))
}; };
iter.map(move |identifier| self.transform(identifier, content.as_bytes())) iter.map(move |identifier| self.transform(identifier, content.as_bytes()))
} }
@ -58,7 +61,7 @@ impl Tokenizer {
let iter = Utf8Chunks::new(content).flat_map(unicode_parser::iter_identifiers); let iter = Utf8Chunks::new(content).flat_map(unicode_parser::iter_identifiers);
itertools::Either::Left(iter) itertools::Either::Left(iter)
} else { } else {
itertools::Either::Right(ascii_parser::iter_identifiers(content)) itertools::Either::Right(ascii_parser::iter_identifiers(BStr::new(content)))
}; };
iter.map(move |identifier| self.transform(identifier, content)) iter.map(move |identifier| self.transform(identifier, content))
} }
@ -126,6 +129,7 @@ impl<'s> Iterator for Utf8Chunks<'s> {
mod parser { mod parser {
use winnow::combinator::*; use winnow::combinator::*;
use winnow::error::ParserError;
use winnow::prelude::*; use winnow::prelude::*;
use winnow::stream::AsBStr; use winnow::stream::AsBStr;
use winnow::stream::AsChar; use winnow::stream::AsChar;
@ -133,8 +137,9 @@ mod parser {
use winnow::stream::Stream; use winnow::stream::Stream;
use winnow::stream::StreamIsPartial; use winnow::stream::StreamIsPartial;
use winnow::token::*; use winnow::token::*;
use winnow::trace::trace;
pub(crate) fn next_identifier<T>(input: T) -> IResult<T, <T as Stream>::Slice> pub(crate) fn next_identifier<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
where where
T: Stream + StreamIsPartial + PartialEq, T: Stream + StreamIsPartial + PartialEq,
<T as Stream>::Slice: AsBStr + SliceLen + Default, <T as Stream>::Slice: AsBStr + SliceLen + Default,
@ -143,7 +148,7 @@ mod parser {
preceded(ignore, identifier).parse_next(input) preceded(ignore, identifier).parse_next(input)
} }
fn identifier<T>(input: T) -> IResult<T, <T as Stream>::Slice> fn identifier<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
where where
T: Stream + StreamIsPartial + PartialEq, T: Stream + StreamIsPartial + PartialEq,
<T as Stream>::Slice: AsBStr + SliceLen + Default, <T as Stream>::Slice: AsBStr + SliceLen + Default,
@ -153,36 +158,39 @@ mod parser {
// `{XID_Continue}+` because XID_Continue is a superset of XID_Start and rather catch odd // `{XID_Continue}+` because XID_Continue is a superset of XID_Start and rather catch odd
// or unexpected cases than strip off start characters to a word since we aren't doing a // or unexpected cases than strip off start characters to a word since we aren't doing a
// proper word boundary parse // proper word boundary parse
take_while(1.., is_xid_continue).parse_next(input) trace("identifier", take_while(1.., is_xid_continue)).parse_next(input)
} }
fn ignore<T>(input: T) -> IResult<T, <T as Stream>::Slice> fn ignore<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
where where
T: Stream + StreamIsPartial + PartialEq, T: Stream + StreamIsPartial + PartialEq,
<T as Stream>::Slice: AsBStr + SliceLen + Default, <T as Stream>::Slice: AsBStr + SliceLen + Default,
<T as Stream>::Token: AsChar + Copy, <T as Stream>::Token: AsChar + Copy,
{ {
take_many0(alt(( trace(
// CAUTION: If adding an ignorable literal, if it doesn't start with `is_xid_continue`, "ignore",
// - Update `is_ignore_char` to make sure `sep1` doesn't eat it all up take_many0(alt((
// - Make sure you always consume it // CAUTION: If adding an ignorable literal, if it doesn't start with `is_xid_continue`,
terminated(uuid_literal, peek(sep1)), // - Update `is_ignore_char` to make sure `sep1` doesn't eat it all up
terminated(hash_literal, peek(sep1)), // - Make sure you always consume it
terminated(base64_literal, peek(sep1)), // base64 should be quoted or something terminated(uuid_literal, peek(sep1)),
terminated(ordinal_literal, peek(sep1)), terminated(hash_literal, peek(sep1)),
terminated(hex_literal, peek(sep1)), terminated(base64_literal, peek(sep1)), // base64 should be quoted or something
terminated(dec_literal, peek(sep1)), // Allow digit-prefixed words terminated(ordinal_literal, peek(sep1)),
terminated(email_literal, peek(sep1)), terminated(hex_literal, peek(sep1)),
terminated(url_literal, peek(sep1)), terminated(dec_literal, peek(sep1)), // Allow digit-prefixed words
terminated(css_color, peek(sep1)), terminated(email_literal, peek(sep1)),
c_escape, terminated(url_literal, peek(sep1)),
printf, terminated(css_color, peek(sep1)),
other, c_escape,
))) printf,
other,
))),
)
.parse_next(input) .parse_next(input)
} }
fn sep1<T>(input: T) -> IResult<T, <T as Stream>::Slice> fn sep1<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
where where
T: Stream + StreamIsPartial + PartialEq, T: Stream + StreamIsPartial + PartialEq,
<T as Stream>::Slice: AsBStr + SliceLen + Default, <T as Stream>::Slice: AsBStr + SliceLen + Default,
@ -195,21 +203,24 @@ mod parser {
.parse_next(input) .parse_next(input)
} }
fn other<T>(input: T) -> IResult<T, <T as Stream>::Slice> fn other<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
where where
T: Stream + StreamIsPartial + PartialEq, T: Stream + StreamIsPartial + PartialEq,
<T as Stream>::Slice: AsBStr + SliceLen + Default, <T as Stream>::Slice: AsBStr + SliceLen + Default,
<T as Stream>::Token: AsChar + Copy, <T as Stream>::Token: AsChar + Copy,
{ {
( trace(
one_of(|c| !is_xid_continue(c)), "other",
take_while(0.., is_ignore_char), (
one_of(|c| !is_xid_continue(c)),
take_while(0.., is_ignore_char),
)
.recognize(),
) )
.recognize() .parse_next(input)
.parse_next(input)
} }
fn ordinal_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice> fn ordinal_literal<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
where where
T: Stream + StreamIsPartial + PartialEq, T: Stream + StreamIsPartial + PartialEq,
<T as Stream>::Slice: AsBStr + SliceLen + Default, <T as Stream>::Slice: AsBStr + SliceLen + Default,
@ -221,26 +232,29 @@ mod parser {
['_'].contains(&c) ['_'].contains(&c)
} }
( trace(
take_while(0.., is_sep), "ordinal_literal",
take_while(1.., is_dec_digit), (
alt((('s', 't'), ('n', 'd'), ('r', 'd'), ('t', 'h'))), take_while(0.., is_sep),
take_while(0.., is_sep), take_while(1.., is_dec_digit),
alt((('s', 't'), ('n', 'd'), ('r', 'd'), ('t', 'h'))),
take_while(0.., is_sep),
)
.recognize(),
) )
.recognize() .parse_next(input)
.parse_next(input)
} }
fn dec_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice> fn dec_literal<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
where where
T: Stream + StreamIsPartial + PartialEq, T: Stream + StreamIsPartial + PartialEq,
<T as Stream>::Slice: AsBStr + SliceLen + Default, <T as Stream>::Slice: AsBStr + SliceLen + Default,
<T as Stream>::Token: AsChar + Copy, <T as Stream>::Token: AsChar + Copy,
{ {
take_while(1.., is_dec_digit_with_sep).parse_next(input) trace("dec_literal", take_while(1.., is_dec_digit_with_sep)).parse_next(input)
} }
fn hex_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice> fn hex_literal<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
where where
T: Stream + StreamIsPartial + PartialEq, T: Stream + StreamIsPartial + PartialEq,
<T as Stream>::Slice: AsBStr + SliceLen + Default, <T as Stream>::Slice: AsBStr + SliceLen + Default,
@ -253,57 +267,63 @@ mod parser {
.parse_next(input) .parse_next(input)
} }
fn css_color<T>(input: T) -> IResult<T, <T as Stream>::Slice> fn css_color<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
where where
T: Stream + StreamIsPartial + PartialEq, T: Stream + StreamIsPartial + PartialEq,
<T as Stream>::Slice: AsBStr + SliceLen + Default, <T as Stream>::Slice: AsBStr + SliceLen + Default,
<T as Stream>::Token: AsChar + Copy, <T as Stream>::Token: AsChar + Copy,
{ {
preceded( trace(
'#', "color",
alt(( preceded(
terminated(take_while(3..=8, is_lower_hex_digit), peek(sep1)), '#',
terminated(take_while(3..=8, is_upper_hex_digit), peek(sep1)), alt((
)), terminated(take_while(3..=8, is_lower_hex_digit), peek(sep1)),
terminated(take_while(3..=8, is_upper_hex_digit), peek(sep1)),
)),
),
) )
.parse_next(input) .parse_next(input)
} }
fn uuid_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice> fn uuid_literal<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
where where
T: Stream + StreamIsPartial + PartialEq, T: Stream + StreamIsPartial + PartialEq,
<T as Stream>::Slice: AsBStr + SliceLen + Default, <T as Stream>::Slice: AsBStr + SliceLen + Default,
<T as Stream>::Token: AsChar + Copy, <T as Stream>::Token: AsChar + Copy,
{ {
alt(( trace(
( "uuid",
take_while(8, is_lower_hex_digit), alt((
'-', (
take_while(4, is_lower_hex_digit), take_while(8, is_lower_hex_digit),
'-', '-',
take_while(4, is_lower_hex_digit), take_while(4, is_lower_hex_digit),
'-', '-',
take_while(4, is_lower_hex_digit), take_while(4, is_lower_hex_digit),
'-', '-',
take_while(12, is_lower_hex_digit), take_while(4, is_lower_hex_digit),
), '-',
( take_while(12, is_lower_hex_digit),
take_while(8, is_upper_hex_digit), ),
'-', (
take_while(4, is_upper_hex_digit), take_while(8, is_upper_hex_digit),
'-', '-',
take_while(4, is_upper_hex_digit), take_while(4, is_upper_hex_digit),
'-', '-',
take_while(4, is_upper_hex_digit), take_while(4, is_upper_hex_digit),
'-', '-',
take_while(12, is_upper_hex_digit), take_while(4, is_upper_hex_digit),
), '-',
)) take_while(12, is_upper_hex_digit),
.recognize() ),
))
.recognize(),
)
.parse_next(input) .parse_next(input)
} }
fn hash_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice> fn hash_literal<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
where where
T: Stream + StreamIsPartial + PartialEq, T: Stream + StreamIsPartial + PartialEq,
<T as Stream>::Slice: AsBStr + SliceLen + Default, <T as Stream>::Slice: AsBStr + SliceLen + Default,
@ -319,103 +339,120 @@ mod parser {
// or more. // or more.
const IGNORE_HEX_MIN: usize = 32; const IGNORE_HEX_MIN: usize = 32;
alt(( trace(
take_while(IGNORE_HEX_MIN.., is_lower_hex_digit), "hash",
take_while(IGNORE_HEX_MIN.., is_upper_hex_digit), alt((
)) take_while(IGNORE_HEX_MIN.., is_lower_hex_digit),
take_while(IGNORE_HEX_MIN.., is_upper_hex_digit),
)),
)
.parse_next(input) .parse_next(input)
} }
fn base64_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice> fn base64_literal<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
where where
T: Stream + StreamIsPartial + PartialEq, T: Stream + StreamIsPartial + PartialEq,
<T as Stream>::Slice: AsBStr + SliceLen + Default, <T as Stream>::Slice: AsBStr + SliceLen + Default,
<T as Stream>::Token: AsChar + Copy, <T as Stream>::Token: AsChar + Copy,
{ {
let (padding, captured) = take_while(1.., is_base64_digit).parse_next(input.clone())?; trace("base64", move |input: &mut T| {
let start = input.checkpoint();
let captured = take_while(1.., is_base64_digit).parse_next(input)?;
const CHUNK: usize = 4; const CHUNK: usize = 4;
let padding_offset = input.offset_to(&padding); let padding_offset = input.offset_from(&start);
let mut padding_len = CHUNK - padding_offset % CHUNK; let mut padding_len = CHUNK - padding_offset % CHUNK;
if padding_len == CHUNK { if padding_len == CHUNK {
padding_len = 0; padding_len = 0;
} }
if captured.slice_len() < 90 if captured.slice_len() < 90
&& padding_len == 0 && padding_len == 0
&& captured && captured
.as_bstr() .as_bstr()
.iter() .iter()
.all(|c| !['/', '+'].contains(&c.as_char())) .all(|c| !['/', '+'].contains(&c.as_char()))
{ {
return Err(winnow::error::ErrMode::Backtrack( return Err(winnow::error::ErrMode::from_error_kind(
winnow::error::Error::new(input, winnow::error::ErrorKind::Slice), input,
)); winnow::error::ErrorKind::Slice,
} ));
}
let (after, _) = take_while(padding_len..=padding_len, is_base64_padding).parse_next(input)?;
take_while(padding_len..=padding_len, is_base64_padding).parse_next(padding)?;
let after_offset = input.offset_to(&after); let after_offset = input.offset_from(&start);
Ok(input.next_slice(after_offset)) input.reset(start);
Ok(input.next_slice(after_offset))
})
.parse_next(input)
} }
fn email_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice> fn email_literal<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
where where
T: Stream + StreamIsPartial + PartialEq, T: Stream + StreamIsPartial + PartialEq,
<T as Stream>::Slice: AsBStr + SliceLen + Default, <T as Stream>::Slice: AsBStr + SliceLen + Default,
<T as Stream>::Token: AsChar + Copy, <T as Stream>::Token: AsChar + Copy,
{ {
( trace(
take_while(1.., is_localport_char), "email",
'@',
take_while(1.., is_domain_char),
)
.recognize()
.parse_next(input)
}
fn url_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice>
where
T: Stream + StreamIsPartial + PartialEq,
<T as Stream>::Slice: AsBStr + SliceLen + Default,
<T as Stream>::Token: AsChar + Copy,
{
(
opt(terminated(
take_while(1.., is_scheme_char),
// HACK: Technically you can skip `//` if you don't have a domain but that would
// get messy to support.
(':', '/', '/'),
)),
( (
opt(terminated(url_userinfo, '@')), take_while(1.., is_localport_char),
'@',
take_while(1.., is_domain_char), take_while(1.., is_domain_char),
opt(preceded(':', take_while(1.., AsChar::is_dec_digit))), )
), .recognize(),
'/',
// HACK: Too lazy to enumerate
take_while(0.., is_path_query_fragment),
) )
.recognize() .parse_next(input)
.parse_next(input)
} }
fn url_userinfo<T>(input: T) -> IResult<T, <T as Stream>::Slice> fn url_literal<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
where where
T: Stream + StreamIsPartial + PartialEq, T: Stream + StreamIsPartial + PartialEq,
<T as Stream>::Slice: AsBStr + SliceLen + Default, <T as Stream>::Slice: AsBStr + SliceLen + Default,
<T as Stream>::Token: AsChar + Copy, <T as Stream>::Token: AsChar + Copy,
{ {
( trace(
take_while(1.., is_localport_char), "url",
opt(preceded(':', take_while(0.., is_localport_char))), (
opt(terminated(
take_while(1.., is_scheme_char),
// HACK: Technically you can skip `//` if you don't have a domain but that would
// get messy to support.
(':', '/', '/'),
)),
(
opt(terminated(url_userinfo, '@')),
take_while(1.., is_domain_char),
opt(preceded(':', take_while(1.., AsChar::is_dec_digit))),
),
'/',
// HACK: Too lazy to enumerate
take_while(0.., is_path_query_fragment),
)
.recognize(),
) )
.recognize() .parse_next(input)
.parse_next(input)
} }
fn c_escape<T>(input: T) -> IResult<T, <T as Stream>::Slice> fn url_userinfo<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
where
T: Stream + StreamIsPartial + PartialEq,
<T as Stream>::Slice: AsBStr + SliceLen + Default,
<T as Stream>::Token: AsChar + Copy,
{
trace(
"userinfo",
(
take_while(1.., is_localport_char),
opt(preceded(':', take_while(0.., is_localport_char))),
)
.recognize(),
)
.parse_next(input)
}
fn c_escape<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
where where
T: Stream + StreamIsPartial + PartialEq, T: Stream + StreamIsPartial + PartialEq,
<T as Stream>::Slice: AsBStr + SliceLen + Default, <T as Stream>::Slice: AsBStr + SliceLen + Default,
@ -425,25 +462,29 @@ mod parser {
// regular string that does escaping. The escaped letter might be part of a word, or it // regular string that does escaping. The escaped letter might be part of a word, or it
// might not be. Rather than guess and be wrong part of the time and correct people's words // might not be. Rather than guess and be wrong part of the time and correct people's words
// incorrectly, we opt for just not evaluating it at all. // incorrectly, we opt for just not evaluating it at all.
preceded(take_while(1.., is_escape), take_while(0.., is_xid_continue)).parse_next(input) trace(
"escape",
preceded(take_while(1.., is_escape), take_while(0.., is_xid_continue)),
)
.parse_next(input)
} }
fn printf<T>(input: T) -> IResult<T, <T as Stream>::Slice> fn printf<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
where where
T: Stream + StreamIsPartial + PartialEq, T: Stream + StreamIsPartial + PartialEq,
<T as Stream>::Slice: AsBStr + SliceLen + Default, <T as Stream>::Slice: AsBStr + SliceLen + Default,
<T as Stream>::Token: AsChar + Copy, <T as Stream>::Token: AsChar + Copy,
{ {
preceded('%', take_while(1.., is_xid_continue)).parse_next(input) trace("printf", preceded('%', take_while(1.., is_xid_continue))).parse_next(input)
} }
fn take_many0<I, E, F>(mut f: F) -> impl FnMut(I) -> IResult<I, <I as Stream>::Slice, E> fn take_many0<I, E, F>(mut f: F) -> impl Parser<I, <I as Stream>::Slice, E>
where where
I: Stream, I: Stream,
F: winnow::Parser<I, <I as Stream>::Slice, E>, F: Parser<I, <I as Stream>::Slice, E>,
E: winnow::error::ParseError<I>, E: ParserError<I>,
{ {
move |i: I| { move |i: &mut I| {
repeat(0.., f.by_ref()) repeat(0.., f.by_ref())
.map(|()| ()) .map(|()| ())
.recognize() .recognize()
@ -581,9 +622,8 @@ mod unicode_parser {
use super::parser::next_identifier; use super::parser::next_identifier;
pub(crate) fn iter_identifiers(mut input: &str) -> impl Iterator<Item = &str> { pub(crate) fn iter_identifiers(mut input: &str) -> impl Iterator<Item = &str> {
std::iter::from_fn(move || match next_identifier(input) { std::iter::from_fn(move || match next_identifier(&mut input) {
Ok((i, o)) => { Ok(o) => {
input = i;
debug_assert_ne!(o, ""); debug_assert_ne!(o, "");
Some(o) Some(o)
} }
@ -595,10 +635,11 @@ mod unicode_parser {
mod ascii_parser { mod ascii_parser {
use super::parser::next_identifier; use super::parser::next_identifier;
pub(crate) fn iter_identifiers(mut input: &[u8]) -> impl Iterator<Item = &str> { use winnow::BStr;
std::iter::from_fn(move || match next_identifier(input) {
Ok((i, o)) => { pub(crate) fn iter_identifiers(mut input: &BStr) -> impl Iterator<Item = &str> {
input = i; std::iter::from_fn(move || match next_identifier(&mut input) {
Ok(o) => {
debug_assert_ne!(o, b""); debug_assert_ne!(o, b"");
// This is safe because we've checked that the strings are a subset of ASCII // This is safe because we've checked that the strings are a subset of ASCII
// characters. // characters.

View file

@ -12,11 +12,11 @@ include.workspace = true
[features] [features]
default = [] default = []
parser = ["winnow"] parser = ["dep:winnow"]
flags = ["enumflags2"] flags = ["dep:enumflags2"]
[dependencies] [dependencies]
winnow = { version = "0.4.6", optional = true } winnow = { version = "0.5.0", optional = true }
enumflags2 = { version = "0.7", optional = true } enumflags2 = { version = "0.7", optional = true }
[package.metadata.docs.rs] [package.metadata.docs.rs]

View file

@ -1,4 +1,5 @@
use winnow::prelude::*; use winnow::prelude::*;
use winnow::trace::trace;
use crate::*; use crate::*;
@ -17,10 +18,8 @@ impl<'i> Iterator for ClusterIter<'i> {
type Item = Cluster; type Item = Cluster;
fn next(&mut self) -> Option<Cluster> { fn next(&mut self) -> Option<Cluster> {
let i = self.input.trim_start(); self.input = self.input.trim_start();
let (i, c) = Cluster::parse(i).ok()?; Cluster::parse_.parse_next(&mut self.input).ok()
self.input = i;
Some(c)
} }
} }
@ -61,38 +60,45 @@ A Cv: acknowledgment's / Av B C: acknowledgement's
} }
impl Cluster { impl Cluster {
pub fn parse(input: &str) -> IResult<&str, Self> { pub fn parse(input: &str) -> Result<Self, ParseError> {
let header = ( Self::parse_.parse(input).map_err(|_err| ParseError)
winnow::bytes::tag("#"), }
winnow::character::space0,
winnow::character::not_line_ending,
winnow::character::line_ending,
);
let note = winnow::sequence::preceded(
(winnow::bytes::tag("##"), winnow::character::space0),
winnow::sequence::terminated(
winnow::character::not_line_ending,
winnow::character::line_ending,
),
);
let mut cluster = (
winnow::combinator::opt(header),
winnow::multi::many1(winnow::sequence::terminated(
Entry::parse,
winnow::character::line_ending,
)),
winnow::multi::many0(note),
);
let (input, (header, entries, notes)): (_, (_, _, Vec<_>)) = cluster.parse_next(input)?;
let header = header.map(|s| s.2.to_owned()); fn parse_(input: &mut &str) -> PResult<Self, ()> {
let notes = notes.into_iter().map(|s| s.to_owned()).collect(); trace("cluster", move |input: &mut &str| {
let c = Self { let header = (
header, "#",
entries, winnow::ascii::space0,
notes, winnow::ascii::not_line_ending,
}; winnow::ascii::line_ending,
Ok((input, c)) );
let note = winnow::combinator::preceded(
("##", winnow::ascii::space0),
winnow::combinator::terminated(
winnow::ascii::not_line_ending,
winnow::ascii::line_ending,
),
);
let mut cluster = (
winnow::combinator::opt(header),
winnow::combinator::repeat(
1..,
winnow::combinator::terminated(Entry::parse_, winnow::ascii::line_ending),
),
winnow::combinator::repeat(0.., note),
);
let (header, entries, notes): (_, _, Vec<_>) = cluster.parse_next(input)?;
let header = header.map(|s| s.2.to_owned());
let notes = notes.into_iter().map(|s| s.to_owned()).collect();
let c = Self {
header,
entries,
notes,
};
Ok(c)
})
.parse_next(input)
} }
} }
@ -102,15 +108,16 @@ mod test_cluster {
#[test] #[test]
fn test_basic() { fn test_basic() {
let (input, actual) = Cluster::parse( let (input, actual) = Cluster::parse_
"# acknowledgment <verified> (level 35) .parse_peek(
"# acknowledgment <verified> (level 35)
A Cv: acknowledgment / Av B C: acknowledgement A Cv: acknowledgment / Av B C: acknowledgement
A Cv: acknowledgments / Av B C: acknowledgements A Cv: acknowledgments / Av B C: acknowledgements
A Cv: acknowledgment's / Av B C: acknowledgement's A Cv: acknowledgment's / Av B C: acknowledgement's
", ",
) )
.unwrap(); .unwrap();
assert_eq!(input, "\n"); assert_eq!(input, "\n");
assert_eq!( assert_eq!(
actual.header, actual.header,
@ -122,8 +129,9 @@ A Cv: acknowledgment's / Av B C: acknowledgement's
#[test] #[test]
fn test_notes() { fn test_notes() {
let (input, actual) = Cluster::parse( let (input, actual) = Cluster::parse_
"# coloration <verified> (level 50) .parse_peek(
"# coloration <verified> (level 50)
A B C: coloration / B. Cv: colouration A B C: coloration / B. Cv: colouration
A B C: colorations / B. Cv: colourations A B C: colorations / B. Cv: colourations
A B C: coloration's / B. Cv: colouration's A B C: coloration's / B. Cv: colouration's
@ -131,8 +139,8 @@ A B C: coloration's / B. Cv: colouration's
## variant for British Engl or some reason ## variant for British Engl or some reason
", ",
) )
.unwrap(); .unwrap();
assert_eq!(input, "\n"); assert_eq!(input, "\n");
assert_eq!( assert_eq!(
actual.header, actual.header,
@ -144,65 +152,75 @@ A B C: coloration's / B. Cv: colouration's
} }
impl Entry { impl Entry {
pub fn parse(input: &str) -> IResult<&str, Self> { pub fn parse(input: &str) -> Result<Self, ParseError> {
let var_sep = (winnow::character::space0, '/', winnow::character::space0); Self::parse_.parse(input).map_err(|_err| ParseError)
let (input, variants) =
winnow::multi::separated1(Variant::parse, var_sep).parse_next(input)?;
let desc_sep = (winnow::character::space0, '|');
let (input, description) =
winnow::combinator::opt((desc_sep, Self::parse_description)).parse_next(input)?;
let comment_sep = (winnow::character::space0, '#');
let (input, comment) = winnow::combinator::opt((
comment_sep,
winnow::character::space1,
winnow::character::not_line_ending,
))
.parse_next(input)?;
let mut e = match description {
Some((_, description)) => description,
None => Self {
variants: Vec::new(),
pos: None,
archaic: false,
note: false,
description: None,
comment: None,
},
};
e.variants = variants;
e.comment = comment.map(|c| c.2.to_owned());
Ok((input, e))
} }
fn parse_description(input: &str) -> IResult<&str, Self> { fn parse_(input: &mut &str) -> PResult<Self, ()> {
let (input, (pos, archaic, note, description)) = ( trace("entry", move |input: &mut &str| {
winnow::combinator::opt((winnow::character::space1, Pos::parse)), let var_sep = (winnow::ascii::space0, '/', winnow::ascii::space0);
winnow::combinator::opt((winnow::character::space1, "(-)")), let variants =
winnow::combinator::opt((winnow::character::space1, "--")), winnow::combinator::separated1(Variant::parse_, var_sep).parse_next(input)?;
winnow::combinator::opt((
winnow::character::space1, let desc_sep = (winnow::ascii::space0, '|');
winnow::bytes::take_till0(|c| c == '\n' || c == '\r' || c == '#'), let description =
)), winnow::combinator::opt((desc_sep, Self::parse_description)).parse_next(input)?;
)
let comment_sep = (winnow::ascii::space0, '#');
let comment = winnow::combinator::opt((
comment_sep,
winnow::ascii::space1,
winnow::ascii::not_line_ending,
))
.parse_next(input)?; .parse_next(input)?;
let variants = Vec::new(); let mut e = match description {
let pos = pos.map(|(_, p)| p); Some((_, description)) => description,
let archaic = archaic.is_some(); None => Self {
let note = note.is_some(); variants: Vec::new(),
let description = description.map(|(_, d)| d.to_owned()); pos: None,
let e = Self { archaic: false,
variants, note: false,
pos, description: None,
archaic, comment: None,
note, },
description, };
comment: None, e.variants = variants;
}; e.comment = comment.map(|c| c.2.to_owned());
Ok((input, e)) Ok(e)
})
.parse_next(input)
}
fn parse_description(input: &mut &str) -> PResult<Self, ()> {
trace("description", move |input: &mut &str| {
let (pos, archaic, note, description) = (
winnow::combinator::opt((winnow::ascii::space1, Pos::parse_)),
winnow::combinator::opt((winnow::ascii::space1, "(-)")),
winnow::combinator::opt((winnow::ascii::space1, "--")),
winnow::combinator::opt((
winnow::ascii::space1,
winnow::token::take_till0(('\n', '\r', '#')),
)),
)
.parse_next(input)?;
let variants = Vec::new();
let pos = pos.map(|(_, p)| p);
let archaic = archaic.is_some();
let note = note.is_some();
let description = description.map(|(_, d)| d.to_owned());
let e = Self {
variants,
pos,
archaic,
note,
description,
comment: None,
};
Ok(e)
})
.parse_next(input)
} }
} }
@ -215,8 +233,9 @@ mod test_entry {
fn test_variant_only() { fn test_variant_only() {
// Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use // Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
// cases. // cases.
let (input, actual) = let (input, actual) = Entry::parse_
Entry::parse("A Cv: acknowledgment's / Av B C: acknowledgement's\n").unwrap(); .parse_peek("A Cv: acknowledgment's / Av B C: acknowledgement's\n")
.unwrap();
assert_eq!(input, "\n"); assert_eq!(input, "\n");
assert_eq!(actual.variants.len(), 2); assert_eq!(actual.variants.len(), 2);
assert_eq!(actual.pos, None); assert_eq!(actual.pos, None);
@ -229,7 +248,9 @@ mod test_entry {
fn test_description() { fn test_description() {
// Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use // Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
// cases. // cases.
let (input, actual) = Entry::parse("A C: prize / B: prise | otherwise\n").unwrap(); let (input, actual) = Entry::parse_
.parse_peek("A C: prize / B: prise | otherwise\n")
.unwrap();
assert_eq!(input, "\n"); assert_eq!(input, "\n");
assert_eq!(actual.variants.len(), 2); assert_eq!(actual.variants.len(), 2);
assert_eq!(actual.pos, None); assert_eq!(actual.pos, None);
@ -242,7 +263,9 @@ mod test_entry {
fn test_pos() { fn test_pos() {
// Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use // Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
// cases. // cases.
let (input, actual) = Entry::parse("A B C: practice / AV Cv: practise | <N>\n").unwrap(); let (input, actual) = Entry::parse_
.parse_peek("A B C: practice / AV Cv: practise | <N>\n")
.unwrap();
assert_eq!(input, "\n"); assert_eq!(input, "\n");
assert_eq!(actual.variants.len(), 2); assert_eq!(actual.variants.len(), 2);
assert_eq!(actual.pos, Some(Pos::Noun)); assert_eq!(actual.pos, Some(Pos::Noun));
@ -255,7 +278,9 @@ mod test_entry {
fn test_archaic() { fn test_archaic() {
// Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use // Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
// cases. // cases.
let (input, actual) = Entry::parse("A: bark / Av B: barque | (-) ship\n").unwrap(); let (input, actual) = Entry::parse_
.parse_peek("A: bark / Av B: barque | (-) ship\n")
.unwrap();
assert_eq!(input, "\n"); assert_eq!(input, "\n");
assert_eq!(actual.variants.len(), 2); assert_eq!(actual.variants.len(), 2);
assert_eq!(actual.pos, None); assert_eq!(actual.pos, None);
@ -268,7 +293,9 @@ mod test_entry {
fn test_note() { fn test_note() {
// Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use // Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
// cases. // cases.
let (input, actual) = Entry::parse("_: cabbies | -- plural\n").unwrap(); let (input, actual) = Entry::parse_
.parse_peek("_: cabbies | -- plural\n")
.unwrap();
assert_eq!(input, "\n"); assert_eq!(input, "\n");
assert_eq!(actual.variants.len(), 1); assert_eq!(actual.variants.len(), 1);
assert_eq!(actual.pos, None); assert_eq!(actual.pos, None);
@ -279,7 +306,7 @@ mod test_entry {
#[test] #[test]
fn test_trailing_comment() { fn test_trailing_comment() {
let (input, actual) = Entry::parse( let (input, actual) = Entry::parse_.parse_peek(
"A B: accursed / AV B-: accurst # ODE: archaic, M-W: 'or' but can find little evidence of use\n", "A B: accursed / AV B-: accurst # ODE: archaic, M-W: 'or' but can find little evidence of use\n",
) )
.unwrap(); .unwrap();
@ -297,20 +324,30 @@ mod test_entry {
} }
impl Variant { impl Variant {
pub fn parse(input: &str) -> IResult<&str, Self> { pub fn parse(input: &str) -> Result<Self, ParseError> {
let types = winnow::multi::separated1(Type::parse, winnow::character::space1); Self::parse_.parse(input).map_err(|_err| ParseError)
let sep = (winnow::bytes::tag(":"), winnow::character::space0); }
let (input, (types, word)) =
winnow::sequence::separated_pair(types, sep, word).parse_next(input)?; fn parse_(input: &mut &str) -> PResult<Self, ()> {
let v = Self { types, word }; trace("variant", move |input: &mut &str| {
Ok((input, v)) let types = winnow::combinator::separated1(Type::parse_, winnow::ascii::space1);
let sep = (":", winnow::ascii::space0);
let (types, word) =
winnow::combinator::separated_pair(types, sep, word).parse_next(input)?;
let v = Self { types, word };
Ok(v)
})
.parse_next(input)
} }
} }
fn word(input: &str) -> IResult<&str, String> { fn word(input: &mut &str) -> PResult<String, ()> {
winnow::bytes::take_till1(|item: char| item.is_ascii_whitespace()) trace("word", move |input: &mut &str| {
.map(|s: &str| s.to_owned().replace('_', " ")) winnow::token::take_till1(|item: char| item.is_ascii_whitespace())
.parse_next(input) .map(|s: &str| s.to_owned().replace('_', " "))
.parse_next(input)
})
.parse_next(input)
} }
#[cfg(test)] #[cfg(test)]
@ -321,7 +358,7 @@ mod test_variant {
fn test_valid() { fn test_valid() {
// Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use // Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
// cases. // cases.
let (input, actual) = Variant::parse("A Cv: acknowledgment ").unwrap(); let (input, actual) = Variant::parse_.parse_peek("A Cv: acknowledgment ").unwrap();
assert_eq!(input, " "); assert_eq!(input, " ");
assert_eq!( assert_eq!(
actual.types, actual.types,
@ -343,8 +380,9 @@ mod test_variant {
#[test] #[test]
fn test_extra() { fn test_extra() {
let (input, actual) = let (input, actual) = Variant::parse_
Variant::parse("A Cv: acknowledgment's / Av B C: acknowledgement's").unwrap(); .parse_peek("A Cv: acknowledgment's / Av B C: acknowledgement's")
.unwrap();
assert_eq!(input, " / Av B C: acknowledgement's"); assert_eq!(input, " / Av B C: acknowledgement's");
assert_eq!( assert_eq!(
actual.types, actual.types,
@ -366,7 +404,7 @@ mod test_variant {
#[test] #[test]
fn test_underscore() { fn test_underscore() {
let (input, actual) = Variant::parse("_: air_gun\n").unwrap(); let (input, actual) = Variant::parse_.parse_peek("_: air_gun\n").unwrap();
assert_eq!(input, "\n"); assert_eq!(input, "\n");
assert_eq!( assert_eq!(
actual.types, actual.types,
@ -381,13 +419,20 @@ mod test_variant {
} }
impl Type { impl Type {
pub fn parse(input: &str) -> IResult<&str, Type> { pub fn parse(input: &str) -> Result<Self, ParseError> {
let (input, category) = Category::parse(input)?; Self::parse_.parse(input).map_err(|_err| ParseError)
let (input, tag) = winnow::combinator::opt(Tag::parse).parse_next(input)?; }
let (input, num) = winnow::combinator::opt(winnow::character::digit1).parse_next(input)?;
let num = num.map(|s| s.parse().expect("parser ensured its a number")); fn parse_(input: &mut &str) -> PResult<Type, ()> {
let t = Type { category, tag, num }; trace("type", move |input: &mut &str| {
Ok((input, t)) let category = Category::parse_(input)?;
let tag = winnow::combinator::opt(Tag::parse_).parse_next(input)?;
let num = winnow::combinator::opt(winnow::ascii::digit1).parse_next(input)?;
let num = num.map(|s| s.parse().expect("parser ensured its a number"));
let t = Type { category, tag, num };
Ok(t)
})
.parse_next(input)
} }
} }
@ -399,13 +444,13 @@ mod test_type {
fn test_valid() { fn test_valid() {
// Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use // Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
// cases. // cases.
let (input, actual) = Type::parse("A ").unwrap(); let (input, actual) = Type::parse_.parse_peek("A ").unwrap();
assert_eq!(input, " "); assert_eq!(input, " ");
assert_eq!(actual.category, Category::American); assert_eq!(actual.category, Category::American);
assert_eq!(actual.tag, None); assert_eq!(actual.tag, None);
assert_eq!(actual.num, None); assert_eq!(actual.num, None);
let (input, actual) = Type::parse("Bv ").unwrap(); let (input, actual) = Type::parse_.parse_peek("Bv ").unwrap();
assert_eq!(input, " "); assert_eq!(input, " ");
assert_eq!(actual.category, Category::BritishIse); assert_eq!(actual.category, Category::BritishIse);
assert_eq!(actual.tag, Some(Tag::Variant)); assert_eq!(actual.tag, Some(Tag::Variant));
@ -414,13 +459,13 @@ mod test_type {
#[test] #[test]
fn test_extra() { fn test_extra() {
let (input, actual) = Type::parse("Z foobar").unwrap(); let (input, actual) = Type::parse_.parse_peek("Z foobar").unwrap();
assert_eq!(input, " foobar"); assert_eq!(input, " foobar");
assert_eq!(actual.category, Category::BritishIze); assert_eq!(actual.category, Category::BritishIze);
assert_eq!(actual.tag, None); assert_eq!(actual.tag, None);
assert_eq!(actual.num, None); assert_eq!(actual.num, None);
let (input, actual) = Type::parse("C- foobar").unwrap(); let (input, actual) = Type::parse_.parse_peek("C- foobar").unwrap();
assert_eq!(input, " foobar"); assert_eq!(input, " foobar");
assert_eq!(actual.category, Category::Canadian); assert_eq!(actual.category, Category::Canadian);
assert_eq!(actual.tag, Some(Tag::Possible)); assert_eq!(actual.tag, Some(Tag::Possible));
@ -429,7 +474,7 @@ mod test_type {
#[test] #[test]
fn test_num() { fn test_num() {
let (input, actual) = Type::parse("Av1 ").unwrap(); let (input, actual) = Type::parse_.parse_peek("Av1 ").unwrap();
assert_eq!(input, " "); assert_eq!(input, " ");
assert_eq!(actual.category, Category::American); assert_eq!(actual.category, Category::American);
assert_eq!(actual.tag, Some(Tag::Variant)); assert_eq!(actual.tag, Some(Tag::Variant));
@ -438,19 +483,26 @@ mod test_type {
} }
impl Category { impl Category {
pub fn parse(input: &str) -> IResult<&str, Category> { pub fn parse(input: &str) -> Result<Self, ParseError> {
let symbols = winnow::bytes::one_of("ABZCD_"); Self::parse_.parse(input).map_err(|_err| ParseError)
symbols }
.map(|c| match c {
'A' => Category::American, fn parse_(input: &mut &str) -> PResult<Self, ()> {
'B' => Category::BritishIse, trace("category", move |input: &mut &str| {
'Z' => Category::BritishIze, let symbols = winnow::token::one_of(['A', 'B', 'Z', 'C', 'D', '_']);
'C' => Category::Canadian, symbols
'D' => Category::Australian, .map(|c| match c {
'_' => Category::Other, 'A' => Category::American,
_ => unreachable!("parser won't select this option"), 'B' => Category::BritishIse,
}) 'Z' => Category::BritishIze,
.parse_next(input) 'C' => Category::Canadian,
'D' => Category::Australian,
'_' => Category::Other,
_ => unreachable!("parser won't select this option"),
})
.parse_next(input)
})
.parse_next(input)
} }
} }
@ -460,32 +512,39 @@ mod test_category {
#[test] #[test]
fn test_valid() { fn test_valid() {
let (input, actual) = Category::parse("A").unwrap(); let (input, actual) = Category::parse_.parse_peek("A").unwrap();
assert_eq!(input, ""); assert_eq!(input, "");
assert_eq!(actual, Category::American); assert_eq!(actual, Category::American);
} }
#[test] #[test]
fn test_extra() { fn test_extra() {
let (input, actual) = Category::parse("_ foobar").unwrap(); let (input, actual) = Category::parse_.parse_peek("_ foobar").unwrap();
assert_eq!(input, " foobar"); assert_eq!(input, " foobar");
assert_eq!(actual, Category::Other); assert_eq!(actual, Category::Other);
} }
} }
impl Tag { impl Tag {
pub fn parse(input: &str) -> IResult<&str, Tag> { pub fn parse(input: &str) -> Result<Self, ParseError> {
let symbols = winnow::bytes::one_of(".vV-x"); Self::parse_.parse(input).map_err(|_err| ParseError)
symbols }
.map(|c| match c {
'.' => Tag::Eq, fn parse_(input: &mut &str) -> PResult<Self, ()> {
'v' => Tag::Variant, trace("tag", move |input: &mut &str| {
'V' => Tag::Seldom, let symbols = winnow::token::one_of(['.', 'v', 'V', '-', 'x']);
'-' => Tag::Possible, symbols
'x' => Tag::Improper, .map(|c| match c {
_ => unreachable!("parser won't select this option"), '.' => Tag::Eq,
}) 'v' => Tag::Variant,
.parse_next(input) 'V' => Tag::Seldom,
'-' => Tag::Possible,
'x' => Tag::Improper,
_ => unreachable!("parser won't select this option"),
})
.parse_next(input)
})
.parse_next(input)
} }
} }
@ -495,32 +554,34 @@ mod test_tag {
#[test] #[test]
fn test_valid() { fn test_valid() {
let (input, actual) = Tag::parse(".").unwrap(); let (input, actual) = Tag::parse_.parse_peek(".").unwrap();
assert_eq!(input, ""); assert_eq!(input, "");
assert_eq!(actual, Tag::Eq); assert_eq!(actual, Tag::Eq);
} }
#[test] #[test]
fn test_extra() { fn test_extra() {
let (input, actual) = Tag::parse("x foobar").unwrap(); let (input, actual) = Tag::parse_.parse_peek("x foobar").unwrap();
assert_eq!(input, " foobar"); assert_eq!(input, " foobar");
assert_eq!(actual, Tag::Improper); assert_eq!(actual, Tag::Improper);
} }
} }
impl Pos { impl Pos {
pub fn parse(input: &str) -> IResult<&str, Pos> { pub fn parse(input: &str) -> Result<Self, ParseError> {
use winnow::bytes::tag; Self::parse_.parse(input).map_err(|_err| ParseError)
let noun = tag("<N>"); }
let verb = tag("<V>");
let adjective = tag("<Adj>"); fn parse_(input: &mut &str) -> PResult<Self, ()> {
let adverb = tag("<Adv>"); trace("pos", move |input: &mut &str| {
winnow::branch::alt(( winnow::combinator::alt((
noun.value(Pos::Noun), "<N>".value(Pos::Noun),
verb.value(Pos::Verb), "<V>".value(Pos::Verb),
adjective.value(Pos::Adjective), "<Adj>".value(Pos::Adjective),
adverb.value(Pos::Adverb), "<Adv>".value(Pos::Adverb),
)) ))
.parse_next(input)
})
.parse_next(input) .parse_next(input)
} }
} }
@ -531,15 +592,26 @@ mod test_pos {
#[test] #[test]
fn test_valid() { fn test_valid() {
let (input, actual) = Pos::parse("<N>").unwrap(); let (input, actual) = Pos::parse_.parse_peek("<N>").unwrap();
assert_eq!(input, ""); assert_eq!(input, "");
assert_eq!(actual, Pos::Noun); assert_eq!(actual, Pos::Noun);
} }
#[test] #[test]
fn test_extra() { fn test_extra() {
let (input, actual) = Pos::parse("<Adj> foobar").unwrap(); let (input, actual) = Pos::parse_.parse_peek("<Adj> foobar").unwrap();
assert_eq!(input, " foobar"); assert_eq!(input, " foobar");
assert_eq!(actual, Pos::Adjective); assert_eq!(actual, Pos::Adjective);
} }
} }
#[derive(Debug)]
pub struct ParseError;
impl std::fmt::Display for ParseError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "invalid")
}
}
impl std::error::Error for ParseError {}