mirror of
https://github.com/crate-ci/typos.git
synced 2024-12-22 23:52:12 -05:00
Merge pull request #780 from epage/winnow
refactor(typos): Upgrade to winnow 0.5
This commit is contained in:
commit
383d51ddf1
7 changed files with 464 additions and 342 deletions
19
Cargo.lock
generated
19
Cargo.lock
generated
|
@ -1610,7 +1610,7 @@ dependencies = [
|
|||
"serde",
|
||||
"serde_spanned",
|
||||
"toml_datetime",
|
||||
"winnow",
|
||||
"winnow 0.4.9",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -1648,7 +1648,7 @@ dependencies = [
|
|||
"thiserror",
|
||||
"unicode-segmentation",
|
||||
"unicode-xid",
|
||||
"winnow",
|
||||
"winnow 0.5.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -1831,7 +1831,7 @@ name = "varcon-core"
|
|||
version = "2.2.12"
|
||||
dependencies = [
|
||||
"enumflags2",
|
||||
"winnow",
|
||||
"winnow 0.5.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -2121,9 +2121,18 @@ checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a"
|
|||
|
||||
[[package]]
|
||||
name = "winnow"
|
||||
version = "0.4.6"
|
||||
version = "0.4.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "61de7bac303dc551fe038e2b3cef0f571087a47571ea6e79a87692ac99b99699"
|
||||
checksum = "81a2094c43cc94775293eaa0e499fbc30048a6d824ac82c0351a8c0bf9112529"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winnow"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "81fac9742fd1ad1bd9643b991319f72dd031016d44b77039a26977eb667141e7"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
|
|
@ -13,8 +13,8 @@ include.workspace = true
|
|||
[features]
|
||||
default = ["std"]
|
||||
std = []
|
||||
codegen = ["std", "phf_codegen"]
|
||||
map = ["phf", "phf_shared"]
|
||||
codegen = ["std", "dep:phf_codegen"]
|
||||
map = ["dep:phf", "dep:phf_shared"]
|
||||
|
||||
[dependencies]
|
||||
unicase = "2.6"
|
||||
|
|
|
@ -32,8 +32,8 @@ pre-release-replacements = [
|
|||
|
||||
[features]
|
||||
default = ["dict", "vars"]
|
||||
dict = ["typos-dict"]
|
||||
vars = ["typos-vars"]
|
||||
dict = ["dep:typos-dict"]
|
||||
vars = ["dep:typos-vars"]
|
||||
|
||||
|
||||
[[bin]]
|
||||
|
|
|
@ -14,7 +14,7 @@ include.workspace = true
|
|||
[dependencies]
|
||||
anyhow = "1.0"
|
||||
thiserror = "1.0"
|
||||
winnow = "0.4.6"
|
||||
winnow = "0.5.0"
|
||||
unicode-xid = "0.2.4"
|
||||
once_cell = "1.17.2"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
use bstr::ByteSlice;
|
||||
use winnow::BStr;
|
||||
|
||||
/// Define rules for tokenizaing a buffer.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
|
@ -48,7 +49,9 @@ impl Tokenizer {
|
|||
let iter = if self.unicode && !ByteSlice::is_ascii(content.as_bytes()) {
|
||||
itertools::Either::Left(unicode_parser::iter_identifiers(content))
|
||||
} else {
|
||||
itertools::Either::Right(ascii_parser::iter_identifiers(content.as_bytes()))
|
||||
itertools::Either::Right(ascii_parser::iter_identifiers(BStr::new(
|
||||
content.as_bytes(),
|
||||
)))
|
||||
};
|
||||
iter.map(move |identifier| self.transform(identifier, content.as_bytes()))
|
||||
}
|
||||
|
@ -58,7 +61,7 @@ impl Tokenizer {
|
|||
let iter = Utf8Chunks::new(content).flat_map(unicode_parser::iter_identifiers);
|
||||
itertools::Either::Left(iter)
|
||||
} else {
|
||||
itertools::Either::Right(ascii_parser::iter_identifiers(content))
|
||||
itertools::Either::Right(ascii_parser::iter_identifiers(BStr::new(content)))
|
||||
};
|
||||
iter.map(move |identifier| self.transform(identifier, content))
|
||||
}
|
||||
|
@ -126,6 +129,7 @@ impl<'s> Iterator for Utf8Chunks<'s> {
|
|||
|
||||
mod parser {
|
||||
use winnow::combinator::*;
|
||||
use winnow::error::ParserError;
|
||||
use winnow::prelude::*;
|
||||
use winnow::stream::AsBStr;
|
||||
use winnow::stream::AsChar;
|
||||
|
@ -133,8 +137,9 @@ mod parser {
|
|||
use winnow::stream::Stream;
|
||||
use winnow::stream::StreamIsPartial;
|
||||
use winnow::token::*;
|
||||
use winnow::trace::trace;
|
||||
|
||||
pub(crate) fn next_identifier<T>(input: T) -> IResult<T, <T as Stream>::Slice>
|
||||
pub(crate) fn next_identifier<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
|
||||
where
|
||||
T: Stream + StreamIsPartial + PartialEq,
|
||||
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
||||
|
@ -143,7 +148,7 @@ mod parser {
|
|||
preceded(ignore, identifier).parse_next(input)
|
||||
}
|
||||
|
||||
fn identifier<T>(input: T) -> IResult<T, <T as Stream>::Slice>
|
||||
fn identifier<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
|
||||
where
|
||||
T: Stream + StreamIsPartial + PartialEq,
|
||||
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
||||
|
@ -153,15 +158,17 @@ mod parser {
|
|||
// `{XID_Continue}+` because XID_Continue is a superset of XID_Start and rather catch odd
|
||||
// or unexpected cases than strip off start characters to a word since we aren't doing a
|
||||
// proper word boundary parse
|
||||
take_while(1.., is_xid_continue).parse_next(input)
|
||||
trace("identifier", take_while(1.., is_xid_continue)).parse_next(input)
|
||||
}
|
||||
|
||||
fn ignore<T>(input: T) -> IResult<T, <T as Stream>::Slice>
|
||||
fn ignore<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
|
||||
where
|
||||
T: Stream + StreamIsPartial + PartialEq,
|
||||
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
||||
<T as Stream>::Token: AsChar + Copy,
|
||||
{
|
||||
trace(
|
||||
"ignore",
|
||||
take_many0(alt((
|
||||
// CAUTION: If adding an ignorable literal, if it doesn't start with `is_xid_continue`,
|
||||
// - Update `is_ignore_char` to make sure `sep1` doesn't eat it all up
|
||||
|
@ -178,11 +185,12 @@ mod parser {
|
|||
c_escape,
|
||||
printf,
|
||||
other,
|
||||
)))
|
||||
))),
|
||||
)
|
||||
.parse_next(input)
|
||||
}
|
||||
|
||||
fn sep1<T>(input: T) -> IResult<T, <T as Stream>::Slice>
|
||||
fn sep1<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
|
||||
where
|
||||
T: Stream + StreamIsPartial + PartialEq,
|
||||
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
||||
|
@ -195,21 +203,24 @@ mod parser {
|
|||
.parse_next(input)
|
||||
}
|
||||
|
||||
fn other<T>(input: T) -> IResult<T, <T as Stream>::Slice>
|
||||
fn other<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
|
||||
where
|
||||
T: Stream + StreamIsPartial + PartialEq,
|
||||
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
||||
<T as Stream>::Token: AsChar + Copy,
|
||||
{
|
||||
trace(
|
||||
"other",
|
||||
(
|
||||
one_of(|c| !is_xid_continue(c)),
|
||||
take_while(0.., is_ignore_char),
|
||||
)
|
||||
.recognize()
|
||||
.recognize(),
|
||||
)
|
||||
.parse_next(input)
|
||||
}
|
||||
|
||||
fn ordinal_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice>
|
||||
fn ordinal_literal<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
|
||||
where
|
||||
T: Stream + StreamIsPartial + PartialEq,
|
||||
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
||||
|
@ -221,26 +232,29 @@ mod parser {
|
|||
['_'].contains(&c)
|
||||
}
|
||||
|
||||
trace(
|
||||
"ordinal_literal",
|
||||
(
|
||||
take_while(0.., is_sep),
|
||||
take_while(1.., is_dec_digit),
|
||||
alt((('s', 't'), ('n', 'd'), ('r', 'd'), ('t', 'h'))),
|
||||
take_while(0.., is_sep),
|
||||
)
|
||||
.recognize()
|
||||
.recognize(),
|
||||
)
|
||||
.parse_next(input)
|
||||
}
|
||||
|
||||
fn dec_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice>
|
||||
fn dec_literal<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
|
||||
where
|
||||
T: Stream + StreamIsPartial + PartialEq,
|
||||
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
||||
<T as Stream>::Token: AsChar + Copy,
|
||||
{
|
||||
take_while(1.., is_dec_digit_with_sep).parse_next(input)
|
||||
trace("dec_literal", take_while(1.., is_dec_digit_with_sep)).parse_next(input)
|
||||
}
|
||||
|
||||
fn hex_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice>
|
||||
fn hex_literal<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
|
||||
where
|
||||
T: Stream + StreamIsPartial + PartialEq,
|
||||
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
||||
|
@ -253,28 +267,33 @@ mod parser {
|
|||
.parse_next(input)
|
||||
}
|
||||
|
||||
fn css_color<T>(input: T) -> IResult<T, <T as Stream>::Slice>
|
||||
fn css_color<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
|
||||
where
|
||||
T: Stream + StreamIsPartial + PartialEq,
|
||||
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
||||
<T as Stream>::Token: AsChar + Copy,
|
||||
{
|
||||
trace(
|
||||
"color",
|
||||
preceded(
|
||||
'#',
|
||||
alt((
|
||||
terminated(take_while(3..=8, is_lower_hex_digit), peek(sep1)),
|
||||
terminated(take_while(3..=8, is_upper_hex_digit), peek(sep1)),
|
||||
)),
|
||||
),
|
||||
)
|
||||
.parse_next(input)
|
||||
}
|
||||
|
||||
fn uuid_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice>
|
||||
fn uuid_literal<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
|
||||
where
|
||||
T: Stream + StreamIsPartial + PartialEq,
|
||||
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
||||
<T as Stream>::Token: AsChar + Copy,
|
||||
{
|
||||
trace(
|
||||
"uuid",
|
||||
alt((
|
||||
(
|
||||
take_while(8, is_lower_hex_digit),
|
||||
|
@ -299,11 +318,12 @@ mod parser {
|
|||
take_while(12, is_upper_hex_digit),
|
||||
),
|
||||
))
|
||||
.recognize()
|
||||
.recognize(),
|
||||
)
|
||||
.parse_next(input)
|
||||
}
|
||||
|
||||
fn hash_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice>
|
||||
fn hash_literal<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
|
||||
where
|
||||
T: Stream + StreamIsPartial + PartialEq,
|
||||
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
||||
|
@ -319,23 +339,28 @@ mod parser {
|
|||
// or more.
|
||||
|
||||
const IGNORE_HEX_MIN: usize = 32;
|
||||
trace(
|
||||
"hash",
|
||||
alt((
|
||||
take_while(IGNORE_HEX_MIN.., is_lower_hex_digit),
|
||||
take_while(IGNORE_HEX_MIN.., is_upper_hex_digit),
|
||||
))
|
||||
)),
|
||||
)
|
||||
.parse_next(input)
|
||||
}
|
||||
|
||||
fn base64_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice>
|
||||
fn base64_literal<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
|
||||
where
|
||||
T: Stream + StreamIsPartial + PartialEq,
|
||||
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
||||
<T as Stream>::Token: AsChar + Copy,
|
||||
{
|
||||
let (padding, captured) = take_while(1.., is_base64_digit).parse_next(input.clone())?;
|
||||
trace("base64", move |input: &mut T| {
|
||||
let start = input.checkpoint();
|
||||
let captured = take_while(1.., is_base64_digit).parse_next(input)?;
|
||||
|
||||
const CHUNK: usize = 4;
|
||||
let padding_offset = input.offset_to(&padding);
|
||||
let padding_offset = input.offset_from(&start);
|
||||
let mut padding_len = CHUNK - padding_offset % CHUNK;
|
||||
if padding_len == CHUNK {
|
||||
padding_len = 0;
|
||||
|
@ -348,39 +373,47 @@ mod parser {
|
|||
.iter()
|
||||
.all(|c| !['/', '+'].contains(&c.as_char()))
|
||||
{
|
||||
return Err(winnow::error::ErrMode::Backtrack(
|
||||
winnow::error::Error::new(input, winnow::error::ErrorKind::Slice),
|
||||
return Err(winnow::error::ErrMode::from_error_kind(
|
||||
input,
|
||||
winnow::error::ErrorKind::Slice,
|
||||
));
|
||||
}
|
||||
|
||||
let (after, _) =
|
||||
take_while(padding_len..=padding_len, is_base64_padding).parse_next(padding)?;
|
||||
take_while(padding_len..=padding_len, is_base64_padding).parse_next(input)?;
|
||||
|
||||
let after_offset = input.offset_to(&after);
|
||||
let after_offset = input.offset_from(&start);
|
||||
input.reset(start);
|
||||
Ok(input.next_slice(after_offset))
|
||||
})
|
||||
.parse_next(input)
|
||||
}
|
||||
|
||||
fn email_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice>
|
||||
fn email_literal<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
|
||||
where
|
||||
T: Stream + StreamIsPartial + PartialEq,
|
||||
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
||||
<T as Stream>::Token: AsChar + Copy,
|
||||
{
|
||||
trace(
|
||||
"email",
|
||||
(
|
||||
take_while(1.., is_localport_char),
|
||||
'@',
|
||||
take_while(1.., is_domain_char),
|
||||
)
|
||||
.recognize()
|
||||
.recognize(),
|
||||
)
|
||||
.parse_next(input)
|
||||
}
|
||||
|
||||
fn url_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice>
|
||||
fn url_literal<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
|
||||
where
|
||||
T: Stream + StreamIsPartial + PartialEq,
|
||||
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
||||
<T as Stream>::Token: AsChar + Copy,
|
||||
{
|
||||
trace(
|
||||
"url",
|
||||
(
|
||||
opt(terminated(
|
||||
take_while(1.., is_scheme_char),
|
||||
|
@ -397,25 +430,29 @@ mod parser {
|
|||
// HACK: Too lazy to enumerate
|
||||
take_while(0.., is_path_query_fragment),
|
||||
)
|
||||
.recognize()
|
||||
.recognize(),
|
||||
)
|
||||
.parse_next(input)
|
||||
}
|
||||
|
||||
fn url_userinfo<T>(input: T) -> IResult<T, <T as Stream>::Slice>
|
||||
fn url_userinfo<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
|
||||
where
|
||||
T: Stream + StreamIsPartial + PartialEq,
|
||||
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
||||
<T as Stream>::Token: AsChar + Copy,
|
||||
{
|
||||
trace(
|
||||
"userinfo",
|
||||
(
|
||||
take_while(1.., is_localport_char),
|
||||
opt(preceded(':', take_while(0.., is_localport_char))),
|
||||
)
|
||||
.recognize()
|
||||
.recognize(),
|
||||
)
|
||||
.parse_next(input)
|
||||
}
|
||||
|
||||
fn c_escape<T>(input: T) -> IResult<T, <T as Stream>::Slice>
|
||||
fn c_escape<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
|
||||
where
|
||||
T: Stream + StreamIsPartial + PartialEq,
|
||||
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
||||
|
@ -425,25 +462,29 @@ mod parser {
|
|||
// regular string that does escaping. The escaped letter might be part of a word, or it
|
||||
// might not be. Rather than guess and be wrong part of the time and correct people's words
|
||||
// incorrectly, we opt for just not evaluating it at all.
|
||||
preceded(take_while(1.., is_escape), take_while(0.., is_xid_continue)).parse_next(input)
|
||||
trace(
|
||||
"escape",
|
||||
preceded(take_while(1.., is_escape), take_while(0.., is_xid_continue)),
|
||||
)
|
||||
.parse_next(input)
|
||||
}
|
||||
|
||||
fn printf<T>(input: T) -> IResult<T, <T as Stream>::Slice>
|
||||
fn printf<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
|
||||
where
|
||||
T: Stream + StreamIsPartial + PartialEq,
|
||||
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
||||
<T as Stream>::Token: AsChar + Copy,
|
||||
{
|
||||
preceded('%', take_while(1.., is_xid_continue)).parse_next(input)
|
||||
trace("printf", preceded('%', take_while(1.., is_xid_continue))).parse_next(input)
|
||||
}
|
||||
|
||||
fn take_many0<I, E, F>(mut f: F) -> impl FnMut(I) -> IResult<I, <I as Stream>::Slice, E>
|
||||
fn take_many0<I, E, F>(mut f: F) -> impl Parser<I, <I as Stream>::Slice, E>
|
||||
where
|
||||
I: Stream,
|
||||
F: winnow::Parser<I, <I as Stream>::Slice, E>,
|
||||
E: winnow::error::ParseError<I>,
|
||||
F: Parser<I, <I as Stream>::Slice, E>,
|
||||
E: ParserError<I>,
|
||||
{
|
||||
move |i: I| {
|
||||
move |i: &mut I| {
|
||||
repeat(0.., f.by_ref())
|
||||
.map(|()| ())
|
||||
.recognize()
|
||||
|
@ -581,9 +622,8 @@ mod unicode_parser {
|
|||
use super::parser::next_identifier;
|
||||
|
||||
pub(crate) fn iter_identifiers(mut input: &str) -> impl Iterator<Item = &str> {
|
||||
std::iter::from_fn(move || match next_identifier(input) {
|
||||
Ok((i, o)) => {
|
||||
input = i;
|
||||
std::iter::from_fn(move || match next_identifier(&mut input) {
|
||||
Ok(o) => {
|
||||
debug_assert_ne!(o, "");
|
||||
Some(o)
|
||||
}
|
||||
|
@ -595,10 +635,11 @@ mod unicode_parser {
|
|||
mod ascii_parser {
|
||||
use super::parser::next_identifier;
|
||||
|
||||
pub(crate) fn iter_identifiers(mut input: &[u8]) -> impl Iterator<Item = &str> {
|
||||
std::iter::from_fn(move || match next_identifier(input) {
|
||||
Ok((i, o)) => {
|
||||
input = i;
|
||||
use winnow::BStr;
|
||||
|
||||
pub(crate) fn iter_identifiers(mut input: &BStr) -> impl Iterator<Item = &str> {
|
||||
std::iter::from_fn(move || match next_identifier(&mut input) {
|
||||
Ok(o) => {
|
||||
debug_assert_ne!(o, b"");
|
||||
// This is safe because we've checked that the strings are a subset of ASCII
|
||||
// characters.
|
||||
|
|
|
@ -12,11 +12,11 @@ include.workspace = true
|
|||
|
||||
[features]
|
||||
default = []
|
||||
parser = ["winnow"]
|
||||
flags = ["enumflags2"]
|
||||
parser = ["dep:winnow"]
|
||||
flags = ["dep:enumflags2"]
|
||||
|
||||
[dependencies]
|
||||
winnow = { version = "0.4.6", optional = true }
|
||||
winnow = { version = "0.5.0", optional = true }
|
||||
enumflags2 = { version = "0.7", optional = true }
|
||||
|
||||
[package.metadata.docs.rs]
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
use winnow::prelude::*;
|
||||
use winnow::trace::trace;
|
||||
|
||||
use crate::*;
|
||||
|
||||
|
@ -17,10 +18,8 @@ impl<'i> Iterator for ClusterIter<'i> {
|
|||
type Item = Cluster;
|
||||
|
||||
fn next(&mut self) -> Option<Cluster> {
|
||||
let i = self.input.trim_start();
|
||||
let (i, c) = Cluster::parse(i).ok()?;
|
||||
self.input = i;
|
||||
Some(c)
|
||||
self.input = self.input.trim_start();
|
||||
Cluster::parse_.parse_next(&mut self.input).ok()
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -61,29 +60,34 @@ A Cv: acknowledgment's / Av B C: acknowledgement's
|
|||
}
|
||||
|
||||
impl Cluster {
|
||||
pub fn parse(input: &str) -> IResult<&str, Self> {
|
||||
pub fn parse(input: &str) -> Result<Self, ParseError> {
|
||||
Self::parse_.parse(input).map_err(|_err| ParseError)
|
||||
}
|
||||
|
||||
fn parse_(input: &mut &str) -> PResult<Self, ()> {
|
||||
trace("cluster", move |input: &mut &str| {
|
||||
let header = (
|
||||
winnow::bytes::tag("#"),
|
||||
winnow::character::space0,
|
||||
winnow::character::not_line_ending,
|
||||
winnow::character::line_ending,
|
||||
"#",
|
||||
winnow::ascii::space0,
|
||||
winnow::ascii::not_line_ending,
|
||||
winnow::ascii::line_ending,
|
||||
);
|
||||
let note = winnow::sequence::preceded(
|
||||
(winnow::bytes::tag("##"), winnow::character::space0),
|
||||
winnow::sequence::terminated(
|
||||
winnow::character::not_line_ending,
|
||||
winnow::character::line_ending,
|
||||
let note = winnow::combinator::preceded(
|
||||
("##", winnow::ascii::space0),
|
||||
winnow::combinator::terminated(
|
||||
winnow::ascii::not_line_ending,
|
||||
winnow::ascii::line_ending,
|
||||
),
|
||||
);
|
||||
let mut cluster = (
|
||||
winnow::combinator::opt(header),
|
||||
winnow::multi::many1(winnow::sequence::terminated(
|
||||
Entry::parse,
|
||||
winnow::character::line_ending,
|
||||
)),
|
||||
winnow::multi::many0(note),
|
||||
winnow::combinator::repeat(
|
||||
1..,
|
||||
winnow::combinator::terminated(Entry::parse_, winnow::ascii::line_ending),
|
||||
),
|
||||
winnow::combinator::repeat(0.., note),
|
||||
);
|
||||
let (input, (header, entries, notes)): (_, (_, _, Vec<_>)) = cluster.parse_next(input)?;
|
||||
let (header, entries, notes): (_, _, Vec<_>) = cluster.parse_next(input)?;
|
||||
|
||||
let header = header.map(|s| s.2.to_owned());
|
||||
let notes = notes.into_iter().map(|s| s.to_owned()).collect();
|
||||
|
@ -92,7 +96,9 @@ impl Cluster {
|
|||
entries,
|
||||
notes,
|
||||
};
|
||||
Ok((input, c))
|
||||
Ok(c)
|
||||
})
|
||||
.parse_next(input)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -102,7 +108,8 @@ mod test_cluster {
|
|||
|
||||
#[test]
|
||||
fn test_basic() {
|
||||
let (input, actual) = Cluster::parse(
|
||||
let (input, actual) = Cluster::parse_
|
||||
.parse_peek(
|
||||
"# acknowledgment <verified> (level 35)
|
||||
A Cv: acknowledgment / Av B C: acknowledgement
|
||||
A Cv: acknowledgments / Av B C: acknowledgements
|
||||
|
@ -122,7 +129,8 @@ A Cv: acknowledgment's / Av B C: acknowledgement's
|
|||
|
||||
#[test]
|
||||
fn test_notes() {
|
||||
let (input, actual) = Cluster::parse(
|
||||
let (input, actual) = Cluster::parse_
|
||||
.parse_peek(
|
||||
"# coloration <verified> (level 50)
|
||||
A B C: coloration / B. Cv: colouration
|
||||
A B C: colorations / B. Cv: colourations
|
||||
|
@ -144,20 +152,25 @@ A B C: coloration's / B. Cv: colouration's
|
|||
}
|
||||
|
||||
impl Entry {
|
||||
pub fn parse(input: &str) -> IResult<&str, Self> {
|
||||
let var_sep = (winnow::character::space0, '/', winnow::character::space0);
|
||||
let (input, variants) =
|
||||
winnow::multi::separated1(Variant::parse, var_sep).parse_next(input)?;
|
||||
pub fn parse(input: &str) -> Result<Self, ParseError> {
|
||||
Self::parse_.parse(input).map_err(|_err| ParseError)
|
||||
}
|
||||
|
||||
let desc_sep = (winnow::character::space0, '|');
|
||||
let (input, description) =
|
||||
fn parse_(input: &mut &str) -> PResult<Self, ()> {
|
||||
trace("entry", move |input: &mut &str| {
|
||||
let var_sep = (winnow::ascii::space0, '/', winnow::ascii::space0);
|
||||
let variants =
|
||||
winnow::combinator::separated1(Variant::parse_, var_sep).parse_next(input)?;
|
||||
|
||||
let desc_sep = (winnow::ascii::space0, '|');
|
||||
let description =
|
||||
winnow::combinator::opt((desc_sep, Self::parse_description)).parse_next(input)?;
|
||||
|
||||
let comment_sep = (winnow::character::space0, '#');
|
||||
let (input, comment) = winnow::combinator::opt((
|
||||
let comment_sep = (winnow::ascii::space0, '#');
|
||||
let comment = winnow::combinator::opt((
|
||||
comment_sep,
|
||||
winnow::character::space1,
|
||||
winnow::character::not_line_ending,
|
||||
winnow::ascii::space1,
|
||||
winnow::ascii::not_line_ending,
|
||||
))
|
||||
.parse_next(input)?;
|
||||
|
||||
|
@ -174,17 +187,20 @@ impl Entry {
|
|||
};
|
||||
e.variants = variants;
|
||||
e.comment = comment.map(|c| c.2.to_owned());
|
||||
Ok((input, e))
|
||||
Ok(e)
|
||||
})
|
||||
.parse_next(input)
|
||||
}
|
||||
|
||||
fn parse_description(input: &str) -> IResult<&str, Self> {
|
||||
let (input, (pos, archaic, note, description)) = (
|
||||
winnow::combinator::opt((winnow::character::space1, Pos::parse)),
|
||||
winnow::combinator::opt((winnow::character::space1, "(-)")),
|
||||
winnow::combinator::opt((winnow::character::space1, "--")),
|
||||
fn parse_description(input: &mut &str) -> PResult<Self, ()> {
|
||||
trace("description", move |input: &mut &str| {
|
||||
let (pos, archaic, note, description) = (
|
||||
winnow::combinator::opt((winnow::ascii::space1, Pos::parse_)),
|
||||
winnow::combinator::opt((winnow::ascii::space1, "(-)")),
|
||||
winnow::combinator::opt((winnow::ascii::space1, "--")),
|
||||
winnow::combinator::opt((
|
||||
winnow::character::space1,
|
||||
winnow::bytes::take_till0(|c| c == '\n' || c == '\r' || c == '#'),
|
||||
winnow::ascii::space1,
|
||||
winnow::token::take_till0(('\n', '\r', '#')),
|
||||
)),
|
||||
)
|
||||
.parse_next(input)?;
|
||||
|
@ -202,7 +218,9 @@ impl Entry {
|
|||
description,
|
||||
comment: None,
|
||||
};
|
||||
Ok((input, e))
|
||||
Ok(e)
|
||||
})
|
||||
.parse_next(input)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -215,8 +233,9 @@ mod test_entry {
|
|||
fn test_variant_only() {
|
||||
// Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
|
||||
// cases.
|
||||
let (input, actual) =
|
||||
Entry::parse("A Cv: acknowledgment's / Av B C: acknowledgement's\n").unwrap();
|
||||
let (input, actual) = Entry::parse_
|
||||
.parse_peek("A Cv: acknowledgment's / Av B C: acknowledgement's\n")
|
||||
.unwrap();
|
||||
assert_eq!(input, "\n");
|
||||
assert_eq!(actual.variants.len(), 2);
|
||||
assert_eq!(actual.pos, None);
|
||||
|
@ -229,7 +248,9 @@ mod test_entry {
|
|||
fn test_description() {
|
||||
// Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
|
||||
// cases.
|
||||
let (input, actual) = Entry::parse("A C: prize / B: prise | otherwise\n").unwrap();
|
||||
let (input, actual) = Entry::parse_
|
||||
.parse_peek("A C: prize / B: prise | otherwise\n")
|
||||
.unwrap();
|
||||
assert_eq!(input, "\n");
|
||||
assert_eq!(actual.variants.len(), 2);
|
||||
assert_eq!(actual.pos, None);
|
||||
|
@ -242,7 +263,9 @@ mod test_entry {
|
|||
fn test_pos() {
|
||||
// Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
|
||||
// cases.
|
||||
let (input, actual) = Entry::parse("A B C: practice / AV Cv: practise | <N>\n").unwrap();
|
||||
let (input, actual) = Entry::parse_
|
||||
.parse_peek("A B C: practice / AV Cv: practise | <N>\n")
|
||||
.unwrap();
|
||||
assert_eq!(input, "\n");
|
||||
assert_eq!(actual.variants.len(), 2);
|
||||
assert_eq!(actual.pos, Some(Pos::Noun));
|
||||
|
@ -255,7 +278,9 @@ mod test_entry {
|
|||
fn test_archaic() {
|
||||
// Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
|
||||
// cases.
|
||||
let (input, actual) = Entry::parse("A: bark / Av B: barque | (-) ship\n").unwrap();
|
||||
let (input, actual) = Entry::parse_
|
||||
.parse_peek("A: bark / Av B: barque | (-) ship\n")
|
||||
.unwrap();
|
||||
assert_eq!(input, "\n");
|
||||
assert_eq!(actual.variants.len(), 2);
|
||||
assert_eq!(actual.pos, None);
|
||||
|
@ -268,7 +293,9 @@ mod test_entry {
|
|||
fn test_note() {
|
||||
// Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
|
||||
// cases.
|
||||
let (input, actual) = Entry::parse("_: cabbies | -- plural\n").unwrap();
|
||||
let (input, actual) = Entry::parse_
|
||||
.parse_peek("_: cabbies | -- plural\n")
|
||||
.unwrap();
|
||||
assert_eq!(input, "\n");
|
||||
assert_eq!(actual.variants.len(), 1);
|
||||
assert_eq!(actual.pos, None);
|
||||
|
@ -279,7 +306,7 @@ mod test_entry {
|
|||
|
||||
#[test]
|
||||
fn test_trailing_comment() {
|
||||
let (input, actual) = Entry::parse(
|
||||
let (input, actual) = Entry::parse_.parse_peek(
|
||||
"A B: accursed / AV B-: accurst # ODE: archaic, M-W: 'or' but can find little evidence of use\n",
|
||||
)
|
||||
.unwrap();
|
||||
|
@ -297,20 +324,30 @@ mod test_entry {
|
|||
}
|
||||
|
||||
impl Variant {
|
||||
pub fn parse(input: &str) -> IResult<&str, Self> {
|
||||
let types = winnow::multi::separated1(Type::parse, winnow::character::space1);
|
||||
let sep = (winnow::bytes::tag(":"), winnow::character::space0);
|
||||
let (input, (types, word)) =
|
||||
winnow::sequence::separated_pair(types, sep, word).parse_next(input)?;
|
||||
pub fn parse(input: &str) -> Result<Self, ParseError> {
|
||||
Self::parse_.parse(input).map_err(|_err| ParseError)
|
||||
}
|
||||
|
||||
fn parse_(input: &mut &str) -> PResult<Self, ()> {
|
||||
trace("variant", move |input: &mut &str| {
|
||||
let types = winnow::combinator::separated1(Type::parse_, winnow::ascii::space1);
|
||||
let sep = (":", winnow::ascii::space0);
|
||||
let (types, word) =
|
||||
winnow::combinator::separated_pair(types, sep, word).parse_next(input)?;
|
||||
let v = Self { types, word };
|
||||
Ok((input, v))
|
||||
Ok(v)
|
||||
})
|
||||
.parse_next(input)
|
||||
}
|
||||
}
|
||||
|
||||
fn word(input: &str) -> IResult<&str, String> {
|
||||
winnow::bytes::take_till1(|item: char| item.is_ascii_whitespace())
|
||||
fn word(input: &mut &str) -> PResult<String, ()> {
|
||||
trace("word", move |input: &mut &str| {
|
||||
winnow::token::take_till1(|item: char| item.is_ascii_whitespace())
|
||||
.map(|s: &str| s.to_owned().replace('_', " "))
|
||||
.parse_next(input)
|
||||
})
|
||||
.parse_next(input)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
@ -321,7 +358,7 @@ mod test_variant {
|
|||
fn test_valid() {
|
||||
// Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
|
||||
// cases.
|
||||
let (input, actual) = Variant::parse("A Cv: acknowledgment ").unwrap();
|
||||
let (input, actual) = Variant::parse_.parse_peek("A Cv: acknowledgment ").unwrap();
|
||||
assert_eq!(input, " ");
|
||||
assert_eq!(
|
||||
actual.types,
|
||||
|
@ -343,8 +380,9 @@ mod test_variant {
|
|||
|
||||
#[test]
|
||||
fn test_extra() {
|
||||
let (input, actual) =
|
||||
Variant::parse("A Cv: acknowledgment's / Av B C: acknowledgement's").unwrap();
|
||||
let (input, actual) = Variant::parse_
|
||||
.parse_peek("A Cv: acknowledgment's / Av B C: acknowledgement's")
|
||||
.unwrap();
|
||||
assert_eq!(input, " / Av B C: acknowledgement's");
|
||||
assert_eq!(
|
||||
actual.types,
|
||||
|
@ -366,7 +404,7 @@ mod test_variant {
|
|||
|
||||
#[test]
|
||||
fn test_underscore() {
|
||||
let (input, actual) = Variant::parse("_: air_gun\n").unwrap();
|
||||
let (input, actual) = Variant::parse_.parse_peek("_: air_gun\n").unwrap();
|
||||
assert_eq!(input, "\n");
|
||||
assert_eq!(
|
||||
actual.types,
|
||||
|
@ -381,13 +419,20 @@ mod test_variant {
|
|||
}
|
||||
|
||||
impl Type {
|
||||
pub fn parse(input: &str) -> IResult<&str, Type> {
|
||||
let (input, category) = Category::parse(input)?;
|
||||
let (input, tag) = winnow::combinator::opt(Tag::parse).parse_next(input)?;
|
||||
let (input, num) = winnow::combinator::opt(winnow::character::digit1).parse_next(input)?;
|
||||
pub fn parse(input: &str) -> Result<Self, ParseError> {
|
||||
Self::parse_.parse(input).map_err(|_err| ParseError)
|
||||
}
|
||||
|
||||
fn parse_(input: &mut &str) -> PResult<Type, ()> {
|
||||
trace("type", move |input: &mut &str| {
|
||||
let category = Category::parse_(input)?;
|
||||
let tag = winnow::combinator::opt(Tag::parse_).parse_next(input)?;
|
||||
let num = winnow::combinator::opt(winnow::ascii::digit1).parse_next(input)?;
|
||||
let num = num.map(|s| s.parse().expect("parser ensured its a number"));
|
||||
let t = Type { category, tag, num };
|
||||
Ok((input, t))
|
||||
Ok(t)
|
||||
})
|
||||
.parse_next(input)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -399,13 +444,13 @@ mod test_type {
|
|||
fn test_valid() {
|
||||
// Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
|
||||
// cases.
|
||||
let (input, actual) = Type::parse("A ").unwrap();
|
||||
let (input, actual) = Type::parse_.parse_peek("A ").unwrap();
|
||||
assert_eq!(input, " ");
|
||||
assert_eq!(actual.category, Category::American);
|
||||
assert_eq!(actual.tag, None);
|
||||
assert_eq!(actual.num, None);
|
||||
|
||||
let (input, actual) = Type::parse("Bv ").unwrap();
|
||||
let (input, actual) = Type::parse_.parse_peek("Bv ").unwrap();
|
||||
assert_eq!(input, " ");
|
||||
assert_eq!(actual.category, Category::BritishIse);
|
||||
assert_eq!(actual.tag, Some(Tag::Variant));
|
||||
|
@ -414,13 +459,13 @@ mod test_type {
|
|||
|
||||
#[test]
|
||||
fn test_extra() {
|
||||
let (input, actual) = Type::parse("Z foobar").unwrap();
|
||||
let (input, actual) = Type::parse_.parse_peek("Z foobar").unwrap();
|
||||
assert_eq!(input, " foobar");
|
||||
assert_eq!(actual.category, Category::BritishIze);
|
||||
assert_eq!(actual.tag, None);
|
||||
assert_eq!(actual.num, None);
|
||||
|
||||
let (input, actual) = Type::parse("C- foobar").unwrap();
|
||||
let (input, actual) = Type::parse_.parse_peek("C- foobar").unwrap();
|
||||
assert_eq!(input, " foobar");
|
||||
assert_eq!(actual.category, Category::Canadian);
|
||||
assert_eq!(actual.tag, Some(Tag::Possible));
|
||||
|
@ -429,7 +474,7 @@ mod test_type {
|
|||
|
||||
#[test]
|
||||
fn test_num() {
|
||||
let (input, actual) = Type::parse("Av1 ").unwrap();
|
||||
let (input, actual) = Type::parse_.parse_peek("Av1 ").unwrap();
|
||||
assert_eq!(input, " ");
|
||||
assert_eq!(actual.category, Category::American);
|
||||
assert_eq!(actual.tag, Some(Tag::Variant));
|
||||
|
@ -438,8 +483,13 @@ mod test_type {
|
|||
}
|
||||
|
||||
impl Category {
|
||||
pub fn parse(input: &str) -> IResult<&str, Category> {
|
||||
let symbols = winnow::bytes::one_of("ABZCD_");
|
||||
pub fn parse(input: &str) -> Result<Self, ParseError> {
|
||||
Self::parse_.parse(input).map_err(|_err| ParseError)
|
||||
}
|
||||
|
||||
fn parse_(input: &mut &str) -> PResult<Self, ()> {
|
||||
trace("category", move |input: &mut &str| {
|
||||
let symbols = winnow::token::one_of(['A', 'B', 'Z', 'C', 'D', '_']);
|
||||
symbols
|
||||
.map(|c| match c {
|
||||
'A' => Category::American,
|
||||
|
@ -451,6 +501,8 @@ impl Category {
|
|||
_ => unreachable!("parser won't select this option"),
|
||||
})
|
||||
.parse_next(input)
|
||||
})
|
||||
.parse_next(input)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -460,22 +512,27 @@ mod test_category {
|
|||
|
||||
#[test]
|
||||
fn test_valid() {
|
||||
let (input, actual) = Category::parse("A").unwrap();
|
||||
let (input, actual) = Category::parse_.parse_peek("A").unwrap();
|
||||
assert_eq!(input, "");
|
||||
assert_eq!(actual, Category::American);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extra() {
|
||||
let (input, actual) = Category::parse("_ foobar").unwrap();
|
||||
let (input, actual) = Category::parse_.parse_peek("_ foobar").unwrap();
|
||||
assert_eq!(input, " foobar");
|
||||
assert_eq!(actual, Category::Other);
|
||||
}
|
||||
}
|
||||
|
||||
impl Tag {
|
||||
pub fn parse(input: &str) -> IResult<&str, Tag> {
|
||||
let symbols = winnow::bytes::one_of(".vV-x");
|
||||
pub fn parse(input: &str) -> Result<Self, ParseError> {
|
||||
Self::parse_.parse(input).map_err(|_err| ParseError)
|
||||
}
|
||||
|
||||
fn parse_(input: &mut &str) -> PResult<Self, ()> {
|
||||
trace("tag", move |input: &mut &str| {
|
||||
let symbols = winnow::token::one_of(['.', 'v', 'V', '-', 'x']);
|
||||
symbols
|
||||
.map(|c| match c {
|
||||
'.' => Tag::Eq,
|
||||
|
@ -486,6 +543,8 @@ impl Tag {
|
|||
_ => unreachable!("parser won't select this option"),
|
||||
})
|
||||
.parse_next(input)
|
||||
})
|
||||
.parse_next(input)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -495,33 +554,35 @@ mod test_tag {
|
|||
|
||||
#[test]
|
||||
fn test_valid() {
|
||||
let (input, actual) = Tag::parse(".").unwrap();
|
||||
let (input, actual) = Tag::parse_.parse_peek(".").unwrap();
|
||||
assert_eq!(input, "");
|
||||
assert_eq!(actual, Tag::Eq);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extra() {
|
||||
let (input, actual) = Tag::parse("x foobar").unwrap();
|
||||
let (input, actual) = Tag::parse_.parse_peek("x foobar").unwrap();
|
||||
assert_eq!(input, " foobar");
|
||||
assert_eq!(actual, Tag::Improper);
|
||||
}
|
||||
}
|
||||
|
||||
impl Pos {
|
||||
pub fn parse(input: &str) -> IResult<&str, Pos> {
|
||||
use winnow::bytes::tag;
|
||||
let noun = tag("<N>");
|
||||
let verb = tag("<V>");
|
||||
let adjective = tag("<Adj>");
|
||||
let adverb = tag("<Adv>");
|
||||
winnow::branch::alt((
|
||||
noun.value(Pos::Noun),
|
||||
verb.value(Pos::Verb),
|
||||
adjective.value(Pos::Adjective),
|
||||
adverb.value(Pos::Adverb),
|
||||
pub fn parse(input: &str) -> Result<Self, ParseError> {
|
||||
Self::parse_.parse(input).map_err(|_err| ParseError)
|
||||
}
|
||||
|
||||
fn parse_(input: &mut &str) -> PResult<Self, ()> {
|
||||
trace("pos", move |input: &mut &str| {
|
||||
winnow::combinator::alt((
|
||||
"<N>".value(Pos::Noun),
|
||||
"<V>".value(Pos::Verb),
|
||||
"<Adj>".value(Pos::Adjective),
|
||||
"<Adv>".value(Pos::Adverb),
|
||||
))
|
||||
.parse_next(input)
|
||||
})
|
||||
.parse_next(input)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -531,15 +592,26 @@ mod test_pos {
|
|||
|
||||
#[test]
|
||||
fn test_valid() {
|
||||
let (input, actual) = Pos::parse("<N>").unwrap();
|
||||
let (input, actual) = Pos::parse_.parse_peek("<N>").unwrap();
|
||||
assert_eq!(input, "");
|
||||
assert_eq!(actual, Pos::Noun);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extra() {
|
||||
let (input, actual) = Pos::parse("<Adj> foobar").unwrap();
|
||||
let (input, actual) = Pos::parse_.parse_peek("<Adj> foobar").unwrap();
|
||||
assert_eq!(input, " foobar");
|
||||
assert_eq!(actual, Pos::Adjective);
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct ParseError;
|
||||
|
||||
impl std::fmt::Display for ParseError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "invalid")
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for ParseError {}
|
||||
|
|
Loading…
Reference in a new issue