Merge remote-tracking branch 'upstream/master'

This commit is contained in:
Ed Page 2023-07-14 14:00:31 -05:00
commit a2c9d2076a
7 changed files with 459 additions and 332 deletions

15
Cargo.lock generated
View file

@ -1610,7 +1610,7 @@ dependencies = [
"serde",
"serde_spanned",
"toml_datetime",
"winnow",
"winnow 0.4.9",
]
[[package]]
@ -1648,7 +1648,7 @@ dependencies = [
"thiserror",
"unicode-segmentation",
"unicode-xid",
"winnow",
"winnow 0.5.0",
]
[[package]]
@ -1831,7 +1831,7 @@ name = "varcon-core"
version = "3.0.0"
dependencies = [
"enumflags2",
"winnow",
"winnow 0.5.0",
]
[[package]]
@ -2127,3 +2127,12 @@ checksum = "81a2094c43cc94775293eaa0e499fbc30048a6d824ac82c0351a8c0bf9112529"
dependencies = [
"memchr",
]
[[package]]
name = "winnow"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "81fac9742fd1ad1bd9643b991319f72dd031016d44b77039a26977eb667141e7"
dependencies = [
"memchr",
]

View file

@ -13,8 +13,8 @@ include.workspace = true
[features]
default = ["std"]
std = []
codegen = ["std", "phf_codegen"]
map = ["phf", "phf_shared"]
codegen = ["std", "dep:phf_codegen"]
map = ["dep:phf", "dep:phf_shared"]
[dependencies]
unicase = "2.6"

View file

@ -32,8 +32,8 @@ pre-release-replacements = [
[features]
default = ["dict", "vars"]
dict = ["typos-dict"]
vars = ["typos-vars"]
dict = ["dep:typos-dict"]
vars = ["dep:typos-vars"]
[[bin]]

View file

@ -14,7 +14,7 @@ include.workspace = true
[dependencies]
anyhow = "1.0"
thiserror = "1.0"
winnow = "0.4.9"
winnow = "0.5.0"
unicode-xid = "0.2.4"
once_cell = "1.17.2"
serde = { version = "1.0", features = ["derive"] }

View file

@ -1,4 +1,5 @@
use bstr::ByteSlice;
use winnow::BStr;
/// Define rules for tokenizaing a buffer.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
@ -48,7 +49,9 @@ impl Tokenizer {
let iter = if self.unicode && !ByteSlice::is_ascii(content.as_bytes()) {
itertools::Either::Left(unicode_parser::iter_identifiers(content))
} else {
itertools::Either::Right(ascii_parser::iter_identifiers(content.as_bytes()))
itertools::Either::Right(ascii_parser::iter_identifiers(BStr::new(
content.as_bytes(),
)))
};
iter.map(move |identifier| self.transform(identifier, content.as_bytes()))
}
@ -58,7 +61,7 @@ impl Tokenizer {
let iter = Utf8Chunks::new(content).flat_map(unicode_parser::iter_identifiers);
itertools::Either::Left(iter)
} else {
itertools::Either::Right(ascii_parser::iter_identifiers(content))
itertools::Either::Right(ascii_parser::iter_identifiers(BStr::new(content)))
};
iter.map(move |identifier| self.transform(identifier, content))
}
@ -126,6 +129,7 @@ impl<'s> Iterator for Utf8Chunks<'s> {
mod parser {
use winnow::combinator::*;
use winnow::error::ParserError;
use winnow::prelude::*;
use winnow::stream::AsBStr;
use winnow::stream::AsChar;
@ -133,8 +137,9 @@ mod parser {
use winnow::stream::Stream;
use winnow::stream::StreamIsPartial;
use winnow::token::*;
use winnow::trace::trace;
pub(crate) fn next_identifier<T>(input: T) -> IResult<T, <T as Stream>::Slice>
pub(crate) fn next_identifier<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
where
T: Stream + StreamIsPartial + PartialEq,
<T as Stream>::Slice: AsBStr + SliceLen + Default,
@ -143,7 +148,7 @@ mod parser {
preceded(ignore, identifier).parse_next(input)
}
fn identifier<T>(input: T) -> IResult<T, <T as Stream>::Slice>
fn identifier<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
where
T: Stream + StreamIsPartial + PartialEq,
<T as Stream>::Slice: AsBStr + SliceLen + Default,
@ -153,36 +158,39 @@ mod parser {
// `{XID_Continue}+` because XID_Continue is a superset of XID_Start and rather catch odd
// or unexpected cases than strip off start characters to a word since we aren't doing a
// proper word boundary parse
take_while(1.., is_xid_continue).parse_next(input)
trace("identifier", take_while(1.., is_xid_continue)).parse_next(input)
}
fn ignore<T>(input: T) -> IResult<T, <T as Stream>::Slice>
fn ignore<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
where
T: Stream + StreamIsPartial + PartialEq,
<T as Stream>::Slice: AsBStr + SliceLen + Default,
<T as Stream>::Token: AsChar + Copy,
{
take_many0(alt((
// CAUTION: If adding an ignorable literal, if it doesn't start with `is_xid_continue`,
// - Update `is_ignore_char` to make sure `sep1` doesn't eat it all up
// - Make sure you always consume it
terminated(uuid_literal, peek(sep1)),
terminated(hash_literal, peek(sep1)),
terminated(base64_literal, peek(sep1)), // base64 should be quoted or something
terminated(ordinal_literal, peek(sep1)),
terminated(hex_literal, peek(sep1)),
terminated(dec_literal, peek(sep1)), // Allow digit-prefixed words
terminated(email_literal, peek(sep1)),
terminated(url_literal, peek(sep1)),
terminated(css_color, peek(sep1)),
c_escape,
printf,
other,
)))
trace(
"ignore",
take_many0(alt((
// CAUTION: If adding an ignorable literal, if it doesn't start with `is_xid_continue`,
// - Update `is_ignore_char` to make sure `sep1` doesn't eat it all up
// - Make sure you always consume it
terminated(uuid_literal, peek(sep1)),
terminated(hash_literal, peek(sep1)),
terminated(base64_literal, peek(sep1)), // base64 should be quoted or something
terminated(ordinal_literal, peek(sep1)),
terminated(hex_literal, peek(sep1)),
terminated(dec_literal, peek(sep1)), // Allow digit-prefixed words
terminated(email_literal, peek(sep1)),
terminated(url_literal, peek(sep1)),
terminated(css_color, peek(sep1)),
c_escape,
printf,
other,
))),
)
.parse_next(input)
}
fn sep1<T>(input: T) -> IResult<T, <T as Stream>::Slice>
fn sep1<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
where
T: Stream + StreamIsPartial + PartialEq,
<T as Stream>::Slice: AsBStr + SliceLen + Default,
@ -195,21 +203,24 @@ mod parser {
.parse_next(input)
}
fn other<T>(input: T) -> IResult<T, <T as Stream>::Slice>
fn other<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
where
T: Stream + StreamIsPartial + PartialEq,
<T as Stream>::Slice: AsBStr + SliceLen + Default,
<T as Stream>::Token: AsChar + Copy,
{
(
one_of(|c| !is_xid_continue(c)),
take_while(0.., is_ignore_char),
trace(
"other",
(
one_of(|c| !is_xid_continue(c)),
take_while(0.., is_ignore_char),
)
.recognize(),
)
.recognize()
.parse_next(input)
.parse_next(input)
}
fn ordinal_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice>
fn ordinal_literal<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
where
T: Stream + StreamIsPartial + PartialEq,
<T as Stream>::Slice: AsBStr + SliceLen + Default,
@ -221,26 +232,29 @@ mod parser {
['_'].contains(&c)
}
(
take_while(0.., is_sep),
take_while(1.., is_dec_digit),
alt((('s', 't'), ('n', 'd'), ('r', 'd'), ('t', 'h'))),
take_while(0.., is_sep),
trace(
"ordinal_literal",
(
take_while(0.., is_sep),
take_while(1.., is_dec_digit),
alt((('s', 't'), ('n', 'd'), ('r', 'd'), ('t', 'h'))),
take_while(0.., is_sep),
)
.recognize(),
)
.recognize()
.parse_next(input)
.parse_next(input)
}
fn dec_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice>
fn dec_literal<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
where
T: Stream + StreamIsPartial + PartialEq,
<T as Stream>::Slice: AsBStr + SliceLen + Default,
<T as Stream>::Token: AsChar + Copy,
{
take_while(1.., is_dec_digit_with_sep).parse_next(input)
trace("dec_literal", take_while(1.., is_dec_digit_with_sep)).parse_next(input)
}
fn hex_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice>
fn hex_literal<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
where
T: Stream + StreamIsPartial + PartialEq,
<T as Stream>::Slice: AsBStr + SliceLen + Default,
@ -253,57 +267,63 @@ mod parser {
.parse_next(input)
}
fn css_color<T>(input: T) -> IResult<T, <T as Stream>::Slice>
fn css_color<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
where
T: Stream + StreamIsPartial + PartialEq,
<T as Stream>::Slice: AsBStr + SliceLen + Default,
<T as Stream>::Token: AsChar + Copy,
{
preceded(
'#',
alt((
terminated(take_while(3..=8, is_lower_hex_digit), peek(sep1)),
terminated(take_while(3..=8, is_upper_hex_digit), peek(sep1)),
)),
trace(
"color",
preceded(
'#',
alt((
terminated(take_while(3..=8, is_lower_hex_digit), peek(sep1)),
terminated(take_while(3..=8, is_upper_hex_digit), peek(sep1)),
)),
),
)
.parse_next(input)
}
fn uuid_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice>
fn uuid_literal<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
where
T: Stream + StreamIsPartial + PartialEq,
<T as Stream>::Slice: AsBStr + SliceLen + Default,
<T as Stream>::Token: AsChar + Copy,
{
alt((
(
take_while(8, is_lower_hex_digit),
'-',
take_while(4, is_lower_hex_digit),
'-',
take_while(4, is_lower_hex_digit),
'-',
take_while(4, is_lower_hex_digit),
'-',
take_while(12, is_lower_hex_digit),
),
(
take_while(8, is_upper_hex_digit),
'-',
take_while(4, is_upper_hex_digit),
'-',
take_while(4, is_upper_hex_digit),
'-',
take_while(4, is_upper_hex_digit),
'-',
take_while(12, is_upper_hex_digit),
),
))
.recognize()
trace(
"uuid",
alt((
(
take_while(8, is_lower_hex_digit),
'-',
take_while(4, is_lower_hex_digit),
'-',
take_while(4, is_lower_hex_digit),
'-',
take_while(4, is_lower_hex_digit),
'-',
take_while(12, is_lower_hex_digit),
),
(
take_while(8, is_upper_hex_digit),
'-',
take_while(4, is_upper_hex_digit),
'-',
take_while(4, is_upper_hex_digit),
'-',
take_while(4, is_upper_hex_digit),
'-',
take_while(12, is_upper_hex_digit),
),
))
.recognize(),
)
.parse_next(input)
}
fn hash_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice>
fn hash_literal<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
where
T: Stream + StreamIsPartial + PartialEq,
<T as Stream>::Slice: AsBStr + SliceLen + Default,
@ -319,103 +339,120 @@ mod parser {
// or more.
const IGNORE_HEX_MIN: usize = 32;
alt((
take_while(IGNORE_HEX_MIN.., is_lower_hex_digit),
take_while(IGNORE_HEX_MIN.., is_upper_hex_digit),
))
trace(
"hash",
alt((
take_while(IGNORE_HEX_MIN.., is_lower_hex_digit),
take_while(IGNORE_HEX_MIN.., is_upper_hex_digit),
)),
)
.parse_next(input)
}
fn base64_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice>
fn base64_literal<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
where
T: Stream + StreamIsPartial + PartialEq,
<T as Stream>::Slice: AsBStr + SliceLen + Default,
<T as Stream>::Token: AsChar + Copy,
{
let (padding, captured) = take_while(1.., is_base64_digit).parse_next(input.clone())?;
trace("base64", move |input: &mut T| {
let start = input.checkpoint();
let captured = take_while(1.., is_base64_digit).parse_next(input)?;
const CHUNK: usize = 4;
let padding_offset = input.offset_to(&padding);
let mut padding_len = CHUNK - padding_offset % CHUNK;
if padding_len == CHUNK {
padding_len = 0;
}
const CHUNK: usize = 4;
let padding_offset = input.offset_from(&start);
let mut padding_len = CHUNK - padding_offset % CHUNK;
if padding_len == CHUNK {
padding_len = 0;
}
if captured.slice_len() < 90
&& padding_len == 0
&& captured
.as_bstr()
.iter()
.all(|c| !['/', '+'].contains(&c.as_char()))
{
return Err(winnow::error::ErrMode::Backtrack(
winnow::error::Error::new(input, winnow::error::ErrorKind::Slice),
));
}
if captured.slice_len() < 90
&& padding_len == 0
&& captured
.as_bstr()
.iter()
.all(|c| !['/', '+'].contains(&c.as_char()))
{
return Err(winnow::error::ErrMode::from_error_kind(
input,
winnow::error::ErrorKind::Slice,
));
}
let (after, _) =
take_while(padding_len..=padding_len, is_base64_padding).parse_next(padding)?;
take_while(padding_len..=padding_len, is_base64_padding).parse_next(input)?;
let after_offset = input.offset_to(&after);
Ok(input.next_slice(after_offset))
let after_offset = input.offset_from(&start);
input.reset(start);
Ok(input.next_slice(after_offset))
})
.parse_next(input)
}
fn email_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice>
fn email_literal<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
where
T: Stream + StreamIsPartial + PartialEq,
<T as Stream>::Slice: AsBStr + SliceLen + Default,
<T as Stream>::Token: AsChar + Copy,
{
(
take_while(1.., is_localport_char),
'@',
take_while(1.., is_domain_char),
)
.recognize()
.parse_next(input)
}
fn url_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice>
where
T: Stream + StreamIsPartial + PartialEq,
<T as Stream>::Slice: AsBStr + SliceLen + Default,
<T as Stream>::Token: AsChar + Copy,
{
(
opt(terminated(
take_while(1.., is_scheme_char),
// HACK: Technically you can skip `//` if you don't have a domain but that would
// get messy to support.
(':', '/', '/'),
)),
trace(
"email",
(
opt(terminated(url_userinfo, '@')),
take_while(1.., is_localport_char),
'@',
take_while(1.., is_domain_char),
opt(preceded(':', take_while(1.., AsChar::is_dec_digit))),
),
'/',
// HACK: Too lazy to enumerate
take_while(0.., is_path_query_fragment),
)
.recognize(),
)
.recognize()
.parse_next(input)
.parse_next(input)
}
fn url_userinfo<T>(input: T) -> IResult<T, <T as Stream>::Slice>
fn url_literal<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
where
T: Stream + StreamIsPartial + PartialEq,
<T as Stream>::Slice: AsBStr + SliceLen + Default,
<T as Stream>::Token: AsChar + Copy,
{
(
take_while(1.., is_localport_char),
opt(preceded(':', take_while(0.., is_localport_char))),
trace(
"url",
(
opt(terminated(
take_while(1.., is_scheme_char),
// HACK: Technically you can skip `//` if you don't have a domain but that would
// get messy to support.
(':', '/', '/'),
)),
(
opt(terminated(url_userinfo, '@')),
take_while(1.., is_domain_char),
opt(preceded(':', take_while(1.., AsChar::is_dec_digit))),
),
'/',
// HACK: Too lazy to enumerate
take_while(0.., is_path_query_fragment),
)
.recognize(),
)
.recognize()
.parse_next(input)
.parse_next(input)
}
fn c_escape<T>(input: T) -> IResult<T, <T as Stream>::Slice>
fn url_userinfo<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
where
T: Stream + StreamIsPartial + PartialEq,
<T as Stream>::Slice: AsBStr + SliceLen + Default,
<T as Stream>::Token: AsChar + Copy,
{
trace(
"userinfo",
(
take_while(1.., is_localport_char),
opt(preceded(':', take_while(0.., is_localport_char))),
)
.recognize(),
)
.parse_next(input)
}
fn c_escape<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
where
T: Stream + StreamIsPartial + PartialEq,
<T as Stream>::Slice: AsBStr + SliceLen + Default,
@ -425,25 +462,29 @@ mod parser {
// regular string that does escaping. The escaped letter might be part of a word, or it
// might not be. Rather than guess and be wrong part of the time and correct people's words
// incorrectly, we opt for just not evaluating it at all.
preceded(take_while(1.., is_escape), take_while(0.., is_xid_continue)).parse_next(input)
trace(
"escape",
preceded(take_while(1.., is_escape), take_while(0.., is_xid_continue)),
)
.parse_next(input)
}
fn printf<T>(input: T) -> IResult<T, <T as Stream>::Slice>
fn printf<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
where
T: Stream + StreamIsPartial + PartialEq,
<T as Stream>::Slice: AsBStr + SliceLen + Default,
<T as Stream>::Token: AsChar + Copy,
{
preceded('%', take_while(1.., is_xid_continue)).parse_next(input)
trace("printf", preceded('%', take_while(1.., is_xid_continue))).parse_next(input)
}
fn take_many0<I, E, F>(mut f: F) -> impl FnMut(I) -> IResult<I, <I as Stream>::Slice, E>
fn take_many0<I, E, F>(mut f: F) -> impl Parser<I, <I as Stream>::Slice, E>
where
I: Stream,
F: winnow::Parser<I, <I as Stream>::Slice, E>,
E: winnow::error::ParseError<I>,
F: Parser<I, <I as Stream>::Slice, E>,
E: ParserError<I>,
{
move |i: I| {
move |i: &mut I| {
repeat(0.., f.by_ref())
.map(|()| ())
.recognize()
@ -581,9 +622,8 @@ mod unicode_parser {
use super::parser::next_identifier;
pub(crate) fn iter_identifiers(mut input: &str) -> impl Iterator<Item = &str> {
std::iter::from_fn(move || match next_identifier(input) {
Ok((i, o)) => {
input = i;
std::iter::from_fn(move || match next_identifier(&mut input) {
Ok(o) => {
debug_assert_ne!(o, "");
Some(o)
}
@ -595,10 +635,11 @@ mod unicode_parser {
mod ascii_parser {
use super::parser::next_identifier;
pub(crate) fn iter_identifiers(mut input: &[u8]) -> impl Iterator<Item = &str> {
std::iter::from_fn(move || match next_identifier(input) {
Ok((i, o)) => {
input = i;
use winnow::BStr;
pub(crate) fn iter_identifiers(mut input: &BStr) -> impl Iterator<Item = &str> {
std::iter::from_fn(move || match next_identifier(&mut input) {
Ok(o) => {
debug_assert_ne!(o, b"");
// This is safe because we've checked that the strings are a subset of ASCII
// characters.

View file

@ -12,11 +12,11 @@ include.workspace = true
[features]
default = []
parser = ["winnow"]
flags = ["enumflags2"]
parser = ["dep:winnow"]
flags = ["dep:enumflags2"]
[dependencies]
winnow = { version = "0.4.9", optional = true }
winnow = { version = "0.5.0", optional = true }
enumflags2 = { version = "0.7", optional = true }
[package.metadata.docs.rs]

View file

@ -1,4 +1,5 @@
use winnow::prelude::*;
use winnow::trace::trace;
use crate::*;
@ -17,10 +18,8 @@ impl<'i> Iterator for ClusterIter<'i> {
type Item = Cluster;
fn next(&mut self) -> Option<Cluster> {
let i = self.input.trim_start();
let (i, c) = Cluster::parse(i).ok()?;
self.input = i;
Some(c)
self.input = self.input.trim_start();
Cluster::parse_.parse_next(&mut self.input).ok()
}
}
@ -61,38 +60,45 @@ A Cv: acknowledgment's / Av B C: acknowledgement's
}
impl Cluster {
pub fn parse(input: &str) -> IResult<&str, Self> {
let header = (
"#",
winnow::ascii::space0,
winnow::ascii::not_line_ending,
winnow::ascii::line_ending,
);
let note = winnow::combinator::preceded(
("##", winnow::ascii::space0),
winnow::combinator::terminated(
pub fn parse(input: &str) -> Result<Self, ParseError> {
Self::parse_.parse(input).map_err(|_err| ParseError)
}
fn parse_(input: &mut &str) -> PResult<Self, ()> {
trace("cluster", move |input: &mut &str| {
let header = (
"#",
winnow::ascii::space0,
winnow::ascii::not_line_ending,
winnow::ascii::line_ending,
),
);
let mut cluster = (
winnow::combinator::opt(header),
winnow::combinator::repeat(
1..,
winnow::combinator::terminated(Entry::parse, winnow::ascii::line_ending),
),
winnow::combinator::repeat(0.., note),
);
let (input, (header, entries, notes)): (_, (_, _, Vec<_>)) = cluster.parse_next(input)?;
);
let note = winnow::combinator::preceded(
("##", winnow::ascii::space0),
winnow::combinator::terminated(
winnow::ascii::not_line_ending,
winnow::ascii::line_ending,
),
);
let mut cluster = (
winnow::combinator::opt(header),
winnow::combinator::repeat(
1..,
winnow::combinator::terminated(Entry::parse_, winnow::ascii::line_ending),
),
winnow::combinator::repeat(0.., note),
);
let (header, entries, notes): (_, _, Vec<_>) = cluster.parse_next(input)?;
let header = header.map(|s| s.2.to_owned());
let notes = notes.into_iter().map(|s| s.to_owned()).collect();
let c = Self {
header,
entries,
notes,
};
Ok((input, c))
let header = header.map(|s| s.2.to_owned());
let notes = notes.into_iter().map(|s| s.to_owned()).collect();
let c = Self {
header,
entries,
notes,
};
Ok(c)
})
.parse_next(input)
}
}
@ -102,15 +108,16 @@ mod test_cluster {
#[test]
fn test_basic() {
let (input, actual) = Cluster::parse(
"# acknowledgment <verified> (level 35)
let (input, actual) = Cluster::parse_
.parse_peek(
"# acknowledgment <verified> (level 35)
A Cv: acknowledgment / Av B C: acknowledgement
A Cv: acknowledgments / Av B C: acknowledgements
A Cv: acknowledgment's / Av B C: acknowledgement's
",
)
.unwrap();
)
.unwrap();
assert_eq!(input, "\n");
assert_eq!(
actual.header,
@ -122,8 +129,9 @@ A Cv: acknowledgment's / Av B C: acknowledgement's
#[test]
fn test_notes() {
let (input, actual) = Cluster::parse(
"# coloration <verified> (level 50)
let (input, actual) = Cluster::parse_
.parse_peek(
"# coloration <verified> (level 50)
A B C: coloration / B. Cv: colouration
A B C: colorations / B. Cv: colourations
A B C: coloration's / B. Cv: colouration's
@ -131,8 +139,8 @@ A B C: coloration's / B. Cv: colouration's
## variant for British Engl or some reason
",
)
.unwrap();
)
.unwrap();
assert_eq!(input, "\n");
assert_eq!(
actual.header,
@ -144,65 +152,75 @@ A B C: coloration's / B. Cv: colouration's
}
impl Entry {
pub fn parse(input: &str) -> IResult<&str, Self> {
let var_sep = (winnow::ascii::space0, '/', winnow::ascii::space0);
let (input, variants) =
winnow::combinator::separated1(Variant::parse, var_sep).parse_next(input)?;
let desc_sep = (winnow::ascii::space0, '|');
let (input, description) =
winnow::combinator::opt((desc_sep, Self::parse_description)).parse_next(input)?;
let comment_sep = (winnow::ascii::space0, '#');
let (input, comment) = winnow::combinator::opt((
comment_sep,
winnow::ascii::space1,
winnow::ascii::not_line_ending,
))
.parse_next(input)?;
let mut e = match description {
Some((_, description)) => description,
None => Self {
variants: Vec::new(),
pos: None,
archaic: false,
note: false,
description: None,
comment: None,
},
};
e.variants = variants;
e.comment = comment.map(|c| c.2.to_owned());
Ok((input, e))
pub fn parse(input: &str) -> Result<Self, ParseError> {
Self::parse_.parse(input).map_err(|_err| ParseError)
}
fn parse_description(input: &str) -> IResult<&str, Self> {
let (input, (pos, archaic, note, description)) = (
winnow::combinator::opt((winnow::ascii::space1, Pos::parse)),
winnow::combinator::opt((winnow::ascii::space1, "(-)")),
winnow::combinator::opt((winnow::ascii::space1, "--")),
winnow::combinator::opt((
fn parse_(input: &mut &str) -> PResult<Self, ()> {
trace("entry", move |input: &mut &str| {
let var_sep = (winnow::ascii::space0, '/', winnow::ascii::space0);
let variants =
winnow::combinator::separated1(Variant::parse_, var_sep).parse_next(input)?;
let desc_sep = (winnow::ascii::space0, '|');
let description =
winnow::combinator::opt((desc_sep, Self::parse_description)).parse_next(input)?;
let comment_sep = (winnow::ascii::space0, '#');
let comment = winnow::combinator::opt((
comment_sep,
winnow::ascii::space1,
winnow::token::take_till0(('\n', '\r', '#')),
)),
)
winnow::ascii::not_line_ending,
))
.parse_next(input)?;
let variants = Vec::new();
let pos = pos.map(|(_, p)| p);
let archaic = archaic.is_some();
let note = note.is_some();
let description = description.map(|(_, d)| d.to_owned());
let e = Self {
variants,
pos,
archaic,
note,
description,
comment: None,
};
Ok((input, e))
let mut e = match description {
Some((_, description)) => description,
None => Self {
variants: Vec::new(),
pos: None,
archaic: false,
note: false,
description: None,
comment: None,
},
};
e.variants = variants;
e.comment = comment.map(|c| c.2.to_owned());
Ok(e)
})
.parse_next(input)
}
fn parse_description(input: &mut &str) -> PResult<Self, ()> {
trace("description", move |input: &mut &str| {
let (pos, archaic, note, description) = (
winnow::combinator::opt((winnow::ascii::space1, Pos::parse_)),
winnow::combinator::opt((winnow::ascii::space1, "(-)")),
winnow::combinator::opt((winnow::ascii::space1, "--")),
winnow::combinator::opt((
winnow::ascii::space1,
winnow::token::take_till0(('\n', '\r', '#')),
)),
)
.parse_next(input)?;
let variants = Vec::new();
let pos = pos.map(|(_, p)| p);
let archaic = archaic.is_some();
let note = note.is_some();
let description = description.map(|(_, d)| d.to_owned());
let e = Self {
variants,
pos,
archaic,
note,
description,
comment: None,
};
Ok(e)
})
.parse_next(input)
}
}
@ -215,8 +233,9 @@ mod test_entry {
fn test_variant_only() {
// Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
// cases.
let (input, actual) =
Entry::parse("A Cv: acknowledgment's / Av B C: acknowledgement's\n").unwrap();
let (input, actual) = Entry::parse_
.parse_peek("A Cv: acknowledgment's / Av B C: acknowledgement's\n")
.unwrap();
assert_eq!(input, "\n");
assert_eq!(actual.variants.len(), 2);
assert_eq!(actual.pos, None);
@ -229,7 +248,9 @@ mod test_entry {
fn test_description() {
// Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
// cases.
let (input, actual) = Entry::parse("A C: prize / B: prise | otherwise\n").unwrap();
let (input, actual) = Entry::parse_
.parse_peek("A C: prize / B: prise | otherwise\n")
.unwrap();
assert_eq!(input, "\n");
assert_eq!(actual.variants.len(), 2);
assert_eq!(actual.pos, None);
@ -242,7 +263,9 @@ mod test_entry {
fn test_pos() {
// Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
// cases.
let (input, actual) = Entry::parse("A B C: practice / AV Cv: practise | <N>\n").unwrap();
let (input, actual) = Entry::parse_
.parse_peek("A B C: practice / AV Cv: practise | <N>\n")
.unwrap();
assert_eq!(input, "\n");
assert_eq!(actual.variants.len(), 2);
assert_eq!(actual.pos, Some(Pos::Noun));
@ -255,7 +278,9 @@ mod test_entry {
fn test_archaic() {
// Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
// cases.
let (input, actual) = Entry::parse("A: bark / Av B: barque | (-) ship\n").unwrap();
let (input, actual) = Entry::parse_
.parse_peek("A: bark / Av B: barque | (-) ship\n")
.unwrap();
assert_eq!(input, "\n");
assert_eq!(actual.variants.len(), 2);
assert_eq!(actual.pos, None);
@ -268,7 +293,9 @@ mod test_entry {
fn test_note() {
// Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
// cases.
let (input, actual) = Entry::parse("_: cabbies | -- plural\n").unwrap();
let (input, actual) = Entry::parse_
.parse_peek("_: cabbies | -- plural\n")
.unwrap();
assert_eq!(input, "\n");
assert_eq!(actual.variants.len(), 1);
assert_eq!(actual.pos, None);
@ -279,7 +306,7 @@ mod test_entry {
#[test]
fn test_trailing_comment() {
let (input, actual) = Entry::parse(
let (input, actual) = Entry::parse_.parse_peek(
"A B: accursed / AV B-: accurst # ODE: archaic, M-W: 'or' but can find little evidence of use\n",
)
.unwrap();
@ -297,20 +324,30 @@ mod test_entry {
}
impl Variant {
pub fn parse(input: &str) -> IResult<&str, Self> {
let types = winnow::combinator::separated1(Type::parse, winnow::ascii::space1);
let sep = (":", winnow::ascii::space0);
let (input, (types, word)) =
winnow::combinator::separated_pair(types, sep, word).parse_next(input)?;
let v = Self { types, word };
Ok((input, v))
pub fn parse(input: &str) -> Result<Self, ParseError> {
Self::parse_.parse(input).map_err(|_err| ParseError)
}
fn parse_(input: &mut &str) -> PResult<Self, ()> {
trace("variant", move |input: &mut &str| {
let types = winnow::combinator::separated1(Type::parse_, winnow::ascii::space1);
let sep = (":", winnow::ascii::space0);
let (types, word) =
winnow::combinator::separated_pair(types, sep, word).parse_next(input)?;
let v = Self { types, word };
Ok(v)
})
.parse_next(input)
}
}
fn word(input: &str) -> IResult<&str, String> {
winnow::token::take_till1(|item: char| item.is_ascii_whitespace())
.map(|s: &str| s.to_owned().replace('_', " "))
.parse_next(input)
fn word(input: &mut &str) -> PResult<String, ()> {
trace("word", move |input: &mut &str| {
winnow::token::take_till1(|item: char| item.is_ascii_whitespace())
.map(|s: &str| s.to_owned().replace('_', " "))
.parse_next(input)
})
.parse_next(input)
}
#[cfg(test)]
@ -321,7 +358,7 @@ mod test_variant {
fn test_valid() {
// Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
// cases.
let (input, actual) = Variant::parse("A Cv: acknowledgment ").unwrap();
let (input, actual) = Variant::parse_.parse_peek("A Cv: acknowledgment ").unwrap();
assert_eq!(input, " ");
assert_eq!(
actual.types,
@ -343,8 +380,9 @@ mod test_variant {
#[test]
fn test_extra() {
let (input, actual) =
Variant::parse("A Cv: acknowledgment's / Av B C: acknowledgement's").unwrap();
let (input, actual) = Variant::parse_
.parse_peek("A Cv: acknowledgment's / Av B C: acknowledgement's")
.unwrap();
assert_eq!(input, " / Av B C: acknowledgement's");
assert_eq!(
actual.types,
@ -366,7 +404,7 @@ mod test_variant {
#[test]
fn test_underscore() {
let (input, actual) = Variant::parse("_: air_gun\n").unwrap();
let (input, actual) = Variant::parse_.parse_peek("_: air_gun\n").unwrap();
assert_eq!(input, "\n");
assert_eq!(
actual.types,
@ -381,13 +419,20 @@ mod test_variant {
}
impl Type {
pub fn parse(input: &str) -> IResult<&str, Type> {
let (input, category) = Category::parse(input)?;
let (input, tag) = winnow::combinator::opt(Tag::parse).parse_next(input)?;
let (input, num) = winnow::combinator::opt(winnow::ascii::digit1).parse_next(input)?;
let num = num.map(|s| s.parse().expect("parser ensured its a number"));
let t = Type { category, tag, num };
Ok((input, t))
pub fn parse(input: &str) -> Result<Self, ParseError> {
Self::parse_.parse(input).map_err(|_err| ParseError)
}
fn parse_(input: &mut &str) -> PResult<Type, ()> {
trace("type", move |input: &mut &str| {
let category = Category::parse_(input)?;
let tag = winnow::combinator::opt(Tag::parse_).parse_next(input)?;
let num = winnow::combinator::opt(winnow::ascii::digit1).parse_next(input)?;
let num = num.map(|s| s.parse().expect("parser ensured its a number"));
let t = Type { category, tag, num };
Ok(t)
})
.parse_next(input)
}
}
@ -399,13 +444,13 @@ mod test_type {
fn test_valid() {
// Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
// cases.
let (input, actual) = Type::parse("A ").unwrap();
let (input, actual) = Type::parse_.parse_peek("A ").unwrap();
assert_eq!(input, " ");
assert_eq!(actual.category, Category::American);
assert_eq!(actual.tag, None);
assert_eq!(actual.num, None);
let (input, actual) = Type::parse("Bv ").unwrap();
let (input, actual) = Type::parse_.parse_peek("Bv ").unwrap();
assert_eq!(input, " ");
assert_eq!(actual.category, Category::BritishIse);
assert_eq!(actual.tag, Some(Tag::Variant));
@ -414,13 +459,13 @@ mod test_type {
#[test]
fn test_extra() {
let (input, actual) = Type::parse("Z foobar").unwrap();
let (input, actual) = Type::parse_.parse_peek("Z foobar").unwrap();
assert_eq!(input, " foobar");
assert_eq!(actual.category, Category::BritishIze);
assert_eq!(actual.tag, None);
assert_eq!(actual.num, None);
let (input, actual) = Type::parse("C- foobar").unwrap();
let (input, actual) = Type::parse_.parse_peek("C- foobar").unwrap();
assert_eq!(input, " foobar");
assert_eq!(actual.category, Category::Canadian);
assert_eq!(actual.tag, Some(Tag::Possible));
@ -429,7 +474,7 @@ mod test_type {
#[test]
fn test_num() {
let (input, actual) = Type::parse("Av1 ").unwrap();
let (input, actual) = Type::parse_.parse_peek("Av1 ").unwrap();
assert_eq!(input, " ");
assert_eq!(actual.category, Category::American);
assert_eq!(actual.tag, Some(Tag::Variant));
@ -438,19 +483,26 @@ mod test_type {
}
impl Category {
pub fn parse(input: &str) -> IResult<&str, Category> {
let symbols = winnow::token::one_of(['A', 'B', 'Z', 'C', 'D', '_']);
symbols
.map(|c| match c {
'A' => Category::American,
'B' => Category::BritishIse,
'Z' => Category::BritishIze,
'C' => Category::Canadian,
'D' => Category::Australian,
'_' => Category::Other,
_ => unreachable!("parser won't select this option"),
})
.parse_next(input)
pub fn parse(input: &str) -> Result<Self, ParseError> {
Self::parse_.parse(input).map_err(|_err| ParseError)
}
fn parse_(input: &mut &str) -> PResult<Self, ()> {
trace("category", move |input: &mut &str| {
let symbols = winnow::token::one_of(['A', 'B', 'Z', 'C', 'D', '_']);
symbols
.map(|c| match c {
'A' => Category::American,
'B' => Category::BritishIse,
'Z' => Category::BritishIze,
'C' => Category::Canadian,
'D' => Category::Australian,
'_' => Category::Other,
_ => unreachable!("parser won't select this option"),
})
.parse_next(input)
})
.parse_next(input)
}
}
@ -460,32 +512,39 @@ mod test_category {
#[test]
fn test_valid() {
let (input, actual) = Category::parse("A").unwrap();
let (input, actual) = Category::parse_.parse_peek("A").unwrap();
assert_eq!(input, "");
assert_eq!(actual, Category::American);
}
#[test]
fn test_extra() {
let (input, actual) = Category::parse("_ foobar").unwrap();
let (input, actual) = Category::parse_.parse_peek("_ foobar").unwrap();
assert_eq!(input, " foobar");
assert_eq!(actual, Category::Other);
}
}
impl Tag {
pub fn parse(input: &str) -> IResult<&str, Tag> {
let symbols = winnow::token::one_of(['.', 'v', 'V', '-', 'x']);
symbols
.map(|c| match c {
'.' => Tag::Eq,
'v' => Tag::Variant,
'V' => Tag::Seldom,
'-' => Tag::Possible,
'x' => Tag::Improper,
_ => unreachable!("parser won't select this option"),
})
.parse_next(input)
pub fn parse(input: &str) -> Result<Self, ParseError> {
Self::parse_.parse(input).map_err(|_err| ParseError)
}
fn parse_(input: &mut &str) -> PResult<Self, ()> {
trace("tag", move |input: &mut &str| {
let symbols = winnow::token::one_of(['.', 'v', 'V', '-', 'x']);
symbols
.map(|c| match c {
'.' => Tag::Eq,
'v' => Tag::Variant,
'V' => Tag::Seldom,
'-' => Tag::Possible,
'x' => Tag::Improper,
_ => unreachable!("parser won't select this option"),
})
.parse_next(input)
})
.parse_next(input)
}
}
@ -495,27 +554,34 @@ mod test_tag {
#[test]
fn test_valid() {
let (input, actual) = Tag::parse(".").unwrap();
let (input, actual) = Tag::parse_.parse_peek(".").unwrap();
assert_eq!(input, "");
assert_eq!(actual, Tag::Eq);
}
#[test]
fn test_extra() {
let (input, actual) = Tag::parse("x foobar").unwrap();
let (input, actual) = Tag::parse_.parse_peek("x foobar").unwrap();
assert_eq!(input, " foobar");
assert_eq!(actual, Tag::Improper);
}
}
impl Pos {
pub fn parse(input: &str) -> IResult<&str, Pos> {
winnow::branch::alt((
"<N>".value(Pos::Noun),
"<V>".value(Pos::Verb),
"<Adj>".value(Pos::Adjective),
"<Adv>".value(Pos::Adverb),
))
pub fn parse(input: &str) -> Result<Self, ParseError> {
Self::parse_.parse(input).map_err(|_err| ParseError)
}
fn parse_(input: &mut &str) -> PResult<Self, ()> {
trace("pos", move |input: &mut &str| {
winnow::combinator::alt((
"<N>".value(Pos::Noun),
"<V>".value(Pos::Verb),
"<Adj>".value(Pos::Adjective),
"<Adv>".value(Pos::Adverb),
))
.parse_next(input)
})
.parse_next(input)
}
}
@ -526,15 +592,26 @@ mod test_pos {
#[test]
fn test_valid() {
let (input, actual) = Pos::parse("<N>").unwrap();
let (input, actual) = Pos::parse_.parse_peek("<N>").unwrap();
assert_eq!(input, "");
assert_eq!(actual, Pos::Noun);
}
#[test]
fn test_extra() {
let (input, actual) = Pos::parse("<Adj> foobar").unwrap();
let (input, actual) = Pos::parse_.parse_peek("<Adj> foobar").unwrap();
assert_eq!(input, " foobar");
assert_eq!(actual, Pos::Adjective);
}
}
#[derive(Debug)]
pub struct ParseError;
impl std::fmt::Display for ParseError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "invalid")
}
}
impl std::error::Error for ParseError {}