mirror of
https://github.com/crate-ci/typos.git
synced 2025-01-11 01:01:36 -05:00
Merge pull request #780 from epage/winnow
refactor(typos): Upgrade to winnow 0.5
This commit is contained in:
commit
383d51ddf1
7 changed files with 464 additions and 342 deletions
19
Cargo.lock
generated
19
Cargo.lock
generated
|
@ -1610,7 +1610,7 @@ dependencies = [
|
||||||
"serde",
|
"serde",
|
||||||
"serde_spanned",
|
"serde_spanned",
|
||||||
"toml_datetime",
|
"toml_datetime",
|
||||||
"winnow",
|
"winnow 0.4.9",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -1648,7 +1648,7 @@ dependencies = [
|
||||||
"thiserror",
|
"thiserror",
|
||||||
"unicode-segmentation",
|
"unicode-segmentation",
|
||||||
"unicode-xid",
|
"unicode-xid",
|
||||||
"winnow",
|
"winnow 0.5.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -1831,7 +1831,7 @@ name = "varcon-core"
|
||||||
version = "2.2.12"
|
version = "2.2.12"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"enumflags2",
|
"enumflags2",
|
||||||
"winnow",
|
"winnow 0.5.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -2121,9 +2121,18 @@ checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "winnow"
|
name = "winnow"
|
||||||
version = "0.4.6"
|
version = "0.4.9"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "61de7bac303dc551fe038e2b3cef0f571087a47571ea6e79a87692ac99b99699"
|
checksum = "81a2094c43cc94775293eaa0e499fbc30048a6d824ac82c0351a8c0bf9112529"
|
||||||
|
dependencies = [
|
||||||
|
"memchr",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "winnow"
|
||||||
|
version = "0.5.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "81fac9742fd1ad1bd9643b991319f72dd031016d44b77039a26977eb667141e7"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"memchr",
|
"memchr",
|
||||||
]
|
]
|
||||||
|
|
|
@ -13,8 +13,8 @@ include.workspace = true
|
||||||
[features]
|
[features]
|
||||||
default = ["std"]
|
default = ["std"]
|
||||||
std = []
|
std = []
|
||||||
codegen = ["std", "phf_codegen"]
|
codegen = ["std", "dep:phf_codegen"]
|
||||||
map = ["phf", "phf_shared"]
|
map = ["dep:phf", "dep:phf_shared"]
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
unicase = "2.6"
|
unicase = "2.6"
|
||||||
|
|
|
@ -32,8 +32,8 @@ pre-release-replacements = [
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
default = ["dict", "vars"]
|
default = ["dict", "vars"]
|
||||||
dict = ["typos-dict"]
|
dict = ["dep:typos-dict"]
|
||||||
vars = ["typos-vars"]
|
vars = ["dep:typos-vars"]
|
||||||
|
|
||||||
|
|
||||||
[[bin]]
|
[[bin]]
|
||||||
|
|
|
@ -14,7 +14,7 @@ include.workspace = true
|
||||||
[dependencies]
|
[dependencies]
|
||||||
anyhow = "1.0"
|
anyhow = "1.0"
|
||||||
thiserror = "1.0"
|
thiserror = "1.0"
|
||||||
winnow = "0.4.6"
|
winnow = "0.5.0"
|
||||||
unicode-xid = "0.2.4"
|
unicode-xid = "0.2.4"
|
||||||
once_cell = "1.17.2"
|
once_cell = "1.17.2"
|
||||||
serde = { version = "1.0", features = ["derive"] }
|
serde = { version = "1.0", features = ["derive"] }
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
use bstr::ByteSlice;
|
use bstr::ByteSlice;
|
||||||
|
use winnow::BStr;
|
||||||
|
|
||||||
/// Define rules for tokenizaing a buffer.
|
/// Define rules for tokenizaing a buffer.
|
||||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||||
|
@ -48,7 +49,9 @@ impl Tokenizer {
|
||||||
let iter = if self.unicode && !ByteSlice::is_ascii(content.as_bytes()) {
|
let iter = if self.unicode && !ByteSlice::is_ascii(content.as_bytes()) {
|
||||||
itertools::Either::Left(unicode_parser::iter_identifiers(content))
|
itertools::Either::Left(unicode_parser::iter_identifiers(content))
|
||||||
} else {
|
} else {
|
||||||
itertools::Either::Right(ascii_parser::iter_identifiers(content.as_bytes()))
|
itertools::Either::Right(ascii_parser::iter_identifiers(BStr::new(
|
||||||
|
content.as_bytes(),
|
||||||
|
)))
|
||||||
};
|
};
|
||||||
iter.map(move |identifier| self.transform(identifier, content.as_bytes()))
|
iter.map(move |identifier| self.transform(identifier, content.as_bytes()))
|
||||||
}
|
}
|
||||||
|
@ -58,7 +61,7 @@ impl Tokenizer {
|
||||||
let iter = Utf8Chunks::new(content).flat_map(unicode_parser::iter_identifiers);
|
let iter = Utf8Chunks::new(content).flat_map(unicode_parser::iter_identifiers);
|
||||||
itertools::Either::Left(iter)
|
itertools::Either::Left(iter)
|
||||||
} else {
|
} else {
|
||||||
itertools::Either::Right(ascii_parser::iter_identifiers(content))
|
itertools::Either::Right(ascii_parser::iter_identifiers(BStr::new(content)))
|
||||||
};
|
};
|
||||||
iter.map(move |identifier| self.transform(identifier, content))
|
iter.map(move |identifier| self.transform(identifier, content))
|
||||||
}
|
}
|
||||||
|
@ -126,6 +129,7 @@ impl<'s> Iterator for Utf8Chunks<'s> {
|
||||||
|
|
||||||
mod parser {
|
mod parser {
|
||||||
use winnow::combinator::*;
|
use winnow::combinator::*;
|
||||||
|
use winnow::error::ParserError;
|
||||||
use winnow::prelude::*;
|
use winnow::prelude::*;
|
||||||
use winnow::stream::AsBStr;
|
use winnow::stream::AsBStr;
|
||||||
use winnow::stream::AsChar;
|
use winnow::stream::AsChar;
|
||||||
|
@ -133,8 +137,9 @@ mod parser {
|
||||||
use winnow::stream::Stream;
|
use winnow::stream::Stream;
|
||||||
use winnow::stream::StreamIsPartial;
|
use winnow::stream::StreamIsPartial;
|
||||||
use winnow::token::*;
|
use winnow::token::*;
|
||||||
|
use winnow::trace::trace;
|
||||||
|
|
||||||
pub(crate) fn next_identifier<T>(input: T) -> IResult<T, <T as Stream>::Slice>
|
pub(crate) fn next_identifier<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
|
||||||
where
|
where
|
||||||
T: Stream + StreamIsPartial + PartialEq,
|
T: Stream + StreamIsPartial + PartialEq,
|
||||||
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
||||||
|
@ -143,7 +148,7 @@ mod parser {
|
||||||
preceded(ignore, identifier).parse_next(input)
|
preceded(ignore, identifier).parse_next(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn identifier<T>(input: T) -> IResult<T, <T as Stream>::Slice>
|
fn identifier<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
|
||||||
where
|
where
|
||||||
T: Stream + StreamIsPartial + PartialEq,
|
T: Stream + StreamIsPartial + PartialEq,
|
||||||
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
||||||
|
@ -153,36 +158,39 @@ mod parser {
|
||||||
// `{XID_Continue}+` because XID_Continue is a superset of XID_Start and rather catch odd
|
// `{XID_Continue}+` because XID_Continue is a superset of XID_Start and rather catch odd
|
||||||
// or unexpected cases than strip off start characters to a word since we aren't doing a
|
// or unexpected cases than strip off start characters to a word since we aren't doing a
|
||||||
// proper word boundary parse
|
// proper word boundary parse
|
||||||
take_while(1.., is_xid_continue).parse_next(input)
|
trace("identifier", take_while(1.., is_xid_continue)).parse_next(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn ignore<T>(input: T) -> IResult<T, <T as Stream>::Slice>
|
fn ignore<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
|
||||||
where
|
where
|
||||||
T: Stream + StreamIsPartial + PartialEq,
|
T: Stream + StreamIsPartial + PartialEq,
|
||||||
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
||||||
<T as Stream>::Token: AsChar + Copy,
|
<T as Stream>::Token: AsChar + Copy,
|
||||||
{
|
{
|
||||||
take_many0(alt((
|
trace(
|
||||||
// CAUTION: If adding an ignorable literal, if it doesn't start with `is_xid_continue`,
|
"ignore",
|
||||||
// - Update `is_ignore_char` to make sure `sep1` doesn't eat it all up
|
take_many0(alt((
|
||||||
// - Make sure you always consume it
|
// CAUTION: If adding an ignorable literal, if it doesn't start with `is_xid_continue`,
|
||||||
terminated(uuid_literal, peek(sep1)),
|
// - Update `is_ignore_char` to make sure `sep1` doesn't eat it all up
|
||||||
terminated(hash_literal, peek(sep1)),
|
// - Make sure you always consume it
|
||||||
terminated(base64_literal, peek(sep1)), // base64 should be quoted or something
|
terminated(uuid_literal, peek(sep1)),
|
||||||
terminated(ordinal_literal, peek(sep1)),
|
terminated(hash_literal, peek(sep1)),
|
||||||
terminated(hex_literal, peek(sep1)),
|
terminated(base64_literal, peek(sep1)), // base64 should be quoted or something
|
||||||
terminated(dec_literal, peek(sep1)), // Allow digit-prefixed words
|
terminated(ordinal_literal, peek(sep1)),
|
||||||
terminated(email_literal, peek(sep1)),
|
terminated(hex_literal, peek(sep1)),
|
||||||
terminated(url_literal, peek(sep1)),
|
terminated(dec_literal, peek(sep1)), // Allow digit-prefixed words
|
||||||
terminated(css_color, peek(sep1)),
|
terminated(email_literal, peek(sep1)),
|
||||||
c_escape,
|
terminated(url_literal, peek(sep1)),
|
||||||
printf,
|
terminated(css_color, peek(sep1)),
|
||||||
other,
|
c_escape,
|
||||||
)))
|
printf,
|
||||||
|
other,
|
||||||
|
))),
|
||||||
|
)
|
||||||
.parse_next(input)
|
.parse_next(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn sep1<T>(input: T) -> IResult<T, <T as Stream>::Slice>
|
fn sep1<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
|
||||||
where
|
where
|
||||||
T: Stream + StreamIsPartial + PartialEq,
|
T: Stream + StreamIsPartial + PartialEq,
|
||||||
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
||||||
|
@ -195,21 +203,24 @@ mod parser {
|
||||||
.parse_next(input)
|
.parse_next(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn other<T>(input: T) -> IResult<T, <T as Stream>::Slice>
|
fn other<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
|
||||||
where
|
where
|
||||||
T: Stream + StreamIsPartial + PartialEq,
|
T: Stream + StreamIsPartial + PartialEq,
|
||||||
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
||||||
<T as Stream>::Token: AsChar + Copy,
|
<T as Stream>::Token: AsChar + Copy,
|
||||||
{
|
{
|
||||||
(
|
trace(
|
||||||
one_of(|c| !is_xid_continue(c)),
|
"other",
|
||||||
take_while(0.., is_ignore_char),
|
(
|
||||||
|
one_of(|c| !is_xid_continue(c)),
|
||||||
|
take_while(0.., is_ignore_char),
|
||||||
|
)
|
||||||
|
.recognize(),
|
||||||
)
|
)
|
||||||
.recognize()
|
.parse_next(input)
|
||||||
.parse_next(input)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn ordinal_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice>
|
fn ordinal_literal<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
|
||||||
where
|
where
|
||||||
T: Stream + StreamIsPartial + PartialEq,
|
T: Stream + StreamIsPartial + PartialEq,
|
||||||
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
||||||
|
@ -221,26 +232,29 @@ mod parser {
|
||||||
['_'].contains(&c)
|
['_'].contains(&c)
|
||||||
}
|
}
|
||||||
|
|
||||||
(
|
trace(
|
||||||
take_while(0.., is_sep),
|
"ordinal_literal",
|
||||||
take_while(1.., is_dec_digit),
|
(
|
||||||
alt((('s', 't'), ('n', 'd'), ('r', 'd'), ('t', 'h'))),
|
take_while(0.., is_sep),
|
||||||
take_while(0.., is_sep),
|
take_while(1.., is_dec_digit),
|
||||||
|
alt((('s', 't'), ('n', 'd'), ('r', 'd'), ('t', 'h'))),
|
||||||
|
take_while(0.., is_sep),
|
||||||
|
)
|
||||||
|
.recognize(),
|
||||||
)
|
)
|
||||||
.recognize()
|
.parse_next(input)
|
||||||
.parse_next(input)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn dec_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice>
|
fn dec_literal<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
|
||||||
where
|
where
|
||||||
T: Stream + StreamIsPartial + PartialEq,
|
T: Stream + StreamIsPartial + PartialEq,
|
||||||
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
||||||
<T as Stream>::Token: AsChar + Copy,
|
<T as Stream>::Token: AsChar + Copy,
|
||||||
{
|
{
|
||||||
take_while(1.., is_dec_digit_with_sep).parse_next(input)
|
trace("dec_literal", take_while(1.., is_dec_digit_with_sep)).parse_next(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn hex_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice>
|
fn hex_literal<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
|
||||||
where
|
where
|
||||||
T: Stream + StreamIsPartial + PartialEq,
|
T: Stream + StreamIsPartial + PartialEq,
|
||||||
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
||||||
|
@ -253,57 +267,63 @@ mod parser {
|
||||||
.parse_next(input)
|
.parse_next(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn css_color<T>(input: T) -> IResult<T, <T as Stream>::Slice>
|
fn css_color<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
|
||||||
where
|
where
|
||||||
T: Stream + StreamIsPartial + PartialEq,
|
T: Stream + StreamIsPartial + PartialEq,
|
||||||
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
||||||
<T as Stream>::Token: AsChar + Copy,
|
<T as Stream>::Token: AsChar + Copy,
|
||||||
{
|
{
|
||||||
preceded(
|
trace(
|
||||||
'#',
|
"color",
|
||||||
alt((
|
preceded(
|
||||||
terminated(take_while(3..=8, is_lower_hex_digit), peek(sep1)),
|
'#',
|
||||||
terminated(take_while(3..=8, is_upper_hex_digit), peek(sep1)),
|
alt((
|
||||||
)),
|
terminated(take_while(3..=8, is_lower_hex_digit), peek(sep1)),
|
||||||
|
terminated(take_while(3..=8, is_upper_hex_digit), peek(sep1)),
|
||||||
|
)),
|
||||||
|
),
|
||||||
)
|
)
|
||||||
.parse_next(input)
|
.parse_next(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn uuid_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice>
|
fn uuid_literal<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
|
||||||
where
|
where
|
||||||
T: Stream + StreamIsPartial + PartialEq,
|
T: Stream + StreamIsPartial + PartialEq,
|
||||||
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
||||||
<T as Stream>::Token: AsChar + Copy,
|
<T as Stream>::Token: AsChar + Copy,
|
||||||
{
|
{
|
||||||
alt((
|
trace(
|
||||||
(
|
"uuid",
|
||||||
take_while(8, is_lower_hex_digit),
|
alt((
|
||||||
'-',
|
(
|
||||||
take_while(4, is_lower_hex_digit),
|
take_while(8, is_lower_hex_digit),
|
||||||
'-',
|
'-',
|
||||||
take_while(4, is_lower_hex_digit),
|
take_while(4, is_lower_hex_digit),
|
||||||
'-',
|
'-',
|
||||||
take_while(4, is_lower_hex_digit),
|
take_while(4, is_lower_hex_digit),
|
||||||
'-',
|
'-',
|
||||||
take_while(12, is_lower_hex_digit),
|
take_while(4, is_lower_hex_digit),
|
||||||
),
|
'-',
|
||||||
(
|
take_while(12, is_lower_hex_digit),
|
||||||
take_while(8, is_upper_hex_digit),
|
),
|
||||||
'-',
|
(
|
||||||
take_while(4, is_upper_hex_digit),
|
take_while(8, is_upper_hex_digit),
|
||||||
'-',
|
'-',
|
||||||
take_while(4, is_upper_hex_digit),
|
take_while(4, is_upper_hex_digit),
|
||||||
'-',
|
'-',
|
||||||
take_while(4, is_upper_hex_digit),
|
take_while(4, is_upper_hex_digit),
|
||||||
'-',
|
'-',
|
||||||
take_while(12, is_upper_hex_digit),
|
take_while(4, is_upper_hex_digit),
|
||||||
),
|
'-',
|
||||||
))
|
take_while(12, is_upper_hex_digit),
|
||||||
.recognize()
|
),
|
||||||
|
))
|
||||||
|
.recognize(),
|
||||||
|
)
|
||||||
.parse_next(input)
|
.parse_next(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn hash_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice>
|
fn hash_literal<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
|
||||||
where
|
where
|
||||||
T: Stream + StreamIsPartial + PartialEq,
|
T: Stream + StreamIsPartial + PartialEq,
|
||||||
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
||||||
|
@ -319,103 +339,120 @@ mod parser {
|
||||||
// or more.
|
// or more.
|
||||||
|
|
||||||
const IGNORE_HEX_MIN: usize = 32;
|
const IGNORE_HEX_MIN: usize = 32;
|
||||||
alt((
|
trace(
|
||||||
take_while(IGNORE_HEX_MIN.., is_lower_hex_digit),
|
"hash",
|
||||||
take_while(IGNORE_HEX_MIN.., is_upper_hex_digit),
|
alt((
|
||||||
))
|
take_while(IGNORE_HEX_MIN.., is_lower_hex_digit),
|
||||||
|
take_while(IGNORE_HEX_MIN.., is_upper_hex_digit),
|
||||||
|
)),
|
||||||
|
)
|
||||||
.parse_next(input)
|
.parse_next(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn base64_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice>
|
fn base64_literal<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
|
||||||
where
|
where
|
||||||
T: Stream + StreamIsPartial + PartialEq,
|
T: Stream + StreamIsPartial + PartialEq,
|
||||||
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
||||||
<T as Stream>::Token: AsChar + Copy,
|
<T as Stream>::Token: AsChar + Copy,
|
||||||
{
|
{
|
||||||
let (padding, captured) = take_while(1.., is_base64_digit).parse_next(input.clone())?;
|
trace("base64", move |input: &mut T| {
|
||||||
|
let start = input.checkpoint();
|
||||||
|
let captured = take_while(1.., is_base64_digit).parse_next(input)?;
|
||||||
|
|
||||||
const CHUNK: usize = 4;
|
const CHUNK: usize = 4;
|
||||||
let padding_offset = input.offset_to(&padding);
|
let padding_offset = input.offset_from(&start);
|
||||||
let mut padding_len = CHUNK - padding_offset % CHUNK;
|
let mut padding_len = CHUNK - padding_offset % CHUNK;
|
||||||
if padding_len == CHUNK {
|
if padding_len == CHUNK {
|
||||||
padding_len = 0;
|
padding_len = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if captured.slice_len() < 90
|
if captured.slice_len() < 90
|
||||||
&& padding_len == 0
|
&& padding_len == 0
|
||||||
&& captured
|
&& captured
|
||||||
.as_bstr()
|
.as_bstr()
|
||||||
.iter()
|
.iter()
|
||||||
.all(|c| !['/', '+'].contains(&c.as_char()))
|
.all(|c| !['/', '+'].contains(&c.as_char()))
|
||||||
{
|
{
|
||||||
return Err(winnow::error::ErrMode::Backtrack(
|
return Err(winnow::error::ErrMode::from_error_kind(
|
||||||
winnow::error::Error::new(input, winnow::error::ErrorKind::Slice),
|
input,
|
||||||
));
|
winnow::error::ErrorKind::Slice,
|
||||||
}
|
));
|
||||||
|
}
|
||||||
|
|
||||||
let (after, _) =
|
take_while(padding_len..=padding_len, is_base64_padding).parse_next(input)?;
|
||||||
take_while(padding_len..=padding_len, is_base64_padding).parse_next(padding)?;
|
|
||||||
|
|
||||||
let after_offset = input.offset_to(&after);
|
let after_offset = input.offset_from(&start);
|
||||||
Ok(input.next_slice(after_offset))
|
input.reset(start);
|
||||||
|
Ok(input.next_slice(after_offset))
|
||||||
|
})
|
||||||
|
.parse_next(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn email_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice>
|
fn email_literal<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
|
||||||
where
|
where
|
||||||
T: Stream + StreamIsPartial + PartialEq,
|
T: Stream + StreamIsPartial + PartialEq,
|
||||||
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
||||||
<T as Stream>::Token: AsChar + Copy,
|
<T as Stream>::Token: AsChar + Copy,
|
||||||
{
|
{
|
||||||
(
|
trace(
|
||||||
take_while(1.., is_localport_char),
|
"email",
|
||||||
'@',
|
|
||||||
take_while(1.., is_domain_char),
|
|
||||||
)
|
|
||||||
.recognize()
|
|
||||||
.parse_next(input)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn url_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice>
|
|
||||||
where
|
|
||||||
T: Stream + StreamIsPartial + PartialEq,
|
|
||||||
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
|
||||||
<T as Stream>::Token: AsChar + Copy,
|
|
||||||
{
|
|
||||||
(
|
|
||||||
opt(terminated(
|
|
||||||
take_while(1.., is_scheme_char),
|
|
||||||
// HACK: Technically you can skip `//` if you don't have a domain but that would
|
|
||||||
// get messy to support.
|
|
||||||
(':', '/', '/'),
|
|
||||||
)),
|
|
||||||
(
|
(
|
||||||
opt(terminated(url_userinfo, '@')),
|
take_while(1.., is_localport_char),
|
||||||
|
'@',
|
||||||
take_while(1.., is_domain_char),
|
take_while(1.., is_domain_char),
|
||||||
opt(preceded(':', take_while(1.., AsChar::is_dec_digit))),
|
)
|
||||||
),
|
.recognize(),
|
||||||
'/',
|
|
||||||
// HACK: Too lazy to enumerate
|
|
||||||
take_while(0.., is_path_query_fragment),
|
|
||||||
)
|
)
|
||||||
.recognize()
|
.parse_next(input)
|
||||||
.parse_next(input)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn url_userinfo<T>(input: T) -> IResult<T, <T as Stream>::Slice>
|
fn url_literal<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
|
||||||
where
|
where
|
||||||
T: Stream + StreamIsPartial + PartialEq,
|
T: Stream + StreamIsPartial + PartialEq,
|
||||||
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
||||||
<T as Stream>::Token: AsChar + Copy,
|
<T as Stream>::Token: AsChar + Copy,
|
||||||
{
|
{
|
||||||
(
|
trace(
|
||||||
take_while(1.., is_localport_char),
|
"url",
|
||||||
opt(preceded(':', take_while(0.., is_localport_char))),
|
(
|
||||||
|
opt(terminated(
|
||||||
|
take_while(1.., is_scheme_char),
|
||||||
|
// HACK: Technically you can skip `//` if you don't have a domain but that would
|
||||||
|
// get messy to support.
|
||||||
|
(':', '/', '/'),
|
||||||
|
)),
|
||||||
|
(
|
||||||
|
opt(terminated(url_userinfo, '@')),
|
||||||
|
take_while(1.., is_domain_char),
|
||||||
|
opt(preceded(':', take_while(1.., AsChar::is_dec_digit))),
|
||||||
|
),
|
||||||
|
'/',
|
||||||
|
// HACK: Too lazy to enumerate
|
||||||
|
take_while(0.., is_path_query_fragment),
|
||||||
|
)
|
||||||
|
.recognize(),
|
||||||
)
|
)
|
||||||
.recognize()
|
.parse_next(input)
|
||||||
.parse_next(input)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn c_escape<T>(input: T) -> IResult<T, <T as Stream>::Slice>
|
fn url_userinfo<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
|
||||||
|
where
|
||||||
|
T: Stream + StreamIsPartial + PartialEq,
|
||||||
|
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
||||||
|
<T as Stream>::Token: AsChar + Copy,
|
||||||
|
{
|
||||||
|
trace(
|
||||||
|
"userinfo",
|
||||||
|
(
|
||||||
|
take_while(1.., is_localport_char),
|
||||||
|
opt(preceded(':', take_while(0.., is_localport_char))),
|
||||||
|
)
|
||||||
|
.recognize(),
|
||||||
|
)
|
||||||
|
.parse_next(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn c_escape<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
|
||||||
where
|
where
|
||||||
T: Stream + StreamIsPartial + PartialEq,
|
T: Stream + StreamIsPartial + PartialEq,
|
||||||
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
||||||
|
@ -425,25 +462,29 @@ mod parser {
|
||||||
// regular string that does escaping. The escaped letter might be part of a word, or it
|
// regular string that does escaping. The escaped letter might be part of a word, or it
|
||||||
// might not be. Rather than guess and be wrong part of the time and correct people's words
|
// might not be. Rather than guess and be wrong part of the time and correct people's words
|
||||||
// incorrectly, we opt for just not evaluating it at all.
|
// incorrectly, we opt for just not evaluating it at all.
|
||||||
preceded(take_while(1.., is_escape), take_while(0.., is_xid_continue)).parse_next(input)
|
trace(
|
||||||
|
"escape",
|
||||||
|
preceded(take_while(1.., is_escape), take_while(0.., is_xid_continue)),
|
||||||
|
)
|
||||||
|
.parse_next(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn printf<T>(input: T) -> IResult<T, <T as Stream>::Slice>
|
fn printf<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
|
||||||
where
|
where
|
||||||
T: Stream + StreamIsPartial + PartialEq,
|
T: Stream + StreamIsPartial + PartialEq,
|
||||||
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
||||||
<T as Stream>::Token: AsChar + Copy,
|
<T as Stream>::Token: AsChar + Copy,
|
||||||
{
|
{
|
||||||
preceded('%', take_while(1.., is_xid_continue)).parse_next(input)
|
trace("printf", preceded('%', take_while(1.., is_xid_continue))).parse_next(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn take_many0<I, E, F>(mut f: F) -> impl FnMut(I) -> IResult<I, <I as Stream>::Slice, E>
|
fn take_many0<I, E, F>(mut f: F) -> impl Parser<I, <I as Stream>::Slice, E>
|
||||||
where
|
where
|
||||||
I: Stream,
|
I: Stream,
|
||||||
F: winnow::Parser<I, <I as Stream>::Slice, E>,
|
F: Parser<I, <I as Stream>::Slice, E>,
|
||||||
E: winnow::error::ParseError<I>,
|
E: ParserError<I>,
|
||||||
{
|
{
|
||||||
move |i: I| {
|
move |i: &mut I| {
|
||||||
repeat(0.., f.by_ref())
|
repeat(0.., f.by_ref())
|
||||||
.map(|()| ())
|
.map(|()| ())
|
||||||
.recognize()
|
.recognize()
|
||||||
|
@ -581,9 +622,8 @@ mod unicode_parser {
|
||||||
use super::parser::next_identifier;
|
use super::parser::next_identifier;
|
||||||
|
|
||||||
pub(crate) fn iter_identifiers(mut input: &str) -> impl Iterator<Item = &str> {
|
pub(crate) fn iter_identifiers(mut input: &str) -> impl Iterator<Item = &str> {
|
||||||
std::iter::from_fn(move || match next_identifier(input) {
|
std::iter::from_fn(move || match next_identifier(&mut input) {
|
||||||
Ok((i, o)) => {
|
Ok(o) => {
|
||||||
input = i;
|
|
||||||
debug_assert_ne!(o, "");
|
debug_assert_ne!(o, "");
|
||||||
Some(o)
|
Some(o)
|
||||||
}
|
}
|
||||||
|
@ -595,10 +635,11 @@ mod unicode_parser {
|
||||||
mod ascii_parser {
|
mod ascii_parser {
|
||||||
use super::parser::next_identifier;
|
use super::parser::next_identifier;
|
||||||
|
|
||||||
pub(crate) fn iter_identifiers(mut input: &[u8]) -> impl Iterator<Item = &str> {
|
use winnow::BStr;
|
||||||
std::iter::from_fn(move || match next_identifier(input) {
|
|
||||||
Ok((i, o)) => {
|
pub(crate) fn iter_identifiers(mut input: &BStr) -> impl Iterator<Item = &str> {
|
||||||
input = i;
|
std::iter::from_fn(move || match next_identifier(&mut input) {
|
||||||
|
Ok(o) => {
|
||||||
debug_assert_ne!(o, b"");
|
debug_assert_ne!(o, b"");
|
||||||
// This is safe because we've checked that the strings are a subset of ASCII
|
// This is safe because we've checked that the strings are a subset of ASCII
|
||||||
// characters.
|
// characters.
|
||||||
|
|
|
@ -12,11 +12,11 @@ include.workspace = true
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
default = []
|
default = []
|
||||||
parser = ["winnow"]
|
parser = ["dep:winnow"]
|
||||||
flags = ["enumflags2"]
|
flags = ["dep:enumflags2"]
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
winnow = { version = "0.4.6", optional = true }
|
winnow = { version = "0.5.0", optional = true }
|
||||||
enumflags2 = { version = "0.7", optional = true }
|
enumflags2 = { version = "0.7", optional = true }
|
||||||
|
|
||||||
[package.metadata.docs.rs]
|
[package.metadata.docs.rs]
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
use winnow::prelude::*;
|
use winnow::prelude::*;
|
||||||
|
use winnow::trace::trace;
|
||||||
|
|
||||||
use crate::*;
|
use crate::*;
|
||||||
|
|
||||||
|
@ -17,10 +18,8 @@ impl<'i> Iterator for ClusterIter<'i> {
|
||||||
type Item = Cluster;
|
type Item = Cluster;
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Cluster> {
|
fn next(&mut self) -> Option<Cluster> {
|
||||||
let i = self.input.trim_start();
|
self.input = self.input.trim_start();
|
||||||
let (i, c) = Cluster::parse(i).ok()?;
|
Cluster::parse_.parse_next(&mut self.input).ok()
|
||||||
self.input = i;
|
|
||||||
Some(c)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -61,38 +60,45 @@ A Cv: acknowledgment's / Av B C: acknowledgement's
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Cluster {
|
impl Cluster {
|
||||||
pub fn parse(input: &str) -> IResult<&str, Self> {
|
pub fn parse(input: &str) -> Result<Self, ParseError> {
|
||||||
let header = (
|
Self::parse_.parse(input).map_err(|_err| ParseError)
|
||||||
winnow::bytes::tag("#"),
|
}
|
||||||
winnow::character::space0,
|
|
||||||
winnow::character::not_line_ending,
|
|
||||||
winnow::character::line_ending,
|
|
||||||
);
|
|
||||||
let note = winnow::sequence::preceded(
|
|
||||||
(winnow::bytes::tag("##"), winnow::character::space0),
|
|
||||||
winnow::sequence::terminated(
|
|
||||||
winnow::character::not_line_ending,
|
|
||||||
winnow::character::line_ending,
|
|
||||||
),
|
|
||||||
);
|
|
||||||
let mut cluster = (
|
|
||||||
winnow::combinator::opt(header),
|
|
||||||
winnow::multi::many1(winnow::sequence::terminated(
|
|
||||||
Entry::parse,
|
|
||||||
winnow::character::line_ending,
|
|
||||||
)),
|
|
||||||
winnow::multi::many0(note),
|
|
||||||
);
|
|
||||||
let (input, (header, entries, notes)): (_, (_, _, Vec<_>)) = cluster.parse_next(input)?;
|
|
||||||
|
|
||||||
let header = header.map(|s| s.2.to_owned());
|
fn parse_(input: &mut &str) -> PResult<Self, ()> {
|
||||||
let notes = notes.into_iter().map(|s| s.to_owned()).collect();
|
trace("cluster", move |input: &mut &str| {
|
||||||
let c = Self {
|
let header = (
|
||||||
header,
|
"#",
|
||||||
entries,
|
winnow::ascii::space0,
|
||||||
notes,
|
winnow::ascii::not_line_ending,
|
||||||
};
|
winnow::ascii::line_ending,
|
||||||
Ok((input, c))
|
);
|
||||||
|
let note = winnow::combinator::preceded(
|
||||||
|
("##", winnow::ascii::space0),
|
||||||
|
winnow::combinator::terminated(
|
||||||
|
winnow::ascii::not_line_ending,
|
||||||
|
winnow::ascii::line_ending,
|
||||||
|
),
|
||||||
|
);
|
||||||
|
let mut cluster = (
|
||||||
|
winnow::combinator::opt(header),
|
||||||
|
winnow::combinator::repeat(
|
||||||
|
1..,
|
||||||
|
winnow::combinator::terminated(Entry::parse_, winnow::ascii::line_ending),
|
||||||
|
),
|
||||||
|
winnow::combinator::repeat(0.., note),
|
||||||
|
);
|
||||||
|
let (header, entries, notes): (_, _, Vec<_>) = cluster.parse_next(input)?;
|
||||||
|
|
||||||
|
let header = header.map(|s| s.2.to_owned());
|
||||||
|
let notes = notes.into_iter().map(|s| s.to_owned()).collect();
|
||||||
|
let c = Self {
|
||||||
|
header,
|
||||||
|
entries,
|
||||||
|
notes,
|
||||||
|
};
|
||||||
|
Ok(c)
|
||||||
|
})
|
||||||
|
.parse_next(input)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -102,15 +108,16 @@ mod test_cluster {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_basic() {
|
fn test_basic() {
|
||||||
let (input, actual) = Cluster::parse(
|
let (input, actual) = Cluster::parse_
|
||||||
"# acknowledgment <verified> (level 35)
|
.parse_peek(
|
||||||
|
"# acknowledgment <verified> (level 35)
|
||||||
A Cv: acknowledgment / Av B C: acknowledgement
|
A Cv: acknowledgment / Av B C: acknowledgement
|
||||||
A Cv: acknowledgments / Av B C: acknowledgements
|
A Cv: acknowledgments / Av B C: acknowledgements
|
||||||
A Cv: acknowledgment's / Av B C: acknowledgement's
|
A Cv: acknowledgment's / Av B C: acknowledgement's
|
||||||
|
|
||||||
",
|
",
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
assert_eq!(input, "\n");
|
assert_eq!(input, "\n");
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
actual.header,
|
actual.header,
|
||||||
|
@ -122,8 +129,9 @@ A Cv: acknowledgment's / Av B C: acknowledgement's
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_notes() {
|
fn test_notes() {
|
||||||
let (input, actual) = Cluster::parse(
|
let (input, actual) = Cluster::parse_
|
||||||
"# coloration <verified> (level 50)
|
.parse_peek(
|
||||||
|
"# coloration <verified> (level 50)
|
||||||
A B C: coloration / B. Cv: colouration
|
A B C: coloration / B. Cv: colouration
|
||||||
A B C: colorations / B. Cv: colourations
|
A B C: colorations / B. Cv: colourations
|
||||||
A B C: coloration's / B. Cv: colouration's
|
A B C: coloration's / B. Cv: colouration's
|
||||||
|
@ -131,8 +139,8 @@ A B C: coloration's / B. Cv: colouration's
|
||||||
## variant for British Engl or some reason
|
## variant for British Engl or some reason
|
||||||
|
|
||||||
",
|
",
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
assert_eq!(input, "\n");
|
assert_eq!(input, "\n");
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
actual.header,
|
actual.header,
|
||||||
|
@ -144,65 +152,75 @@ A B C: coloration's / B. Cv: colouration's
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Entry {
|
impl Entry {
|
||||||
pub fn parse(input: &str) -> IResult<&str, Self> {
|
pub fn parse(input: &str) -> Result<Self, ParseError> {
|
||||||
let var_sep = (winnow::character::space0, '/', winnow::character::space0);
|
Self::parse_.parse(input).map_err(|_err| ParseError)
|
||||||
let (input, variants) =
|
|
||||||
winnow::multi::separated1(Variant::parse, var_sep).parse_next(input)?;
|
|
||||||
|
|
||||||
let desc_sep = (winnow::character::space0, '|');
|
|
||||||
let (input, description) =
|
|
||||||
winnow::combinator::opt((desc_sep, Self::parse_description)).parse_next(input)?;
|
|
||||||
|
|
||||||
let comment_sep = (winnow::character::space0, '#');
|
|
||||||
let (input, comment) = winnow::combinator::opt((
|
|
||||||
comment_sep,
|
|
||||||
winnow::character::space1,
|
|
||||||
winnow::character::not_line_ending,
|
|
||||||
))
|
|
||||||
.parse_next(input)?;
|
|
||||||
|
|
||||||
let mut e = match description {
|
|
||||||
Some((_, description)) => description,
|
|
||||||
None => Self {
|
|
||||||
variants: Vec::new(),
|
|
||||||
pos: None,
|
|
||||||
archaic: false,
|
|
||||||
note: false,
|
|
||||||
description: None,
|
|
||||||
comment: None,
|
|
||||||
},
|
|
||||||
};
|
|
||||||
e.variants = variants;
|
|
||||||
e.comment = comment.map(|c| c.2.to_owned());
|
|
||||||
Ok((input, e))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_description(input: &str) -> IResult<&str, Self> {
|
fn parse_(input: &mut &str) -> PResult<Self, ()> {
|
||||||
let (input, (pos, archaic, note, description)) = (
|
trace("entry", move |input: &mut &str| {
|
||||||
winnow::combinator::opt((winnow::character::space1, Pos::parse)),
|
let var_sep = (winnow::ascii::space0, '/', winnow::ascii::space0);
|
||||||
winnow::combinator::opt((winnow::character::space1, "(-)")),
|
let variants =
|
||||||
winnow::combinator::opt((winnow::character::space1, "--")),
|
winnow::combinator::separated1(Variant::parse_, var_sep).parse_next(input)?;
|
||||||
winnow::combinator::opt((
|
|
||||||
winnow::character::space1,
|
let desc_sep = (winnow::ascii::space0, '|');
|
||||||
winnow::bytes::take_till0(|c| c == '\n' || c == '\r' || c == '#'),
|
let description =
|
||||||
)),
|
winnow::combinator::opt((desc_sep, Self::parse_description)).parse_next(input)?;
|
||||||
)
|
|
||||||
|
let comment_sep = (winnow::ascii::space0, '#');
|
||||||
|
let comment = winnow::combinator::opt((
|
||||||
|
comment_sep,
|
||||||
|
winnow::ascii::space1,
|
||||||
|
winnow::ascii::not_line_ending,
|
||||||
|
))
|
||||||
.parse_next(input)?;
|
.parse_next(input)?;
|
||||||
|
|
||||||
let variants = Vec::new();
|
let mut e = match description {
|
||||||
let pos = pos.map(|(_, p)| p);
|
Some((_, description)) => description,
|
||||||
let archaic = archaic.is_some();
|
None => Self {
|
||||||
let note = note.is_some();
|
variants: Vec::new(),
|
||||||
let description = description.map(|(_, d)| d.to_owned());
|
pos: None,
|
||||||
let e = Self {
|
archaic: false,
|
||||||
variants,
|
note: false,
|
||||||
pos,
|
description: None,
|
||||||
archaic,
|
comment: None,
|
||||||
note,
|
},
|
||||||
description,
|
};
|
||||||
comment: None,
|
e.variants = variants;
|
||||||
};
|
e.comment = comment.map(|c| c.2.to_owned());
|
||||||
Ok((input, e))
|
Ok(e)
|
||||||
|
})
|
||||||
|
.parse_next(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_description(input: &mut &str) -> PResult<Self, ()> {
|
||||||
|
trace("description", move |input: &mut &str| {
|
||||||
|
let (pos, archaic, note, description) = (
|
||||||
|
winnow::combinator::opt((winnow::ascii::space1, Pos::parse_)),
|
||||||
|
winnow::combinator::opt((winnow::ascii::space1, "(-)")),
|
||||||
|
winnow::combinator::opt((winnow::ascii::space1, "--")),
|
||||||
|
winnow::combinator::opt((
|
||||||
|
winnow::ascii::space1,
|
||||||
|
winnow::token::take_till0(('\n', '\r', '#')),
|
||||||
|
)),
|
||||||
|
)
|
||||||
|
.parse_next(input)?;
|
||||||
|
|
||||||
|
let variants = Vec::new();
|
||||||
|
let pos = pos.map(|(_, p)| p);
|
||||||
|
let archaic = archaic.is_some();
|
||||||
|
let note = note.is_some();
|
||||||
|
let description = description.map(|(_, d)| d.to_owned());
|
||||||
|
let e = Self {
|
||||||
|
variants,
|
||||||
|
pos,
|
||||||
|
archaic,
|
||||||
|
note,
|
||||||
|
description,
|
||||||
|
comment: None,
|
||||||
|
};
|
||||||
|
Ok(e)
|
||||||
|
})
|
||||||
|
.parse_next(input)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -215,8 +233,9 @@ mod test_entry {
|
||||||
fn test_variant_only() {
|
fn test_variant_only() {
|
||||||
// Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
|
// Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
|
||||||
// cases.
|
// cases.
|
||||||
let (input, actual) =
|
let (input, actual) = Entry::parse_
|
||||||
Entry::parse("A Cv: acknowledgment's / Av B C: acknowledgement's\n").unwrap();
|
.parse_peek("A Cv: acknowledgment's / Av B C: acknowledgement's\n")
|
||||||
|
.unwrap();
|
||||||
assert_eq!(input, "\n");
|
assert_eq!(input, "\n");
|
||||||
assert_eq!(actual.variants.len(), 2);
|
assert_eq!(actual.variants.len(), 2);
|
||||||
assert_eq!(actual.pos, None);
|
assert_eq!(actual.pos, None);
|
||||||
|
@ -229,7 +248,9 @@ mod test_entry {
|
||||||
fn test_description() {
|
fn test_description() {
|
||||||
// Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
|
// Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
|
||||||
// cases.
|
// cases.
|
||||||
let (input, actual) = Entry::parse("A C: prize / B: prise | otherwise\n").unwrap();
|
let (input, actual) = Entry::parse_
|
||||||
|
.parse_peek("A C: prize / B: prise | otherwise\n")
|
||||||
|
.unwrap();
|
||||||
assert_eq!(input, "\n");
|
assert_eq!(input, "\n");
|
||||||
assert_eq!(actual.variants.len(), 2);
|
assert_eq!(actual.variants.len(), 2);
|
||||||
assert_eq!(actual.pos, None);
|
assert_eq!(actual.pos, None);
|
||||||
|
@ -242,7 +263,9 @@ mod test_entry {
|
||||||
fn test_pos() {
|
fn test_pos() {
|
||||||
// Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
|
// Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
|
||||||
// cases.
|
// cases.
|
||||||
let (input, actual) = Entry::parse("A B C: practice / AV Cv: practise | <N>\n").unwrap();
|
let (input, actual) = Entry::parse_
|
||||||
|
.parse_peek("A B C: practice / AV Cv: practise | <N>\n")
|
||||||
|
.unwrap();
|
||||||
assert_eq!(input, "\n");
|
assert_eq!(input, "\n");
|
||||||
assert_eq!(actual.variants.len(), 2);
|
assert_eq!(actual.variants.len(), 2);
|
||||||
assert_eq!(actual.pos, Some(Pos::Noun));
|
assert_eq!(actual.pos, Some(Pos::Noun));
|
||||||
|
@ -255,7 +278,9 @@ mod test_entry {
|
||||||
fn test_archaic() {
|
fn test_archaic() {
|
||||||
// Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
|
// Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
|
||||||
// cases.
|
// cases.
|
||||||
let (input, actual) = Entry::parse("A: bark / Av B: barque | (-) ship\n").unwrap();
|
let (input, actual) = Entry::parse_
|
||||||
|
.parse_peek("A: bark / Av B: barque | (-) ship\n")
|
||||||
|
.unwrap();
|
||||||
assert_eq!(input, "\n");
|
assert_eq!(input, "\n");
|
||||||
assert_eq!(actual.variants.len(), 2);
|
assert_eq!(actual.variants.len(), 2);
|
||||||
assert_eq!(actual.pos, None);
|
assert_eq!(actual.pos, None);
|
||||||
|
@ -268,7 +293,9 @@ mod test_entry {
|
||||||
fn test_note() {
|
fn test_note() {
|
||||||
// Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
|
// Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
|
||||||
// cases.
|
// cases.
|
||||||
let (input, actual) = Entry::parse("_: cabbies | -- plural\n").unwrap();
|
let (input, actual) = Entry::parse_
|
||||||
|
.parse_peek("_: cabbies | -- plural\n")
|
||||||
|
.unwrap();
|
||||||
assert_eq!(input, "\n");
|
assert_eq!(input, "\n");
|
||||||
assert_eq!(actual.variants.len(), 1);
|
assert_eq!(actual.variants.len(), 1);
|
||||||
assert_eq!(actual.pos, None);
|
assert_eq!(actual.pos, None);
|
||||||
|
@ -279,7 +306,7 @@ mod test_entry {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_trailing_comment() {
|
fn test_trailing_comment() {
|
||||||
let (input, actual) = Entry::parse(
|
let (input, actual) = Entry::parse_.parse_peek(
|
||||||
"A B: accursed / AV B-: accurst # ODE: archaic, M-W: 'or' but can find little evidence of use\n",
|
"A B: accursed / AV B-: accurst # ODE: archaic, M-W: 'or' but can find little evidence of use\n",
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
@ -297,20 +324,30 @@ mod test_entry {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Variant {
|
impl Variant {
|
||||||
pub fn parse(input: &str) -> IResult<&str, Self> {
|
pub fn parse(input: &str) -> Result<Self, ParseError> {
|
||||||
let types = winnow::multi::separated1(Type::parse, winnow::character::space1);
|
Self::parse_.parse(input).map_err(|_err| ParseError)
|
||||||
let sep = (winnow::bytes::tag(":"), winnow::character::space0);
|
}
|
||||||
let (input, (types, word)) =
|
|
||||||
winnow::sequence::separated_pair(types, sep, word).parse_next(input)?;
|
fn parse_(input: &mut &str) -> PResult<Self, ()> {
|
||||||
let v = Self { types, word };
|
trace("variant", move |input: &mut &str| {
|
||||||
Ok((input, v))
|
let types = winnow::combinator::separated1(Type::parse_, winnow::ascii::space1);
|
||||||
|
let sep = (":", winnow::ascii::space0);
|
||||||
|
let (types, word) =
|
||||||
|
winnow::combinator::separated_pair(types, sep, word).parse_next(input)?;
|
||||||
|
let v = Self { types, word };
|
||||||
|
Ok(v)
|
||||||
|
})
|
||||||
|
.parse_next(input)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn word(input: &str) -> IResult<&str, String> {
|
fn word(input: &mut &str) -> PResult<String, ()> {
|
||||||
winnow::bytes::take_till1(|item: char| item.is_ascii_whitespace())
|
trace("word", move |input: &mut &str| {
|
||||||
.map(|s: &str| s.to_owned().replace('_', " "))
|
winnow::token::take_till1(|item: char| item.is_ascii_whitespace())
|
||||||
.parse_next(input)
|
.map(|s: &str| s.to_owned().replace('_', " "))
|
||||||
|
.parse_next(input)
|
||||||
|
})
|
||||||
|
.parse_next(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
@ -321,7 +358,7 @@ mod test_variant {
|
||||||
fn test_valid() {
|
fn test_valid() {
|
||||||
// Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
|
// Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
|
||||||
// cases.
|
// cases.
|
||||||
let (input, actual) = Variant::parse("A Cv: acknowledgment ").unwrap();
|
let (input, actual) = Variant::parse_.parse_peek("A Cv: acknowledgment ").unwrap();
|
||||||
assert_eq!(input, " ");
|
assert_eq!(input, " ");
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
actual.types,
|
actual.types,
|
||||||
|
@ -343,8 +380,9 @@ mod test_variant {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_extra() {
|
fn test_extra() {
|
||||||
let (input, actual) =
|
let (input, actual) = Variant::parse_
|
||||||
Variant::parse("A Cv: acknowledgment's / Av B C: acknowledgement's").unwrap();
|
.parse_peek("A Cv: acknowledgment's / Av B C: acknowledgement's")
|
||||||
|
.unwrap();
|
||||||
assert_eq!(input, " / Av B C: acknowledgement's");
|
assert_eq!(input, " / Av B C: acknowledgement's");
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
actual.types,
|
actual.types,
|
||||||
|
@ -366,7 +404,7 @@ mod test_variant {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_underscore() {
|
fn test_underscore() {
|
||||||
let (input, actual) = Variant::parse("_: air_gun\n").unwrap();
|
let (input, actual) = Variant::parse_.parse_peek("_: air_gun\n").unwrap();
|
||||||
assert_eq!(input, "\n");
|
assert_eq!(input, "\n");
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
actual.types,
|
actual.types,
|
||||||
|
@ -381,13 +419,20 @@ mod test_variant {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Type {
|
impl Type {
|
||||||
pub fn parse(input: &str) -> IResult<&str, Type> {
|
pub fn parse(input: &str) -> Result<Self, ParseError> {
|
||||||
let (input, category) = Category::parse(input)?;
|
Self::parse_.parse(input).map_err(|_err| ParseError)
|
||||||
let (input, tag) = winnow::combinator::opt(Tag::parse).parse_next(input)?;
|
}
|
||||||
let (input, num) = winnow::combinator::opt(winnow::character::digit1).parse_next(input)?;
|
|
||||||
let num = num.map(|s| s.parse().expect("parser ensured its a number"));
|
fn parse_(input: &mut &str) -> PResult<Type, ()> {
|
||||||
let t = Type { category, tag, num };
|
trace("type", move |input: &mut &str| {
|
||||||
Ok((input, t))
|
let category = Category::parse_(input)?;
|
||||||
|
let tag = winnow::combinator::opt(Tag::parse_).parse_next(input)?;
|
||||||
|
let num = winnow::combinator::opt(winnow::ascii::digit1).parse_next(input)?;
|
||||||
|
let num = num.map(|s| s.parse().expect("parser ensured its a number"));
|
||||||
|
let t = Type { category, tag, num };
|
||||||
|
Ok(t)
|
||||||
|
})
|
||||||
|
.parse_next(input)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -399,13 +444,13 @@ mod test_type {
|
||||||
fn test_valid() {
|
fn test_valid() {
|
||||||
// Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
|
// Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
|
||||||
// cases.
|
// cases.
|
||||||
let (input, actual) = Type::parse("A ").unwrap();
|
let (input, actual) = Type::parse_.parse_peek("A ").unwrap();
|
||||||
assert_eq!(input, " ");
|
assert_eq!(input, " ");
|
||||||
assert_eq!(actual.category, Category::American);
|
assert_eq!(actual.category, Category::American);
|
||||||
assert_eq!(actual.tag, None);
|
assert_eq!(actual.tag, None);
|
||||||
assert_eq!(actual.num, None);
|
assert_eq!(actual.num, None);
|
||||||
|
|
||||||
let (input, actual) = Type::parse("Bv ").unwrap();
|
let (input, actual) = Type::parse_.parse_peek("Bv ").unwrap();
|
||||||
assert_eq!(input, " ");
|
assert_eq!(input, " ");
|
||||||
assert_eq!(actual.category, Category::BritishIse);
|
assert_eq!(actual.category, Category::BritishIse);
|
||||||
assert_eq!(actual.tag, Some(Tag::Variant));
|
assert_eq!(actual.tag, Some(Tag::Variant));
|
||||||
|
@ -414,13 +459,13 @@ mod test_type {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_extra() {
|
fn test_extra() {
|
||||||
let (input, actual) = Type::parse("Z foobar").unwrap();
|
let (input, actual) = Type::parse_.parse_peek("Z foobar").unwrap();
|
||||||
assert_eq!(input, " foobar");
|
assert_eq!(input, " foobar");
|
||||||
assert_eq!(actual.category, Category::BritishIze);
|
assert_eq!(actual.category, Category::BritishIze);
|
||||||
assert_eq!(actual.tag, None);
|
assert_eq!(actual.tag, None);
|
||||||
assert_eq!(actual.num, None);
|
assert_eq!(actual.num, None);
|
||||||
|
|
||||||
let (input, actual) = Type::parse("C- foobar").unwrap();
|
let (input, actual) = Type::parse_.parse_peek("C- foobar").unwrap();
|
||||||
assert_eq!(input, " foobar");
|
assert_eq!(input, " foobar");
|
||||||
assert_eq!(actual.category, Category::Canadian);
|
assert_eq!(actual.category, Category::Canadian);
|
||||||
assert_eq!(actual.tag, Some(Tag::Possible));
|
assert_eq!(actual.tag, Some(Tag::Possible));
|
||||||
|
@ -429,7 +474,7 @@ mod test_type {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_num() {
|
fn test_num() {
|
||||||
let (input, actual) = Type::parse("Av1 ").unwrap();
|
let (input, actual) = Type::parse_.parse_peek("Av1 ").unwrap();
|
||||||
assert_eq!(input, " ");
|
assert_eq!(input, " ");
|
||||||
assert_eq!(actual.category, Category::American);
|
assert_eq!(actual.category, Category::American);
|
||||||
assert_eq!(actual.tag, Some(Tag::Variant));
|
assert_eq!(actual.tag, Some(Tag::Variant));
|
||||||
|
@ -438,19 +483,26 @@ mod test_type {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Category {
|
impl Category {
|
||||||
pub fn parse(input: &str) -> IResult<&str, Category> {
|
pub fn parse(input: &str) -> Result<Self, ParseError> {
|
||||||
let symbols = winnow::bytes::one_of("ABZCD_");
|
Self::parse_.parse(input).map_err(|_err| ParseError)
|
||||||
symbols
|
}
|
||||||
.map(|c| match c {
|
|
||||||
'A' => Category::American,
|
fn parse_(input: &mut &str) -> PResult<Self, ()> {
|
||||||
'B' => Category::BritishIse,
|
trace("category", move |input: &mut &str| {
|
||||||
'Z' => Category::BritishIze,
|
let symbols = winnow::token::one_of(['A', 'B', 'Z', 'C', 'D', '_']);
|
||||||
'C' => Category::Canadian,
|
symbols
|
||||||
'D' => Category::Australian,
|
.map(|c| match c {
|
||||||
'_' => Category::Other,
|
'A' => Category::American,
|
||||||
_ => unreachable!("parser won't select this option"),
|
'B' => Category::BritishIse,
|
||||||
})
|
'Z' => Category::BritishIze,
|
||||||
.parse_next(input)
|
'C' => Category::Canadian,
|
||||||
|
'D' => Category::Australian,
|
||||||
|
'_' => Category::Other,
|
||||||
|
_ => unreachable!("parser won't select this option"),
|
||||||
|
})
|
||||||
|
.parse_next(input)
|
||||||
|
})
|
||||||
|
.parse_next(input)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -460,32 +512,39 @@ mod test_category {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_valid() {
|
fn test_valid() {
|
||||||
let (input, actual) = Category::parse("A").unwrap();
|
let (input, actual) = Category::parse_.parse_peek("A").unwrap();
|
||||||
assert_eq!(input, "");
|
assert_eq!(input, "");
|
||||||
assert_eq!(actual, Category::American);
|
assert_eq!(actual, Category::American);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_extra() {
|
fn test_extra() {
|
||||||
let (input, actual) = Category::parse("_ foobar").unwrap();
|
let (input, actual) = Category::parse_.parse_peek("_ foobar").unwrap();
|
||||||
assert_eq!(input, " foobar");
|
assert_eq!(input, " foobar");
|
||||||
assert_eq!(actual, Category::Other);
|
assert_eq!(actual, Category::Other);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Tag {
|
impl Tag {
|
||||||
pub fn parse(input: &str) -> IResult<&str, Tag> {
|
pub fn parse(input: &str) -> Result<Self, ParseError> {
|
||||||
let symbols = winnow::bytes::one_of(".vV-x");
|
Self::parse_.parse(input).map_err(|_err| ParseError)
|
||||||
symbols
|
}
|
||||||
.map(|c| match c {
|
|
||||||
'.' => Tag::Eq,
|
fn parse_(input: &mut &str) -> PResult<Self, ()> {
|
||||||
'v' => Tag::Variant,
|
trace("tag", move |input: &mut &str| {
|
||||||
'V' => Tag::Seldom,
|
let symbols = winnow::token::one_of(['.', 'v', 'V', '-', 'x']);
|
||||||
'-' => Tag::Possible,
|
symbols
|
||||||
'x' => Tag::Improper,
|
.map(|c| match c {
|
||||||
_ => unreachable!("parser won't select this option"),
|
'.' => Tag::Eq,
|
||||||
})
|
'v' => Tag::Variant,
|
||||||
.parse_next(input)
|
'V' => Tag::Seldom,
|
||||||
|
'-' => Tag::Possible,
|
||||||
|
'x' => Tag::Improper,
|
||||||
|
_ => unreachable!("parser won't select this option"),
|
||||||
|
})
|
||||||
|
.parse_next(input)
|
||||||
|
})
|
||||||
|
.parse_next(input)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -495,32 +554,34 @@ mod test_tag {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_valid() {
|
fn test_valid() {
|
||||||
let (input, actual) = Tag::parse(".").unwrap();
|
let (input, actual) = Tag::parse_.parse_peek(".").unwrap();
|
||||||
assert_eq!(input, "");
|
assert_eq!(input, "");
|
||||||
assert_eq!(actual, Tag::Eq);
|
assert_eq!(actual, Tag::Eq);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_extra() {
|
fn test_extra() {
|
||||||
let (input, actual) = Tag::parse("x foobar").unwrap();
|
let (input, actual) = Tag::parse_.parse_peek("x foobar").unwrap();
|
||||||
assert_eq!(input, " foobar");
|
assert_eq!(input, " foobar");
|
||||||
assert_eq!(actual, Tag::Improper);
|
assert_eq!(actual, Tag::Improper);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Pos {
|
impl Pos {
|
||||||
pub fn parse(input: &str) -> IResult<&str, Pos> {
|
pub fn parse(input: &str) -> Result<Self, ParseError> {
|
||||||
use winnow::bytes::tag;
|
Self::parse_.parse(input).map_err(|_err| ParseError)
|
||||||
let noun = tag("<N>");
|
}
|
||||||
let verb = tag("<V>");
|
|
||||||
let adjective = tag("<Adj>");
|
fn parse_(input: &mut &str) -> PResult<Self, ()> {
|
||||||
let adverb = tag("<Adv>");
|
trace("pos", move |input: &mut &str| {
|
||||||
winnow::branch::alt((
|
winnow::combinator::alt((
|
||||||
noun.value(Pos::Noun),
|
"<N>".value(Pos::Noun),
|
||||||
verb.value(Pos::Verb),
|
"<V>".value(Pos::Verb),
|
||||||
adjective.value(Pos::Adjective),
|
"<Adj>".value(Pos::Adjective),
|
||||||
adverb.value(Pos::Adverb),
|
"<Adv>".value(Pos::Adverb),
|
||||||
))
|
))
|
||||||
|
.parse_next(input)
|
||||||
|
})
|
||||||
.parse_next(input)
|
.parse_next(input)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -531,15 +592,26 @@ mod test_pos {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_valid() {
|
fn test_valid() {
|
||||||
let (input, actual) = Pos::parse("<N>").unwrap();
|
let (input, actual) = Pos::parse_.parse_peek("<N>").unwrap();
|
||||||
assert_eq!(input, "");
|
assert_eq!(input, "");
|
||||||
assert_eq!(actual, Pos::Noun);
|
assert_eq!(actual, Pos::Noun);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_extra() {
|
fn test_extra() {
|
||||||
let (input, actual) = Pos::parse("<Adj> foobar").unwrap();
|
let (input, actual) = Pos::parse_.parse_peek("<Adj> foobar").unwrap();
|
||||||
assert_eq!(input, " foobar");
|
assert_eq!(input, " foobar");
|
||||||
assert_eq!(actual, Pos::Adjective);
|
assert_eq!(actual, Pos::Adjective);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct ParseError;
|
||||||
|
|
||||||
|
impl std::fmt::Display for ParseError {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
write!(f, "invalid")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::error::Error for ParseError {}
|
||||||
|
|
Loading…
Reference in a new issue