From 6cc3e3f9e033285d3c10dac138a90317172bb800 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Fri, 14 Jul 2023 12:44:39 -0500 Subject: [PATCH] refactor(varcon)!: Upgrade to winnow 0.5 --- Cargo.lock | 15 +++- crates/varcon-core/Cargo.toml | 2 +- crates/varcon-core/src/parser.rs | 143 ++++++++++++++++--------------- 3 files changed, 88 insertions(+), 72 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d492fef..ecb55b7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1610,7 +1610,7 @@ dependencies = [ "serde", "serde_spanned", "toml_datetime", - "winnow", + "winnow 0.4.9", ] [[package]] @@ -1648,7 +1648,7 @@ dependencies = [ "thiserror", "unicode-segmentation", "unicode-xid", - "winnow", + "winnow 0.4.9", ] [[package]] @@ -1831,7 +1831,7 @@ name = "varcon-core" version = "2.2.12" dependencies = [ "enumflags2", - "winnow", + "winnow 0.5.0", ] [[package]] @@ -2127,3 +2127,12 @@ checksum = "81a2094c43cc94775293eaa0e499fbc30048a6d824ac82c0351a8c0bf9112529" dependencies = [ "memchr", ] + +[[package]] +name = "winnow" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81fac9742fd1ad1bd9643b991319f72dd031016d44b77039a26977eb667141e7" +dependencies = [ + "memchr", +] diff --git a/crates/varcon-core/Cargo.toml b/crates/varcon-core/Cargo.toml index a55eacf..671ef17 100644 --- a/crates/varcon-core/Cargo.toml +++ b/crates/varcon-core/Cargo.toml @@ -16,7 +16,7 @@ parser = ["dep:winnow"] flags = ["dep:enumflags2"] [dependencies] -winnow = { version = "0.4.9", optional = true } +winnow = { version = "0.5.0", optional = true } enumflags2 = { version = "0.7", optional = true } [package.metadata.docs.rs] diff --git a/crates/varcon-core/src/parser.rs b/crates/varcon-core/src/parser.rs index 4eeb9cc..a770ba9 100644 --- a/crates/varcon-core/src/parser.rs +++ b/crates/varcon-core/src/parser.rs @@ -18,10 +18,8 @@ impl<'i> Iterator for ClusterIter<'i> { type Item = Cluster; fn next(&mut self) -> Option { - let i = self.input.trim_start(); - let (i, c) = Cluster::parse(i).ok()?; - self.input = i; - Some(c) + self.input = self.input.trim_start(); + Cluster::parse.parse_next(&mut self.input).ok() } } @@ -62,8 +60,8 @@ A Cv: acknowledgment's / Av B C: acknowledgement's } impl Cluster { - pub fn parse(input: &str) -> IResult<&str, Self> { - trace("cluster", move |input| { + pub fn parse(input: &mut &str) -> PResult { + trace("cluster", move |input: &mut &str| { let header = ( "#", winnow::ascii::space0, @@ -85,8 +83,7 @@ impl Cluster { ), winnow::combinator::repeat(0.., note), ); - let (input, (header, entries, notes)): (_, (_, _, Vec<_>)) = - cluster.parse_next(input)?; + let (header, entries, notes): (_, _, Vec<_>) = cluster.parse_next(input)?; let header = header.map(|s| s.2.to_owned()); let notes = notes.into_iter().map(|s| s.to_owned()).collect(); @@ -95,7 +92,7 @@ impl Cluster { entries, notes, }; - Ok((input, c)) + Ok(c) }) .parse_next(input) } @@ -107,15 +104,16 @@ mod test_cluster { #[test] fn test_basic() { - let (input, actual) = Cluster::parse( - "# acknowledgment (level 35) + let (input, actual) = Cluster::parse + .parse_peek( + "# acknowledgment (level 35) A Cv: acknowledgment / Av B C: acknowledgement A Cv: acknowledgments / Av B C: acknowledgements A Cv: acknowledgment's / Av B C: acknowledgement's ", - ) - .unwrap(); + ) + .unwrap(); assert_eq!(input, "\n"); assert_eq!( actual.header, @@ -127,8 +125,9 @@ A Cv: acknowledgment's / Av B C: acknowledgement's #[test] fn test_notes() { - let (input, actual) = Cluster::parse( - "# coloration (level 50) + let (input, actual) = Cluster::parse + .parse_peek( + "# coloration (level 50) A B C: coloration / B. Cv: colouration A B C: colorations / B. Cv: colourations A B C: coloration's / B. Cv: colouration's @@ -136,8 +135,8 @@ A B C: coloration's / B. Cv: colouration's ## variant for British Engl or some reason ", - ) - .unwrap(); + ) + .unwrap(); assert_eq!(input, "\n"); assert_eq!( actual.header, @@ -149,18 +148,18 @@ A B C: coloration's / B. Cv: colouration's } impl Entry { - pub fn parse(input: &str) -> IResult<&str, Self> { - trace("entry", move |input| { + pub fn parse(input: &mut &str) -> PResult { + trace("entry", move |input: &mut &str| { let var_sep = (winnow::ascii::space0, '/', winnow::ascii::space0); - let (input, variants) = + let variants = winnow::combinator::separated1(Variant::parse, var_sep).parse_next(input)?; let desc_sep = (winnow::ascii::space0, '|'); - let (input, description) = + let description = winnow::combinator::opt((desc_sep, Self::parse_description)).parse_next(input)?; let comment_sep = (winnow::ascii::space0, '#'); - let (input, comment) = winnow::combinator::opt(( + let comment = winnow::combinator::opt(( comment_sep, winnow::ascii::space1, winnow::ascii::not_line_ending, @@ -180,14 +179,14 @@ impl Entry { }; e.variants = variants; e.comment = comment.map(|c| c.2.to_owned()); - Ok((input, e)) + Ok(e) }) .parse_next(input) } - fn parse_description(input: &str) -> IResult<&str, Self> { - trace("description", move |input| { - let (input, (pos, archaic, note, description)) = ( + fn parse_description(input: &mut &str) -> PResult { + trace("description", move |input: &mut &str| { + let (pos, archaic, note, description) = ( winnow::combinator::opt((winnow::ascii::space1, Pos::parse)), winnow::combinator::opt((winnow::ascii::space1, "(-)")), winnow::combinator::opt((winnow::ascii::space1, "--")), @@ -211,7 +210,7 @@ impl Entry { description, comment: None, }; - Ok((input, e)) + Ok(e) }) .parse_next(input) } @@ -226,8 +225,9 @@ mod test_entry { fn test_variant_only() { // Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use // cases. - let (input, actual) = - Entry::parse("A Cv: acknowledgment's / Av B C: acknowledgement's\n").unwrap(); + let (input, actual) = Entry::parse + .parse_peek("A Cv: acknowledgment's / Av B C: acknowledgement's\n") + .unwrap(); assert_eq!(input, "\n"); assert_eq!(actual.variants.len(), 2); assert_eq!(actual.pos, None); @@ -240,7 +240,9 @@ mod test_entry { fn test_description() { // Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use // cases. - let (input, actual) = Entry::parse("A C: prize / B: prise | otherwise\n").unwrap(); + let (input, actual) = Entry::parse + .parse_peek("A C: prize / B: prise | otherwise\n") + .unwrap(); assert_eq!(input, "\n"); assert_eq!(actual.variants.len(), 2); assert_eq!(actual.pos, None); @@ -253,7 +255,9 @@ mod test_entry { fn test_pos() { // Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use // cases. - let (input, actual) = Entry::parse("A B C: practice / AV Cv: practise | \n").unwrap(); + let (input, actual) = Entry::parse + .parse_peek("A B C: practice / AV Cv: practise | \n") + .unwrap(); assert_eq!(input, "\n"); assert_eq!(actual.variants.len(), 2); assert_eq!(actual.pos, Some(Pos::Noun)); @@ -266,7 +270,9 @@ mod test_entry { fn test_archaic() { // Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use // cases. - let (input, actual) = Entry::parse("A: bark / Av B: barque | (-) ship\n").unwrap(); + let (input, actual) = Entry::parse + .parse_peek("A: bark / Av B: barque | (-) ship\n") + .unwrap(); assert_eq!(input, "\n"); assert_eq!(actual.variants.len(), 2); assert_eq!(actual.pos, None); @@ -279,7 +285,7 @@ mod test_entry { fn test_note() { // Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use // cases. - let (input, actual) = Entry::parse("_: cabbies | -- plural\n").unwrap(); + let (input, actual) = Entry::parse.parse_peek("_: cabbies | -- plural\n").unwrap(); assert_eq!(input, "\n"); assert_eq!(actual.variants.len(), 1); assert_eq!(actual.pos, None); @@ -290,7 +296,7 @@ mod test_entry { #[test] fn test_trailing_comment() { - let (input, actual) = Entry::parse( + let (input, actual) = Entry::parse.parse_peek( "A B: accursed / AV B-: accurst # ODE: archaic, M-W: 'or' but can find little evidence of use\n", ) .unwrap(); @@ -308,21 +314,21 @@ mod test_entry { } impl Variant { - pub fn parse(input: &str) -> IResult<&str, Self> { - trace("variant", move |input| { + pub fn parse(input: &mut &str) -> PResult { + trace("variant", move |input: &mut &str| { let types = winnow::combinator::separated1(Type::parse, winnow::ascii::space1); let sep = (":", winnow::ascii::space0); - let (input, (types, word)) = + let (types, word) = winnow::combinator::separated_pair(types, sep, word).parse_next(input)?; let v = Self { types, word }; - Ok((input, v)) + Ok(v) }) .parse_next(input) } } -fn word(input: &str) -> IResult<&str, String> { - trace("word", move |input| { +fn word(input: &mut &str) -> PResult { + trace("word", move |input: &mut &str| { winnow::token::take_till1(|item: char| item.is_ascii_whitespace()) .map(|s: &str| s.to_owned().replace('_', " ")) .parse_next(input) @@ -338,7 +344,7 @@ mod test_variant { fn test_valid() { // Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use // cases. - let (input, actual) = Variant::parse("A Cv: acknowledgment ").unwrap(); + let (input, actual) = Variant::parse.parse_peek("A Cv: acknowledgment ").unwrap(); assert_eq!(input, " "); assert_eq!( actual.types, @@ -360,8 +366,9 @@ mod test_variant { #[test] fn test_extra() { - let (input, actual) = - Variant::parse("A Cv: acknowledgment's / Av B C: acknowledgement's").unwrap(); + let (input, actual) = Variant::parse + .parse_peek("A Cv: acknowledgment's / Av B C: acknowledgement's") + .unwrap(); assert_eq!(input, " / Av B C: acknowledgement's"); assert_eq!( actual.types, @@ -383,7 +390,7 @@ mod test_variant { #[test] fn test_underscore() { - let (input, actual) = Variant::parse("_: air_gun\n").unwrap(); + let (input, actual) = Variant::parse.parse_peek("_: air_gun\n").unwrap(); assert_eq!(input, "\n"); assert_eq!( actual.types, @@ -398,14 +405,14 @@ mod test_variant { } impl Type { - pub fn parse(input: &str) -> IResult<&str, Type> { - trace("type", move |input| { - let (input, category) = Category::parse(input)?; - let (input, tag) = winnow::combinator::opt(Tag::parse).parse_next(input)?; - let (input, num) = winnow::combinator::opt(winnow::ascii::digit1).parse_next(input)?; + pub fn parse(input: &mut &str) -> PResult { + trace("type", move |input: &mut &str| { + let category = Category::parse(input)?; + let tag = winnow::combinator::opt(Tag::parse).parse_next(input)?; + let num = winnow::combinator::opt(winnow::ascii::digit1).parse_next(input)?; let num = num.map(|s| s.parse().expect("parser ensured its a number")); let t = Type { category, tag, num }; - Ok((input, t)) + Ok(t) }) .parse_next(input) } @@ -419,13 +426,13 @@ mod test_type { fn test_valid() { // Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use // cases. - let (input, actual) = Type::parse("A ").unwrap(); + let (input, actual) = Type::parse.parse_peek("A ").unwrap(); assert_eq!(input, " "); assert_eq!(actual.category, Category::American); assert_eq!(actual.tag, None); assert_eq!(actual.num, None); - let (input, actual) = Type::parse("Bv ").unwrap(); + let (input, actual) = Type::parse.parse_peek("Bv ").unwrap(); assert_eq!(input, " "); assert_eq!(actual.category, Category::BritishIse); assert_eq!(actual.tag, Some(Tag::Variant)); @@ -434,13 +441,13 @@ mod test_type { #[test] fn test_extra() { - let (input, actual) = Type::parse("Z foobar").unwrap(); + let (input, actual) = Type::parse.parse_peek("Z foobar").unwrap(); assert_eq!(input, " foobar"); assert_eq!(actual.category, Category::BritishIze); assert_eq!(actual.tag, None); assert_eq!(actual.num, None); - let (input, actual) = Type::parse("C- foobar").unwrap(); + let (input, actual) = Type::parse.parse_peek("C- foobar").unwrap(); assert_eq!(input, " foobar"); assert_eq!(actual.category, Category::Canadian); assert_eq!(actual.tag, Some(Tag::Possible)); @@ -449,7 +456,7 @@ mod test_type { #[test] fn test_num() { - let (input, actual) = Type::parse("Av1 ").unwrap(); + let (input, actual) = Type::parse.parse_peek("Av1 ").unwrap(); assert_eq!(input, " "); assert_eq!(actual.category, Category::American); assert_eq!(actual.tag, Some(Tag::Variant)); @@ -458,8 +465,8 @@ mod test_type { } impl Category { - pub fn parse(input: &str) -> IResult<&str, Category> { - trace("category", move |input| { + pub fn parse(input: &mut &str) -> PResult { + trace("category", move |input: &mut &str| { let symbols = winnow::token::one_of(['A', 'B', 'Z', 'C', 'D', '_']); symbols .map(|c| match c { @@ -483,22 +490,22 @@ mod test_category { #[test] fn test_valid() { - let (input, actual) = Category::parse("A").unwrap(); + let (input, actual) = Category::parse.parse_peek("A").unwrap(); assert_eq!(input, ""); assert_eq!(actual, Category::American); } #[test] fn test_extra() { - let (input, actual) = Category::parse("_ foobar").unwrap(); + let (input, actual) = Category::parse.parse_peek("_ foobar").unwrap(); assert_eq!(input, " foobar"); assert_eq!(actual, Category::Other); } } impl Tag { - pub fn parse(input: &str) -> IResult<&str, Tag> { - trace("tag", move |input| { + pub fn parse(input: &mut &str) -> PResult { + trace("tag", move |input: &mut &str| { let symbols = winnow::token::one_of(['.', 'v', 'V', '-', 'x']); symbols .map(|c| match c { @@ -521,23 +528,23 @@ mod test_tag { #[test] fn test_valid() { - let (input, actual) = Tag::parse(".").unwrap(); + let (input, actual) = Tag::parse.parse_peek(".").unwrap(); assert_eq!(input, ""); assert_eq!(actual, Tag::Eq); } #[test] fn test_extra() { - let (input, actual) = Tag::parse("x foobar").unwrap(); + let (input, actual) = Tag::parse.parse_peek("x foobar").unwrap(); assert_eq!(input, " foobar"); assert_eq!(actual, Tag::Improper); } } impl Pos { - pub fn parse(input: &str) -> IResult<&str, Pos> { - trace("pos", move |input| { - winnow::branch::alt(( + pub fn parse(input: &mut &str) -> PResult { + trace("pos", move |input: &mut &str| { + winnow::combinator::alt(( "".value(Pos::Noun), "".value(Pos::Verb), "".value(Pos::Adjective), @@ -555,14 +562,14 @@ mod test_pos { #[test] fn test_valid() { - let (input, actual) = Pos::parse("").unwrap(); + let (input, actual) = Pos::parse.parse_peek("").unwrap(); assert_eq!(input, ""); assert_eq!(actual, Pos::Noun); } #[test] fn test_extra() { - let (input, actual) = Pos::parse(" foobar").unwrap(); + let (input, actual) = Pos::parse.parse_peek(" foobar").unwrap(); assert_eq!(input, " foobar"); assert_eq!(actual, Pos::Adjective); }