From 0c49c3ea2be49ae173fd6d366616bddf74b2b628 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Mon, 24 Jan 2022 20:01:06 -0600 Subject: [PATCH] fix(parser): Allow markdown formatting around ordinals Fixes #409 --- crates/typos/src/tokens.rs | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/crates/typos/src/tokens.rs b/crates/typos/src/tokens.rs index a340729..da61571 100644 --- a/crates/typos/src/tokens.rs +++ b/crates/typos/src/tokens.rs @@ -216,7 +216,14 @@ mod parser { ::Item: AsChar + Copy, ::Item: AsChar + Copy, { - terminated( + fn is_sep(c: impl AsChar) -> bool { + let c = c.as_char(); + // Avoid markdown throwing off our ordinal detection + ['_'].contains(&c) + } + + recognize(tuple(( + take_while(is_sep), take_while1(is_dec_digit), alt(( pair(char('s'), char('t')), @@ -224,7 +231,8 @@ mod parser { pair(char('r'), char('d')), pair(char('t'), char('h')), )), - )(input) + take_while(is_sep), + )))(input) } fn dec_literal(input: T) -> IResult @@ -935,10 +943,10 @@ mod test { fn tokenize_ignore_ordinal() { let parser = TokenizerBuilder::new().build(); - let input = "Hello 1st 2nd 3rd 4th World"; + let input = "Hello 1st 2nd 3rd 4th __5th__ World"; let expected: Vec = vec![ Identifier::new_unchecked("Hello", Case::None, 0), - Identifier::new_unchecked("World", Case::None, 22), + Identifier::new_unchecked("World", Case::None, 30), ]; let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect(); assert_eq!(expected, actual);