From e20879dae1da3f0cb20de70b02821031ec07c068 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Tue, 14 Sep 2021 08:53:31 -0500 Subject: [PATCH] fix: Reduce false positives from ordinals Just ignoring them since our focus is on programmer typos and these can't be identifiers. This is simpler and is less work at runtime. Fixes #331 --- crates/typos/src/tokens.rs | 45 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/crates/typos/src/tokens.rs b/crates/typos/src/tokens.rs index 35f483a..7c1ce31 100644 --- a/crates/typos/src/tokens.rs +++ b/crates/typos/src/tokens.rs @@ -182,6 +182,7 @@ mod parser { terminated(hash_literal, sep1), terminated(hex_literal, sep1), terminated(dec_literal, sep1), + terminated(ordinal_literal, sep1), terminated(base64_literal, sep1), terminated(email_literal, sep1), terminated(url_literal, sep1), @@ -199,6 +200,30 @@ mod parser { take_while1(is_ignore_char)(input) } + fn ordinal_literal(input: T) -> IResult + where + T: nom::InputTakeAtPosition + + nom::InputTake + + nom::InputIter + + nom::InputLength + + nom::Offset + + nom::Slice> + + nom::Slice> + + Clone, + ::Item: AsChar + Copy, + ::Item: AsChar + Copy, + { + terminated( + take_while1(is_dec_digit), + alt(( + pair(char('s'), char('t')), + pair(char('n'), char('d')), + pair(char('r'), char('d')), + pair(char('t'), char('h')), + )), + )(input) + } + fn dec_literal(input: T) -> IResult where T: nom::InputTakeAtPosition, @@ -434,6 +459,11 @@ mod parser { } } + #[inline] + fn is_dec_digit(i: impl AsChar + Copy) -> bool { + i.is_dec_digit() + } + #[inline] fn is_dec_digit_with_sep(i: impl AsChar + Copy) -> bool { i.is_dec_digit() || is_digit_sep(i.as_char()) @@ -884,6 +914,21 @@ mod test { assert_eq!(expected, actual); } + #[test] + fn tokenize_ignore_ordinal() { + let parser = TokenizerBuilder::new().build(); + + let input = "Hello 1st 2nd 3rd 4th World"; + let expected: Vec = vec![ + Identifier::new_unchecked("Hello", Case::None, 0), + Identifier::new_unchecked("World", Case::None, 22), + ]; + let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect(); + assert_eq!(expected, actual); + let actual: Vec<_> = parser.parse_str(input).collect(); + assert_eq!(expected, actual); + } + #[test] fn tokenize_ignore_hex() { let parser = TokenizerBuilder::new().build();