fix: Reduce false positives from ordinals

Just ignoring them since our focus is on programmer typos and these can't be identifiers. This is simpler and is less work at runtime. Fixes #331
2024-12-22 23:52:12 -05:00 · 2021-09-14 08:53:31 -05:00 · 2021-09-14 08:53:31 -05:00 · e20879dae1
commit e20879dae1
parent 894c8d71ca
1 changed files with 45 additions and 0 deletions
--- a/crates/typos/src/tokens.rs
+++ b/crates/typos/src/tokens.rs
@ -182,6 +182,7 @@ mod parser {
            terminated(hash_literal, sep1),
            terminated(hex_literal, sep1),
            terminated(dec_literal, sep1),
+            terminated(ordinal_literal, sep1),
            terminated(base64_literal, sep1),
            terminated(email_literal, sep1),
            terminated(url_literal, sep1),
@ -199,6 +200,30 @@ mod parser {
        take_while1(is_ignore_char)(input)
    }

+    fn ordinal_literal<T>(input: T) -> IResult<T, T>
+    where
+        T: nom::InputTakeAtPosition
+            + nom::InputTake
+            + nom::InputIter
+            + nom::InputLength
+            + nom::Offset
+            + nom::Slice<std::ops::RangeTo<usize>>
+            + nom::Slice<std::ops::RangeFrom<usize>>
+            + Clone,
+        <T as nom::InputTakeAtPosition>::Item: AsChar + Copy,
+        <T as nom::InputIter>::Item: AsChar + Copy,
+    {
+        terminated(
+            take_while1(is_dec_digit),
+            alt((
+                pair(char('s'), char('t')),
+                pair(char('n'), char('d')),
+                pair(char('r'), char('d')),
+                pair(char('t'), char('h')),
+            )),
+        )(input)
+    }
+
    fn dec_literal<T>(input: T) -> IResult<T, T>
    where
        T: nom::InputTakeAtPosition,
@ -434,6 +459,11 @@ mod parser {
        }
    }

+    #[inline]
+    fn is_dec_digit(i: impl AsChar + Copy) -> bool {
+        i.is_dec_digit()
+    }
+
    #[inline]
    fn is_dec_digit_with_sep(i: impl AsChar + Copy) -> bool {
        i.is_dec_digit() || is_digit_sep(i.as_char())
@ -884,6 +914,21 @@ mod test {
        assert_eq!(expected, actual);
    }

+    #[test]
+    fn tokenize_ignore_ordinal() {
+        let parser = TokenizerBuilder::new().build();
+
+        let input = "Hello 1st 2nd 3rd 4th World";
+        let expected: Vec<Identifier> = vec![
+            Identifier::new_unchecked("Hello", Case::None, 0),
+            Identifier::new_unchecked("World", Case::None, 22),
+        ];
+        let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect();
+        assert_eq!(expected, actual);
+        let actual: Vec<_> = parser.parse_str(input).collect();
+        assert_eq!(expected, actual);
+    }
+
    #[test]
    fn tokenize_ignore_hex() {
        let parser = TokenizerBuilder::new().build();