fix: Reduce false positives from ordinals

Just ignoring them since our focus is on programmer typos and these
can't be identifiers.  This is simpler and is less work at runtime.

Fixes #331
This commit is contained in:
Ed Page 2021-09-14 08:53:31 -05:00
parent 894c8d71ca
commit e20879dae1

View file

@ -182,6 +182,7 @@ mod parser {
terminated(hash_literal, sep1), terminated(hash_literal, sep1),
terminated(hex_literal, sep1), terminated(hex_literal, sep1),
terminated(dec_literal, sep1), terminated(dec_literal, sep1),
terminated(ordinal_literal, sep1),
terminated(base64_literal, sep1), terminated(base64_literal, sep1),
terminated(email_literal, sep1), terminated(email_literal, sep1),
terminated(url_literal, sep1), terminated(url_literal, sep1),
@ -199,6 +200,30 @@ mod parser {
take_while1(is_ignore_char)(input) take_while1(is_ignore_char)(input)
} }
fn ordinal_literal<T>(input: T) -> IResult<T, T>
where
T: nom::InputTakeAtPosition
+ nom::InputTake
+ nom::InputIter
+ nom::InputLength
+ nom::Offset
+ nom::Slice<std::ops::RangeTo<usize>>
+ nom::Slice<std::ops::RangeFrom<usize>>
+ Clone,
<T as nom::InputTakeAtPosition>::Item: AsChar + Copy,
<T as nom::InputIter>::Item: AsChar + Copy,
{
terminated(
take_while1(is_dec_digit),
alt((
pair(char('s'), char('t')),
pair(char('n'), char('d')),
pair(char('r'), char('d')),
pair(char('t'), char('h')),
)),
)(input)
}
fn dec_literal<T>(input: T) -> IResult<T, T> fn dec_literal<T>(input: T) -> IResult<T, T>
where where
T: nom::InputTakeAtPosition, T: nom::InputTakeAtPosition,
@ -434,6 +459,11 @@ mod parser {
} }
} }
#[inline]
fn is_dec_digit(i: impl AsChar + Copy) -> bool {
i.is_dec_digit()
}
#[inline] #[inline]
fn is_dec_digit_with_sep(i: impl AsChar + Copy) -> bool { fn is_dec_digit_with_sep(i: impl AsChar + Copy) -> bool {
i.is_dec_digit() || is_digit_sep(i.as_char()) i.is_dec_digit() || is_digit_sep(i.as_char())
@ -884,6 +914,21 @@ mod test {
assert_eq!(expected, actual); assert_eq!(expected, actual);
} }
#[test]
fn tokenize_ignore_ordinal() {
let parser = TokenizerBuilder::new().build();
let input = "Hello 1st 2nd 3rd 4th World";
let expected: Vec<Identifier> = vec![
Identifier::new_unchecked("Hello", Case::None, 0),
Identifier::new_unchecked("World", Case::None, 22),
];
let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect();
assert_eq!(expected, actual);
let actual: Vec<_> = parser.parse_str(input).collect();
assert_eq!(expected, actual);
}
#[test] #[test]
fn tokenize_ignore_hex() { fn tokenize_ignore_hex() {
let parser = TokenizerBuilder::new().build(); let parser = TokenizerBuilder::new().build();