diff --git a/crates/typos/src/tokens.rs b/crates/typos/src/tokens.rs index 5795e19..02c5429 100644 --- a/crates/typos/src/tokens.rs +++ b/crates/typos/src/tokens.rs @@ -186,6 +186,7 @@ mod parser { terminated(hex_literal, sep1), terminated(dec_literal, sep1), terminated(base64_literal, sep1), + terminated(email_literal, sep1), )))(input) } @@ -305,6 +306,27 @@ mod parser { Ok(input.take_split(after_offset)) } + fn email_literal(input: T) -> IResult + where + T: nom::InputTakeAtPosition + + nom::InputTake + + nom::InputIter + + nom::InputLength + + nom::Offset + + nom::Slice> + + nom::Slice> + + std::fmt::Debug + + Clone, + ::Item: AsChar + Copy, + ::Item: AsChar + Copy, + { + recognize(tuple(( + take_while1(is_email_localport_char), + char('@'), + take_while1(is_email_domain_char), + )))(input) + } + fn take_many0(mut f: F) -> impl FnMut(I) -> IResult where I: nom::Offset + nom::InputTake + Clone + PartialEq + std::fmt::Debug, @@ -338,19 +360,23 @@ mod parser { } } + #[inline] fn is_dec_digit_with_sep(i: impl AsChar + Copy) -> bool { i.is_dec_digit() || is_digit_sep(i.as_char()) } + #[inline] fn is_hex_digit_with_sep(i: impl AsChar + Copy) -> bool { i.is_hex_digit() || is_digit_sep(i.as_char()) } + #[inline] fn is_lower_hex_digit(i: impl AsChar + Copy) -> bool { let c = i.as_char(); ('a'..='f').contains(&c) || ('0'..='9').contains(&c) } + #[inline] fn is_base64_digit(i: impl AsChar + Copy) -> bool { let c = i.as_char(); ('a'..='z').contains(&c) @@ -360,11 +386,31 @@ mod parser { || c == '/' } + #[inline] fn is_base64_padding(i: impl AsChar + Copy) -> bool { let c = i.as_char(); c == '=' } + #[inline] + fn is_email_localport_char(i: impl AsChar + Copy) -> bool { + let c = i.as_char(); + ('a'..='z').contains(&c) + || ('A'..='Z').contains(&c) + || ('0'..='9').contains(&c) + || "!#$%&'*+-/=?^_`{|}~().".find(c).is_some() + } + + #[inline] + fn is_email_domain_char(i: impl AsChar + Copy) -> bool { + let c = i.as_char(); + ('a'..='z').contains(&c) + || ('A'..='Z').contains(&c) + || ('0'..='9').contains(&c) + || "-().".find(c).is_some() + } + + #[inline] fn is_xid_continue(i: impl AsChar + Copy) -> bool { let c = i.as_char(); unicode_xid::UnicodeXID::is_xid_continue(c) @@ -799,6 +845,21 @@ mod test { assert_eq!(expected, actual); } + #[test] + fn tokenize_ignore_email() { + let parser = TokenizerBuilder::new().build(); + + let input = "Good example@example.com Bye"; + let expected: Vec = vec![ + Identifier::new_unchecked("Good", Case::None, 0), + Identifier::new_unchecked("Bye", Case::None, 25), + ]; + let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect(); + assert_eq!(expected, actual); + let actual: Vec<_> = parser.parse_str(input).collect(); + assert_eq!(expected, actual); + } + #[test] fn tokenize_leading_digits() { let parser = TokenizerBuilder::new().build();