mirror of
https://github.com/crate-ci/typos.git
synced 2025-01-13 18:21:21 -05:00
feat(parser): Ignore base64
For now, we hardcoded a min length of 90 bytes to ensure to avoid ambiguity with math operations on variables (generally people use whitespace anyways). Fixes #287
This commit is contained in:
parent
23b6ad5796
commit
2a1e6ca0f6
1 changed files with 64 additions and 0 deletions
|
@ -185,6 +185,7 @@ mod parser {
|
||||||
terminated(hash_literal, sep1),
|
terminated(hash_literal, sep1),
|
||||||
terminated(hex_literal, sep1),
|
terminated(hex_literal, sep1),
|
||||||
terminated(dec_literal, sep1),
|
terminated(dec_literal, sep1),
|
||||||
|
terminated(base64_literal, sep1),
|
||||||
)))(input)
|
)))(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -270,6 +271,40 @@ mod parser {
|
||||||
take_while_m_n(SHA_1_MAX, SHA_256_MAX, is_lower_hex_digit)(input)
|
take_while_m_n(SHA_1_MAX, SHA_256_MAX, is_lower_hex_digit)(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn base64_literal<T>(input: T) -> IResult<T, T>
|
||||||
|
where
|
||||||
|
T: nom::InputTakeAtPosition
|
||||||
|
+ nom::InputTake
|
||||||
|
+ nom::InputIter
|
||||||
|
+ nom::InputLength
|
||||||
|
+ nom::Offset
|
||||||
|
+ nom::Slice<std::ops::RangeTo<usize>>
|
||||||
|
+ nom::Slice<std::ops::RangeFrom<usize>>
|
||||||
|
+ std::fmt::Debug
|
||||||
|
+ Clone,
|
||||||
|
<T as nom::InputTakeAtPosition>::Item: AsChar + Copy,
|
||||||
|
<T as nom::InputIter>::Item: AsChar + Copy,
|
||||||
|
{
|
||||||
|
let (padding, captured) = take_while1(is_base64_digit)(input.clone())?;
|
||||||
|
if captured.input_len() < 90 {
|
||||||
|
return Err(nom::Err::Error(nom::error::Error::new(
|
||||||
|
input,
|
||||||
|
nom::error::ErrorKind::LengthValue,
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
const CHUNK: usize = 4;
|
||||||
|
let padding_offset = input.offset(&padding);
|
||||||
|
let mut padding_len = CHUNK - padding_offset % CHUNK;
|
||||||
|
if padding_len == CHUNK {
|
||||||
|
padding_len = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
let (after, _) = take_while_m_n(padding_len, padding_len, is_base64_padding)(padding)?;
|
||||||
|
let after_offset = input.offset(&after);
|
||||||
|
Ok(input.take_split(after_offset))
|
||||||
|
}
|
||||||
|
|
||||||
fn take_many0<I, E, F>(mut f: F) -> impl FnMut(I) -> IResult<I, I, E>
|
fn take_many0<I, E, F>(mut f: F) -> impl FnMut(I) -> IResult<I, I, E>
|
||||||
where
|
where
|
||||||
I: nom::Offset + nom::InputTake + Clone + PartialEq + std::fmt::Debug,
|
I: nom::Offset + nom::InputTake + Clone + PartialEq + std::fmt::Debug,
|
||||||
|
@ -316,6 +351,20 @@ mod parser {
|
||||||
('a'..='f').contains(&c) || ('0'..='9').contains(&c)
|
('a'..='f').contains(&c) || ('0'..='9').contains(&c)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn is_base64_digit(i: impl AsChar + Copy) -> bool {
|
||||||
|
let c = i.as_char();
|
||||||
|
('a'..='z').contains(&c)
|
||||||
|
|| ('A'..='Z').contains(&c)
|
||||||
|
|| ('0'..='9').contains(&c)
|
||||||
|
|| c == '+'
|
||||||
|
|| c == '/'
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_base64_padding(i: impl AsChar + Copy) -> bool {
|
||||||
|
let c = i.as_char();
|
||||||
|
c == '='
|
||||||
|
}
|
||||||
|
|
||||||
fn is_xid_continue(i: impl AsChar + Copy) -> bool {
|
fn is_xid_continue(i: impl AsChar + Copy) -> bool {
|
||||||
let c = i.as_char();
|
let c = i.as_char();
|
||||||
unicode_xid::UnicodeXID::is_xid_continue(c)
|
unicode_xid::UnicodeXID::is_xid_continue(c)
|
||||||
|
@ -735,6 +784,21 @@ mod test {
|
||||||
assert_eq!(expected, actual);
|
assert_eq!(expected, actual);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn tokenize_ignore_base64() {
|
||||||
|
let parser = TokenizerBuilder::new().build();
|
||||||
|
|
||||||
|
let input = "Good Iy9+btvut+d92V+v84444ziIqJKHK879KJH59//X1Iy9+btvut+d92V+v84444ziIqJKHK879KJH59//X122Iy9+btvut+d92V+v84444ziIqJKHK879KJH59//X12== Bye";
|
||||||
|
let expected: Vec<Identifier> = vec![
|
||||||
|
Identifier::new_unchecked("Good", Case::None, 0),
|
||||||
|
Identifier::new_unchecked("Bye", Case::None, 134),
|
||||||
|
];
|
||||||
|
let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect();
|
||||||
|
assert_eq!(expected, actual);
|
||||||
|
let actual: Vec<_> = parser.parse_str(input).collect();
|
||||||
|
assert_eq!(expected, actual);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn tokenize_leading_digits() {
|
fn tokenize_leading_digits() {
|
||||||
let parser = TokenizerBuilder::new().build();
|
let parser = TokenizerBuilder::new().build();
|
||||||
|
|
Loading…
Reference in a new issue