fix(parser): Better short base64 detection

Previously, we bailed out if the string is too short (<90) and there
weren't non-alpha-base64 bytes present.  What we ignored were the
padding bytes.

We key off of padding bytes to detect that a string is in fact base64
encoded.  Like the other cases, there can be false positives but those
strings should show up elsewhere or the compiler will fail.

This was called out in #485
This commit is contained in:
Ed Page 2022-05-10 13:57:54 -05:00
parent bd5048def5
commit fd5398316f

View file

@ -407,7 +407,16 @@ mod parser {
<T as nom::InputIter>::Item: AsChar + Copy,
{
let (padding, captured) = take_while1(is_base64_digit)(input.clone())?;
const CHUNK: usize = 4;
let padding_offset = input.offset(&padding);
let mut padding_len = CHUNK - padding_offset % CHUNK;
if padding_len == CHUNK {
padding_len = 0;
}
if captured.input_len() < 90
&& padding_len == 0
&& captured
.iter_elements()
.all(|c| !['/', '+'].contains(&c.as_char()))
@ -418,14 +427,8 @@ mod parser {
)));
}
const CHUNK: usize = 4;
let padding_offset = input.offset(&padding);
let mut padding_len = CHUNK - padding_offset % CHUNK;
if padding_len == CHUNK {
padding_len = 0;
}
let (after, _) = take_while_m_n(padding_len, padding_len, is_base64_padding)(padding)?;
let after_offset = input.offset(&after);
Ok(input.take_split(after_offset))
}
@ -1207,6 +1210,21 @@ mod test {
assert_eq!(expected, actual);
}
#[test]
fn tokenize_ignore_base64_case_3() {
let parser = TokenizerBuilder::new().build();
let input = r#" "integrity": "sha512-hCmlUAIlUiav8Xdqw3Io4LcpA1DOt7h3LSTAC4G6JGHFFaWzI6qvFt9oilvl8BmkbBRX1IhM90ZAmpk68zccQA==","#;
let expected: Vec<Identifier> = vec![
Identifier::new_unchecked("integrity", Case::None, 8),
Identifier::new_unchecked("sha512", Case::None, 21),
];
let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect();
assert_eq!(expected, actual);
let actual: Vec<_> = parser.parse_str(input).collect();
assert_eq!(expected, actual);
}
#[test]
fn tokenize_ignore_email() {
let parser = TokenizerBuilder::new().build();