mirror of
https://github.com/crate-ci/typos.git
synced 2025-01-23 23:18:57 -05:00
fix(parser): Better short base64 detection
Previously, we bailed out if the string is too short (<90) and there weren't non-alpha-base64 bytes present. What we ignored were the padding bytes. We key off of padding bytes to detect that a string is in fact base64 encoded. Like the other cases, there can be false positives but those strings should show up elsewhere or the compiler will fail. This was called out in #485
This commit is contained in:
parent
bd5048def5
commit
fd5398316f
1 changed files with 25 additions and 7 deletions
|
@ -407,7 +407,16 @@ mod parser {
|
||||||
<T as nom::InputIter>::Item: AsChar + Copy,
|
<T as nom::InputIter>::Item: AsChar + Copy,
|
||||||
{
|
{
|
||||||
let (padding, captured) = take_while1(is_base64_digit)(input.clone())?;
|
let (padding, captured) = take_while1(is_base64_digit)(input.clone())?;
|
||||||
|
|
||||||
|
const CHUNK: usize = 4;
|
||||||
|
let padding_offset = input.offset(&padding);
|
||||||
|
let mut padding_len = CHUNK - padding_offset % CHUNK;
|
||||||
|
if padding_len == CHUNK {
|
||||||
|
padding_len = 0;
|
||||||
|
}
|
||||||
|
|
||||||
if captured.input_len() < 90
|
if captured.input_len() < 90
|
||||||
|
&& padding_len == 0
|
||||||
&& captured
|
&& captured
|
||||||
.iter_elements()
|
.iter_elements()
|
||||||
.all(|c| !['/', '+'].contains(&c.as_char()))
|
.all(|c| !['/', '+'].contains(&c.as_char()))
|
||||||
|
@ -418,14 +427,8 @@ mod parser {
|
||||||
)));
|
)));
|
||||||
}
|
}
|
||||||
|
|
||||||
const CHUNK: usize = 4;
|
|
||||||
let padding_offset = input.offset(&padding);
|
|
||||||
let mut padding_len = CHUNK - padding_offset % CHUNK;
|
|
||||||
if padding_len == CHUNK {
|
|
||||||
padding_len = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
let (after, _) = take_while_m_n(padding_len, padding_len, is_base64_padding)(padding)?;
|
let (after, _) = take_while_m_n(padding_len, padding_len, is_base64_padding)(padding)?;
|
||||||
|
|
||||||
let after_offset = input.offset(&after);
|
let after_offset = input.offset(&after);
|
||||||
Ok(input.take_split(after_offset))
|
Ok(input.take_split(after_offset))
|
||||||
}
|
}
|
||||||
|
@ -1207,6 +1210,21 @@ mod test {
|
||||||
assert_eq!(expected, actual);
|
assert_eq!(expected, actual);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn tokenize_ignore_base64_case_3() {
|
||||||
|
let parser = TokenizerBuilder::new().build();
|
||||||
|
|
||||||
|
let input = r#" "integrity": "sha512-hCmlUAIlUiav8Xdqw3Io4LcpA1DOt7h3LSTAC4G6JGHFFaWzI6qvFt9oilvl8BmkbBRX1IhM90ZAmpk68zccQA==","#;
|
||||||
|
let expected: Vec<Identifier> = vec![
|
||||||
|
Identifier::new_unchecked("integrity", Case::None, 8),
|
||||||
|
Identifier::new_unchecked("sha512", Case::None, 21),
|
||||||
|
];
|
||||||
|
let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect();
|
||||||
|
assert_eq!(expected, actual);
|
||||||
|
let actual: Vec<_> = parser.parse_str(input).collect();
|
||||||
|
assert_eq!(expected, actual);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn tokenize_ignore_email() {
|
fn tokenize_ignore_email() {
|
||||||
let parser = TokenizerBuilder::new().build();
|
let parser = TokenizerBuilder::new().build();
|
||||||
|
|
Loading…
Add table
Reference in a new issue