From fd5398316fd152fb16261da1750f230d20e215f3 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Tue, 10 May 2022 13:57:54 -0500 Subject: [PATCH] fix(parser): Better short base64 detection Previously, we bailed out if the string is too short (<90) and there weren't non-alpha-base64 bytes present. What we ignored were the padding bytes. We key off of padding bytes to detect that a string is in fact base64 encoded. Like the other cases, there can be false positives but those strings should show up elsewhere or the compiler will fail. This was called out in #485 --- crates/typos/src/tokens.rs | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/crates/typos/src/tokens.rs b/crates/typos/src/tokens.rs index 860d5f3..acc4a87 100644 --- a/crates/typos/src/tokens.rs +++ b/crates/typos/src/tokens.rs @@ -407,7 +407,16 @@ mod parser { ::Item: AsChar + Copy, { let (padding, captured) = take_while1(is_base64_digit)(input.clone())?; + + const CHUNK: usize = 4; + let padding_offset = input.offset(&padding); + let mut padding_len = CHUNK - padding_offset % CHUNK; + if padding_len == CHUNK { + padding_len = 0; + } + if captured.input_len() < 90 + && padding_len == 0 && captured .iter_elements() .all(|c| !['/', '+'].contains(&c.as_char())) @@ -418,14 +427,8 @@ mod parser { ))); } - const CHUNK: usize = 4; - let padding_offset = input.offset(&padding); - let mut padding_len = CHUNK - padding_offset % CHUNK; - if padding_len == CHUNK { - padding_len = 0; - } - let (after, _) = take_while_m_n(padding_len, padding_len, is_base64_padding)(padding)?; + let after_offset = input.offset(&after); Ok(input.take_split(after_offset)) } @@ -1207,6 +1210,21 @@ mod test { assert_eq!(expected, actual); } + #[test] + fn tokenize_ignore_base64_case_3() { + let parser = TokenizerBuilder::new().build(); + + let input = r#" "integrity": "sha512-hCmlUAIlUiav8Xdqw3Io4LcpA1DOt7h3LSTAC4G6JGHFFaWzI6qvFt9oilvl8BmkbBRX1IhM90ZAmpk68zccQA==","#; + let expected: Vec = vec![ + Identifier::new_unchecked("integrity", Case::None, 8), + Identifier::new_unchecked("sha512", Case::None, 21), + ]; + let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect(); + assert_eq!(expected, actual); + let actual: Vec<_> = parser.parse_str(input).collect(); + assert_eq!(expected, actual); + } + #[test] fn tokenize_ignore_email() { let parser = TokenizerBuilder::new().build();