From ae7f3132306e65a32d52ec1b4b899aacad8f123b Mon Sep 17 00:00:00 2001 From: Ed Page Date: Thu, 30 Mar 2023 07:27:55 -0500 Subject: [PATCH] fix(cli): Actually decode UTF-16 Two problems - I thought we had a UTF-16 test but apparently we didn't - I didn't read enough fine print in the `encoding_rs` API These combined meant the last release completely broke UTF-16 support. --- crates/typos-cli/src/file.rs | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/crates/typos-cli/src/file.rs b/crates/typos-cli/src/file.rs index 85574a1..17fdbf6 100644 --- a/crates/typos-cli/src/file.rs +++ b/crates/typos-cli/src/file.rs @@ -472,21 +472,27 @@ fn read_file( (buffer, content_type) }, content_inspector::ContentType::UTF_16LE => { - let mut decoded = String::new(); - let (r, _) = encoding_rs::UTF_16LE.new_decoder_with_bom_removal().decode_to_string_without_replacement(&buffer, &mut decoded, true); + // Despite accepting a `String`, decode_to_string_without_replacement` doesn't allocate + // so to avoid `OutputFull` loops, we're going to assume any UTF-16 content can fit in + // a buffer twice its size + let mut decoded = String::with_capacity(buffer.len() * 2); + let (r, written) = encoding_rs::UTF_16LE.new_decoder_with_bom_removal().decode_to_string_without_replacement(&buffer, &mut decoded, true); let decoded = match r { encoding_rs::DecoderResult::InputEmpty => Ok(decoded), - _ => Err("invalid UTF-16LE encoding"), + _ => Err(format!("invalid UTF-16LE encoding at byte {} in {}", written, path.display())), }; let buffer = report_result(decoded, reporter)?; (buffer.into_bytes(), content_type) } content_inspector::ContentType::UTF_16BE => { - let mut decoded = String::new(); - let (r, _) = encoding_rs::UTF_16BE.new_decoder_with_bom_removal().decode_to_string_without_replacement(&buffer, &mut decoded, true); + // Despite accepting a `String`, decode_to_string_without_replacement` doesn't allocate + // so to avoid `OutputFull` loops, we're going to assume any UTF-16 content can fit in + // a buffer twice its size + let mut decoded = String::with_capacity(buffer.len() * 2); + let (r, written) = encoding_rs::UTF_16BE.new_decoder_with_bom_removal().decode_to_string_without_replacement(&buffer, &mut decoded, true); let decoded = match r { encoding_rs::DecoderResult::InputEmpty => Ok(decoded), - _ => Err("invalid UTF-16BE encoding"), + _ => Err(format!("invalid UTF-16BE encoding at byte {} in {}", written, path.display())), }; let buffer = report_result(decoded, reporter)?; (buffer.into_bytes(), content_type)