fix(cli): Actually decode UTF-16

Two problems
- I thought we had a UTF-16 test but apparently we didn't
- I didn't read enough fine print in the `encoding_rs` API

These combined meant the last release completely broke UTF-16 support.
This commit is contained in:
Ed Page 2023-03-30 07:27:55 -05:00
parent 144ee4d018
commit ae7f313230

View file

@ -472,21 +472,27 @@ fn read_file(
(buffer, content_type)
},
content_inspector::ContentType::UTF_16LE => {
let mut decoded = String::new();
let (r, _) = encoding_rs::UTF_16LE.new_decoder_with_bom_removal().decode_to_string_without_replacement(&buffer, &mut decoded, true);
// Despite accepting a `String`, decode_to_string_without_replacement` doesn't allocate
// so to avoid `OutputFull` loops, we're going to assume any UTF-16 content can fit in
// a buffer twice its size
let mut decoded = String::with_capacity(buffer.len() * 2);
let (r, written) = encoding_rs::UTF_16LE.new_decoder_with_bom_removal().decode_to_string_without_replacement(&buffer, &mut decoded, true);
let decoded = match r {
encoding_rs::DecoderResult::InputEmpty => Ok(decoded),
_ => Err("invalid UTF-16LE encoding"),
_ => Err(format!("invalid UTF-16LE encoding at byte {} in {}", written, path.display())),
};
let buffer = report_result(decoded, reporter)?;
(buffer.into_bytes(), content_type)
}
content_inspector::ContentType::UTF_16BE => {
let mut decoded = String::new();
let (r, _) = encoding_rs::UTF_16BE.new_decoder_with_bom_removal().decode_to_string_without_replacement(&buffer, &mut decoded, true);
// Despite accepting a `String`, decode_to_string_without_replacement` doesn't allocate
// so to avoid `OutputFull` loops, we're going to assume any UTF-16 content can fit in
// a buffer twice its size
let mut decoded = String::with_capacity(buffer.len() * 2);
let (r, written) = encoding_rs::UTF_16BE.new_decoder_with_bom_removal().decode_to_string_without_replacement(&buffer, &mut decoded, true);
let decoded = match r {
encoding_rs::DecoderResult::InputEmpty => Ok(decoded),
_ => Err("invalid UTF-16BE encoding"),
_ => Err(format!("invalid UTF-16BE encoding at byte {} in {}", written, path.display())),
};
let buffer = report_result(decoded, reporter)?;
(buffer.into_bytes(), content_type)