mirror of
https://github.com/crate-ci/typos.git
synced 2024-12-22 15:42:23 -05:00
fix(cli): Actually decode UTF-16
Two problems - I thought we had a UTF-16 test but apparently we didn't - I didn't read enough fine print in the `encoding_rs` API These combined meant the last release completely broke UTF-16 support.
This commit is contained in:
parent
144ee4d018
commit
ae7f313230
1 changed files with 12 additions and 6 deletions
|
@ -472,21 +472,27 @@ fn read_file(
|
|||
(buffer, content_type)
|
||||
},
|
||||
content_inspector::ContentType::UTF_16LE => {
|
||||
let mut decoded = String::new();
|
||||
let (r, _) = encoding_rs::UTF_16LE.new_decoder_with_bom_removal().decode_to_string_without_replacement(&buffer, &mut decoded, true);
|
||||
// Despite accepting a `String`, decode_to_string_without_replacement` doesn't allocate
|
||||
// so to avoid `OutputFull` loops, we're going to assume any UTF-16 content can fit in
|
||||
// a buffer twice its size
|
||||
let mut decoded = String::with_capacity(buffer.len() * 2);
|
||||
let (r, written) = encoding_rs::UTF_16LE.new_decoder_with_bom_removal().decode_to_string_without_replacement(&buffer, &mut decoded, true);
|
||||
let decoded = match r {
|
||||
encoding_rs::DecoderResult::InputEmpty => Ok(decoded),
|
||||
_ => Err("invalid UTF-16LE encoding"),
|
||||
_ => Err(format!("invalid UTF-16LE encoding at byte {} in {}", written, path.display())),
|
||||
};
|
||||
let buffer = report_result(decoded, reporter)?;
|
||||
(buffer.into_bytes(), content_type)
|
||||
}
|
||||
content_inspector::ContentType::UTF_16BE => {
|
||||
let mut decoded = String::new();
|
||||
let (r, _) = encoding_rs::UTF_16BE.new_decoder_with_bom_removal().decode_to_string_without_replacement(&buffer, &mut decoded, true);
|
||||
// Despite accepting a `String`, decode_to_string_without_replacement` doesn't allocate
|
||||
// so to avoid `OutputFull` loops, we're going to assume any UTF-16 content can fit in
|
||||
// a buffer twice its size
|
||||
let mut decoded = String::with_capacity(buffer.len() * 2);
|
||||
let (r, written) = encoding_rs::UTF_16BE.new_decoder_with_bom_removal().decode_to_string_without_replacement(&buffer, &mut decoded, true);
|
||||
let decoded = match r {
|
||||
encoding_rs::DecoderResult::InputEmpty => Ok(decoded),
|
||||
_ => Err("invalid UTF-16BE encoding"),
|
||||
_ => Err(format!("invalid UTF-16BE encoding at byte {} in {}", written, path.display())),
|
||||
};
|
||||
let buffer = report_result(decoded, reporter)?;
|
||||
(buffer.into_bytes(), content_type)
|
||||
|
|
Loading…
Reference in a new issue