mirror of
https://github.com/crate-ci/typos.git
synced 2024-11-22 00:51:11 -05:00
Merge pull request #706 from epage/utf16
fix(cli): Actually decode UTF-16
This commit is contained in:
commit
37d90c230e
1 changed files with 12 additions and 6 deletions
|
@ -472,21 +472,27 @@ fn read_file(
|
||||||
(buffer, content_type)
|
(buffer, content_type)
|
||||||
},
|
},
|
||||||
content_inspector::ContentType::UTF_16LE => {
|
content_inspector::ContentType::UTF_16LE => {
|
||||||
let mut decoded = String::new();
|
// Despite accepting a `String`, decode_to_string_without_replacement` doesn't allocate
|
||||||
let (r, _) = encoding_rs::UTF_16LE.new_decoder_with_bom_removal().decode_to_string_without_replacement(&buffer, &mut decoded, true);
|
// so to avoid `OutputFull` loops, we're going to assume any UTF-16 content can fit in
|
||||||
|
// a buffer twice its size
|
||||||
|
let mut decoded = String::with_capacity(buffer.len() * 2);
|
||||||
|
let (r, written) = encoding_rs::UTF_16LE.new_decoder_with_bom_removal().decode_to_string_without_replacement(&buffer, &mut decoded, true);
|
||||||
let decoded = match r {
|
let decoded = match r {
|
||||||
encoding_rs::DecoderResult::InputEmpty => Ok(decoded),
|
encoding_rs::DecoderResult::InputEmpty => Ok(decoded),
|
||||||
_ => Err("invalid UTF-16LE encoding"),
|
_ => Err(format!("invalid UTF-16LE encoding at byte {} in {}", written, path.display())),
|
||||||
};
|
};
|
||||||
let buffer = report_result(decoded, reporter)?;
|
let buffer = report_result(decoded, reporter)?;
|
||||||
(buffer.into_bytes(), content_type)
|
(buffer.into_bytes(), content_type)
|
||||||
}
|
}
|
||||||
content_inspector::ContentType::UTF_16BE => {
|
content_inspector::ContentType::UTF_16BE => {
|
||||||
let mut decoded = String::new();
|
// Despite accepting a `String`, decode_to_string_without_replacement` doesn't allocate
|
||||||
let (r, _) = encoding_rs::UTF_16BE.new_decoder_with_bom_removal().decode_to_string_without_replacement(&buffer, &mut decoded, true);
|
// so to avoid `OutputFull` loops, we're going to assume any UTF-16 content can fit in
|
||||||
|
// a buffer twice its size
|
||||||
|
let mut decoded = String::with_capacity(buffer.len() * 2);
|
||||||
|
let (r, written) = encoding_rs::UTF_16BE.new_decoder_with_bom_removal().decode_to_string_without_replacement(&buffer, &mut decoded, true);
|
||||||
let decoded = match r {
|
let decoded = match r {
|
||||||
encoding_rs::DecoderResult::InputEmpty => Ok(decoded),
|
encoding_rs::DecoderResult::InputEmpty => Ok(decoded),
|
||||||
_ => Err("invalid UTF-16BE encoding"),
|
_ => Err(format!("invalid UTF-16BE encoding at byte {} in {}", written, path.display())),
|
||||||
};
|
};
|
||||||
let buffer = report_result(decoded, reporter)?;
|
let buffer = report_result(decoded, reporter)?;
|
||||||
(buffer.into_bytes(), content_type)
|
(buffer.into_bytes(), content_type)
|
||||||
|
|
Loading…
Reference in a new issue