diff --git a/crates/typos-cli/src/file.rs b/crates/typos-cli/src/file.rs index 85574a1..17fdbf6 100644 --- a/crates/typos-cli/src/file.rs +++ b/crates/typos-cli/src/file.rs @@ -472,21 +472,27 @@ fn read_file( (buffer, content_type) }, content_inspector::ContentType::UTF_16LE => { - let mut decoded = String::new(); - let (r, _) = encoding_rs::UTF_16LE.new_decoder_with_bom_removal().decode_to_string_without_replacement(&buffer, &mut decoded, true); + // Despite accepting a `String`, decode_to_string_without_replacement` doesn't allocate + // so to avoid `OutputFull` loops, we're going to assume any UTF-16 content can fit in + // a buffer twice its size + let mut decoded = String::with_capacity(buffer.len() * 2); + let (r, written) = encoding_rs::UTF_16LE.new_decoder_with_bom_removal().decode_to_string_without_replacement(&buffer, &mut decoded, true); let decoded = match r { encoding_rs::DecoderResult::InputEmpty => Ok(decoded), - _ => Err("invalid UTF-16LE encoding"), + _ => Err(format!("invalid UTF-16LE encoding at byte {} in {}", written, path.display())), }; let buffer = report_result(decoded, reporter)?; (buffer.into_bytes(), content_type) } content_inspector::ContentType::UTF_16BE => { - let mut decoded = String::new(); - let (r, _) = encoding_rs::UTF_16BE.new_decoder_with_bom_removal().decode_to_string_without_replacement(&buffer, &mut decoded, true); + // Despite accepting a `String`, decode_to_string_without_replacement` doesn't allocate + // so to avoid `OutputFull` loops, we're going to assume any UTF-16 content can fit in + // a buffer twice its size + let mut decoded = String::with_capacity(buffer.len() * 2); + let (r, written) = encoding_rs::UTF_16BE.new_decoder_with_bom_removal().decode_to_string_without_replacement(&buffer, &mut decoded, true); let decoded = match r { encoding_rs::DecoderResult::InputEmpty => Ok(decoded), - _ => Err("invalid UTF-16BE encoding"), + _ => Err(format!("invalid UTF-16BE encoding at byte {} in {}", written, path.display())), }; let buffer = report_result(decoded, reporter)?; (buffer.into_bytes(), content_type)