diff --git a/Cargo.lock b/Cargo.lock index 8c616b6..dafb92a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -578,69 +578,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91" [[package]] -name = "encoding" -version = "0.2.33" +name = "encoding_rs" +version = "0.8.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b0d943856b990d12d3b55b359144ff341533e516d94098b1d3fc1ac666d36ec" +checksum = "071a31f4ee85403370b58aca746f01041ede6f0da2730960ad001edc2b71b394" dependencies = [ - "encoding-index-japanese", - "encoding-index-korean", - "encoding-index-simpchinese", - "encoding-index-singlebyte", - "encoding-index-tradchinese", + "cfg-if", ] -[[package]] -name = "encoding-index-japanese" -version = "1.20141219.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04e8b2ff42e9a05335dbf8b5c6f7567e5591d0d916ccef4e0b1710d32a0d0c91" -dependencies = [ - "encoding_index_tests", -] - -[[package]] -name = "encoding-index-korean" -version = "1.20141219.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4dc33fb8e6bcba213fe2f14275f0963fd16f0a02c878e3095ecfdf5bee529d81" -dependencies = [ - "encoding_index_tests", -] - -[[package]] -name = "encoding-index-simpchinese" -version = "1.20141219.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d87a7194909b9118fc707194baa434a4e3b0fb6a5a757c73c3adb07aa25031f7" -dependencies = [ - "encoding_index_tests", -] - -[[package]] -name = "encoding-index-singlebyte" -version = "1.20141219.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3351d5acffb224af9ca265f435b859c7c01537c0849754d3db3fdf2bfe2ae84a" -dependencies = [ - "encoding_index_tests", -] - -[[package]] -name = "encoding-index-tradchinese" -version = "1.20141219.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd0e20d5688ce3cab59eb3ef3a2083a5c77bf496cb798dc6fcdb75f323890c18" -dependencies = [ - "encoding_index_tests", -] - -[[package]] -name = "encoding_index_tests" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a246d82be1c9d791c5dfde9a2bd045fc3cbba3fa2b11ad558f27d01712f00569" - [[package]] name = "enumflags2" version = "0.7.5" @@ -1675,7 +1620,7 @@ dependencies = [ "derive_more", "derive_setters", "difflib", - "encoding", + "encoding_rs", "env_logger", "globset", "human-panic", diff --git a/crates/typos-cli/Cargo.toml b/crates/typos-cli/Cargo.toml index 5cc2114..8388621 100644 --- a/crates/typos-cli/Cargo.toml +++ b/crates/typos-cli/Cargo.toml @@ -70,7 +70,6 @@ derive_more = "0.99.17" derive_setters = "0.1" itertools = "0.10" serde_json = "1.0" -encoding = "0.2" kstring = { version = "2.0.0", features = ["serde"] } typed-arena = "2.0.2" maplit = "1.0" @@ -82,6 +81,7 @@ anstyle = "0.3.5" anstream = "0.2.6" serde_regex = "1.1.0" regex = "1.7.3" +encoding_rs = "0.8.32" [dev-dependencies] assert_fs = "1.0" diff --git a/crates/typos-cli/src/file.rs b/crates/typos-cli/src/file.rs index cd6924e..85574a1 100644 --- a/crates/typos-cli/src/file.rs +++ b/crates/typos-cli/src/file.rs @@ -1,5 +1,4 @@ use bstr::ByteSlice; -use encoding::Encoding; use std::io::Read; use std::io::Write; @@ -473,11 +472,23 @@ fn read_file( (buffer, content_type) }, content_inspector::ContentType::UTF_16LE => { - let buffer = report_result(encoding::all::UTF_16LE.decode(&buffer, encoding::DecoderTrap::Strict), reporter)?; + let mut decoded = String::new(); + let (r, _) = encoding_rs::UTF_16LE.new_decoder_with_bom_removal().decode_to_string_without_replacement(&buffer, &mut decoded, true); + let decoded = match r { + encoding_rs::DecoderResult::InputEmpty => Ok(decoded), + _ => Err("invalid UTF-16LE encoding"), + }; + let buffer = report_result(decoded, reporter)?; (buffer.into_bytes(), content_type) } content_inspector::ContentType::UTF_16BE => { - let buffer = report_result(encoding::all::UTF_16BE.decode(&buffer, encoding::DecoderTrap::Strict), reporter)?; + let mut decoded = String::new(); + let (r, _) = encoding_rs::UTF_16BE.new_decoder_with_bom_removal().decode_to_string_without_replacement(&buffer, &mut decoded, true); + let decoded = match r { + encoding_rs::DecoderResult::InputEmpty => Ok(decoded), + _ => Err("invalid UTF-16BE encoding"), + }; + let buffer = report_result(decoded, reporter)?; (buffer.into_bytes(), content_type) }, }; @@ -505,10 +516,12 @@ fn write_file( // Error occurred, don't clear out the file return Ok(()); } - report_result( - encoding::all::UTF_16LE.encode(&buffer, encoding::EncoderTrap::Strict), - reporter, - )? + let (encoded, _, replaced) = encoding_rs::UTF_16LE.encode(&buffer); + assert!( + !replaced, + "Coming from UTF-8, UTF-16LE shouldn't do replacements" + ); + encoded.into_owned() } content_inspector::ContentType::UTF_16BE => { let buffer = report_result(String::from_utf8(buffer), reporter)?; @@ -516,10 +529,12 @@ fn write_file( // Error occurred, don't clear out the file return Ok(()); } - report_result( - encoding::all::UTF_16BE.encode(&buffer, encoding::EncoderTrap::Strict), - reporter, - )? + let (encoded, _, replaced) = encoding_rs::UTF_16BE.encode(&buffer); + assert!( + !replaced, + "Coming from UTF-8, UTF-16BE shouldn't do replacements" + ); + encoded.into_owned() } };