mirror of
https://github.com/crate-ci/typos.git
synced 2024-12-22 15:42:23 -05:00
Merge pull request #703 from epage/enc
refactor: Switch out the UTF-16 encoding impl
This commit is contained in:
commit
e10e0d20da
3 changed files with 32 additions and 72 deletions
65
Cargo.lock
generated
65
Cargo.lock
generated
|
@ -578,69 +578,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91"
|
||||
|
||||
[[package]]
|
||||
name = "encoding"
|
||||
version = "0.2.33"
|
||||
name = "encoding_rs"
|
||||
version = "0.8.32"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6b0d943856b990d12d3b55b359144ff341533e516d94098b1d3fc1ac666d36ec"
|
||||
checksum = "071a31f4ee85403370b58aca746f01041ede6f0da2730960ad001edc2b71b394"
|
||||
dependencies = [
|
||||
"encoding-index-japanese",
|
||||
"encoding-index-korean",
|
||||
"encoding-index-simpchinese",
|
||||
"encoding-index-singlebyte",
|
||||
"encoding-index-tradchinese",
|
||||
"cfg-if",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "encoding-index-japanese"
|
||||
version = "1.20141219.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "04e8b2ff42e9a05335dbf8b5c6f7567e5591d0d916ccef4e0b1710d32a0d0c91"
|
||||
dependencies = [
|
||||
"encoding_index_tests",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "encoding-index-korean"
|
||||
version = "1.20141219.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4dc33fb8e6bcba213fe2f14275f0963fd16f0a02c878e3095ecfdf5bee529d81"
|
||||
dependencies = [
|
||||
"encoding_index_tests",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "encoding-index-simpchinese"
|
||||
version = "1.20141219.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d87a7194909b9118fc707194baa434a4e3b0fb6a5a757c73c3adb07aa25031f7"
|
||||
dependencies = [
|
||||
"encoding_index_tests",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "encoding-index-singlebyte"
|
||||
version = "1.20141219.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3351d5acffb224af9ca265f435b859c7c01537c0849754d3db3fdf2bfe2ae84a"
|
||||
dependencies = [
|
||||
"encoding_index_tests",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "encoding-index-tradchinese"
|
||||
version = "1.20141219.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fd0e20d5688ce3cab59eb3ef3a2083a5c77bf496cb798dc6fcdb75f323890c18"
|
||||
dependencies = [
|
||||
"encoding_index_tests",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "encoding_index_tests"
|
||||
version = "0.1.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a246d82be1c9d791c5dfde9a2bd045fc3cbba3fa2b11ad558f27d01712f00569"
|
||||
|
||||
[[package]]
|
||||
name = "enumflags2"
|
||||
version = "0.7.5"
|
||||
|
@ -1675,7 +1620,7 @@ dependencies = [
|
|||
"derive_more",
|
||||
"derive_setters",
|
||||
"difflib",
|
||||
"encoding",
|
||||
"encoding_rs",
|
||||
"env_logger",
|
||||
"globset",
|
||||
"human-panic",
|
||||
|
|
|
@ -70,7 +70,6 @@ derive_more = "0.99.17"
|
|||
derive_setters = "0.1"
|
||||
itertools = "0.10"
|
||||
serde_json = "1.0"
|
||||
encoding = "0.2"
|
||||
kstring = { version = "2.0.0", features = ["serde"] }
|
||||
typed-arena = "2.0.2"
|
||||
maplit = "1.0"
|
||||
|
@ -82,6 +81,7 @@ anstyle = "0.3.5"
|
|||
anstream = "0.2.6"
|
||||
serde_regex = "1.1.0"
|
||||
regex = "1.7.3"
|
||||
encoding_rs = "0.8.32"
|
||||
|
||||
[dev-dependencies]
|
||||
assert_fs = "1.0"
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
use bstr::ByteSlice;
|
||||
use encoding::Encoding;
|
||||
use std::io::Read;
|
||||
use std::io::Write;
|
||||
|
||||
|
@ -473,11 +472,23 @@ fn read_file(
|
|||
(buffer, content_type)
|
||||
},
|
||||
content_inspector::ContentType::UTF_16LE => {
|
||||
let buffer = report_result(encoding::all::UTF_16LE.decode(&buffer, encoding::DecoderTrap::Strict), reporter)?;
|
||||
let mut decoded = String::new();
|
||||
let (r, _) = encoding_rs::UTF_16LE.new_decoder_with_bom_removal().decode_to_string_without_replacement(&buffer, &mut decoded, true);
|
||||
let decoded = match r {
|
||||
encoding_rs::DecoderResult::InputEmpty => Ok(decoded),
|
||||
_ => Err("invalid UTF-16LE encoding"),
|
||||
};
|
||||
let buffer = report_result(decoded, reporter)?;
|
||||
(buffer.into_bytes(), content_type)
|
||||
}
|
||||
content_inspector::ContentType::UTF_16BE => {
|
||||
let buffer = report_result(encoding::all::UTF_16BE.decode(&buffer, encoding::DecoderTrap::Strict), reporter)?;
|
||||
let mut decoded = String::new();
|
||||
let (r, _) = encoding_rs::UTF_16BE.new_decoder_with_bom_removal().decode_to_string_without_replacement(&buffer, &mut decoded, true);
|
||||
let decoded = match r {
|
||||
encoding_rs::DecoderResult::InputEmpty => Ok(decoded),
|
||||
_ => Err("invalid UTF-16BE encoding"),
|
||||
};
|
||||
let buffer = report_result(decoded, reporter)?;
|
||||
(buffer.into_bytes(), content_type)
|
||||
},
|
||||
};
|
||||
|
@ -505,10 +516,12 @@ fn write_file(
|
|||
// Error occurred, don't clear out the file
|
||||
return Ok(());
|
||||
}
|
||||
report_result(
|
||||
encoding::all::UTF_16LE.encode(&buffer, encoding::EncoderTrap::Strict),
|
||||
reporter,
|
||||
)?
|
||||
let (encoded, _, replaced) = encoding_rs::UTF_16LE.encode(&buffer);
|
||||
assert!(
|
||||
!replaced,
|
||||
"Coming from UTF-8, UTF-16LE shouldn't do replacements"
|
||||
);
|
||||
encoded.into_owned()
|
||||
}
|
||||
content_inspector::ContentType::UTF_16BE => {
|
||||
let buffer = report_result(String::from_utf8(buffer), reporter)?;
|
||||
|
@ -516,10 +529,12 @@ fn write_file(
|
|||
// Error occurred, don't clear out the file
|
||||
return Ok(());
|
||||
}
|
||||
report_result(
|
||||
encoding::all::UTF_16BE.encode(&buffer, encoding::EncoderTrap::Strict),
|
||||
reporter,
|
||||
)?
|
||||
let (encoded, _, replaced) = encoding_rs::UTF_16BE.encode(&buffer);
|
||||
assert!(
|
||||
!replaced,
|
||||
"Coming from UTF-8, UTF-16BE shouldn't do replacements"
|
||||
);
|
||||
encoded.into_owned()
|
||||
}
|
||||
};
|
||||
|
||||
|
|
Loading…
Reference in a new issue