mirror of
https://github.com/crate-ci/typos.git
synced 2024-11-22 00:51:11 -05:00
Merge pull request #703 from epage/enc
refactor: Switch out the UTF-16 encoding impl
This commit is contained in:
commit
e10e0d20da
3 changed files with 32 additions and 72 deletions
65
Cargo.lock
generated
65
Cargo.lock
generated
|
@ -578,69 +578,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91"
|
checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "encoding"
|
name = "encoding_rs"
|
||||||
version = "0.2.33"
|
version = "0.8.32"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "6b0d943856b990d12d3b55b359144ff341533e516d94098b1d3fc1ac666d36ec"
|
checksum = "071a31f4ee85403370b58aca746f01041ede6f0da2730960ad001edc2b71b394"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"encoding-index-japanese",
|
"cfg-if",
|
||||||
"encoding-index-korean",
|
|
||||||
"encoding-index-simpchinese",
|
|
||||||
"encoding-index-singlebyte",
|
|
||||||
"encoding-index-tradchinese",
|
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "encoding-index-japanese"
|
|
||||||
version = "1.20141219.5"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "04e8b2ff42e9a05335dbf8b5c6f7567e5591d0d916ccef4e0b1710d32a0d0c91"
|
|
||||||
dependencies = [
|
|
||||||
"encoding_index_tests",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "encoding-index-korean"
|
|
||||||
version = "1.20141219.5"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "4dc33fb8e6bcba213fe2f14275f0963fd16f0a02c878e3095ecfdf5bee529d81"
|
|
||||||
dependencies = [
|
|
||||||
"encoding_index_tests",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "encoding-index-simpchinese"
|
|
||||||
version = "1.20141219.5"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "d87a7194909b9118fc707194baa434a4e3b0fb6a5a757c73c3adb07aa25031f7"
|
|
||||||
dependencies = [
|
|
||||||
"encoding_index_tests",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "encoding-index-singlebyte"
|
|
||||||
version = "1.20141219.5"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "3351d5acffb224af9ca265f435b859c7c01537c0849754d3db3fdf2bfe2ae84a"
|
|
||||||
dependencies = [
|
|
||||||
"encoding_index_tests",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "encoding-index-tradchinese"
|
|
||||||
version = "1.20141219.5"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "fd0e20d5688ce3cab59eb3ef3a2083a5c77bf496cb798dc6fcdb75f323890c18"
|
|
||||||
dependencies = [
|
|
||||||
"encoding_index_tests",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "encoding_index_tests"
|
|
||||||
version = "0.1.4"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "a246d82be1c9d791c5dfde9a2bd045fc3cbba3fa2b11ad558f27d01712f00569"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "enumflags2"
|
name = "enumflags2"
|
||||||
version = "0.7.5"
|
version = "0.7.5"
|
||||||
|
@ -1675,7 +1620,7 @@ dependencies = [
|
||||||
"derive_more",
|
"derive_more",
|
||||||
"derive_setters",
|
"derive_setters",
|
||||||
"difflib",
|
"difflib",
|
||||||
"encoding",
|
"encoding_rs",
|
||||||
"env_logger",
|
"env_logger",
|
||||||
"globset",
|
"globset",
|
||||||
"human-panic",
|
"human-panic",
|
||||||
|
|
|
@ -70,7 +70,6 @@ derive_more = "0.99.17"
|
||||||
derive_setters = "0.1"
|
derive_setters = "0.1"
|
||||||
itertools = "0.10"
|
itertools = "0.10"
|
||||||
serde_json = "1.0"
|
serde_json = "1.0"
|
||||||
encoding = "0.2"
|
|
||||||
kstring = { version = "2.0.0", features = ["serde"] }
|
kstring = { version = "2.0.0", features = ["serde"] }
|
||||||
typed-arena = "2.0.2"
|
typed-arena = "2.0.2"
|
||||||
maplit = "1.0"
|
maplit = "1.0"
|
||||||
|
@ -82,6 +81,7 @@ anstyle = "0.3.5"
|
||||||
anstream = "0.2.6"
|
anstream = "0.2.6"
|
||||||
serde_regex = "1.1.0"
|
serde_regex = "1.1.0"
|
||||||
regex = "1.7.3"
|
regex = "1.7.3"
|
||||||
|
encoding_rs = "0.8.32"
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
assert_fs = "1.0"
|
assert_fs = "1.0"
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
use bstr::ByteSlice;
|
use bstr::ByteSlice;
|
||||||
use encoding::Encoding;
|
|
||||||
use std::io::Read;
|
use std::io::Read;
|
||||||
use std::io::Write;
|
use std::io::Write;
|
||||||
|
|
||||||
|
@ -473,11 +472,23 @@ fn read_file(
|
||||||
(buffer, content_type)
|
(buffer, content_type)
|
||||||
},
|
},
|
||||||
content_inspector::ContentType::UTF_16LE => {
|
content_inspector::ContentType::UTF_16LE => {
|
||||||
let buffer = report_result(encoding::all::UTF_16LE.decode(&buffer, encoding::DecoderTrap::Strict), reporter)?;
|
let mut decoded = String::new();
|
||||||
|
let (r, _) = encoding_rs::UTF_16LE.new_decoder_with_bom_removal().decode_to_string_without_replacement(&buffer, &mut decoded, true);
|
||||||
|
let decoded = match r {
|
||||||
|
encoding_rs::DecoderResult::InputEmpty => Ok(decoded),
|
||||||
|
_ => Err("invalid UTF-16LE encoding"),
|
||||||
|
};
|
||||||
|
let buffer = report_result(decoded, reporter)?;
|
||||||
(buffer.into_bytes(), content_type)
|
(buffer.into_bytes(), content_type)
|
||||||
}
|
}
|
||||||
content_inspector::ContentType::UTF_16BE => {
|
content_inspector::ContentType::UTF_16BE => {
|
||||||
let buffer = report_result(encoding::all::UTF_16BE.decode(&buffer, encoding::DecoderTrap::Strict), reporter)?;
|
let mut decoded = String::new();
|
||||||
|
let (r, _) = encoding_rs::UTF_16BE.new_decoder_with_bom_removal().decode_to_string_without_replacement(&buffer, &mut decoded, true);
|
||||||
|
let decoded = match r {
|
||||||
|
encoding_rs::DecoderResult::InputEmpty => Ok(decoded),
|
||||||
|
_ => Err("invalid UTF-16BE encoding"),
|
||||||
|
};
|
||||||
|
let buffer = report_result(decoded, reporter)?;
|
||||||
(buffer.into_bytes(), content_type)
|
(buffer.into_bytes(), content_type)
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
@ -505,10 +516,12 @@ fn write_file(
|
||||||
// Error occurred, don't clear out the file
|
// Error occurred, don't clear out the file
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
report_result(
|
let (encoded, _, replaced) = encoding_rs::UTF_16LE.encode(&buffer);
|
||||||
encoding::all::UTF_16LE.encode(&buffer, encoding::EncoderTrap::Strict),
|
assert!(
|
||||||
reporter,
|
!replaced,
|
||||||
)?
|
"Coming from UTF-8, UTF-16LE shouldn't do replacements"
|
||||||
|
);
|
||||||
|
encoded.into_owned()
|
||||||
}
|
}
|
||||||
content_inspector::ContentType::UTF_16BE => {
|
content_inspector::ContentType::UTF_16BE => {
|
||||||
let buffer = report_result(String::from_utf8(buffer), reporter)?;
|
let buffer = report_result(String::from_utf8(buffer), reporter)?;
|
||||||
|
@ -516,10 +529,12 @@ fn write_file(
|
||||||
// Error occurred, don't clear out the file
|
// Error occurred, don't clear out the file
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
report_result(
|
let (encoded, _, replaced) = encoding_rs::UTF_16BE.encode(&buffer);
|
||||||
encoding::all::UTF_16BE.encode(&buffer, encoding::EncoderTrap::Strict),
|
assert!(
|
||||||
reporter,
|
!replaced,
|
||||||
)?
|
"Coming from UTF-8, UTF-16BE shouldn't do replacements"
|
||||||
|
);
|
||||||
|
encoded.into_owned()
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue