mirror of
https://github.com/crate-ci/typos.git
synced 2024-11-21 16:41:01 -05:00
feat: Check and replace UTF-16 files
We don't have good detection for non-UTF encodings and don't have encoding support for UTF-32, so limiting it to just UTF-16. Fixes #17
This commit is contained in:
parent
1c392c2606
commit
998fad4390
4 changed files with 138 additions and 26 deletions
65
Cargo.lock
generated
65
Cargo.lock
generated
|
@ -351,6 +351,70 @@ version = "1.6.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457"
|
||||
|
||||
[[package]]
|
||||
name = "encoding"
|
||||
version = "0.2.33"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6b0d943856b990d12d3b55b359144ff341533e516d94098b1d3fc1ac666d36ec"
|
||||
dependencies = [
|
||||
"encoding-index-japanese",
|
||||
"encoding-index-korean",
|
||||
"encoding-index-simpchinese",
|
||||
"encoding-index-singlebyte",
|
||||
"encoding-index-tradchinese",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "encoding-index-japanese"
|
||||
version = "1.20141219.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "04e8b2ff42e9a05335dbf8b5c6f7567e5591d0d916ccef4e0b1710d32a0d0c91"
|
||||
dependencies = [
|
||||
"encoding_index_tests",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "encoding-index-korean"
|
||||
version = "1.20141219.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4dc33fb8e6bcba213fe2f14275f0963fd16f0a02c878e3095ecfdf5bee529d81"
|
||||
dependencies = [
|
||||
"encoding_index_tests",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "encoding-index-simpchinese"
|
||||
version = "1.20141219.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d87a7194909b9118fc707194baa434a4e3b0fb6a5a757c73c3adb07aa25031f7"
|
||||
dependencies = [
|
||||
"encoding_index_tests",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "encoding-index-singlebyte"
|
||||
version = "1.20141219.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3351d5acffb224af9ca265f435b859c7c01537c0849754d3db3fdf2bfe2ae84a"
|
||||
dependencies = [
|
||||
"encoding_index_tests",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "encoding-index-tradchinese"
|
||||
version = "1.20141219.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fd0e20d5688ce3cab59eb3ef3a2083a5c77bf496cb798dc6fcdb75f323890c18"
|
||||
dependencies = [
|
||||
"encoding_index_tests",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "encoding_index_tests"
|
||||
version = "0.1.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a246d82be1c9d791c5dfde9a2bd045fc3cbba3fa2b11ad558f27d01712f00569"
|
||||
|
||||
[[package]]
|
||||
name = "enumflags2"
|
||||
version = "0.6.4"
|
||||
|
@ -1177,6 +1241,7 @@ dependencies = [
|
|||
"derive_more 0.99.11",
|
||||
"derive_setters",
|
||||
"difflib",
|
||||
"encoding",
|
||||
"env_logger 0.8.2",
|
||||
"human-panic",
|
||||
"ignore",
|
||||
|
|
|
@ -56,6 +56,7 @@ derive_more = "0.99.11"
|
|||
derive_setters = "0.1"
|
||||
itertools = "0.9"
|
||||
serde_json = "1.0"
|
||||
encoding = "0.2"
|
||||
|
||||
[dev-dependencies]
|
||||
assert_fs = "1.0"
|
||||
|
|
|
@ -44,7 +44,7 @@ See also [benchmarks](../benchsuite/runs).
|
|||
| snake_case | Yes | No | ? | No | Yes |
|
||||
| Ignore Hex | Yes | No | ? | No | Yes |
|
||||
| C-Escapes | No ([#20][def-3]) | No | ? | No | Yes |
|
||||
| Encodings | UTF-8 ([#17][def-17]) | UTF-8 | ? | Auto | Auto |
|
||||
| Encodings | UTF-8 / UTF-16 | UTF-8 | ? | Auto | Auto |
|
||||
| Whole-project | Yes | Yes | Yes | Yes | No |
|
||||
| Ignores hidden | Yes | Yes | ? | Yes | No |
|
||||
| Respect gitignore | Yes | Yes | ? | No | No |
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
use bstr::ByteSlice;
|
||||
use encoding::Encoding;
|
||||
|
||||
use crate::report;
|
||||
use typos::tokens;
|
||||
|
@ -208,7 +209,7 @@ impl FileChecker for FixTypos {
|
|||
}
|
||||
if !fixes.is_empty() {
|
||||
let buffer = fix_buffer(buffer, fixes.into_iter());
|
||||
write_file(path, content_type, &buffer, reporter)?;
|
||||
write_file(path, content_type, buffer, reporter)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -503,22 +504,30 @@ pub fn read_file(
|
|||
path: &std::path::Path,
|
||||
reporter: &dyn report::Report,
|
||||
) -> Result<(Vec<u8>, content_inspector::ContentType), std::io::Error> {
|
||||
let buffer = match std::fs::read(path) {
|
||||
Ok(buffer) => buffer,
|
||||
Err(err) => {
|
||||
let msg = report::Error::new(err.to_string());
|
||||
reporter.report(msg.into())?;
|
||||
Vec::new()
|
||||
}
|
||||
};
|
||||
let buffer = report_error(std::fs::read(path), reporter)?;
|
||||
|
||||
let mut content_type = content_inspector::inspect(&buffer);
|
||||
// HACK: We only support UTF-8 at the moment
|
||||
if content_type != content_inspector::ContentType::UTF_8_BOM
|
||||
&& content_type != content_inspector::ContentType::UTF_8
|
||||
{
|
||||
content_type = content_inspector::ContentType::BINARY;
|
||||
}
|
||||
let content_type = content_inspector::inspect(&buffer);
|
||||
|
||||
let (buffer, content_type) = match content_type {
|
||||
content_inspector::ContentType::BINARY |
|
||||
// HACK: We don't support UTF-32 yet
|
||||
content_inspector::ContentType::UTF_32LE |
|
||||
content_inspector::ContentType::UTF_32BE => {
|
||||
(buffer, content_inspector::ContentType::BINARY)
|
||||
},
|
||||
content_inspector::ContentType::UTF_8 |
|
||||
content_inspector::ContentType::UTF_8_BOM => {
|
||||
(buffer, content_type)
|
||||
},
|
||||
content_inspector::ContentType::UTF_16LE => {
|
||||
let buffer = report_error(encoding::all::UTF_16LE.decode(&buffer, encoding::DecoderTrap::Strict), reporter)?;
|
||||
(buffer.into_bytes(), content_type)
|
||||
}
|
||||
content_inspector::ContentType::UTF_16BE => {
|
||||
let buffer = report_error(encoding::all::UTF_16BE.decode(&buffer, encoding::DecoderTrap::Strict), reporter)?;
|
||||
(buffer.into_bytes(), content_type)
|
||||
},
|
||||
};
|
||||
|
||||
Ok((buffer, content_type))
|
||||
}
|
||||
|
@ -526,22 +535,59 @@ pub fn read_file(
|
|||
pub fn write_file(
|
||||
path: &std::path::Path,
|
||||
content_type: content_inspector::ContentType,
|
||||
buffer: &[u8],
|
||||
buffer: Vec<u8>,
|
||||
reporter: &dyn report::Report,
|
||||
) -> Result<(), std::io::Error> {
|
||||
assert!(
|
||||
content_type == content_inspector::ContentType::UTF_8_BOM
|
||||
|| content_type == content_inspector::ContentType::UTF_8
|
||||
|| content_type == content_inspector::ContentType::BINARY
|
||||
);
|
||||
match std::fs::write(path, buffer) {
|
||||
Ok(()) => (),
|
||||
let buffer = match content_type {
|
||||
// HACK: We don't support UTF-32 yet
|
||||
content_inspector::ContentType::UTF_32LE | content_inspector::ContentType::UTF_32BE => {
|
||||
unreachable!("read_file should prevent these from being passed along");
|
||||
}
|
||||
content_inspector::ContentType::BINARY
|
||||
| content_inspector::ContentType::UTF_8
|
||||
| content_inspector::ContentType::UTF_8_BOM => buffer,
|
||||
content_inspector::ContentType::UTF_16LE => {
|
||||
let buffer = report_error(String::from_utf8(buffer), reporter)?;
|
||||
if buffer.is_empty() {
|
||||
// Error occurred, don't clear out the file
|
||||
return Ok(());
|
||||
}
|
||||
report_error(
|
||||
encoding::all::UTF_16LE.encode(&buffer, encoding::EncoderTrap::Strict),
|
||||
reporter,
|
||||
)?
|
||||
}
|
||||
content_inspector::ContentType::UTF_16BE => {
|
||||
let buffer = report_error(String::from_utf8(buffer), reporter)?;
|
||||
if buffer.is_empty() {
|
||||
// Error occurred, don't clear out the file
|
||||
return Ok(());
|
||||
}
|
||||
report_error(
|
||||
encoding::all::UTF_16BE.encode(&buffer, encoding::EncoderTrap::Strict),
|
||||
reporter,
|
||||
)?
|
||||
}
|
||||
};
|
||||
|
||||
report_error(std::fs::write(path, buffer), reporter)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn report_error<T: Default, E: ToString>(
|
||||
value: Result<T, E>,
|
||||
reporter: &dyn report::Report,
|
||||
) -> Result<T, std::io::Error> {
|
||||
let buffer = match value {
|
||||
Ok(value) => value,
|
||||
Err(err) => {
|
||||
let msg = report::Error::new(err.to_string());
|
||||
reporter.report(msg.into())?;
|
||||
Default::default()
|
||||
}
|
||||
};
|
||||
Ok(())
|
||||
Ok(buffer)
|
||||
}
|
||||
|
||||
struct AccumulateLineNum {
|
||||
|
|
Loading…
Reference in a new issue