mirror of
https://github.com/crate-ci/typos.git
synced 2024-11-21 16:41:01 -05:00
perf(parser): Speed up UTF-8 validation
This commit is contained in:
parent
819702c82f
commit
6b92e345cc
3 changed files with 9 additions and 1 deletions
7
Cargo.lock
generated
7
Cargo.lock
generated
|
@ -1318,6 +1318,12 @@ dependencies = [
|
||||||
"serde",
|
"serde",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "simdutf8"
|
||||||
|
version = "0.1.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7f4f3d445e9015cf5e72cec4a3b3a84f8d54f34207afee609fd152de1c0212b1"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "siphasher"
|
name = "siphasher"
|
||||||
version = "0.3.3"
|
version = "0.3.3"
|
||||||
|
@ -1496,6 +1502,7 @@ dependencies = [
|
||||||
"once_cell",
|
"once_cell",
|
||||||
"regex",
|
"regex",
|
||||||
"serde",
|
"serde",
|
||||||
|
"simdutf8",
|
||||||
"thiserror",
|
"thiserror",
|
||||||
"unicode-segmentation",
|
"unicode-segmentation",
|
||||||
]
|
]
|
||||||
|
|
|
@ -20,6 +20,7 @@ thiserror = "1.0"
|
||||||
regex = "1.3"
|
regex = "1.3"
|
||||||
once_cell = "1.2.0"
|
once_cell = "1.2.0"
|
||||||
serde = { version = "1.0", features = ["derive"] }
|
serde = { version = "1.0", features = ["derive"] }
|
||||||
|
simdutf8 = "0.1.1"
|
||||||
itertools = "0.10"
|
itertools = "0.10"
|
||||||
log = "0.4"
|
log = "0.4"
|
||||||
unicode-segmentation = "1.7.1"
|
unicode-segmentation = "1.7.1"
|
||||||
|
|
|
@ -158,7 +158,7 @@ impl<'s> Iterator for Utf8Chunks<'s> {
|
||||||
if self.source.is_empty() {
|
if self.source.is_empty() {
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
match std::str::from_utf8(self.source) {
|
match simdutf8::compat::from_utf8(self.source) {
|
||||||
Ok(valid) => {
|
Ok(valid) => {
|
||||||
self.source = b"";
|
self.source = b"";
|
||||||
return Some(valid);
|
return Some(valid);
|
||||||
|
|
Loading…
Reference in a new issue