perf(parser): Speed up UTF-8 validation

This commit is contained in:
Ed Page 2021-04-26 18:08:52 -05:00
parent 819702c82f
commit 6b92e345cc
3 changed files with 9 additions and 1 deletions

7
Cargo.lock generated
View file

@ -1318,6 +1318,12 @@ dependencies = [
"serde",
]
[[package]]
name = "simdutf8"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f4f3d445e9015cf5e72cec4a3b3a84f8d54f34207afee609fd152de1c0212b1"
[[package]]
name = "siphasher"
version = "0.3.3"
@ -1496,6 +1502,7 @@ dependencies = [
"once_cell",
"regex",
"serde",
"simdutf8",
"thiserror",
"unicode-segmentation",
]

View file

@ -20,6 +20,7 @@ thiserror = "1.0"
regex = "1.3"
once_cell = "1.2.0"
serde = { version = "1.0", features = ["derive"] }
simdutf8 = "0.1.1"
itertools = "0.10"
log = "0.4"
unicode-segmentation = "1.7.1"

View file

@ -158,7 +158,7 @@ impl<'s> Iterator for Utf8Chunks<'s> {
if self.source.is_empty() {
return None;
}
match std::str::from_utf8(self.source) {
match simdutf8::compat::from_utf8(self.source) {
Ok(valid) => {
self.source = b"";
return Some(valid);