perf(dictgen): Move unicode check up in trie

This commit is contained in:
Ed Page 2024-12-30 11:27:34 -06:00
parent 083871447d
commit 016ae0c7c6

View file

@ -32,21 +32,31 @@ pub struct DictTrie<V: 'static> {
} }
impl<V> DictTrie<V> { impl<V> DictTrie<V> {
#[inline]
pub fn find(&self, word: &'_ unicase::UniCase<&str>) -> Option<&'static V> { pub fn find(&self, word: &'_ unicase::UniCase<&str>) -> Option<&'static V> {
if word.is_ascii() {
if self.range.contains(&word.len()) { if self.range.contains(&word.len()) {
let bytes = word.as_bytes(); self.find_ascii(word.as_bytes())
} else {
None
}
} else {
self.unicode.find(word)
}
}
fn find_ascii(&self, word: &[u8]) -> Option<&'static V> {
let mut child = &self.root; let mut child = &self.root;
for i in 0..bytes.len() { for i in 0..word.len() {
match child.children { match child.children {
DictTrieChild::Nested(n) => { DictTrieChild::Nested(n) => {
let byte = bytes[i]; let byte = word[i];
let index = if byte.is_ascii_lowercase() { let index = if byte.is_ascii_lowercase() {
byte - b'a' byte - b'a'
} else if byte.is_ascii_uppercase() { } else if byte.is_ascii_uppercase() {
byte - b'A' byte - b'A'
} else { } else {
return self.unicode.find(word); return None;
}; };
debug_assert!(index < 26); debug_assert!(index < 26);
if let Some(next) = n[index as usize].as_ref() { if let Some(next) = n[index as usize].as_ref() {
@ -56,24 +66,16 @@ impl<V> DictTrie<V> {
} }
} }
DictTrieChild::Flat(t) => { DictTrieChild::Flat(t) => {
let remaining = &bytes[i..bytes.len()]; let remaining = &word[i..word.len()];
// Unsafe: Everything before has been proven to be ASCII, so this should be // Unsafe: Everything before has been proven to be ASCII, so this should be
// safe. // safe.
let remaining = unsafe { core::str::from_utf8_unchecked(remaining) }; let remaining = unsafe { core::str::from_utf8_unchecked(remaining) };
// Reuse the prior ascii check, rather than doing it again let remaining = unicase::UniCase::ascii(remaining);
let remaining = if word.is_ascii() {
unicase::UniCase::ascii(remaining)
} else {
unicase::UniCase::unicode(remaining)
};
return t.find(&remaining); return t.find(&remaining);
} }
} }
} }
child.value.as_ref() child.value.as_ref()
} else {
None
}
} }
} }