mirror of
https://github.com/crate-ci/typos.git
synced 2025-01-11 01:01:36 -05:00
perf(dictgen): Remove ascii/unicode branch from trie
This commit is contained in:
parent
e7ff9cfc01
commit
534e3c5f71
7 changed files with 160323 additions and 159956 deletions
|
@ -40,7 +40,11 @@ impl<'g> DictGen<'g> {
|
|||
}
|
||||
|
||||
pub fn ordered_map(self) -> crate::OrderedMapGen<'g> {
|
||||
crate::OrderedMapGen { gen: self }
|
||||
crate::OrderedMapGen {
|
||||
gen: self,
|
||||
unicode: true,
|
||||
unicase: true,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn trie(self) -> crate::TrieGen<'g> {
|
||||
|
|
|
@ -113,3 +113,97 @@ impl<'b, 'a: 'b> phf_shared::PhfBorrow<InsensitiveStr<'b>> for InsensitiveStr<'a
|
|||
self
|
||||
}
|
||||
}
|
||||
|
||||
/// `UniCase` look-alike that avoids const-fn so large tables don't OOM
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct InsensitiveAscii<'s>(pub &'s str);
|
||||
|
||||
impl<'s> InsensitiveAscii<'s> {
|
||||
pub fn convert(self) -> unicase::Ascii<&'s str> {
|
||||
unicase::Ascii::new(self.0)
|
||||
}
|
||||
|
||||
pub fn into_inner(self) -> &'s str {
|
||||
self.0
|
||||
}
|
||||
|
||||
pub fn is_empty(self) -> bool {
|
||||
self.0.is_empty()
|
||||
}
|
||||
|
||||
pub fn len(self) -> usize {
|
||||
self.0.len()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> From<unicase::Ascii<&'s str>> for InsensitiveAscii<'s> {
|
||||
fn from(other: unicase::Ascii<&'s str>) -> Self {
|
||||
Self(other.into_inner())
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s2> PartialEq<InsensitiveAscii<'s2>> for InsensitiveAscii<'_> {
|
||||
#[inline]
|
||||
fn eq(&self, other: &InsensitiveAscii<'s2>) -> bool {
|
||||
self.convert() == other.convert()
|
||||
}
|
||||
}
|
||||
|
||||
impl Eq for InsensitiveAscii<'_> {}
|
||||
|
||||
impl PartialOrd for InsensitiveAscii<'_> {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
|
||||
Some(self.cmp(other))
|
||||
}
|
||||
}
|
||||
|
||||
impl Ord for InsensitiveAscii<'_> {
|
||||
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
|
||||
self.convert().cmp(&other.convert())
|
||||
}
|
||||
}
|
||||
|
||||
impl core::hash::Hash for InsensitiveAscii<'_> {
|
||||
#[inline]
|
||||
fn hash<H: core::hash::Hasher>(&self, hasher: &mut H) {
|
||||
self.convert().hash(hasher);
|
||||
}
|
||||
}
|
||||
|
||||
impl core::fmt::Debug for InsensitiveAscii<'_> {
|
||||
#[inline]
|
||||
fn fmt(&self, fmt: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
|
||||
core::fmt::Debug::fmt(self.into_inner(), fmt)
|
||||
}
|
||||
}
|
||||
|
||||
impl core::fmt::Display for InsensitiveAscii<'_> {
|
||||
#[inline]
|
||||
fn fmt(&self, fmt: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
|
||||
core::fmt::Display::fmt(self.into_inner(), fmt)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "map")]
|
||||
impl phf_shared::PhfHash for InsensitiveAscii<'_> {
|
||||
#[inline]
|
||||
fn phf_hash<H: core::hash::Hasher>(&self, state: &mut H) {
|
||||
core::hash::Hash::hash(self, state);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "map")]
|
||||
impl phf_shared::FmtConst for InsensitiveAscii<'_> {
|
||||
fn fmt_const(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
|
||||
f.write_str("dictgen::InsensitiveAscii(")?;
|
||||
self.into_inner().fmt_const(f)?;
|
||||
f.write_str(")")
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "map")]
|
||||
impl<'b, 'a: 'b> phf_shared::PhfBorrow<InsensitiveAscii<'b>> for InsensitiveAscii<'a> {
|
||||
fn borrow(&self) -> &InsensitiveAscii<'b> {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,10 +1,22 @@
|
|||
#[cfg(feature = "codegen")]
|
||||
pub struct OrderedMapGen<'g> {
|
||||
pub(crate) gen: crate::DictGen<'g>,
|
||||
pub(crate) unicase: bool,
|
||||
pub(crate) unicode: bool,
|
||||
}
|
||||
|
||||
#[cfg(feature = "codegen")]
|
||||
impl OrderedMapGen<'_> {
|
||||
pub fn unicase(mut self, yes: bool) -> Self {
|
||||
self.unicase = yes;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn unicode(mut self, yes: bool) -> Self {
|
||||
self.unicode = yes;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn write<'d, W: std::io::Write, V: std::fmt::Display>(
|
||||
&self,
|
||||
file: &mut W,
|
||||
|
@ -14,7 +26,7 @@ impl OrderedMapGen<'_> {
|
|||
data.sort_unstable_by_key(|v| unicase::UniCase::new(v.0));
|
||||
|
||||
let name = self.gen.name;
|
||||
let key_type = "dictgen::InsensitiveStr<'static>";
|
||||
let key_type = self.key_type();
|
||||
let value_type = self.gen.value_type;
|
||||
|
||||
let mut smallest = usize::MAX;
|
||||
|
@ -29,11 +41,7 @@ impl OrderedMapGen<'_> {
|
|||
smallest = std::cmp::min(smallest, key.len());
|
||||
largest = std::cmp::max(largest, key.len());
|
||||
|
||||
let key = if key.is_ascii() {
|
||||
format!("dictgen::InsensitiveStr::Ascii({key:?})")
|
||||
} else {
|
||||
format!("dictgen::InsensitiveStr::Unicode({key:?})")
|
||||
};
|
||||
let key = self.key_new(key);
|
||||
|
||||
writeln!(file, " {key},")?;
|
||||
}
|
||||
|
@ -51,6 +59,28 @@ impl OrderedMapGen<'_> {
|
|||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn key_type(&self) -> &'static str {
|
||||
match (self.unicase, self.unicode) {
|
||||
(true, true) => "dictgen::InsensitiveStr<'static>",
|
||||
(true, false) => "dictgen::InsensitiveAscii<'static>",
|
||||
(false, _) => "&'static str",
|
||||
}
|
||||
}
|
||||
|
||||
fn key_new(&self, key: &str) -> String {
|
||||
match (self.unicase, self.unicode) {
|
||||
(true, true) => {
|
||||
if key.is_ascii() {
|
||||
format!("dictgen::InsensitiveStr::Ascii({key:?})")
|
||||
} else {
|
||||
format!("dictgen::InsensitiveStr::Unicode({key:?})")
|
||||
}
|
||||
}
|
||||
(true, false) => format!("dictgen::InsensitiveAscii({key:?})"),
|
||||
(false, _) => format!("{key:?}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct OrderedMap<K: 'static, V: 'static> {
|
||||
|
@ -73,6 +103,20 @@ impl<V> OrderedMap<crate::InsensitiveStr<'_>, V> {
|
|||
}
|
||||
}
|
||||
|
||||
impl<V> OrderedMap<crate::InsensitiveAscii<'_>, V> {
|
||||
#[inline]
|
||||
pub fn find(&self, word: &'_ unicase::Ascii<&str>) -> Option<&'static V> {
|
||||
if self.range.contains(&word.len()) {
|
||||
self.keys
|
||||
.binary_search_by_key(word, |key| key.convert())
|
||||
.map(|i| &self.values[i])
|
||||
.ok()
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<V> OrderedMap<&str, V> {
|
||||
#[inline]
|
||||
pub fn find(&self, word: &'_ &str) -> Option<&'static V> {
|
||||
|
|
|
@ -75,7 +75,7 @@ impl<V> Trie<V> {
|
|||
// Unsafe: Everything before has been proven to be ASCII, so this should be
|
||||
// safe.
|
||||
let remaining = unsafe { core::str::from_utf8_unchecked(remaining) };
|
||||
let remaining = unicase::UniCase::ascii(remaining);
|
||||
let remaining = unicase::Ascii::new(remaining);
|
||||
return t.find(&remaining);
|
||||
}
|
||||
}
|
||||
|
@ -91,7 +91,7 @@ pub struct TrieNode<V: 'static> {
|
|||
|
||||
pub enum TrieChild<V: 'static> {
|
||||
Nested(&'static [Option<&'static TrieNode<V>>; 26]),
|
||||
Flat(&'static crate::OrderedMap<crate::InsensitiveStr<'static>, V>),
|
||||
Flat(&'static crate::OrderedMap<crate::InsensitiveAscii<'static>, V>),
|
||||
}
|
||||
|
||||
#[cfg(feature = "codegen")]
|
||||
|
@ -179,6 +179,7 @@ mod codegen {
|
|||
.name(&children_name)
|
||||
.value_type(value_type)
|
||||
.ordered_map()
|
||||
.unicode(false)
|
||||
.write(file, table_input)?;
|
||||
}
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
Loading…
Reference in a new issue