2021-07-01 09:52:36 -04:00
|
|
|
#[cfg(feature = "codegen")]
|
2021-06-30 11:46:22 -04:00
|
|
|
pub fn generate_table<'d, W: std::io::Write, V: std::fmt::Display>(
|
|
|
|
file: &mut W,
|
|
|
|
name: &str,
|
|
|
|
value_type: &str,
|
|
|
|
data: impl Iterator<Item = (&'d str, V)>,
|
|
|
|
) -> Result<(), std::io::Error> {
|
|
|
|
let mut data: Vec<_> = data.collect();
|
|
|
|
data.sort_unstable_by_key(|v| unicase::UniCase::new(v.0));
|
|
|
|
|
|
|
|
let mut smallest = usize::MAX;
|
|
|
|
let mut largest = usize::MIN;
|
|
|
|
|
|
|
|
writeln!(
|
|
|
|
file,
|
2024-07-26 17:08:02 -04:00
|
|
|
"pub static {name}: dictgen::DictTable<{value_type}> = dictgen::DictTable {{"
|
2021-06-30 11:46:22 -04:00
|
|
|
)?;
|
2021-06-30 12:14:45 -04:00
|
|
|
writeln!(file, " keys: &[")?;
|
|
|
|
for (key, _value) in data.iter() {
|
2021-06-30 11:46:22 -04:00
|
|
|
smallest = std::cmp::min(smallest, key.len());
|
|
|
|
largest = std::cmp::max(largest, key.len());
|
|
|
|
|
|
|
|
let key = if key.is_ascii() {
|
2024-07-26 17:08:02 -04:00
|
|
|
format!("dictgen::InsensitiveStr::Ascii({key:?})")
|
2021-06-30 11:46:22 -04:00
|
|
|
} else {
|
2024-07-26 17:08:02 -04:00
|
|
|
format!("dictgen::InsensitiveStr::Unicode({key:?})")
|
2021-06-30 11:46:22 -04:00
|
|
|
};
|
|
|
|
|
2024-07-26 17:08:02 -04:00
|
|
|
writeln!(file, " {key},")?;
|
2021-06-30 11:46:22 -04:00
|
|
|
}
|
2021-06-30 17:03:09 -04:00
|
|
|
if largest == 0 {
|
|
|
|
smallest = 0;
|
|
|
|
}
|
2021-06-30 12:14:45 -04:00
|
|
|
writeln!(file, " ],")?;
|
|
|
|
writeln!(file, " values: &[")?;
|
|
|
|
for (_key, value) in data.iter() {
|
2024-07-26 17:08:02 -04:00
|
|
|
writeln!(file, " {value},")?;
|
2021-06-30 12:14:45 -04:00
|
|
|
}
|
|
|
|
writeln!(file, " ],")?;
|
2024-07-26 17:08:02 -04:00
|
|
|
writeln!(file, " range: {smallest}..={largest},")?;
|
2021-06-30 11:46:22 -04:00
|
|
|
writeln!(file, "}};")?;
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
|
|
|
pub struct DictTable<V: 'static> {
|
2021-07-01 11:39:27 -04:00
|
|
|
pub keys: &'static [InsensitiveStr<'static>],
|
2021-06-30 12:14:45 -04:00
|
|
|
pub values: &'static [V],
|
2021-07-01 09:52:36 -04:00
|
|
|
pub range: core::ops::RangeInclusive<usize>,
|
2021-06-30 11:46:22 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
impl<V> DictTable<V> {
|
|
|
|
pub fn find(&self, word: &'_ unicase::UniCase<&str>) -> Option<&'static V> {
|
|
|
|
if self.range.contains(&word.len()) {
|
2021-06-30 12:14:45 -04:00
|
|
|
self.keys
|
|
|
|
.binary_search_by_key(word, |key| key.convert())
|
|
|
|
.map(|i| &self.values[i])
|
2021-06-30 11:46:22 -04:00
|
|
|
.ok()
|
|
|
|
} else {
|
|
|
|
None
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-06-30 12:14:45 -04:00
|
|
|
pub fn iter(&self) -> impl Iterator<Item = (unicase::UniCase<&'static str>, &'static V)> + '_ {
|
|
|
|
(0..self.keys.len()).map(move |i| (self.keys[i].convert(), &self.values[i]))
|
2021-06-30 11:46:22 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-04-26 22:14:01 -04:00
|
|
|
/// `UniCase` look-alike that avoids const-fn so large tables don't OOM
|
2021-07-01 11:39:27 -04:00
|
|
|
#[derive(Copy, Clone)]
|
|
|
|
pub enum InsensitiveStr<'s> {
|
|
|
|
Unicode(&'s str),
|
|
|
|
Ascii(&'s str),
|
2021-06-30 11:46:22 -04:00
|
|
|
}
|
|
|
|
|
2021-07-01 11:39:27 -04:00
|
|
|
impl<'s> InsensitiveStr<'s> {
|
|
|
|
pub fn convert(self) -> unicase::UniCase<&'s str> {
|
2021-06-30 11:46:22 -04:00
|
|
|
match self {
|
|
|
|
InsensitiveStr::Unicode(s) => unicase::UniCase::unicode(s),
|
|
|
|
InsensitiveStr::Ascii(s) => unicase::UniCase::ascii(s),
|
|
|
|
}
|
|
|
|
}
|
2021-07-01 11:39:27 -04:00
|
|
|
|
|
|
|
pub fn into_inner(self) -> &'s str {
|
|
|
|
match self {
|
|
|
|
InsensitiveStr::Unicode(s) | InsensitiveStr::Ascii(s) => s,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'s> From<unicase::UniCase<&'s str>> for InsensitiveStr<'s> {
|
|
|
|
fn from(other: unicase::UniCase<&'s str>) -> Self {
|
|
|
|
if other.is_ascii() {
|
|
|
|
InsensitiveStr::Ascii(other.into_inner())
|
|
|
|
} else {
|
|
|
|
InsensitiveStr::Unicode(other.into_inner())
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'s1, 's2> PartialEq<InsensitiveStr<'s2>> for InsensitiveStr<'s1> {
|
|
|
|
#[inline]
|
|
|
|
fn eq(&self, other: &InsensitiveStr<'s2>) -> bool {
|
|
|
|
self.convert() == other.convert()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'s> Eq for InsensitiveStr<'s> {}
|
|
|
|
|
|
|
|
impl<'s> core::hash::Hash for InsensitiveStr<'s> {
|
|
|
|
#[inline]
|
|
|
|
fn hash<H: core::hash::Hasher>(&self, hasher: &mut H) {
|
2024-04-26 22:14:01 -04:00
|
|
|
self.convert().hash(hasher);
|
2021-07-01 11:39:27 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'s> core::fmt::Debug for InsensitiveStr<'s> {
|
|
|
|
#[inline]
|
2024-04-26 22:14:01 -04:00
|
|
|
fn fmt(&self, fmt: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
|
2021-07-01 11:39:27 -04:00
|
|
|
core::fmt::Debug::fmt(self.into_inner(), fmt)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'s> core::fmt::Display for InsensitiveStr<'s> {
|
|
|
|
#[inline]
|
2024-04-26 22:14:01 -04:00
|
|
|
fn fmt(&self, fmt: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
|
2021-07-01 11:39:27 -04:00
|
|
|
core::fmt::Display::fmt(self.into_inner(), fmt)
|
|
|
|
}
|
2021-06-30 11:46:22 -04:00
|
|
|
}
|