Merge pull request #1198 from epage/generic

perf(dict)!: Switch to PHF Map
2025-01-11 09:11:39 -05:00 · 2024-12-31 06:56:23 -06:00 · 2024-12-31 06:56:23 -06:00 · 44cf2f8cf6
commit 44cf2f8cf6
parent 086f9d1558 7457534850
20 changed files with 909056 additions and 704967 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -1509,6 +1509,7 @@ dependencies = [
 "dictgen",
 "divan",
 "edit-distance",
 "heck",
 "indexmap",
 "itertools 0.13.0",
 "phf",
--- a/crates/codespell-dict/src/dict_codegen.rs
+++ b/crates/codespell-dict/src/dict_codegen.rs
@ -1,6 +1,7 @@
 // This file is @generated crates/codespell-dict/tests/codegen.rs
-pub static WORD_DICTIONARY: dictgen::OrderedMap<&[&str]> = dictgen::OrderedMap {
+pub static WORD_DICTIONARY: dictgen::OrderedMap<dictgen::InsensitiveStr<'static>, &[&str]> =
    dictgen::OrderedMap {
        keys: &[
            dictgen::InsensitiveStr::Ascii("1nd"),
            dictgen::InsensitiveStr::Ascii("2rd"),
@ -84256,4 +84257,4 @@ pub static WORD_DICTIONARY: dictgen::OrderedMap<&[&str]> = dictgen::OrderedMap {
            &["contains"],
        ],
        range: 2..=34,
-};
+    };
--- a/crates/dictgen/src/gen.rs
+++ b/crates/dictgen/src/gen.rs
@ -36,11 +36,19 @@ impl<'g> DictGen<'g> {
    #[cfg(feature = "map")]
    pub fn map(self) -> crate::MapGen<'g> {
-        crate::MapGen { gen: self }
+        crate::MapGen {
            gen: self,
            unicode: true,
            unicase: true,
        }
    }
    pub fn ordered_map(self) -> crate::OrderedMapGen<'g> {
-        crate::OrderedMapGen { gen: self }
+        crate::OrderedMapGen {
            gen: self,
            unicode: true,
            unicase: true,
        }
    }
    pub fn trie(self) -> crate::TrieGen<'g> {
@ -49,6 +57,10 @@ impl<'g> DictGen<'g> {
            limit: 64,
        }
    }
    pub fn r#match(self) -> crate::MatchGen<'g> {
        crate::MatchGen { gen: self }
    }
 }
 impl Default for DictGen<'static> {
--- a/crates/dictgen/src/insensitive.rs
+++ b/crates/dictgen/src/insensitive.rs
@ -51,6 +51,18 @@ impl<'s2> PartialEq<InsensitiveStr<'s2>> for InsensitiveStr<'_> {
 impl Eq for InsensitiveStr<'_> {}
 impl PartialOrd for InsensitiveStr<'_> {
    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
        Some(self.cmp(other))
    }
 }
 impl Ord for InsensitiveStr<'_> {
    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
        self.convert().cmp(&other.convert())
    }
 }
 impl core::hash::Hash for InsensitiveStr<'_> {
    #[inline]
    fn hash<H: core::hash::Hasher>(&self, hasher: &mut H) {
@ -101,3 +113,97 @@ impl<'b, 'a: 'b> phf_shared::PhfBorrow<InsensitiveStr<'b>> for InsensitiveStr<'a
        self
    }
 }
 /// `UniCase` look-alike that avoids const-fn so large tables don't OOM
 #[derive(Copy, Clone)]
 pub struct InsensitiveAscii<'s>(pub &'s str);
 impl<'s> InsensitiveAscii<'s> {
    pub fn convert(self) -> unicase::Ascii<&'s str> {
        unicase::Ascii::new(self.0)
    }
    pub fn into_inner(self) -> &'s str {
        self.0
    }
    pub fn is_empty(self) -> bool {
        self.0.is_empty()
    }
    pub fn len(self) -> usize {
        self.0.len()
    }
 }
 impl<'s> From<unicase::Ascii<&'s str>> for InsensitiveAscii<'s> {
    fn from(other: unicase::Ascii<&'s str>) -> Self {
        Self(other.into_inner())
    }
 }
 impl<'s2> PartialEq<InsensitiveAscii<'s2>> for InsensitiveAscii<'_> {
    #[inline]
    fn eq(&self, other: &InsensitiveAscii<'s2>) -> bool {
        self.convert() == other.convert()
    }
 }
 impl Eq for InsensitiveAscii<'_> {}
 impl PartialOrd for InsensitiveAscii<'_> {
    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
        Some(self.cmp(other))
    }
 }
 impl Ord for InsensitiveAscii<'_> {
    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
        self.convert().cmp(&other.convert())
    }
 }
 impl core::hash::Hash for InsensitiveAscii<'_> {
    #[inline]
    fn hash<H: core::hash::Hasher>(&self, hasher: &mut H) {
        self.convert().hash(hasher);
    }
 }
 impl core::fmt::Debug for InsensitiveAscii<'_> {
    #[inline]
    fn fmt(&self, fmt: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
        core::fmt::Debug::fmt(self.into_inner(), fmt)
    }
 }
 impl core::fmt::Display for InsensitiveAscii<'_> {
    #[inline]
    fn fmt(&self, fmt: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
        core::fmt::Display::fmt(self.into_inner(), fmt)
    }
 }
 #[cfg(feature = "map")]
 impl phf_shared::PhfHash for InsensitiveAscii<'_> {
    #[inline]
    fn phf_hash<H: core::hash::Hasher>(&self, state: &mut H) {
        core::hash::Hash::hash(self, state);
    }
 }
 #[cfg(feature = "map")]
 impl phf_shared::FmtConst for InsensitiveAscii<'_> {
    fn fmt_const(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
        f.write_str("dictgen::InsensitiveAscii(")?;
        self.into_inner().fmt_const(f)?;
        f.write_str(")")
    }
 }
 #[cfg(feature = "map")]
 impl<'b, 'a: 'b> phf_shared::PhfBorrow<InsensitiveAscii<'b>> for InsensitiveAscii<'a> {
    fn borrow(&self) -> &InsensitiveAscii<'b> {
        self
    }
 }
--- a/crates/dictgen/src/lib.rs
+++ b/crates/dictgen/src/lib.rs
@ -7,6 +7,8 @@ mod gen;
 mod insensitive;
 #[cfg(feature = "map")]
 mod map;
 #[cfg(feature = "codegen")]
 mod r#match;
 mod ordered_map;
 mod trie;
@ -16,4 +18,6 @@ pub use insensitive::*;
 #[cfg(feature = "map")]
 pub use map::*;
 pub use ordered_map::*;
 #[cfg(feature = "codegen")]
 pub use r#match::*;
 pub use trie::*;
--- a/crates/dictgen/src/map.rs
+++ b/crates/dictgen/src/map.rs
@ -1,27 +1,57 @@
 #[cfg(feature = "codegen")]
 pub struct MapGen<'g> {
    pub(crate) gen: crate::DictGen<'g>,
    pub(crate) unicase: bool,
    pub(crate) unicode: bool,
 }
 #[cfg(feature = "codegen")]
 impl MapGen<'_> {
-    pub fn write<'d, W: std::io::Write, V: std::fmt::Display>(
+    pub fn unicase(mut self, yes: bool) -> Self {
        self.unicase = yes;
        self
    }
    pub fn unicode(mut self, yes: bool) -> Self {
        self.unicode = yes;
        self
    }
    pub fn write<W: std::io::Write, V: std::fmt::Display>(
        &self,
        file: &mut W,
-        data: impl Iterator<Item = (&'d str, V)>,
+        data: impl Iterator<Item = (impl AsRef<str>, V)>,
    ) -> Result<(), std::io::Error> {
        let mut data: Vec<_> = data.collect();
-        data.sort_unstable_by_key(|v| unicase::UniCase::new(v.0));
+        data.sort_unstable_by_key(|v| unicase::UniCase::new(v.0.as_ref().to_owned()));
        let name = self.gen.name;
        let key_type = self.key_type();
        let value_type = self.gen.value_type;
        let mut smallest = usize::MAX;
        let mut largest = usize::MIN;
        for (key, _) in data.iter() {
            let key = key.as_ref();
            smallest = std::cmp::min(smallest, key.len());
            largest = std::cmp::max(largest, key.len());
        }
        if largest == 0 {
            smallest = 0;
        }
        writeln!(
            file,
            "pub static {name}: dictgen::Map<{key_type}, {value_type}> = dictgen::Map {{"
        )?;
        match (self.unicase, self.unicode) {
            (true, true) => {
                let mut builder = phf_codegen::Map::new();
                let data = data
                    .iter()
                    .map(|(key, value)| {
                        let key = key.as_ref();
                        (
                            if key.is_ascii() {
                                crate::InsensitiveStr::Ascii(key)
@ -33,33 +63,58 @@ impl MapGen<'_> {
                    })
                    .collect::<Vec<_>>();
                for (key, value) in data.iter() {
            smallest = std::cmp::min(smallest, key.len());
            largest = std::cmp::max(largest, key.len());
                    builder.entry(key, value.as_str());
                }
                let builder = builder.build();
-        if largest == 0 {
+                writeln!(file, "    map: {builder},")?;
-            smallest = 0;
+            }
            (true, false) => {
                let mut builder = phf_codegen::Map::new();
                let data = data
                    .iter()
                    .map(|(key, value)| (crate::InsensitiveAscii(key.as_ref()), value.to_string()))
                    .collect::<Vec<_>>();
                for (key, value) in data.iter() {
                    builder.entry(key, value.as_str());
                }
                let builder = builder.build();
                writeln!(file, "    map: {builder},")?;
            }
            (false, _) => {
                let mut builder = phf_codegen::Map::new();
                let data = data
                    .iter()
                    .map(|(key, value)| (key, value.to_string()))
                    .collect::<Vec<_>>();
                for (key, value) in data.iter() {
                    builder.entry(key.as_ref(), value.as_str());
                }
                let builder = builder.build();
                writeln!(file, "    map: {builder},")?;
            }
        }
        writeln!(
            file,
            "pub static {name}: dictgen::Map<{value_type}> = dictgen::Map {{"
        )?;
        writeln!(file, "    map: {builder},")?;
        writeln!(file, "    range: {smallest}..={largest},")?;
        writeln!(file, "}};")?;
        Ok(())
    }
    fn key_type(&self) -> &'static str {
        match (self.unicase, self.unicode) {
            (true, true) => "dictgen::InsensitiveStr<'static>",
            (true, false) => "dictgen::InsensitiveAscii<'static>",
            (false, _) => "&'static str",
        }
    }
 }
-pub struct Map<V: 'static> {
+pub struct Map<K: 'static, V: 'static> {
-    pub map: phf::Map<crate::InsensitiveStr<'static>, V>,
+    pub map: phf::Map<K, V>,
    pub range: std::ops::RangeInclusive<usize>,
 }
-impl<V> Map<V> {
+impl<V> Map<crate::InsensitiveStr<'_>, V> {
    #[inline]
    pub fn find(&self, word: &'_ unicase::UniCase<&str>) -> Option<&V> {
        if self.range.contains(&word.len()) {
@ -69,3 +124,25 @@ impl<V> Map<V> {
        }
    }
 }
 impl<V> Map<crate::InsensitiveAscii<'_>, V> {
    #[inline]
    pub fn find(&self, word: &'_ unicase::Ascii<&str>) -> Option<&V> {
        if self.range.contains(&word.len()) {
            self.map.get(&(*word).into())
        } else {
            None
        }
    }
 }
 impl<V> Map<&str, V> {
    #[inline]
    pub fn find(&self, word: &'_ &str) -> Option<&V> {
        if self.range.contains(&word.len()) {
            self.map.get(word)
        } else {
            None
        }
    }
 }
--- a/crates/dictgen/src/match.rs
+++ b/crates/dictgen/src/match.rs
@ -0,0 +1,37 @@
 #[cfg(feature = "codegen")]
 pub struct MatchGen<'g> {
    pub(crate) gen: crate::DictGen<'g>,
 }
 #[cfg(feature = "codegen")]
 impl MatchGen<'_> {
    pub fn write<W: std::io::Write, V: std::fmt::Display>(
        &self,
        file: &mut W,
        data: impl Iterator<Item = (impl AsRef<str>, V)>,
    ) -> Result<(), std::io::Error> {
        let mut data: Vec<_> = data.collect();
        data.sort_unstable_by_key(|v| unicase::UniCase::new(v.0.as_ref().to_owned()));
        let name = self.gen.name;
        let value_type = self.gen.value_type;
        writeln!(file, "pub struct {name};")?;
        writeln!(file, "impl {name} {{")?;
        writeln!(
            file,
            "    pub fn find(&self, word: &&str) -> Option<&'static {value_type}> {{"
        )?;
        writeln!(file, "        match *word {{")?;
        for (key, value) in data.iter() {
            let key = key.as_ref();
            writeln!(file, "            {key:?} => Some(&{value}.as_slice()),")?;
        }
        writeln!(file, "            _ => None,")?;
        writeln!(file, "        }}")?;
        writeln!(file, "    }}")?;
        writeln!(file, "}}")?;
        Ok(())
    }
 }
--- a/crates/dictgen/src/ordered_map.rs
+++ b/crates/dictgen/src/ordered_map.rs
@ -1,19 +1,32 @@
 #[cfg(feature = "codegen")]
 pub struct OrderedMapGen<'g> {
    pub(crate) gen: crate::DictGen<'g>,
    pub(crate) unicase: bool,
    pub(crate) unicode: bool,
 }
 #[cfg(feature = "codegen")]
 impl OrderedMapGen<'_> {
-    pub fn write<'d, W: std::io::Write, V: std::fmt::Display>(
+    pub fn unicase(mut self, yes: bool) -> Self {
        self.unicase = yes;
        self
    }
    pub fn unicode(mut self, yes: bool) -> Self {
        self.unicode = yes;
        self
    }
    pub fn write<W: std::io::Write, V: std::fmt::Display>(
        &self,
        file: &mut W,
-        data: impl Iterator<Item = (&'d str, V)>,
+        data: impl Iterator<Item = (impl AsRef<str>, V)>,
    ) -> Result<(), std::io::Error> {
        let mut data: Vec<_> = data.collect();
-        data.sort_unstable_by_key(|v| unicase::UniCase::new(v.0));
+        data.sort_unstable_by_key(|v| unicase::UniCase::new(v.0.as_ref().to_owned()));
        let name = self.gen.name;
        let key_type = self.key_type();
        let value_type = self.gen.value_type;
        let mut smallest = usize::MAX;
@ -21,18 +34,15 @@ impl OrderedMapGen<'_> {
        writeln!(
            file,
-            "pub static {name}: dictgen::OrderedMap<{value_type}> = dictgen::OrderedMap {{"
+            "pub static {name}: dictgen::OrderedMap<{key_type}, {value_type}> = dictgen::OrderedMap {{"
        )?;
        writeln!(file, "    keys: &[")?;
        for (key, _value) in data.iter() {
            let key = key.as_ref();
            smallest = std::cmp::min(smallest, key.len());
            largest = std::cmp::max(largest, key.len());
-            let key = if key.is_ascii() {
+            let key = self.key_new(key);
                format!("dictgen::InsensitiveStr::Ascii({key:?})")
            } else {
                format!("dictgen::InsensitiveStr::Unicode({key:?})")
            };
            writeln!(file, "      {key},")?;
        }
@ -50,15 +60,37 @@ impl OrderedMapGen<'_> {
        Ok(())
    }
    fn key_type(&self) -> &'static str {
        match (self.unicase, self.unicode) {
            (true, true) => "dictgen::InsensitiveStr<'static>",
            (true, false) => "dictgen::InsensitiveAscii<'static>",
            (false, _) => "&'static str",
        }
    }
    fn key_new(&self, key: &str) -> String {
        match (self.unicase, self.unicode) {
            (true, true) => {
                if key.is_ascii() {
                    format!("dictgen::InsensitiveStr::Ascii({key:?})")
                } else {
                    format!("dictgen::InsensitiveStr::Unicode({key:?})")
                }
            }
            (true, false) => format!("dictgen::InsensitiveAscii({key:?})"),
            (false, _) => format!("{key:?}"),
        }
    }
 }
-pub struct OrderedMap<V: 'static> {
+pub struct OrderedMap<K: 'static, V: 'static> {
-    pub keys: &'static [crate::InsensitiveStr<'static>],
+    pub keys: &'static [K],
    pub values: &'static [V],
    pub range: core::ops::RangeInclusive<usize>,
 }
-impl<V> OrderedMap<V> {
+impl<V> OrderedMap<crate::InsensitiveStr<'_>, V> {
    #[inline]
    pub fn find(&self, word: &'_ unicase::UniCase<&str>) -> Option<&'static V> {
        if self.range.contains(&word.len()) {
@ -71,3 +103,28 @@ impl<V> OrderedMap<V> {
        }
    }
 }
 impl<V> OrderedMap<crate::InsensitiveAscii<'_>, V> {
    #[inline]
    pub fn find(&self, word: &'_ unicase::Ascii<&str>) -> Option<&'static V> {
        if self.range.contains(&word.len()) {
            self.keys
                .binary_search_by_key(word, |key| key.convert())
                .map(|i| &self.values[i])
                .ok()
        } else {
            None
        }
    }
 }
 impl<V> OrderedMap<&str, V> {
    #[inline]
    pub fn find(&self, word: &'_ &str) -> Option<&'static V> {
        if self.range.contains(&word.len()) {
            self.keys.binary_search(word).map(|i| &self.values[i]).ok()
        } else {
            None
        }
    }
 }
--- a/crates/dictgen/src/trie.rs
+++ b/crates/dictgen/src/trie.rs
@ -27,7 +27,7 @@ impl TrieGen<'_> {
 pub struct Trie<V: 'static> {
    pub root: &'static TrieNode<V>,
-    pub unicode: &'static crate::OrderedMap<V>,
+    pub unicode: &'static crate::OrderedMap<crate::InsensitiveStr<'static>, V>,
    pub range: core::ops::RangeInclusive<usize>,
 }
@ -75,7 +75,7 @@ impl<V> Trie<V> {
                    // Unsafe: Everything before has been proven to be ASCII, so this should be
                    // safe.
                    let remaining = unsafe { core::str::from_utf8_unchecked(remaining) };
-                    let remaining = unicase::UniCase::ascii(remaining);
+                    let remaining = unicase::Ascii::new(remaining);
                    return t.find(&remaining);
                }
            }
@ -91,7 +91,7 @@ pub struct TrieNode<V: 'static> {
 pub enum TrieChild<V: 'static> {
    Nested(&'static [Option<&'static TrieNode<V>>; 26]),
-    Flat(&'static crate::OrderedMap<V>),
+    Flat(&'static crate::OrderedMap<crate::InsensitiveAscii<'static>, V>),
 }
 #[cfg(feature = "codegen")]
@ -179,6 +179,7 @@ mod codegen {
                        .name(&children_name)
                        .value_type(value_type)
                        .ordered_map()
                        .unicode(false)
                        .write(file, table_input)?;
                }
            }
--- a/crates/misspell-dict/src/dict_codegen.rs
+++ b/crates/misspell-dict/src/dict_codegen.rs
@ -1,6 +1,7 @@
 // This file is @generated by crates/misspell-dict/tests/codegen.rs
-pub static MAIN_DICTIONARY: dictgen::OrderedMap<&[&str]> = dictgen::OrderedMap {
+pub static MAIN_DICTIONARY: dictgen::OrderedMap<dictgen::InsensitiveStr<'static>, &[&str]> =
    dictgen::OrderedMap {
        keys: &[
            dictgen::InsensitiveStr::Ascii("abandenment"),
            dictgen::InsensitiveStr::Ascii("abandining"),
@ -56100,8 +56101,9 @@ pub static MAIN_DICTIONARY: dictgen::OrderedMap<&[&str]> = dictgen::OrderedMap {
            &["zionists"],
        ],
        range: 3..=19,
-};
+    };
-pub static AMERICAN_DICTIONARY: dictgen::OrderedMap<&[&str]> = dictgen::OrderedMap {
+pub static AMERICAN_DICTIONARY: dictgen::OrderedMap<dictgen::InsensitiveStr<'static>, &[&str]> =
    dictgen::OrderedMap {
        keys: &[
            dictgen::InsensitiveStr::Ascii("accessorise"),
            dictgen::InsensitiveStr::Ascii("accessorised"),
@ -59345,8 +59347,9 @@ pub static AMERICAN_DICTIONARY: dictgen::OrderedMap<&[&str]> = dictgen::OrderedM
            &["yogurts"],
        ],
        range: 4..=20,
-};
+    };
-pub static BRITISH_DICTIONARY: dictgen::OrderedMap<&[&str]> = dictgen::OrderedMap {
+pub static BRITISH_DICTIONARY: dictgen::OrderedMap<dictgen::InsensitiveStr<'static>, &[&str]> =
    dictgen::OrderedMap {
        keys: &[
            dictgen::InsensitiveStr::Ascii("accessorize"),
            dictgen::InsensitiveStr::Ascii("accessorized"),
@ -62306,4 +62309,4 @@ pub static BRITISH_DICTIONARY: dictgen::OrderedMap<&[&str]> = dictgen::OrderedMa
            &["yodelling"],
        ],
        range: 4..=20,
-};
+    };
--- a/crates/typos-dict/Cargo.toml
+++ b/crates/typos-dict/Cargo.toml
@ -16,7 +16,8 @@ all-features = true
 rustdoc-args = ["--cfg", "docsrs", "--generate-link-to-definition"]
 [dependencies]
-dictgen = { version = "^0.2", path = "../dictgen" }
+phf = "0.11.2"
 dictgen = { version = "^0.2", path = "../dictgen", features = ["map"] }
 [dev-dependencies]
 csv = "1.3"
@ -29,7 +30,7 @@ varcon = { version = "^1.0", path = "../varcon" }
 snapbox = "0.6.5"
 indexmap = "2.2.6"
 divan = "0.1.16"
-phf = "0.11.2"
+heck = "0.5.0"
 [lints]
 workspace = true
--- a/crates/typos-dict/benches/benches/cased_map_codegen.rs
+++ b/crates/typos-dict/benches/benches/cased_map_codegen.rs
--- a/crates/typos-dict/benches/benches/main.rs
+++ b/crates/typos-dict/benches/benches/main.rs
@ -1,5 +1,7 @@
 #![allow(clippy::wildcard_imports)]
 #![allow(dead_code)]
 mod cased_map_codegen;
 mod map_codegen;
 mod ordered_map_codegen;
 mod trie_codegen;
@ -9,6 +11,11 @@ mod miss {
    const MISS: &str = "finalizes";
    #[divan::bench(args = [MISS])]
    fn cased_map(word: &str) -> Option<&'static &[&str]> {
        cased_map_codegen::WORD_ASCII_LOWER.find(&word)
    }
    #[divan::bench(args = [unicase::UniCase::new(MISS)])]
    fn map(word: unicase::UniCase<&str>) -> Option<&'static &[&str]> {
        map_codegen::WORD.find(&word)
@ -30,6 +37,11 @@ mod hit {
    const HIT: &str = "finallizes";
    #[divan::bench(args = [HIT])]
    fn cased_map(word: &str) -> Option<&'static &[&str]> {
        cased_map_codegen::WORD_ASCII_LOWER.find(&word)
    }
    #[divan::bench(args = [unicase::UniCase::new(HIT)])]
    fn map(word: unicase::UniCase<&str>) -> Option<&'static &[&str]> {
        map_codegen::WORD.find(&word)
--- a/crates/typos-dict/benches/benches/map_codegen.rs
+++ b/crates/typos-dict/benches/benches/map_codegen.rs
@ -2,7 +2,7 @@
 #![allow(clippy::unreadable_literal)]
 #![allow(unreachable_pub)]
-pub static WORD: dictgen::Map<&[&str]> = dictgen::Map {
+pub static WORD: dictgen::Map<dictgen::InsensitiveStr<'static>, &[&str]> = dictgen::Map {
    map: ::phf::Map {
        key: 12913932095322966823,
        disps: &[
--- a/crates/typos-dict/benches/benches/ordered_map_codegen.rs
+++ b/crates/typos-dict/benches/benches/ordered_map_codegen.rs
@ -2,7 +2,8 @@
 #![allow(clippy::unreadable_literal)]
 #![allow(unreachable_pub)]
-pub static WORD: dictgen::OrderedMap<&[&str]> = dictgen::OrderedMap {
+pub static WORD: dictgen::OrderedMap<dictgen::InsensitiveStr<'static>, &[&str]> =
    dictgen::OrderedMap {
        keys: &[
            dictgen::InsensitiveStr::Ascii("aaccess"),
            dictgen::InsensitiveStr::Ascii("aaccessibility"),
@ -138034,4 +138035,4 @@ pub static WORD: dictgen::OrderedMap<&[&str]> = dictgen::OrderedMap {
            &["contains"],
        ],
        range: 2..=34,
-};
+    };
--- a/crates/typos-dict/benches/benches/trie_codegen.rs
+++ b/crates/typos-dict/benches/benches/trie_codegen.rs
--- a/crates/typos-dict/src/word_codegen.rs
+++ b/crates/typos-dict/src/word_codegen.rs
--- a/crates/typos-dict/tests/codegen.rs
+++ b/crates/typos-dict/tests/codegen.rs
@ -20,6 +20,15 @@ fn codegen() {
        snapbox::file!["../benches/benches/map_codegen.rs"].raw()
    );
    let mut cased_map_content = vec![];
    generate_cased_map(&mut cased_map_content, "WORD", DICT);
    let cased_map_content = String::from_utf8(cased_map_content).unwrap();
    let cased_map_content = codegenrs::rustfmt(&cased_map_content, None).unwrap();
    snapbox::assert_data_eq!(
        &cased_map_content,
        snapbox::file!["../benches/benches/cased_map_codegen.rs"].raw()
    );
    let mut ordered_map_content = vec![];
    generate_ordered_map(&mut ordered_map_content, "WORD", DICT);
    let ordered_map_content = String::from_utf8(ordered_map_content).unwrap();
@ -29,10 +38,7 @@ fn codegen() {
        snapbox::file!["../benches/benches/ordered_map_codegen.rs"].raw()
    );
-    snapbox::assert_data_eq!(
+    snapbox::assert_data_eq!(&map_content, snapbox::file!["../src/word_codegen.rs"].raw());
        &trie_content,
        snapbox::file!["../src/word_codegen.rs"].raw()
    );
 }
 fn generate_trie<W: std::io::Write>(file: &mut W, name: &str, dict: &[u8]) {
@ -72,6 +78,111 @@ fn generate_trie<W: std::io::Write>(file: &mut W, name: &str, dict: &[u8]) {
        .unwrap();
 }
 fn generate_cased_map<W: std::io::Write>(file: &mut W, name: &str, dict: &[u8]) {
    writeln!(
        file,
        "// This file is @generated by {}",
        file!().replace('\\', "/")
    )
    .unwrap();
    writeln!(file, "#![allow(clippy::unreadable_literal)]",).unwrap();
    writeln!(file, "#![allow(unreachable_pub)]",).unwrap();
    writeln!(file).unwrap();
    let records: Vec<_> = csv::ReaderBuilder::new()
        .has_headers(false)
        .flexible(true)
        .from_reader(dict)
        .records()
        .map(|r| r.unwrap())
        .collect();
    dictgen::DictGen::new()
        .name(&format!("{name}_ASCII_LOWER"))
        .value_type("&[&str]")
        .map()
        .unicase(false)
        .write(
            file,
            records
                .iter()
                .filter(|r| r.iter().next().unwrap().is_ascii())
                .map(|record| {
                    let mut record_fields = record.iter();
                    let key = record_fields.next().unwrap();
                    let value = format!(
                        "&[{}]",
                        itertools::join(record_fields.map(|field| format!(r#""{field}""#)), ", ")
                    );
                    (key, value)
                }),
        )
        .unwrap();
    dictgen::DictGen::new()
        .name(&format!("{name}_ASCII_UPPER"))
        .value_type("&[&str]")
        .map()
        .unicase(false)
        .write(
            file,
            records
                .iter()
                .filter(|r| r.iter().next().unwrap().is_ascii())
                .map(|record| {
                    use heck::ToShoutySnakeCase;
                    let mut record_fields = record.iter();
                    let key = record_fields.next().unwrap().to_shouty_snake_case();
                    let value = format!(
                        "&[{}]",
                        itertools::join(record_fields.map(|field| format!(r#""{field}""#)), ", ")
                    );
                    (key, value)
                }),
        )
        .unwrap();
    dictgen::DictGen::new()
        .name(&format!("{name}_ASCII_TITLE"))
        .value_type("&[&str]")
        .map()
        .unicase(false)
        .write(
            file,
            records
                .iter()
                .filter(|r| r.iter().next().unwrap().is_ascii())
                .map(|record| {
                    use heck::ToTitleCase;
                    let mut record_fields = record.iter();
                    let key = record_fields.next().unwrap().to_title_case();
                    let value = format!(
                        "&[{}]",
                        itertools::join(record_fields.map(|field| format!(r#""{field}""#)), ", ")
                    );
                    (key, value)
                }),
        )
        .unwrap();
    dictgen::DictGen::new()
        .name(&format!("{name}_UNICODE"))
        .value_type("&[&str]")
        .ordered_map()
        .write(
            file,
            records
                .iter()
                .filter(|r| !r.iter().next().unwrap().is_ascii())
                .map(|record| {
                    let mut record_fields = record.iter();
                    let key = record_fields.next().unwrap();
                    let value = format!(
                        "&[{}]",
                        itertools::join(record_fields.map(|field| format!(r#""{field}""#)), ", ")
                    );
                    (key, value)
                }),
        )
        .unwrap();
 }
 fn generate_map<W: std::io::Write>(file: &mut W, name: &str, dict: &[u8]) {
    writeln!(
        file,
--- a/crates/typos-vars/src/vars_codegen.rs
+++ b/crates/typos-vars/src/vars_codegen.rs
--- a/crates/wikipedia-dict/src/dict_codegen.rs
+++ b/crates/wikipedia-dict/src/dict_codegen.rs
@ -1,6 +1,7 @@
 // This file is @generated by crates/wikipedia-dict/tests/codegen.rs
-pub static WORD_DICTIONARY: dictgen::OrderedMap<&[&str]> = dictgen::OrderedMap {
+pub static WORD_DICTIONARY: dictgen::OrderedMap<dictgen::InsensitiveStr<'static>, &[&str]> =
    dictgen::OrderedMap {
        keys: &[
            dictgen::InsensitiveStr::Ascii("abandonned"),
            dictgen::InsensitiveStr::Ascii("abbout"),
@ -8588,4 +8589,4 @@ pub static WORD_DICTIONARY: dictgen::OrderedMap<&[&str]> = dictgen::OrderedMap {
            &["zebra"],
        ],
        range: 3..=19,
-};
+    };