feat(dict): varcon dict

2024-11-22 00:51:11 -05:00 · 2020-04-07 19:50:06 -05:00 · 2020-04-07 19:50:06 -05:00 · 7f983992bd
commit 7f983992bd
parent 814ff82aff
15 changed files with 484863 additions and 3 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -24,6 +24,15 @@ version = "1.0.28"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d9a60d744a80c30fcb657dfe2c1b22bcb3e814c1a1e3674f32bf5820b570fbff"
 [[package]]
 name = "arrayvec"
 version = "0.4.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "cd9fd44efafa8690358b7408d253adf110036b88f55672a933f01d616ad9b1b9"
 dependencies = [
 "nodrop",
 ]
 [[package]]
 name = "assert_fs"
 version = "1.0.0"
@ -81,9 +90,9 @@ checksum = "08c48aae112d48ed9f069b33538ea9e3e90aa263cfa3d1c24309612b1f7472de"
 [[package]]
 name = "cfg-if"
-version = "0.1.10"
+version = "0.1.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822"
+checksum = "b486ce3ccf7ffd79fdeb678eac06a9e6c09fc88d33836340becb8fffe87c5e33"
 [[package]]
 name = "clap"
@ -276,6 +285,26 @@ version = "1.5.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "bb1f6b1ce1c140482ea30ddd3335fc0024ac7ee112895426e0a629a6c20adfe3"
 [[package]]
 name = "enumflags2"
 version = "0.6.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "83c8d82922337cd23a15f88b70d8e4ef5f11da38dd7cdb55e84dd5de99695da0"
 dependencies = [
 "enumflags2_derive",
 ]
 [[package]]
 name = "enumflags2_derive"
 version = "0.6.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "946ee94e3dbf58fdd324f9ce245c7b238d46a66f00e86a020b71996349e46cce"
 dependencies = [
 "proc-macro2 1.0.12",
 "quote 1.0.4",
 "syn 1.0.19",
 ]
 [[package]]
 name = "env_logger"
 version = "0.7.1"
@ -411,6 +440,20 @@ version = "1.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
 [[package]]
 name = "lexical-core"
 version = "0.6.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f86d66d380c9c5a685aaac7a11818bdfa1f733198dfd9ec09c70b762cd12ad6f"
 dependencies = [
 "arrayvec",
 "bitflags",
 "cfg-if",
 "rustc_version",
 "ryu",
 "static_assertions",
 ]
 [[package]]
 name = "libc"
 version = "0.2.69"
@ -460,6 +503,23 @@ dependencies = [
 "unicase",
 ]
 [[package]]
 name = "nodrop"
 version = "0.1.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb"
 [[package]]
 name = "nom"
 version = "5.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0b471253da97532da4b61552249c521e01e736071f71c1a4f7ebbfbf0a06aad6"
 dependencies = [
 "lexical-core",
 "memchr",
 "version_check",
 ]
 [[package]]
 name = "normalize-line-endings"
 version = "0.3.0"
@ -786,6 +846,12 @@ version = "0.3.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "fa8f3741c7372e75519bd9346068370c9cdaabcc1f9599cbcf2a2719352286b7"
 [[package]]
 name = "static_assertions"
 version = "0.3.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7f3eb36b47e512f8f1c9e3d10c2c1965bc992bd9cdb024fa581e2194501c83d3"
 [[package]]
 name = "strsim"
 version = "0.8.0"
@ -1025,6 +1091,29 @@ version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "826e7639553986605ec5979c7dd957c7895e93eabed50ab2ffa7f6128a75097c"
 [[package]]
 name = "varcon-codegen"
 version = "1.0.2"
 dependencies = [
 "codegenrs",
 "structopt",
 "varcon-parser",
 ]
 [[package]]
 name = "varcon-dict"
 version = "0.2.1"
 dependencies = [
 "enumflags2",
 ]
 [[package]]
 name = "varcon-parser"
 version = "1.0.0"
 dependencies = [
 "nom",
 ]
 [[package]]
 name = "vec_map"
 version = "0.8.2"
--- a/Cargo.toml
+++ b/Cargo.toml
@ -5,6 +5,7 @@ members = [
    "crates/codespell-dict", "crates/codespell-dict/codegen",
    "crates/misspell-dict", "crates/misspell-dict/codegen",
    "crates/wikipedia-dict", "crates/wikipedia-dict/codegen",
    "crates/varcon", "crates/varcon/codegen", "crates/varcon-parser",
 ]
 [package]
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@ -39,6 +39,9 @@ stages:
    - script: |
        cargo run --package wikipedia-codegen -- --output crates/wikipedia-dict/src/dict_codegen.rs --check
      displayName: Verify wikipedia-dict
    - script: |
        cargo run --package varcon-codegen -- --output crates/varcon/src/codegen.rs --check
      displayName: Verify varcon-dict
 - stage: committed
  displayName: Lint History
  dependsOn: []
--- a/crates/typos-dict/codegen/src/main.rs
+++ b/crates/typos-dict/codegen/src/main.rs
@ -1,6 +1,6 @@
 use structopt::StructOpt;
-pub const DICT: &[u8] = include_bytes!("../../assets/words.csv");
+const DICT: &[u8] = include_bytes!("../../assets/words.csv");
 fn generate<W: std::io::Write>(file: &mut W) {
    writeln!(
--- a/crates/varcon-parser/Cargo.toml
+++ b/crates/varcon-parser/Cargo.toml
@ -0,0 +1,17 @@
 [package]
 name = "varcon-parser"
 version = "1.0.0"
 authors = ["Ed Page <eopage@gmail.com>"]
 description = "Parse varcon.txt file"
 repository = "https://github.com/crate-ci/typos"
 readme = "../../../README.md"
 categories = ["text-processing"]
 license = "MIT"
 edition = "2018"
 [badges]
 azure-devops = { project = "crate-ci", pipeline = "typos" }
 codecov = { repository = "crate-ci/typos" }
 [dependencies]
 nom = "5.1.1"
--- a/crates/varcon-parser/src/lib.rs
+++ b/crates/varcon-parser/src/lib.rs
@ -0,0 +1,107 @@
 mod parser;
 pub use parser::ClusterIter;
 #[derive(Clone, PartialEq, Eq, Debug)]
 pub struct Cluster {
    pub header: Option<String>,
    pub entries: Vec<Entry>,
    pub notes: Vec<String>,
 }
 impl Cluster {
    pub fn infer(&mut self) {
        for entry in self.entries.iter_mut() {
            entry.infer();
        }
    }
 }
 #[derive(Clone, PartialEq, Eq, Debug)]
 pub struct Entry {
    pub variants: Vec<Variant>,
    pub pos: Option<Pos>,
    pub archaic: bool,
    pub note: bool,
    pub description: Option<String>,
    pub comment: Option<String>,
 }
 impl Entry {
    pub fn infer(&mut self) {
        imply(
            &mut self.variants,
            Category::BritishIse,
            Category::BritishIze,
        );
        imply(&mut self.variants, Category::BritishIze, Category::Canadian);
        imply(
            &mut self.variants,
            Category::BritishIse,
            Category::Australian,
        );
    }
 }
 fn imply(variants: &mut Vec<Variant>, required: Category, missing: Category) {
    let missing_exists = variants
        .iter()
        .any(|v| v.types.iter().any(|t| t.category == missing));
    if missing_exists {
        return;
    }
    for variant in variants.iter_mut() {
        let types: Vec<_> = variant
            .types
            .iter()
            .filter(|t| t.category == required)
            .cloned()
            .map(|mut t| {
                t.category = missing;
                t
            })
            .collect();
        variant.types.extend(types);
    }
 }
 #[derive(Clone, PartialEq, Eq, Debug)]
 pub struct Variant {
    pub types: Vec<Type>,
    pub word: String,
 }
 #[derive(Clone, PartialEq, Eq, Debug)]
 pub struct Type {
    pub category: Category,
    pub tag: Option<Tag>,
    pub num: Option<usize>,
 }
 #[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
 pub enum Category {
    American,
    BritishIse,
    BritishIze,
    Canadian,
    Australian,
    Other,
 }
 #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
 pub enum Tag {
    Eq,
    Variant,
    Seldom,
    Possible,
    Improper,
 }
 #[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
 pub enum Pos {
    Noun,
    Verb,
    Adjective,
    Adverb,
 }
--- a/crates/varcon-parser/src/parser.rs
+++ b/crates/varcon-parser/src/parser.rs
@ -0,0 +1,568 @@
 use nom::IResult;
 use nom::InputTakeAtPosition;
 use crate::*;
 #[derive(Clone, PartialEq, Eq, Debug)]
 pub struct ClusterIter<'i> {
    input: &'i str,
 }
 impl<'i> ClusterIter<'i> {
    pub fn new(input: &'i str) -> Self {
        Self { input }
    }
 }
 impl<'i> Iterator for ClusterIter<'i> {
    type Item = Cluster;
    fn next(&mut self) -> Option<Cluster> {
        let i = self.input.trim_start();
        let (i, c) = Cluster::parse(i).ok()?;
        self.input = i;
        Some(c)
    }
 }
 #[cfg(test)]
 mod test_cluster_iter {
    use super::*;
    #[test]
    fn test_single() {
        let iter = ClusterIter::new(
            "# acknowledgment <verified> (level 35)
 A Cv: acknowledgment / Av B C: acknowledgement
 A Cv: acknowledgments / Av B C: acknowledgements
 A Cv: acknowledgment's / Av B C: acknowledgement's
 ",
        );
        let all: Vec<_> = iter.collect();
        assert_eq!(all.len(), 1);
    }
    #[test]
    fn test_multiple() {
        let iter = ClusterIter::new(
            "# acknowledgment <verified> (level 35)
 A Cv: acknowledgment / Av B C: acknowledgement
 A Cv: acknowledgments / Av B C: acknowledgements
 A Cv: acknowledgment's / Av B C: acknowledgement's
 # acknowledgment <verified> (level 35)
 A Cv: acknowledgment / Av B C: acknowledgement
 A Cv: acknowledgments / Av B C: acknowledgements
 A Cv: acknowledgment's / Av B C: acknowledgement's
 ",
        );
        let all: Vec<_> = iter.collect();
        assert_eq!(all.len(), 2);
    }
 }
 impl Cluster {
    pub fn parse(input: &str) -> IResult<&str, Self> {
        let header = nom::sequence::tuple((
            nom::bytes::streaming::tag("#"),
            nom::character::streaming::space0,
            nom::character::streaming::not_line_ending,
            nom::character::streaming::line_ending,
        ));
        let note = nom::sequence::preceded(
            nom::sequence::pair(
                nom::bytes::streaming::tag("##"),
                nom::character::streaming::space0,
            ),
            nom::sequence::terminated(
                nom::character::streaming::not_line_ending,
                nom::character::streaming::line_ending,
            ),
        );
        let cluster = nom::sequence::tuple((
            nom::combinator::opt(header),
            nom::multi::many1(nom::sequence::terminated(
                Entry::parse,
                nom::character::streaming::line_ending,
            )),
            nom::multi::many0(note),
        ));
        let (input, (header, entries, notes)) = (cluster)(input)?;
        let header = header.map(|s| s.2.to_owned());
        let notes = notes.into_iter().map(|s| s.to_owned()).collect();
        let c = Self {
            header,
            entries,
            notes,
        };
        Ok((input, c))
    }
 }
 #[cfg(test)]
 mod test_cluster {
    use super::*;
    #[test]
    fn test_basic() {
        let (input, actual) = Cluster::parse(
            "# acknowledgment <verified> (level 35)
 A Cv: acknowledgment / Av B C: acknowledgement
 A Cv: acknowledgments / Av B C: acknowledgements
 A Cv: acknowledgment's / Av B C: acknowledgement's
 ",
        )
        .unwrap();
        assert_eq!(input, "\n");
        assert_eq!(
            actual.header,
            Some("acknowledgment <verified> (level 35)".to_owned())
        );
        assert_eq!(actual.entries.len(), 3);
        assert_eq!(actual.notes.len(), 0);
    }
    #[test]
    fn test_notes() {
        let (input, actual) = Cluster::parse(
            "# coloration <verified> (level 50)
 A B C: coloration / B. Cv: colouration
 A B C: colorations / B. Cv: colourations
 A B C: coloration's / B. Cv: colouration's
 ## OED has coloration as the preferred spelling and discolouration as a
 ## variant for British Engl or some reason
 ",
        )
        .unwrap();
        assert_eq!(input, "\n");
        assert_eq!(
            actual.header,
            Some("coloration <verified> (level 50)".to_owned())
        );
        assert_eq!(actual.entries.len(), 3);
        assert_eq!(actual.notes.len(), 2);
    }
 }
 impl Entry {
    pub fn parse(input: &str) -> IResult<&str, Self> {
        let var_sep = nom::sequence::tuple((
            nom::character::streaming::space0,
            nom::bytes::streaming::tag("/"),
            nom::character::streaming::space0,
        ));
        let (input, variants) =
            nom::multi::separated_nonempty_list(var_sep, Variant::parse)(input)?;
        let desc_sep = nom::sequence::tuple((
            nom::character::streaming::space0,
            nom::bytes::streaming::tag("|"),
        ));
        let (input, description) =
            nom::combinator::opt(nom::sequence::tuple((desc_sep, Self::parse_description)))(input)?;
        let comment_sep = nom::sequence::tuple((
            nom::character::streaming::space0,
            nom::bytes::streaming::tag("#"),
        ));
        let (input, comment) = nom::combinator::opt(nom::sequence::tuple((
            comment_sep,
            nom::character::streaming::space1,
            nom::character::streaming::not_line_ending,
        )))(input)?;
        let mut e = match description {
            Some((_, description)) => description,
            None => Self {
                variants: Vec::new(),
                pos: None,
                archaic: false,
                note: false,
                description: None,
                comment: None,
            },
        };
        e.variants = variants;
        e.comment = comment.map(|c| c.2.to_owned());
        Ok((input, e))
    }
    fn parse_description(input: &str) -> IResult<&str, Self> {
        let (input, (pos, archaic, note, description)) = nom::sequence::tuple((
            nom::combinator::opt(nom::sequence::tuple((
                nom::character::streaming::space1,
                Pos::parse,
            ))),
            nom::combinator::opt(nom::sequence::tuple((
                nom::character::streaming::space1,
                nom::bytes::streaming::tag("(-)"),
            ))),
            nom::combinator::opt(nom::sequence::tuple((
                nom::character::streaming::space1,
                nom::bytes::streaming::tag("--"),
            ))),
            nom::combinator::opt(nom::sequence::tuple((
                nom::character::streaming::space1,
                nom::bytes::streaming::take_till(|c| c == '\n' || c == '\r' || c == '#'),
            ))),
        ))(input)?;
        let variants = Vec::new();
        let pos = pos.map(|(_, p)| p);
        let archaic = archaic.is_some();
        let note = note.is_some();
        let description = description.map(|(_, d)| d.to_owned());
        let e = Self {
            variants,
            pos,
            archaic,
            note,
            description,
            comment: None,
        };
        Ok((input, e))
    }
 }
 #[cfg(test)]
 mod test_entry {
    use super::*;
    #[test]
    fn test_variant_only() {
        // Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
        // cases.
        let (input, actual) =
            Entry::parse("A Cv: acknowledgment's / Av B C: acknowledgement's\n").unwrap();
        assert_eq!(input, "\n");
        assert_eq!(actual.variants.len(), 2);
        assert_eq!(actual.pos, None);
        assert_eq!(actual.archaic, false);
        assert_eq!(actual.note, false);
        assert_eq!(actual.description, None);
    }
    #[test]
    fn test_description() {
        // Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
        // cases.
        let (input, actual) = Entry::parse("A C: prize / B: prise | otherwise\n").unwrap();
        assert_eq!(input, "\n");
        assert_eq!(actual.variants.len(), 2);
        assert_eq!(actual.pos, None);
        assert_eq!(actual.archaic, false);
        assert_eq!(actual.note, false);
        assert_eq!(actual.description, Some("otherwise".to_owned()));
    }
    #[test]
    fn test_pos() {
        // Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
        // cases.
        let (input, actual) = Entry::parse("A B C: practice / AV Cv: practise | <N>\n").unwrap();
        assert_eq!(input, "\n");
        assert_eq!(actual.variants.len(), 2);
        assert_eq!(actual.pos, Some(Pos::Noun));
        assert_eq!(actual.archaic, false);
        assert_eq!(actual.note, false);
        assert_eq!(actual.description, None);
    }
    #[test]
    fn test_archaic() {
        // Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
        // cases.
        let (input, actual) = Entry::parse("A: bark / Av B: barque | (-) ship\n").unwrap();
        assert_eq!(input, "\n");
        assert_eq!(actual.variants.len(), 2);
        assert_eq!(actual.pos, None);
        assert_eq!(actual.archaic, true);
        assert_eq!(actual.note, false);
        assert_eq!(actual.description, Some("ship".to_owned()));
    }
    #[test]
    fn test_note() {
        // Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
        // cases.
        let (input, actual) = Entry::parse("_: cabbies | -- plural\n").unwrap();
        assert_eq!(input, "\n");
        assert_eq!(actual.variants.len(), 1);
        assert_eq!(actual.pos, None);
        assert_eq!(actual.archaic, false);
        assert_eq!(actual.note, true);
        assert_eq!(actual.description, Some("plural".to_owned()));
    }
    #[test]
    fn test_trailing_comment() {
        let (input, actual) = Entry::parse(
            "A B: accursed / AV B-: accurst # ODE: archaic, M-W: 'or' but can find little evidence of use\n",
        )
        .unwrap();
        assert_eq!(input, "\n");
        assert_eq!(actual.variants.len(), 2);
        assert_eq!(actual.pos, None);
        assert_eq!(actual.archaic, false);
        assert_eq!(actual.note, false);
        assert_eq!(actual.description, None);
        assert_eq!(
            actual.comment,
            Some("ODE: archaic, M-W: 'or' but can find little evidence of use".to_owned())
        );
    }
 }
 impl Variant {
    pub fn parse(input: &str) -> IResult<&str, Self> {
        let types =
            nom::multi::separated_nonempty_list(nom::character::streaming::space1, Type::parse);
        let sep = nom::sequence::tuple((
            nom::bytes::streaming::tag(":"),
            nom::character::streaming::space0,
        ));
        let (input, (types, word)) = nom::sequence::separated_pair(types, sep, word)(input)?;
        let v = Self { types, word };
        Ok((input, v))
    }
 }
 fn word(input: &str) -> IResult<&str, String> {
    input
        .split_at_position1(
            |item| item.is_ascii_whitespace(),
            nom::error::ErrorKind::Alpha,
        )
        .map(|(i, s)| (i, s.to_owned().replace('_', " ")))
 }
 #[cfg(test)]
 mod test_variant {
    use super::*;
    #[test]
    fn test_valid() {
        // Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
        // cases.
        let (input, actual) = Variant::parse("A Cv: acknowledgment ").unwrap();
        assert_eq!(input, " ");
        assert_eq!(
            actual.types,
            vec![
                Type {
                    category: Category::American,
                    tag: None,
                    num: None,
                },
                Type {
                    category: Category::Canadian,
                    tag: Some(Tag::Variant),
                    num: None,
                }
            ]
        );
        assert_eq!(actual.word, "acknowledgment");
    }
    #[test]
    fn test_extra() {
        let (input, actual) =
            Variant::parse("A Cv: acknowledgment's / Av B C: acknowledgement's").unwrap();
        assert_eq!(input, " / Av B C: acknowledgement's");
        assert_eq!(
            actual.types,
            vec![
                Type {
                    category: Category::American,
                    tag: None,
                    num: None,
                },
                Type {
                    category: Category::Canadian,
                    tag: Some(Tag::Variant),
                    num: None,
                }
            ]
        );
        assert_eq!(actual.word, "acknowledgment's");
    }
    #[test]
    fn test_underscore() {
        let (input, actual) = Variant::parse("_: air_gun\n").unwrap();
        assert_eq!(input, "\n");
        assert_eq!(
            actual.types,
            vec![Type {
                category: Category::Other,
                tag: None,
                num: None,
            },]
        );
        assert_eq!(actual.word, "air gun");
    }
 }
 impl Type {
    pub fn parse(input: &str) -> IResult<&str, Type> {
        let (input, category) = Category::parse(input)?;
        let (input, tag) = nom::combinator::opt(Tag::parse)(input)?;
        let (input, num) = nom::combinator::opt(nom::character::streaming::digit1)(input)?;
        let num = num.map(|s| s.parse().expect("parser ensured its a number"));
        let t = Type { category, tag, num };
        Ok((input, t))
    }
 }
 #[cfg(test)]
 mod test_type {
    use super::*;
    #[test]
    fn test_valid() {
        // Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
        // cases.
        let (input, actual) = Type::parse("A ").unwrap();
        assert_eq!(input, " ");
        assert_eq!(actual.category, Category::American);
        assert_eq!(actual.tag, None);
        assert_eq!(actual.num, None);
        let (input, actual) = Type::parse("Bv ").unwrap();
        assert_eq!(input, " ");
        assert_eq!(actual.category, Category::BritishIse);
        assert_eq!(actual.tag, Some(Tag::Variant));
        assert_eq!(actual.num, None);
    }
    #[test]
    fn test_extra() {
        let (input, actual) = Type::parse("Z foobar").unwrap();
        assert_eq!(input, " foobar");
        assert_eq!(actual.category, Category::BritishIze);
        assert_eq!(actual.tag, None);
        assert_eq!(actual.num, None);
        let (input, actual) = Type::parse("C- foobar").unwrap();
        assert_eq!(input, " foobar");
        assert_eq!(actual.category, Category::Canadian);
        assert_eq!(actual.tag, Some(Tag::Possible));
        assert_eq!(actual.num, None);
    }
    #[test]
    fn test_num() {
        let (input, actual) = Type::parse("Av1 ").unwrap();
        assert_eq!(input, " ");
        assert_eq!(actual.category, Category::American);
        assert_eq!(actual.tag, Some(Tag::Variant));
        assert_eq!(actual.num, Some(1));
    }
 }
 impl Category {
    pub fn parse(input: &str) -> IResult<&str, Category> {
        let symbols = nom::character::streaming::one_of("ABZCD_");
        nom::combinator::map(symbols, |c| match c {
            'A' => Category::American,
            'B' => Category::BritishIse,
            'Z' => Category::BritishIze,
            'C' => Category::Canadian,
            'D' => Category::Australian,
            '_' => Category::Other,
            _ => unreachable!("parser won't select this option"),
        })(input)
    }
 }
 #[cfg(test)]
 mod test_category {
    use super::*;
    #[test]
    fn test_valid() {
        let (input, actual) = Category::parse("A").unwrap();
        assert_eq!(input, "");
        assert_eq!(actual, Category::American);
    }
    #[test]
    fn test_extra() {
        let (input, actual) = Category::parse("_ foobar").unwrap();
        assert_eq!(input, " foobar");
        assert_eq!(actual, Category::Other);
    }
 }
 impl Tag {
    pub fn parse(input: &str) -> IResult<&str, Tag> {
        let symbols = nom::character::streaming::one_of(".vV-x");
        nom::combinator::map(symbols, |c| match c {
            '.' => Tag::Eq,
            'v' => Tag::Variant,
            'V' => Tag::Seldom,
            '-' => Tag::Possible,
            'x' => Tag::Improper,
            _ => unreachable!("parser won't select this option"),
        })(input)
    }
 }
 #[cfg(test)]
 mod test_tag {
    use super::*;
    #[test]
    fn test_valid() {
        let (input, actual) = Tag::parse(".").unwrap();
        assert_eq!(input, "");
        assert_eq!(actual, Tag::Eq);
    }
    #[test]
    fn test_extra() {
        let (input, actual) = Tag::parse("x foobar").unwrap();
        assert_eq!(input, " foobar");
        assert_eq!(actual, Tag::Improper);
    }
 }
 impl Pos {
    pub fn parse(input: &str) -> IResult<&str, Pos> {
        use nom::bytes::streaming::tag;
        let noun = tag("<N>");
        let verb = tag("<V>");
        let adjective = tag("<Adj>");
        let adverb = tag("<Adv>");
        nom::alt!(input,
            noun => {|_| Pos::Noun } |
            verb => {|_| Pos::Verb } |
            adjective => {|_| Pos::Adjective } |
            adverb => {|_| Pos::Adverb }
        )
    }
 }
 #[cfg(test)]
 mod test_pos {
    use super::*;
    #[test]
    fn test_valid() {
        let (input, actual) = Pos::parse("<N>").unwrap();
        assert_eq!(input, "");
        assert_eq!(actual, Pos::Noun);
    }
    #[test]
    fn test_extra() {
        let (input, actual) = Pos::parse("<Adj> foobar").unwrap();
        assert_eq!(input, " foobar");
        assert_eq!(actual, Pos::Adjective);
    }
 }
--- a/crates/varcon/Cargo.toml
+++ b/crates/varcon/Cargo.toml
@ -0,0 +1,23 @@
 [package]
 name = "varcon-dict"
 version = "0.2.1"
 authors = ["Ed Page <eopage@gmail.com>"]
 description = "Source Code Spelling Correction"
 repository = "https://github.com/crate-ci/typos"
 readme = "../../README.md"
 categories = ["development-tools", "text-processing"]
 keywords = ["development", "spelling"]
 license = "MIT"
 edition = "2018"
 [badges]
 azure-devops = { project = "crate-ci", pipeline = "typos" }
 codecov = { repository = "crate-ci/typos" }
 [features]
 default = ["all"]
 all = ["flags"]
 flags = ["enumflags2"]
 [dependencies]
 enumflags2 = { version = "0.6", optional = true }
--- a/crates/varcon/assets/.gitattributes
+++ b/crates/varcon/assets/.gitattributes
@ -0,0 +1 @@
 * linguist-vendored
--- a/crates/varcon/assets/README
+++ b/crates/varcon/assets/README
@ -0,0 +1,481 @@
 Variant Conversion Info (VarCon)
 Version 2019.10.06
 Copyright 2000-2016 by Kevin Atkinson (kevina@gnu.org) and Benjamin
 Titze (btitze@protonmail.ch).
 This package contains information to convert between American,
 British, Canadian, and Australian spellings and vocabulary as well as
 other variant information.
 The latest version can be found at http://wordlist.aspell.net/.
 The main data file is varcon.txt.  It contains information on the
 preferred American, British, and Canadian spelling of a word as well
 as other variant information.
 Each line contains a mapping between the various spellings of a word.
 Words are tagged to indicate where the spelling is used, and each
 word/tag pair is separated with a " / ".  For example in the line:
  A Cv: acknowledgment / Av B C: acknowledgement
 "acknowledgment" and "acknowledgement" are two spellings of the same
 word and "A", "Cv", "B", etc are the tags.  Tags are separated by
 spaces and the group of tags is separated from the word with a ": ".
 Here, "acknowledgment" is the preferred American spelling (as
 indicated by the "A") of the word, and "acknowledgement" is the
 preferred Canadian and British spelling ("B" and "C").  However the
 American spelling is sometimes used in Canada (as indicated by "Cv",
 where the lowercase "v" indicated a variant form) and the British
 spelling is sometimes used in America (as indicated the "Av").
 More generally each tag consists of a spelling category (for example
 "A") followed possible by a variant indicator.  The spelling
 categories are as follows:
  A: American
  B: British "ise" spelling
  Z: British "ize" spelling or OED preferred Spelling
  C: Canadian
  D: Australian
  _: Other (Variant info based on American dictionaries, never used
            with any of the above).
 and the variants tags are as follows:
  .: equal
  v: variant
  V: seldom used variant
  -: possible variant, should generally not used
  x: improper variant (should not use)
 The "." or equal variant tags are reserved for special cases when
 there is little agreement between dictionaries or when I think the
 dictionary is wrong.  The "v" indicator is used for most words marked
 as variants in the dictionary.  However, some variants will be demoted
 to a "V".  For example, if the variant is marked as "also" by
 Merriam-Webster, or also if only some dictionaries acknowledge the
 existence the variant.  "-" is used when the variant is generally not
 listed is the dictionary but I could find some evidence of its use, or
 when it is marked as an archaic spelling for the word.  The "x"
 is used when the spelling is almost generally considered a
 misspelling, and is only included for completeness.
 For Australian English "v" was used for variants that are widely used,
 but not preferred, and "V" for all "-or" (vs. "-our") variants and 
 variants considered "chiefly US".
 If there are no tags with the 'Z' spelling category on the line then
 'B' implies 'Z'.  Similarly if there are no 'C' tags then 'Z' implies
 'C'.  If there are no 'D' tags then 'B' implies 'D'.
 For ease of reading and maintaining the data file, each line is
 grouped in a cluster of closely related words.  Each cluster is
 uniquely identified by a headword, which is generally the American
 spelling of word on the first line of the cluster.  Each cluster is
 started with a '#' and is followed by the headword with some
 additional information after it.  For example the cluster for
 acknowledgment is:
  # acknowledgment <verified> (level 35)
  A Cv: acknowledgment / Av B C: acknowledgement
  A Cv: acknowledgments / Av B C: acknowledgements
  A Cv: acknowledgment's / Av B C: acknowledgement's
 The "<verified>" tag will be explained latter, and "(level 35)"
 indicate what level in SCOWL (see http://wordlist.sourceforge.net) 
 the headword is found in.  The levels generally mean the following:
  <= 35: Very common word
  <= 70: Can be found in the dictionary
     80: Likely a valid word, can likely be found in an
         unabridged dictionary
   > 80: May not even be a legal word
 Sometimes the spelling of a word depends on the usage.  If so the word
 is listed more than once within a cluster, with any usage information
 being indicated after a " | ".  For example here is part of the cluster
 for prize:
  A B: prize | reward
  A B: prizes | reward
  A C: prize / B: prise | otherwise
  A C: prizes / B: prises | otherwise
 which indicated than the preferred spelling of prize is always with a
 "z" when meaning a reward, but otherwise is spelled with a "s" is
 British English.  In the example above a brief definition of the word
 is given, but often no such attempt is made, and the definition simply
 consists of a number, for example:
  A B: sake | :1
  A C: sake / Av B Cv: saki | :2
 Sometimes part-of-speech (POS) info is given to help distinguish which
 form is used.  For example:
  A B C: practice / AV Cv: practise | <N>
  A Cv: practice / AV B C: practise | <V>
 POS info is always given in the form "<POS>" and if a definition
 is also given the POS info is always first.  The POS tags used are as
 follows:
  <N>: Noun
  <V>: Verb
  <Adj>: Adjective
  <Adv>: Adverb
 A "(-)" before the definition indicated a rarely used or archaic form
 of a word, for example:
  A B: bark | :1
  A: bark / Av B: barque | (-) ship
 A "--" indicates a note rather than definition.  This is generally
 used to indicate that the spelling of the plural form not depend on
 the spelling of the root word, for example:
  _: cabby / _.: cabbie
  _: cabbies | -- plural
 Misc. notes on a particular form of a word are given after a "#" on
 the same line.  Misc. notes for the cluster are given at the end of
 the cluster and are prefixed with "##", for example:
  # coloration <verified> (level 50)
  A B C: coloration / B. Cv: colouration
  A B C: colorations / B. Cv: colourations
  A B C: coloration's / B. Cv: colouration's
  ## OED has coloration as the preferred spelling and discolouration as a
  ## variant for British Engl or some reason
 In the notes ODE (not to be confused with OED) stands for Oxford
 Dictionary of English, "Ox" is used for any Oxford dictionary, and
 "M-W" for Merriam-Webster.
 Earlier versions of varcon contained numerous errors.  With version
 5.0 massive effort has been made to correct many of these errors.
 Clusters that have undergone some form of verification (and likely
 correction) are marked with "<verified>".  As of version 5.0, most
 clusters with headwords word in common usage (SCOWL level 35 and
 below) should now be checked, as well as many others.  No effort was
 made to check clusters with headwords in SCOWL level 80 and above;
 many of those entries are unlikely to be in the dictionary anyway.
 The file variant-also.tab contains additional mappings between various
 spellings of a word which are not yet in varcon.txt.  No attempt is
 made to distinguish the primary form of a word.  The file
 variant-infl.tab is like variant-also.tab except that it is created
 automatically from the AGID inflection database.  The file
 variant-wroot.tab is like variant-infl.tab except that it also
 included the root form of the word.
 The file voc.tab is similar to varcon.txt but converts between
 vocabulary instead of spelling.  Unlike varcon.tab it is a simple tab
 separated file with the fields corresponding to the American, British,
 and Canadian words.  If more than one word if often used to describe
 the same thing the words are separated with commas.  The last column
 contains additional notes on when the word is used.  Unlike varcon.txt
 it is generally not suitable for automatic conversion.
 The "make-variant" Perl script will combine varcon.txt,
 variant-also.tab, and variant-infl.tab into one huge mapping and will
 output the result to "variant.tab".  If the "no-infl" option is given
 than variant-infl.tab will not be included.
 The "split" script will split out the information in varcon.txt into
 several word lists named as follows:
  <spelling>[-v<variant level>][-uncommon].lst
 where <spelling> is one of: american, british, british_z, canadian,
 common, or other.  "common" is used for words which appear in
 varcon.txt, yet are used in all versions of english, such as "prize",
 and "other" is used for the "_" spelling category.  The mapping from
 the variant indicators in varcon.txt to the numeric variant level is
 as follows:
  v => 0
  V => 1
  - => 2
 "-uncommon" is used for forms marked with "(-)" as already described.
 The "translate" Perl script will translate a text file from one
 spelling to another. Its usage is:
 translate <options> [<translation array>] <from> <to>
 <options> is any of
  -?,-h,--help this screen
  -m,--mark     mark words where the translation is questionable
  -i,--include  include words where the translation is questionable
 <translation array> is the file name of the translation array,
                    defaults to "abbc.tab".
 <from> and <to> are one of: american, british, british_z, or canadian.
 british-ise and british-ize can also be used.
 Text is read in from standard input and is outputted to standard out.
 Words are marked with a '?' before and after the questionable word
 when the option is enabled.
 The file varcon.pm contains some library routines for parsing
 varcon.txt and is used by many of the scripts above.
 If you discover any errors in these mappings or have suggestions for
 additions please file a bug report at
 https://github.com/kevina/wordlist/issues, or alternatively email me
 directly at kevina@gnu.org, but I will likely tell you to file a bug
 report so that I don't forget about it.
 SOURCE:
 These mappings were compiled from numerous sources.
 The abc.tab was originally created from the American and British word
 lists found in the Ispell distribution and the Canadian word list
 created by Garst R. Reese <reese@isn.net>:
  What I have discovered is that Canadian is a modification of British.
  Canadians use ize ization, izing izable like Americans, and gram instead
  of gramme. The one exception I found was practise. It does not go to
  practize.  Otherwise they use British spelling. So, what I am currently
  checking books with is a an edited version of British, where I have
  changed all occurrences of ise to ize, isab to izab, isation to ization,
  ising to izing, and gramme to gram except I allow programme, which is
  sometimes proper unless you are talking about a computer program. I did
  bunches of greps to be sure these substitutions would work as expected.
 Many other words have been added to abc.tab which were not in the
 original Ispell word lists.
 Many different web sources were consulted when crating the tables.  They
 include:
  The American-British British-American Dictionary
    http://www.peak.org/~jeremy/dictionary/dictionary.html
    American and British Spelling Differences
      http://www.peak.org/~jeremy/dictionary/spellcat.html
  Dave (VE7CNV)'s Truly Canadian Dictionary of Canadian Spelling
    http://www.luther.bc.ca/~dave7cnv/cdnspelling/cdnspelling.html
  Canadian Spelling Convention
    http://imej.wfu.edu/articles/1999/1/02/demo/tutorial/canas.html
  Cornerstone's Canadian English Page
    http://www.web.net/cornerstone/cdneng.htm
  Inter-Play Translation: British/Canadian/American Spelling
    http://www.inter-play.com/translation/spel-ukus.htm
  Inter-Play Translation: British/Canadian/American Vocabulary
    http://www.inter-play.com/translation/voc-ukus.htm
 As well as several online dictionaries:
  Marriam-Webster: http://www.m-w.com/
  American Heritage: http://www.bartleby.com/61/
  Cambridge (ESL): http://dictionary.cambridge.org/
 In version 5.0 a massive effort to correct the numerous errors in
 VarCon was done.  The primary sources used for verification were:
  Marriam-Webster: http://www.m-w.com/
  Free version of Oxford Dictionaries Online: 
    http://www.oxforddictionaries.com/
  Oxford dictionaries available via Oxford Reference Online
    (subscription service, http://www.oxfordreference.com/):
    The New Oxford American Dictionary (2nd edition, 2006)
      and sometimes: The Oxford American Dictionary of Current English (2002)
    The Concise Oxford English Dictionary (11th edition revised, 2008)
      and sometimes: The Oxford Dictionary of English (2nd edition revised, 2005)
    The Canadian Oxford Dictionary (2004)
 I also used Tysto UK vs US spelling list available at:
  http://www.tysto.com/articles05/q1/20050324uk-us.shtml
 to make sure I didn't leave out any information in VarCon, however any
 additions from his lists where verified using the dictionaries
 mentioned above as his lists contained numerous errors (such as
 including archaic spellings of words)
 I also made indirect use of Luke's Canadian, British and American
 Spelling page available at:
  http://www.lukemastin.com/testing/spelling/cgi-bin/database.cgi?database=spelling
 but only to perform some initial verification, in the end I rechecked
 his data using the dictionaries above.  (However, his data is, by far,
 more accurate than Tysto's)
 In Version 2016.11.20 Benjamin Titze added support for Australian English.
 The primary sources for this addition were:
  The Macquarie Dictionary: https://www.macquariedictionary.com.au/
  Style Manual: For Authors, Editors and Printers, 6th Edition. DCITA.
  University of Technology Sydney Publications Style Guide:
    http://www.gsu.uts.edu.au/publications/styleguide/spelling.html
  Style Manual, Department of Treasury and Finance, Tasmania:
    http://conference.tasa.org.au/wp-content/uploads/2015/03/Style-Manual.pdf
  Editor Australia - Style Guide: 
    http://www.editoraustralia.com/styleguide_spelling.html
  Webster in Australia (history of "our"/"or" spelling variants): 
    http://blogs.usyd.edu.au/elac/2008/01/webster_in_australia.html
 CHANGELOG:
 From 2017.08.24 to 2018.10.06
   - Added entries for: eukaryote, prokaryote, virtualization, volcanism
 From 2016.11.20 to 2017.08.24
   - Typo fixes thanks to Jakub Wilk
 From 2016.06.26 to 2016.11.20
   - New Australian spelling category thanks to the work of Benjamin
     Titze.
   - Various other fixes.
 From 2016.01.19 to 2016.06.26
   - Fix plural of "bus".
 From 2015.08.24 to 2016.01.19
   - Undo the effects of PERL_UNICODE in the translate script.
   - Other minor fixes and new entries.
 From 2014.02.15 to 2015.08.24 (Aug 24, 2015)
   - Added entry for Koran/Koranic.
   - Tweaked "adviser" cluster.
   - Fix formatting problems.
 From 2015.01.28 to 2014.02.15 (February 15, 2015)
   - Various new entries
 From 2014.11.17 to 2015.01.28 (January 28, 2015)
   - Minor adjustments to a few entries (analytic, amid)
   - Added entry for shareable
   - Remove a junk entry (ted/taed).
 From 2014.08.11 to 2014.11.17 (November 17, 2014)
   - Fix typos in README
   - Enhancement to VarCon translate script.  It will now, by default,
     filter clusters with a SCOWL level > 80.  This behavior can be
     controlled with the new "--thresh" option.
   - Remove a few junk entries.
 From Revision 5.1 to Version 2014.08.11 (August 8, 2014)
   - Various corrections.  Most of them minor.  Two notable exceptions:
       - Added an entry for furor as the correct British spelling is furore
       - Fixed racket entries as Canadians still use racquet even
         though it is a British English (at least according to the
         Oxford dictionaries)
   - Other minor changes.
 From Revision 5.0 to Revision 5.1 (January 6, 2010)
   - Corrected numerous errors after running various forms
     of verification on varcon.txt.
   - Reordered the clusters in varcon.txt so that they are
     mostly in alphabetic order based on the headword.
 From Revision 4.1 to Revision 5.0 (December 27, 2010)
  - Completely new format for the main table which, in addition to
    providing the preferred spelling of a word for various forms of
    English, also records variant and other information.  To reflect
    this change, the name of the file was renamed from abbc.tab to
    varcon.txt.
  - Massive effort to verify the variant information against
    authoritative sources (mainly Oxford dictionaries).  Most entries
    for common words (SCOWL level 35 and below) have been checked
    against at least a British and Canadian dictionary.
  - Added variant information for numerous other words, even when
    there is no difference between the various forms on English.
  - Other changes corresponding to the new format.
 From Revision 4 to Revision 4.1 (August 10, 2004)
  - Fixed various errors in abbc.tab
  - Removed clause 4 from the Ispell copyright with permission of Geoff
    Kuenning.
 From Revision 3 to Revision 4 (August 7, 2004)
  - Added a column to "abc.tab" for the British "ize" spelling and
    renamed the file to abbc.tab.
  - Added verb forms of prize/prise to abbc.tab, removed from
    variant-also.tab
 From Revision 2 to Revision 3 (January 2, 2003)
  - Added an option for not including variant-infl.tab for the
    make-variant perl script
  - Added the file variant-wroot.tab
  - Added a few entries given to me by Francis Bond and Edward Betts
 From Revision 1 to Revision 2 (January 27, 2001)
  - Removed all "B" markers because I could not find any evidence for
    them
  - Corrected a few Canadian entries, especially those with the "B"
    markers
  - Added some more entries by trying fixed changes (such as ize to
    ise) to words in SCOWL and hand-checking over the ones with semi-common
    words in them. 
  - Added variant-infl.tab
 COPYRIGHT:
 Copyright 2000-2018 by Kevin Atkinson
 Permission to use, copy, modify, distribute and sell this array, the
 associated software, and its documentation for any purpose is hereby
 granted without fee, provided that the above copyright notice appears
 in all copies and that both that copyright notice and this permission
 notice appear in supporting documentation. Kevin Atkinson makes no
 representations about the suitability of this array for any
 purpose. It is provided "as is" without express or implied warranty.
 Copyright 2016 by Benjamin Titze
 Permission to use, copy, modify, distribute and sell this array, the
 associated software, and its documentation for any purpose is hereby
 granted without fee, provided that the above copyright notice appears
 in all copies and that both that copyright notice and this permission
 notice appear in supporting documentation. Benjamin Titze makes no
 representations about the suitability of this array for any
 purpose. It is provided "as is" without express or implied warranty.
 Since the original words lists come from the Ispell distribution:
 Copyright 1993, Geoff Kuenning, Granada Hills, CA
 All rights reserved.
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions
 are met:
 1. Redistributions of source code must retain the above copyright
   notice, this list of conditions and the following disclaimer.
 2. Redistributions in binary form must reproduce the above copyright
   notice, this list of conditions and the following disclaimer in the
   documentation and/or other materials provided with the distribution.
 3. All modifications to the source code must be clearly marked as
   such.  Binary redistributions based on modified source code
   must be clearly marked as modified versions in the documentation
   and/or other materials provided with the distribution.
 (clause 4 removed with permission from Geoff Kuenning)
 5. The name of Geoff Kuenning may not be used to endorse or promote
   products derived from this software without specific prior
   written permission.
 THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND
 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 ARE DISCLAIMED.  IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE
 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 SUCH DAMAGE.
--- a/crates/varcon/assets/varcon.txt
+++ b/crates/varcon/assets/varcon.txt
--- a/crates/varcon/codegen/Cargo.toml
+++ b/crates/varcon/codegen/Cargo.toml
@ -0,0 +1,20 @@
 [package]
 name = "varcon-codegen"
 version = "1.0.2"
 authors = ["Ed Page <eopage@gmail.com>"]
 description = "Source Code Spelling Correction"
 repository = "https://github.com/crate-ci/typos"
 readme = "../../../README.md"
 categories = ["text-processing"]
 license = "MIT"
 edition = "2018"
 publish = false
 [badges]
 azure-devops = { project = "crate-ci", pipeline = "typos" }
 codecov = { repository = "crate-ci/typos" }
 [dependencies]
 codegenrs = "0.1"
 structopt = "0.3"
 varcon-parser = { version = "1.0", path = "../../varcon-parser" }
--- a/crates/varcon/codegen/src/main.rs
+++ b/crates/varcon/codegen/src/main.rs
@ -0,0 +1,99 @@
 use structopt::StructOpt;
 const DICT: &[u8] = include_bytes!("../../assets/varcon.txt");
 fn generate<W: std::io::Write>(file: &mut W) {
    let dict = String::from_utf8_lossy(DICT);
    let clusters = varcon_parser::ClusterIter::new(&dict);
    writeln!(
        file,
        "// This file is code-genned by {}",
        env!("CARGO_PKG_NAME")
    )
    .unwrap();
    writeln!(file, "#![allow(clippy::unreadable_literal)]",).unwrap();
    writeln!(file).unwrap();
    writeln!(file, "use crate::*;").unwrap();
    writeln!(file).unwrap();
    writeln!(file, "pub static VARCON: &'static [Cluster] = &[").unwrap();
    for mut cluster in clusters {
        cluster.infer();
        writeln!(file, "Cluster {{").unwrap();
        writeln!(file, "  header: {:?},", cluster.header).unwrap();
        writeln!(file, "  entries: &[").unwrap();
        for entry in &cluster.entries {
            writeln!(file, "  Entry {{").unwrap();
            writeln!(file, "    variants: &[").unwrap();
            for variant in &entry.variants {
                writeln!(file, "      Variant {{").unwrap();
                writeln!(file, "        word: {:?},", variant.word).unwrap();
                writeln!(file, "        types: &[").unwrap();
                for t in &variant.types {
                    write!(file, "          Type {{").unwrap();
                    write!(file, "category: Category::{:?}, ", t.category).unwrap();
                    if let Some(tag) = t.tag {
                        write!(file, "tag: Some(Tag::{:?}), ", tag).unwrap();
                    } else {
                        write!(file, "tag: {:?}, ", t.tag).unwrap();
                    }
                    write!(file, "num: {:?},", t.num).unwrap();
                    writeln!(file, "}},").unwrap();
                }
                writeln!(file, "        ],").unwrap();
                writeln!(file, "      }},").unwrap();
            }
            writeln!(file, "  ],").unwrap();
            if let Some(pos) = entry.pos {
                write!(file, "  pos: Some(Pos::{:?}),", pos).unwrap();
            } else {
                write!(file, "  pos: {:?},", entry.pos).unwrap();
            }
            writeln!(
                file,
                " archaic: {:?}, note: {:?},",
                entry.archaic, entry.note
            )
            .unwrap();
            writeln!(file, "  description: {:?},", entry.description).unwrap();
            writeln!(file, "  comment: {:?},", entry.comment).unwrap();
            writeln!(file, "  }},").unwrap();
        }
        writeln!(file, "  ],").unwrap();
        writeln!(file, "  notes: &[").unwrap();
        for note in &cluster.notes {
            writeln!(file, "    {:?},", note).unwrap();
        }
        writeln!(file, "  ],").unwrap();
        writeln!(file, "  }},").unwrap();
    }
    writeln!(file, "];").unwrap();
 }
 #[derive(Debug, StructOpt)]
 #[structopt(rename_all = "kebab-case")]
 struct Options {
    #[structopt(flatten)]
    codegen: codegenrs::CodeGenArgs,
    #[structopt(flatten)]
    rustmft: codegenrs::RustfmtArgs,
 }
 fn run() -> Result<i32, Box<dyn std::error::Error>> {
    let options = Options::from_args();
    let mut content = vec![];
    generate(&mut content);
    let content = String::from_utf8(content)?;
    let content = options.rustmft.reformat(&content)?;
    options.codegen.write_str(&content)?;
    Ok(0)
 }
 fn main() {
    let code = run().unwrap();
    std::process::exit(code);
 }
--- a/crates/varcon/src/codegen.rs
+++ b/crates/varcon/src/codegen.rs
--- a/crates/varcon/src/lib.rs
+++ b/crates/varcon/src/lib.rs
@ -0,0 +1,75 @@
 mod codegen;
 pub use codegen::*;
 #[derive(Clone, PartialEq, Eq, Debug)]
 pub struct Cluster {
    pub header: Option<&'static str>,
    pub entries: &'static [Entry],
    pub notes: &'static [&'static str],
 }
 #[derive(Clone, PartialEq, Eq, Debug)]
 pub struct Entry {
    pub variants: &'static [Variant],
    pub pos: Option<Pos>,
    pub archaic: bool,
    pub note: bool,
    pub description: Option<&'static str>,
    pub comment: Option<&'static str>,
 }
 #[derive(Clone, PartialEq, Eq, Debug)]
 pub struct Variant {
    pub types: &'static [Type],
    pub word: &'static str,
 }
 #[derive(Clone, PartialEq, Eq, Debug)]
 pub struct Type {
    pub category: Category,
    pub tag: Option<Tag>,
    pub num: Option<usize>,
 }
 #[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
 #[cfg_attr(feature = "flags", derive(enumflags2::BitFlags))]
 #[repr(u8)]
 pub enum Category {
    American = 0x01,
    BritishIse = 0x02,
    BritishIze = 0x04,
    Canadian = 0x08,
    Australian = 0x10,
    Other = 0x20,
 }
 #[cfg(feature = "flags")]
 pub type CategorySet = enumflags2::BitFlags<Category>;
 #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
 #[cfg_attr(feature = "flags", derive(enumflags2::BitFlags))]
 #[repr(u8)]
 pub enum Tag {
    Eq = 0x01,
    Variant = 0x02,
    Seldom = 0x04,
    Possible = 0x08,
    Improper = 0x10,
 }
 #[cfg(feature = "flags")]
 pub type TagSet = enumflags2::BitFlags<Tag>;
 #[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
 #[cfg_attr(feature = "flags", derive(enumflags2::BitFlags))]
 #[repr(u8)]
 pub enum Pos {
    Noun = 0x01,
    Verb = 0x02,
    Adjective = 0x04,
    Adverb = 0x08,
 }
 #[cfg(feature = "flags")]
 pub type PosSet = enumflags2::BitFlags<Pos>;