Merge pull request #63 from epage/dict

Prepare for dict cleanup
2025-01-09 00:04:49 -05:00 · 2019-10-29 08:19:17 -06:00 · 2019-10-29 08:19:17 -06:00 · 2684b9b228
commit 2684b9b228
parent 03fa6f8b8a 2e95e5e1f6
39 changed files with 165303 additions and 35642 deletions
--- a/.gitattributes
+++ b/.gitattributes
@ -1 +0,0 @@
-typos-dict/assets/* linguist-vendored
--- a/Cargo.lock
+++ b/Cargo.lock
@ -135,6 +135,27 @@ dependencies = [
 "structopt 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
 ]

+[[package]]
+name = "codespell-codegen"
+version = "0.1.1"
+dependencies = [
+ "codegenrs 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
+ "itertools 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "phf 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "phf_codegen 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "structopt 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
+ "unicase 2.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "codespell-dict"
+version = "0.1.1"
+dependencies = [
+ "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
+ "phf 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "unicase 2.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
 [[package]]
 name = "crossbeam-channel"
 version = "0.3.9"
@ -343,6 +364,28 @@ dependencies = [
 "libc 0.2.65 (registry+https://github.com/rust-lang/crates.io-index)",
 ]

+[[package]]
+name = "misspell-codegen"
+version = "0.1.1"
+dependencies = [
+ "codegenrs 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
+ "itertools 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "phf 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "phf_codegen 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "regex 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "structopt 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
+ "unicase 2.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "misspell-dict"
+version = "0.1.1"
+dependencies = [
+ "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
+ "phf 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "unicase 2.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
 [[package]]
 name = "normalize-line-endings"
 version = "0.2.2"
@ -765,11 +808,13 @@ dependencies = [
 "failure 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
 "ignore 0.4.10 (registry+https://github.com/rust-lang/crates.io-index)",
 "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
+ "phf 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
 "serde 1.0.101 (registry+https://github.com/rust-lang/crates.io-index)",
 "structopt 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
 "toml 0.4.10 (registry+https://github.com/rust-lang/crates.io-index)",
 "typos 0.1.1",
 "typos-dict 0.1.1",
+ "unicase 2.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
 ]

 [[package]]
@ -790,7 +835,6 @@ version = "0.1.1"
 dependencies = [
 "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
 "phf 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
- "typos 0.1.1",
 "unicase 2.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
 ]

@ -855,6 +899,27 @@ name = "wasi"
 version = "0.7.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"

+[[package]]
+name = "wikipedia-codegen"
+version = "0.1.1"
+dependencies = [
+ "codegenrs 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
+ "itertools 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "phf 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "phf_codegen 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "structopt 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
+ "unicase 2.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "wikipedia-dict"
+version = "0.1.1"
+dependencies = [
+ "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
+ "phf 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "unicase 2.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
 [[package]]
 name = "winapi"
 version = "0.3.8"
--- a/Cargo.toml
+++ b/Cargo.toml
@ -1,5 +1,11 @@
 [workspace]
-members = ["codegen", "typos", "typos-dict"]
+members = [
+    "typos",
+    "dict/typos", "dict/typos/codegen",
+    "dict/codespell", "dict/codespell/codegen",
+    "dict/misspell", "dict/misspell/codegen",
+    "dict/wikipedia", "dict/wikipedia/codegen",
+]

 [package]
 name = "typos-cli"
@ -24,7 +30,9 @@ codecov = { repository = "crate-ci/typos" }

 [dependencies]
 typos = { version = "0.1", path = "typos" }
-typos-dict = { version = "0.1", path = "typos-dict" }
+typos-dict = { version = "0.1", path = "dict/typos" }
+phf = { version = "0.8", features = ["unicase"] }
+unicase = "2.5"
 failure = "0.1"
 structopt = "0.3"
 clap = "2"
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@ -25,8 +25,17 @@ stages:
    steps:
    - template: azure/install-rust.yml@templates
    - script: |
-        cargo run --package typos-codegen -- --input typos-dict/assets/words.csv --output typos-dict/src/dict_codegen.rs --check
-      displayName: Verify Code-gen
+        cargo run --package typos-codegen -- --output dict/typos/src/dict_codegen.rs --check
+      displayName: Verify typos-dict
+    - script: |
+        cargo run --package codespell-codegen -- --output dict/codespell/src/dict_codegen.rs --check
+      displayName: Verify codespell-dict
+    - script: |
+        cargo run --package misspell-codegen -- --output dict/misspell/src/dict_codegen.rs --check
+      displayName: Verify misspell-dict
+    - script: |
+        cargo run --package wikipedia-codegen -- --output dict/wikipedia/src/dict_codegen.rs --check
+      displayName: Verify wikipedia-dict
 - stage: committed
  displayName: Lint History
  dependsOn: []
--- a/dict/codespell/Cargo.toml
+++ b/dict/codespell/Cargo.toml
@ -0,0 +1,21 @@
+[package]
+name = "codespell-dict"
+version = "0.1.1"
+authors = ["Ed Page <eopage@gmail.com>"]
+description = "Source Code Spelling Correction"
+repository = "https://github.com/crate-ci/typos"
+readme = "README.md"
+categories = ["development-tools", "text-processing"]
+keywords = ["development", "spelling"]
+license = "CC-BY-SA-3.0"
+edition = "2018"
+publish = false
+
+[badges]
+azure-devops = { project = "crate-ci", pipeline = "typos" }
+codecov = { repository = "crate-ci/typos" }
+
+[dependencies]
+phf = { version = "0.8", features = ["unicase"] }
+unicase = "2.5"
+log = "0.4"
--- a/dict/codespell/README.md
+++ b/dict/codespell/README.md
@ -0,0 +1,5 @@
+Origin: [codespell](https://github.com/codespell-project/codespell)
+
+# License
+
+dictionary.txt is a derived work of English Wikipedia and is released under the Creative Commons Attribution-Share-Alike License 3.0 http://creativecommons.org/licenses/by-sa/3.0/
--- a/dict/codespell/assets/dictionary.txt
+++ b/dict/codespell/assets/dictionary.txt
--- a/dict/codespell/codegen/Cargo.toml
+++ b/dict/codespell/codegen/Cargo.toml
@ -0,0 +1,23 @@
+[package]
+name = "codespell-codegen"
+version = "0.1.1"
+authors = ["Ed Page <eopage@gmail.com>"]
+description = "Source Code Spelling Correction"
+repository = "https://github.com/crate-ci/typos"
+readme = "../README.md"
+categories = ["text-processing"]
+license = "MIT"
+edition = "2018"
+publish = false
+
+[badges]
+azure-devops = { project = "crate-ci", pipeline = "typos" }
+codecov = { repository = "crate-ci/typos" }
+
+[dependencies]
+phf = { version = "0.8", features = ["unicase"] }
+phf_codegen = "0.8"
+unicase = "2.5"
+itertools = "0.8"
+codegenrs = "0.1"
+structopt = "0.3"
--- a/dict/codespell/codegen/src/main.rs
+++ b/dict/codespell/codegen/src/main.rs
@ -0,0 +1,80 @@
+use structopt::StructOpt;
+
+pub const DICT: &str = include_str!("../../assets/dictionary.txt");
+
+fn parse_dict(raw: &str) -> impl Iterator<Item = (&str, Vec<&str>)> {
+    raw.lines().map(|s| {
+        let mut parts = s.splitn(2, "->");
+        let typo = parts.next().unwrap().trim();
+        let corrections = parts
+            .next()
+            .unwrap()
+            .split(',')
+            .filter_map(|c| {
+                let c = c.trim();
+                if c.is_empty() {
+                    None
+                } else {
+                    Some(c)
+                }
+            })
+            .collect();
+        (typo, corrections)
+    })
+}
+
+fn generate<W: std::io::Write>(file: &mut W) {
+    writeln!(
+        file,
+        "// This file is code-genned by {}",
+        env!("CARGO_PKG_NAME")
+    )
+    .unwrap();
+    writeln!(file, "#![allow(clippy::unreadable_literal)]",).unwrap();
+    writeln!(file).unwrap();
+    writeln!(file, "use unicase::UniCase;").unwrap();
+
+    let dict = parse_dict(DICT);
+
+    writeln!(
+        file,
+        "pub static WORD_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &[&'static str]> = ",
+    )
+    .unwrap();
+    let mut builder = phf_codegen::Map::new();
+    for (typo, corrections) in dict {
+        let value = itertools::join(corrections.iter().map(|s| format!("{:?}", s)), ", ");
+        let value = format!("&[{}]", value);
+        builder.entry(unicase::UniCase::new(typo), &value);
+    }
+    let codegenned = builder.build();
+    writeln!(file, "{}", codegenned).unwrap();
+    writeln!(file, ";").unwrap();
+}
+
+#[derive(Debug, StructOpt)]
+#[structopt(rename_all = "kebab-case")]
+struct Options {
+    #[structopt(flatten)]
+    codegen: codegenrs::CodeGenArgs,
+    #[structopt(flatten)]
+    rustmft: codegenrs::RustfmtArgs,
+}
+
+fn run() -> Result<i32, Box<dyn std::error::Error>> {
+    let options = Options::from_args();
+
+    let mut content = vec![];
+    generate(&mut content);
+
+    let content = String::from_utf8(content)?;
+    let content = options.rustmft.reformat(&content)?;
+    options.codegen.write_str(&content)?;
+
+    Ok(0)
+}
+
+fn main() {
+    let code = run().unwrap();
+    std::process::exit(code);
+}
--- a/dict/codespell/src/dict_codegen.rs
+++ b/dict/codespell/src/dict_codegen.rs
--- a/dict/codespell/src/lib.rs
+++ b/dict/codespell/src/lib.rs
@ -0,0 +1,3 @@
+mod dict_codegen;
+
+pub use crate::dict_codegen::*;
--- a/dict/misspell/Cargo.toml
+++ b/dict/misspell/Cargo.toml
@ -0,0 +1,21 @@
+[package]
+name = "misspell-dict"
+version = "0.1.1"
+authors = ["Ed Page <eopage@gmail.com>"]
+description = "Source Code Spelling Correction"
+repository = "https://github.com/crate-ci/typos"
+readme = "README.md"
+categories = ["development-tools", "text-processing"]
+keywords = ["development", "spelling"]
+license = "MIT"
+edition = "2018"
+publish = false
+
+[badges]
+azure-devops = { project = "crate-ci", pipeline = "typos" }
+codecov = { repository = "crate-ci/typos" }
+
+[dependencies]
+phf = { version = "0.8", features = ["unicase"] }
+unicase = "2.5"
+log = "0.4"
--- a/dict/misspell/README.md
+++ b/dict/misspell/README.md
@ -0,0 +1 @@
+Origin: [misspell](https://github.com/client9/misspell)
--- a/dict/misspell/assets/.gitattributes
+++ b/dict/misspell/assets/.gitattributes
@ -0,0 +1 @@
+* linguist-vendored
--- a/dict/misspell/assets/words.go
+++ b/dict/misspell/assets/words.go
--- a/dict/misspell/codegen/Cargo.toml
+++ b/dict/misspell/codegen/Cargo.toml
@ -0,0 +1,24 @@
+[package]
+name = "misspell-codegen"
+version = "0.1.1"
+authors = ["Ed Page <eopage@gmail.com>"]
+description = "Source Code Spelling Correction"
+repository = "https://github.com/crate-ci/typos"
+readme = "../README.md"
+categories = ["text-processing"]
+license = "MIT"
+edition = "2018"
+publish = false
+
+[badges]
+azure-devops = { project = "crate-ci", pipeline = "typos" }
+codecov = { repository = "crate-ci/typos" }
+
+[dependencies]
+phf = { version = "0.8", features = ["unicase"] }
+phf_codegen = "0.8"
+unicase = "2.5"
+itertools = "0.8"
+codegenrs = "0.1"
+structopt = "0.3"
+regex = "1"
--- a/dict/misspell/codegen/src/main.rs
+++ b/dict/misspell/codegen/src/main.rs
@ -0,0 +1,148 @@
+use std::collections::HashMap;
+
+use structopt::StructOpt;
+
+pub const DICT: &str = include_str!("../../assets/words.go");
+
+struct Words<'s> {
+    main: HashMap<&'s str, Vec<&'s str>>,
+    american: HashMap<&'s str, Vec<&'s str>>,
+    british: HashMap<&'s str, Vec<&'s str>>,
+}
+
+fn parse_dict(raw: &str) -> Words {
+    let mut bad = HashMap::new();
+    let mut main = HashMap::new();
+    let mut american = HashMap::new();
+    let mut british = HashMap::new();
+
+    let mapping = regex::Regex::new(r#"^"(.*)", "(.*)",$"#).unwrap();
+
+    let mut current = &mut bad;
+    for line in raw.lines() {
+        let line = line.splitn(2, "//").next().unwrap().trim();
+        if line.is_empty() || line.starts_with("package") {
+            continue;
+        } else if line.contains("DictMain") {
+            current = &mut main;
+        } else if line.contains("DictAmerican") {
+            current = &mut american;
+        } else if line.contains("DictBritish") {
+            current = &mut british;
+        } else if line.contains('}') {
+            current = &mut bad;
+        } else {
+            let captures = mapping.captures(line);
+            if let Some(captures) = captures {
+                current.insert(
+                    captures.get(1).unwrap().as_str(),
+                    vec![captures.get(2).unwrap().as_str()],
+                );
+            } else {
+                eprintln!("Unknown line: {}", line);
+            }
+        }
+    }
+
+    if !bad.is_empty() {
+        panic!("Failed parsing; found extra words: {:#?}", bad);
+    }
+
+    Words {
+        main,
+        american,
+        british,
+    }
+}
+
+fn generate<W: std::io::Write>(file: &mut W) {
+    writeln!(
+        file,
+        "// This file is code-genned by {}",
+        env!("CARGO_PKG_NAME")
+    )
+    .unwrap();
+    writeln!(file, "#![allow(clippy::unreadable_literal)]",).unwrap();
+    writeln!(file).unwrap();
+    writeln!(file, "use unicase::UniCase;").unwrap();
+
+    let Words {
+        main,
+        american,
+        british,
+    } = parse_dict(DICT);
+
+    writeln!(
+        file,
+        "pub static MAIN_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &[&'static str]> = ",
+    )
+    .unwrap();
+    let mut builder = phf_codegen::Map::new();
+    for (typo, corrections) in main {
+        let value = itertools::join(corrections.iter().map(|s| format!("{:?}", s)), ", ");
+        let value = format!("&[{}]", value);
+        builder.entry(unicase::UniCase::new(typo), &value);
+    }
+    let codegenned = builder.build();
+    writeln!(file, "{}", codegenned).unwrap();
+    writeln!(file, ";").unwrap();
+    writeln!(file).unwrap();
+
+    writeln!(
+        file,
+        "pub static AMERICAN_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &[&'static str]> = ",
+    )
+    .unwrap();
+    let mut builder = phf_codegen::Map::new();
+    for (typo, corrections) in american {
+        let value = itertools::join(corrections.iter().map(|s| format!("{:?}", s)), ", ");
+        let value = format!("&[{}]", value);
+        builder.entry(unicase::UniCase::new(typo), &value);
+    }
+    let codegenned = builder.build();
+    writeln!(file, "{}", codegenned).unwrap();
+    writeln!(file, ";").unwrap();
+    writeln!(file).unwrap();
+
+    writeln!(
+        file,
+        "pub static BRITISH_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &[&'static str]> = ",
+    )
+    .unwrap();
+    let mut builder = phf_codegen::Map::new();
+    for (typo, corrections) in british {
+        let value = itertools::join(corrections.iter().map(|s| format!("{:?}", s)), ", ");
+        let value = format!("&[{}]", value);
+        builder.entry(unicase::UniCase::new(typo), &value);
+    }
+    let codegenned = builder.build();
+    writeln!(file, "{}", codegenned).unwrap();
+    writeln!(file, ";").unwrap();
+}
+
+#[derive(Debug, StructOpt)]
+#[structopt(rename_all = "kebab-case")]
+struct Options {
+    #[structopt(flatten)]
+    codegen: codegenrs::CodeGenArgs,
+    #[structopt(flatten)]
+    rustmft: codegenrs::RustfmtArgs,
+}
+
+fn run() -> Result<i32, Box<dyn std::error::Error>> {
+    let options = Options::from_args();
+
+    let mut content = vec![];
+    generate(&mut content);
+
+    let content = String::from_utf8(content)?;
+    let content = options.rustmft.reformat(&content)?;
+    options.codegen.write_str(&content)?;
+
+    Ok(0)
+}
+
+fn main() {
+    let code = run().unwrap();
+    std::process::exit(code);
+}
--- a/dict/misspell/src/dict_codegen.rs
+++ b/dict/misspell/src/dict_codegen.rs
--- a/dict/misspell/src/lib.rs
+++ b/dict/misspell/src/lib.rs
@ -0,0 +1,3 @@
+mod dict_codegen;
+
+pub use crate::dict_codegen::*;
--- a/dict/typos/Cargo.toml
+++ b/dict/typos/Cargo.toml
@ -4,7 +4,7 @@ version = "0.1.1"
 authors = ["Ed Page <eopage@gmail.com>"]
 description = "Source Code Spelling Correction"
 repository = "https://github.com/crate-ci/typos"
-readme = "../README.md"
+readme = "../../README.md"
 categories = ["development-tools", "text-processing"]
 keywords = ["development", "spelling"]
 license = "MIT"
@ -15,7 +15,6 @@ azure-devops = { project = "crate-ci", pipeline = "typos" }
 codecov = { repository = "crate-ci/typos" }

 [dependencies]
-typos = { version = "0.1", path = "../typos" }
 phf = { version = "0.8", features = ["unicase"] }
 unicase = "2.5"
 log = "0.4"
--- a/dict/typos/assets/.gitattributes
+++ b/dict/typos/assets/.gitattributes
@ -0,0 +1 @@
+* linguist-vendored
--- a/dict/typos/assets/main.go
+++ b/dict/typos/assets/main.go
--- a/dict/typos/assets/words.csv
+++ b/dict/typos/assets/words.csv
--- a/dict/typos/assets/words.go
+++ b/dict/typos/assets/words.go
--- a/dict/typos/codegen/Cargo.toml
+++ b/dict/typos/codegen/Cargo.toml
@ -4,7 +4,7 @@ version = "1.0.1"
 authors = ["Ed Page <eopage@gmail.com>"]
 description = "Source Code Spelling Correction"
 repository = "https://github.com/crate-ci/typos"
-readme = "../README.md"
+readme = "../../../README.md"
 categories = ["text-processing"]
 license = "MIT"
 edition = "2018"
--- a/dict/typos/codegen/src/main.rs
+++ b/dict/typos/codegen/src/main.rs
@ -1,22 +1,25 @@
 use structopt::StructOpt;

-fn generate<W: std::io::Write>(input: &[u8], file: &mut W) {
+pub const DICT: &[u8] = include_bytes!("../../assets/words.csv");
+
+fn generate<W: std::io::Write>(file: &mut W) {
    writeln!(
        file,
        "// This file is code-genned by {}",
        env!("CARGO_PKG_NAME")
    )
    .unwrap();
+    writeln!(file, "#![allow(clippy::unreadable_literal)]",).unwrap();
    writeln!(file).unwrap();
    writeln!(file, "use unicase::UniCase;").unwrap();

    writeln!(
        file,
-        "pub(crate) static WORD_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &'static str> = "
+        "pub static WORD_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &'static str> = "
    )
    .unwrap();
    let mut builder = phf_codegen::Map::new();
-    let records: Vec<_> = csv::Reader::from_reader(input)
+    let records: Vec<_> = csv::Reader::from_reader(DICT)
        .records()
        .map(|r| r.unwrap())
        .collect();
@ -32,8 +35,6 @@ fn generate<W: std::io::Write>(input: &[u8], file: &mut W) {
 #[derive(Debug, StructOpt)]
 #[structopt(rename_all = "kebab-case")]
 struct Options {
-    #[structopt(long, parse(from_os_str))]
-    input: std::path::PathBuf,
    #[structopt(flatten)]
    codegen: codegenrs::CodeGenArgs,
    #[structopt(flatten)]
@ -43,12 +44,8 @@ struct Options {
 fn run() -> Result<i32, Box<dyn std::error::Error>> {
    let options = Options::from_args();

-    let content = {
-        let mut content = vec![];
-        let input = std::fs::read(&options.input)?;
-        generate(&input, &mut content);
-        content
-    };
+    let mut content = vec![];
+    generate(&mut content);

    let content = String::from_utf8(content)?;
    let content = options.rustmft.reformat(&content)?;
--- a/dict/typos/src/dict_codegen.rs
+++ b/dict/typos/src/dict_codegen.rs
--- a/dict/typos/src/lib.rs
+++ b/dict/typos/src/lib.rs
@ -0,0 +1,3 @@
+mod dict_codegen;
+
+pub use crate::dict_codegen::*;
--- a/dict/wikipedia/Cargo.toml
+++ b/dict/wikipedia/Cargo.toml
@ -0,0 +1,21 @@
+[package]
+name = "wikipedia-dict"
+version = "0.1.1"
+authors = ["Ed Page <eopage@gmail.com>"]
+description = "Source Code Spelling Correction"
+repository = "https://github.com/crate-ci/typos"
+readme = "README.md"
+categories = ["development-tools", "text-processing"]
+keywords = ["development", "spelling"]
+license = "CC-BY-SA-3.0"
+edition = "2018"
+publish = false
+
+[badges]
+azure-devops = { project = "crate-ci", pipeline = "typos" }
+codecov = { repository = "crate-ci/typos" }
+
+[dependencies]
+phf = { version = "0.8", features = ["unicase"] }
+unicase = "2.5"
+log = "0.4"
--- a/dict/wikipedia/README.md
+++ b/dict/wikipedia/README.md
@ -0,0 +1,5 @@
+Origin: [Wikipedia:Lists of common misspellings/For machines](https://en.wikipedia.org/wiki/Wikipedia:Lists_of_common_misspellings/For_machines)
+
+# License
+
+Text is available under the Creative Commons Attribution-ShareAlike License; additional terms may apply. 
--- a/dict/wikipedia/assets/dictionary.txt
+++ b/dict/wikipedia/assets/dictionary.txt
--- a/dict/wikipedia/codegen/Cargo.toml
+++ b/dict/wikipedia/codegen/Cargo.toml
@ -0,0 +1,23 @@
+[package]
+name = "wikipedia-codegen"
+version = "0.1.1"
+authors = ["Ed Page <eopage@gmail.com>"]
+description = "Source Code Spelling Correction"
+repository = "https://github.com/crate-ci/typos"
+readme = "../README.md"
+categories = ["text-processing"]
+license = "MIT"
+edition = "2018"
+publish = false
+
+[badges]
+azure-devops = { project = "crate-ci", pipeline = "typos" }
+codecov = { repository = "crate-ci/typos" }
+
+[dependencies]
+phf = { version = "0.8", features = ["unicase"] }
+phf_codegen = "0.8"
+unicase = "2.5"
+itertools = "0.8"
+codegenrs = "0.1"
+structopt = "0.3"
--- a/dict/wikipedia/codegen/src/main.rs
+++ b/dict/wikipedia/codegen/src/main.rs
@ -0,0 +1,80 @@
+use structopt::StructOpt;
+
+pub const DICT: &str = include_str!("../../assets/dictionary.txt");
+
+fn parse_dict(raw: &str) -> impl Iterator<Item = (&str, Vec<&str>)> {
+    raw.lines().map(|s| {
+        let mut parts = s.splitn(2, "->");
+        let typo = parts.next().unwrap().trim();
+        let corrections = parts
+            .next()
+            .unwrap()
+            .split(',')
+            .filter_map(|c| {
+                let c = c.trim();
+                if c.is_empty() {
+                    None
+                } else {
+                    Some(c)
+                }
+            })
+            .collect();
+        (typo, corrections)
+    })
+}
+
+fn generate<W: std::io::Write>(file: &mut W) {
+    writeln!(
+        file,
+        "// This file is code-genned by {}",
+        env!("CARGO_PKG_NAME")
+    )
+    .unwrap();
+    writeln!(file, "#![allow(clippy::unreadable_literal)]",).unwrap();
+    writeln!(file).unwrap();
+    writeln!(file, "use unicase::UniCase;").unwrap();
+
+    let dict = parse_dict(DICT);
+
+    writeln!(
+        file,
+        "pub static WORD_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &[&'static str]> = ",
+    )
+    .unwrap();
+    let mut builder = phf_codegen::Map::new();
+    for (typo, corrections) in dict {
+        let value = itertools::join(corrections.iter().map(|s| format!("{:?}", s)), ", ");
+        let value = format!("&[{}]", value);
+        builder.entry(unicase::UniCase::new(typo), &value);
+    }
+    let codegenned = builder.build();
+    writeln!(file, "{}", codegenned).unwrap();
+    writeln!(file, ";").unwrap();
+}
+
+#[derive(Debug, StructOpt)]
+#[structopt(rename_all = "kebab-case")]
+struct Options {
+    #[structopt(flatten)]
+    codegen: codegenrs::CodeGenArgs,
+    #[structopt(flatten)]
+    rustmft: codegenrs::RustfmtArgs,
+}
+
+fn run() -> Result<i32, Box<dyn std::error::Error>> {
+    let options = Options::from_args();
+
+    let mut content = vec![];
+    generate(&mut content);
+
+    let content = String::from_utf8(content)?;
+    let content = options.rustmft.reformat(&content)?;
+    options.codegen.write_str(&content)?;
+
+    Ok(0)
+}
+
+fn main() {
+    let code = run().unwrap();
+    std::process::exit(code);
+}
--- a/dict/wikipedia/src/dict_codegen.rs
+++ b/dict/wikipedia/src/dict_codegen.rs
--- a/dict/wikipedia/src/lib.rs
+++ b/dict/wikipedia/src/lib.rs
@ -0,0 +1,3 @@
+mod dict_codegen;
+
+pub use crate::dict_codegen::*;
--- a/typos-dict/src/dict.rs
+++ b/typos-dict/src/dict.rs
@ -20,8 +20,7 @@ impl BuiltIn {
    }

    pub fn correct_word<'s, 'w>(&'s self, word: typos::tokens::Word<'w>) -> Option<Cow<'s, str>> {
-        map_lookup(&crate::dict_codegen::WORD_DICTIONARY, word.token())
-            .map(|s| case_correct(s, word.case()))
+        map_lookup(&typos_dict::WORD_DICTIONARY, word.token()).map(|s| case_correct(s, word.case()))
    }
 }

--- a/src/main.rs
+++ b/src/main.rs
@ -7,6 +7,7 @@ use std::io::Write;
 use structopt::StructOpt;

 mod config;
+mod dict;

 arg_enum! {
    #[derive(Debug, Copy, Clone, PartialEq, Eq)]
@ -318,7 +319,7 @@ fn run() -> Result<i32, failure::Error> {
        config.default.update(&args.overrides);
        let config = config;

-        let dictionary = typos_dict::BuiltIn::new();
+        let dictionary = crate::dict::BuiltIn::new();

        let parser = typos::tokens::ParserBuilder::new()
            .ignore_hex(config.default.ignore_hex())
--- a/typos-dict/src/dict_codegen.rs
+++ b/typos-dict/src/dict_codegen.rs
--- a/typos-dict/src/lib.rs
+++ b/typos-dict/src/lib.rs
@ -1,4 +0,0 @@
-mod dict;
-mod dict_codegen;
-
-pub use crate::dict::*;
				`@ -0,0 +1 @@`
				`Origin: [misspell](https://github.com/client9/misspell)`