feat: Expose client9/misspell's dict to Rust

This commit is contained in:
Ed Page 2019-10-28 13:33:48 -06:00
parent ce1ef2ca30
commit 3daafd1ea7
11 changed files with 68854 additions and 0 deletions

22
Cargo.lock generated
View file

@ -364,6 +364,28 @@ dependencies = [
"libc 0.2.65 (registry+https://github.com/rust-lang/crates.io-index)", "libc 0.2.65 (registry+https://github.com/rust-lang/crates.io-index)",
] ]
[[package]]
name = "misspell-codegen"
version = "0.1.1"
dependencies = [
"codegenrs 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
"itertools 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
"phf 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
"phf_codegen 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
"structopt 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
"unicase 2.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "misspell-dict"
version = "0.1.1"
dependencies = [
"log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
"phf 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
"unicase 2.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]] [[package]]
name = "normalize-line-endings" name = "normalize-line-endings"
version = "0.2.2" version = "0.2.2"

View file

@ -3,6 +3,7 @@ members = [
"typos", "typos",
"dict/typos", "dict/typos/codegen", "dict/typos", "dict/typos/codegen",
"dict/codespell", "dict/codespell/codegen", "dict/codespell", "dict/codespell/codegen",
"dict/misspell", "dict/misspell/codegen",
] ]
[package] [package]

View file

@ -30,6 +30,9 @@ stages:
- script: | - script: |
cargo run --package codespell-codegen -- --output dict/codespell/src/dict_codegen.rs --check cargo run --package codespell-codegen -- --output dict/codespell/src/dict_codegen.rs --check
displayName: Verify codespell-dict displayName: Verify codespell-dict
- script: |
cargo run --package misspell-codegen -- --output dict/misspell/src/dict_codegen.rs --check
displayName: Verify misspell-dict
- stage: committed - stage: committed
displayName: Lint History displayName: Lint History
dependsOn: [] dependsOn: []

20
dict/misspell/Cargo.toml Normal file
View file

@ -0,0 +1,20 @@
[package]
name = "misspell-dict"
version = "0.1.1"
authors = ["Ed Page <eopage@gmail.com>"]
description = "Source Code Spelling Correction"
repository = "https://github.com/crate-ci/typos"
readme = "README.md"
categories = ["development-tools", "text-processing"]
keywords = ["development", "spelling"]
license = "MIT"
edition = "2018"
[badges]
azure-devops = { project = "crate-ci", pipeline = "typos" }
codecov = { repository = "crate-ci/typos" }
[dependencies]
phf = { version = "0.8", features = ["unicase"] }
unicase = "2.5"
log = "0.4"

1
dict/misspell/README.md Normal file
View file

@ -0,0 +1 @@
Origin: [misspell](https://github.com/client9/misspell)

1
dict/misspell/assets/.gitattributes vendored Normal file
View file

@ -0,0 +1 @@
* linguist-vendored

31158
dict/misspell/assets/words.go vendored Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,24 @@
[package]
name = "misspell-codegen"
version = "0.1.1"
authors = ["Ed Page <eopage@gmail.com>"]
description = "Source Code Spelling Correction"
repository = "https://github.com/crate-ci/typos"
readme = "../README.md"
categories = ["text-processing"]
license = "MIT"
edition = "2018"
publish = false
[badges]
azure-devops = { project = "crate-ci", pipeline = "typos" }
codecov = { repository = "crate-ci/typos" }
[dependencies]
phf = { version = "0.8", features = ["unicase"] }
phf_codegen = "0.8"
unicase = "2.5"
itertools = "0.8"
codegenrs = "0.1"
structopt = "0.3"
regex = "1"

View file

@ -0,0 +1,141 @@
use std::collections::HashMap;
use structopt::StructOpt;
pub const DICT: &str = include_str!("../../assets/words.go");
fn parse_dict(
raw: &str,
) -> (
HashMap<&str, Vec<&str>>,
HashMap<&str, Vec<&str>>,
HashMap<&str, Vec<&str>>,
) {
let mut bad = HashMap::new();
let mut main = HashMap::new();
let mut american = HashMap::new();
let mut british = HashMap::new();
let mapping = regex::Regex::new(r#"^"(.*)", "(.*)",$"#).unwrap();
let mut current = &mut bad;
for line in raw.lines() {
let line = line.splitn(2, "//").next().unwrap().trim();
if line.is_empty() {
continue;
} else if line.starts_with("package") {
continue;
} else if line.contains("DictMain") {
current = &mut main;
} else if line.contains("DictAmerican") {
current = &mut american;
} else if line.contains("DictBritish") {
current = &mut british;
} else if line.contains("}") {
current = &mut bad;
} else {
let captures = mapping.captures(line);
if let Some(captures) = captures {
current.insert(
captures.get(1).unwrap().as_str(),
vec![captures.get(2).unwrap().as_str()],
);
} else {
eprintln!("Unknown line: {}", line);
}
}
}
if !bad.is_empty() {
panic!("Failed parsing; found extra words: {:#?}", bad);
}
(main, american, british)
}
fn generate<W: std::io::Write>(file: &mut W) {
writeln!(
file,
"// This file is code-genned by {}",
env!("CARGO_PKG_NAME")
)
.unwrap();
writeln!(file).unwrap();
writeln!(file, "use unicase::UniCase;").unwrap();
let (main, american, british) = parse_dict(DICT);
writeln!(
file,
"pub static MAIN_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &[&'static str]> = ",
)
.unwrap();
let mut builder = phf_codegen::Map::new();
for (typo, corrections) in main {
let value = itertools::join(corrections.iter().map(|s| format!("{:?}", s)), ", ");
let value = format!("&[{}]", value);
builder.entry(unicase::UniCase::new(typo), &value);
}
let codegenned = builder.build();
writeln!(file, "{}", codegenned).unwrap();
writeln!(file, ";").unwrap();
writeln!(file, "").unwrap();
writeln!(
file,
"pub static AMERICAN_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &[&'static str]> = ",
)
.unwrap();
let mut builder = phf_codegen::Map::new();
for (typo, corrections) in american {
let value = itertools::join(corrections.iter().map(|s| format!("{:?}", s)), ", ");
let value = format!("&[{}]", value);
builder.entry(unicase::UniCase::new(typo), &value);
}
let codegenned = builder.build();
writeln!(file, "{}", codegenned).unwrap();
writeln!(file, ";").unwrap();
writeln!(file, "").unwrap();
writeln!(
file,
"pub static BRITISH_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &[&'static str]> = ",
)
.unwrap();
let mut builder = phf_codegen::Map::new();
for (typo, corrections) in british {
let value = itertools::join(corrections.iter().map(|s| format!("{:?}", s)), ", ");
let value = format!("&[{}]", value);
builder.entry(unicase::UniCase::new(typo), &value);
}
let codegenned = builder.build();
writeln!(file, "{}", codegenned).unwrap();
writeln!(file, ";").unwrap();
writeln!(file, "").unwrap();
}
#[derive(Debug, StructOpt)]
#[structopt(rename_all = "kebab-case")]
struct Options {
#[structopt(flatten)]
codegen: codegenrs::CodeGenArgs,
#[structopt(flatten)]
rustmft: codegenrs::RustfmtArgs,
}
fn run() -> Result<i32, Box<dyn std::error::Error>> {
let options = Options::from_args();
let mut content = vec![];
generate(&mut content);
let content = String::from_utf8(content)?;
let content = options.rustmft.reformat(&content)?;
options.codegen.write_str(&content)?;
Ok(0)
}
fn main() {
let code = run().unwrap();
std::process::exit(code);
}

File diff suppressed because it is too large Load diff

3
dict/misspell/src/lib.rs Normal file
View file

@ -0,0 +1,3 @@
mod dict_codegen;
pub use crate::dict_codegen::*;