Merge pull request #63 from epage/dict

Prepare for dict cleanup
This commit is contained in:
Ed Page 2019-10-29 08:19:17 -06:00 committed by GitHub
commit 2684b9b228
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
39 changed files with 165303 additions and 35642 deletions

1
.gitattributes vendored
View file

@ -1 +0,0 @@
typos-dict/assets/* linguist-vendored

67
Cargo.lock generated
View file

@ -135,6 +135,27 @@ dependencies = [
"structopt 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", "structopt 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
] ]
[[package]]
name = "codespell-codegen"
version = "0.1.1"
dependencies = [
"codegenrs 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
"itertools 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
"phf 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
"phf_codegen 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
"structopt 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
"unicase 2.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "codespell-dict"
version = "0.1.1"
dependencies = [
"log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
"phf 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
"unicase 2.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]] [[package]]
name = "crossbeam-channel" name = "crossbeam-channel"
version = "0.3.9" version = "0.3.9"
@ -343,6 +364,28 @@ dependencies = [
"libc 0.2.65 (registry+https://github.com/rust-lang/crates.io-index)", "libc 0.2.65 (registry+https://github.com/rust-lang/crates.io-index)",
] ]
[[package]]
name = "misspell-codegen"
version = "0.1.1"
dependencies = [
"codegenrs 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
"itertools 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
"phf 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
"phf_codegen 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
"structopt 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
"unicase 2.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "misspell-dict"
version = "0.1.1"
dependencies = [
"log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
"phf 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
"unicase 2.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]] [[package]]
name = "normalize-line-endings" name = "normalize-line-endings"
version = "0.2.2" version = "0.2.2"
@ -765,11 +808,13 @@ dependencies = [
"failure 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", "failure 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
"ignore 0.4.10 (registry+https://github.com/rust-lang/crates.io-index)", "ignore 0.4.10 (registry+https://github.com/rust-lang/crates.io-index)",
"log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
"phf 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.101 (registry+https://github.com/rust-lang/crates.io-index)", "serde 1.0.101 (registry+https://github.com/rust-lang/crates.io-index)",
"structopt 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", "structopt 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
"toml 0.4.10 (registry+https://github.com/rust-lang/crates.io-index)", "toml 0.4.10 (registry+https://github.com/rust-lang/crates.io-index)",
"typos 0.1.1", "typos 0.1.1",
"typos-dict 0.1.1", "typos-dict 0.1.1",
"unicase 2.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
] ]
[[package]] [[package]]
@ -790,7 +835,6 @@ version = "0.1.1"
dependencies = [ dependencies = [
"log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
"phf 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)", "phf 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
"typos 0.1.1",
"unicase 2.5.1 (registry+https://github.com/rust-lang/crates.io-index)", "unicase 2.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
] ]
@ -855,6 +899,27 @@ name = "wasi"
version = "0.7.0" version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "wikipedia-codegen"
version = "0.1.1"
dependencies = [
"codegenrs 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
"itertools 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
"phf 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
"phf_codegen 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
"structopt 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
"unicase 2.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "wikipedia-dict"
version = "0.1.1"
dependencies = [
"log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
"phf 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
"unicase 2.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]] [[package]]
name = "winapi" name = "winapi"
version = "0.3.8" version = "0.3.8"

View file

@ -1,5 +1,11 @@
[workspace] [workspace]
members = ["codegen", "typos", "typos-dict"] members = [
"typos",
"dict/typos", "dict/typos/codegen",
"dict/codespell", "dict/codespell/codegen",
"dict/misspell", "dict/misspell/codegen",
"dict/wikipedia", "dict/wikipedia/codegen",
]
[package] [package]
name = "typos-cli" name = "typos-cli"
@ -24,7 +30,9 @@ codecov = { repository = "crate-ci/typos" }
[dependencies] [dependencies]
typos = { version = "0.1", path = "typos" } typos = { version = "0.1", path = "typos" }
typos-dict = { version = "0.1", path = "typos-dict" } typos-dict = { version = "0.1", path = "dict/typos" }
phf = { version = "0.8", features = ["unicase"] }
unicase = "2.5"
failure = "0.1" failure = "0.1"
structopt = "0.3" structopt = "0.3"
clap = "2" clap = "2"

View file

@ -25,8 +25,17 @@ stages:
steps: steps:
- template: azure/install-rust.yml@templates - template: azure/install-rust.yml@templates
- script: | - script: |
cargo run --package typos-codegen -- --input typos-dict/assets/words.csv --output typos-dict/src/dict_codegen.rs --check cargo run --package typos-codegen -- --output dict/typos/src/dict_codegen.rs --check
displayName: Verify Code-gen displayName: Verify typos-dict
- script: |
cargo run --package codespell-codegen -- --output dict/codespell/src/dict_codegen.rs --check
displayName: Verify codespell-dict
- script: |
cargo run --package misspell-codegen -- --output dict/misspell/src/dict_codegen.rs --check
displayName: Verify misspell-dict
- script: |
cargo run --package wikipedia-codegen -- --output dict/wikipedia/src/dict_codegen.rs --check
displayName: Verify wikipedia-dict
- stage: committed - stage: committed
displayName: Lint History displayName: Lint History
dependsOn: [] dependsOn: []

21
dict/codespell/Cargo.toml Normal file
View file

@ -0,0 +1,21 @@
[package]
name = "codespell-dict"
version = "0.1.1"
authors = ["Ed Page <eopage@gmail.com>"]
description = "Source Code Spelling Correction"
repository = "https://github.com/crate-ci/typos"
readme = "README.md"
categories = ["development-tools", "text-processing"]
keywords = ["development", "spelling"]
license = "CC-BY-SA-3.0"
edition = "2018"
publish = false
[badges]
azure-devops = { project = "crate-ci", pipeline = "typos" }
codecov = { repository = "crate-ci/typos" }
[dependencies]
phf = { version = "0.8", features = ["unicase"] }
unicase = "2.5"
log = "0.4"

5
dict/codespell/README.md Normal file
View file

@ -0,0 +1,5 @@
Origin: [codespell](https://github.com/codespell-project/codespell)
# License
dictionary.txt is a derived work of English Wikipedia and is released under the Creative Commons Attribution-Share-Alike License 3.0 http://creativecommons.org/licenses/by-sa/3.0/

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,23 @@
[package]
name = "codespell-codegen"
version = "0.1.1"
authors = ["Ed Page <eopage@gmail.com>"]
description = "Source Code Spelling Correction"
repository = "https://github.com/crate-ci/typos"
readme = "../README.md"
categories = ["text-processing"]
license = "MIT"
edition = "2018"
publish = false
[badges]
azure-devops = { project = "crate-ci", pipeline = "typos" }
codecov = { repository = "crate-ci/typos" }
[dependencies]
phf = { version = "0.8", features = ["unicase"] }
phf_codegen = "0.8"
unicase = "2.5"
itertools = "0.8"
codegenrs = "0.1"
structopt = "0.3"

View file

@ -0,0 +1,80 @@
use structopt::StructOpt;
pub const DICT: &str = include_str!("../../assets/dictionary.txt");
fn parse_dict(raw: &str) -> impl Iterator<Item = (&str, Vec<&str>)> {
raw.lines().map(|s| {
let mut parts = s.splitn(2, "->");
let typo = parts.next().unwrap().trim();
let corrections = parts
.next()
.unwrap()
.split(',')
.filter_map(|c| {
let c = c.trim();
if c.is_empty() {
None
} else {
Some(c)
}
})
.collect();
(typo, corrections)
})
}
fn generate<W: std::io::Write>(file: &mut W) {
writeln!(
file,
"// This file is code-genned by {}",
env!("CARGO_PKG_NAME")
)
.unwrap();
writeln!(file, "#![allow(clippy::unreadable_literal)]",).unwrap();
writeln!(file).unwrap();
writeln!(file, "use unicase::UniCase;").unwrap();
let dict = parse_dict(DICT);
writeln!(
file,
"pub static WORD_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &[&'static str]> = ",
)
.unwrap();
let mut builder = phf_codegen::Map::new();
for (typo, corrections) in dict {
let value = itertools::join(corrections.iter().map(|s| format!("{:?}", s)), ", ");
let value = format!("&[{}]", value);
builder.entry(unicase::UniCase::new(typo), &value);
}
let codegenned = builder.build();
writeln!(file, "{}", codegenned).unwrap();
writeln!(file, ";").unwrap();
}
#[derive(Debug, StructOpt)]
#[structopt(rename_all = "kebab-case")]
struct Options {
#[structopt(flatten)]
codegen: codegenrs::CodeGenArgs,
#[structopt(flatten)]
rustmft: codegenrs::RustfmtArgs,
}
fn run() -> Result<i32, Box<dyn std::error::Error>> {
let options = Options::from_args();
let mut content = vec![];
generate(&mut content);
let content = String::from_utf8(content)?;
let content = options.rustmft.reformat(&content)?;
options.codegen.write_str(&content)?;
Ok(0)
}
fn main() {
let code = run().unwrap();
std::process::exit(code);
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,3 @@
mod dict_codegen;
pub use crate::dict_codegen::*;

21
dict/misspell/Cargo.toml Normal file
View file

@ -0,0 +1,21 @@
[package]
name = "misspell-dict"
version = "0.1.1"
authors = ["Ed Page <eopage@gmail.com>"]
description = "Source Code Spelling Correction"
repository = "https://github.com/crate-ci/typos"
readme = "README.md"
categories = ["development-tools", "text-processing"]
keywords = ["development", "spelling"]
license = "MIT"
edition = "2018"
publish = false
[badges]
azure-devops = { project = "crate-ci", pipeline = "typos" }
codecov = { repository = "crate-ci/typos" }
[dependencies]
phf = { version = "0.8", features = ["unicase"] }
unicase = "2.5"
log = "0.4"

1
dict/misspell/README.md Normal file
View file

@ -0,0 +1 @@
Origin: [misspell](https://github.com/client9/misspell)

1
dict/misspell/assets/.gitattributes vendored Normal file
View file

@ -0,0 +1 @@
* linguist-vendored

31158
dict/misspell/assets/words.go vendored Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,24 @@
[package]
name = "misspell-codegen"
version = "0.1.1"
authors = ["Ed Page <eopage@gmail.com>"]
description = "Source Code Spelling Correction"
repository = "https://github.com/crate-ci/typos"
readme = "../README.md"
categories = ["text-processing"]
license = "MIT"
edition = "2018"
publish = false
[badges]
azure-devops = { project = "crate-ci", pipeline = "typos" }
codecov = { repository = "crate-ci/typos" }
[dependencies]
phf = { version = "0.8", features = ["unicase"] }
phf_codegen = "0.8"
unicase = "2.5"
itertools = "0.8"
codegenrs = "0.1"
structopt = "0.3"
regex = "1"

View file

@ -0,0 +1,148 @@
use std::collections::HashMap;
use structopt::StructOpt;
pub const DICT: &str = include_str!("../../assets/words.go");
struct Words<'s> {
main: HashMap<&'s str, Vec<&'s str>>,
american: HashMap<&'s str, Vec<&'s str>>,
british: HashMap<&'s str, Vec<&'s str>>,
}
fn parse_dict(raw: &str) -> Words {
let mut bad = HashMap::new();
let mut main = HashMap::new();
let mut american = HashMap::new();
let mut british = HashMap::new();
let mapping = regex::Regex::new(r#"^"(.*)", "(.*)",$"#).unwrap();
let mut current = &mut bad;
for line in raw.lines() {
let line = line.splitn(2, "//").next().unwrap().trim();
if line.is_empty() || line.starts_with("package") {
continue;
} else if line.contains("DictMain") {
current = &mut main;
} else if line.contains("DictAmerican") {
current = &mut american;
} else if line.contains("DictBritish") {
current = &mut british;
} else if line.contains('}') {
current = &mut bad;
} else {
let captures = mapping.captures(line);
if let Some(captures) = captures {
current.insert(
captures.get(1).unwrap().as_str(),
vec![captures.get(2).unwrap().as_str()],
);
} else {
eprintln!("Unknown line: {}", line);
}
}
}
if !bad.is_empty() {
panic!("Failed parsing; found extra words: {:#?}", bad);
}
Words {
main,
american,
british,
}
}
fn generate<W: std::io::Write>(file: &mut W) {
writeln!(
file,
"// This file is code-genned by {}",
env!("CARGO_PKG_NAME")
)
.unwrap();
writeln!(file, "#![allow(clippy::unreadable_literal)]",).unwrap();
writeln!(file).unwrap();
writeln!(file, "use unicase::UniCase;").unwrap();
let Words {
main,
american,
british,
} = parse_dict(DICT);
writeln!(
file,
"pub static MAIN_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &[&'static str]> = ",
)
.unwrap();
let mut builder = phf_codegen::Map::new();
for (typo, corrections) in main {
let value = itertools::join(corrections.iter().map(|s| format!("{:?}", s)), ", ");
let value = format!("&[{}]", value);
builder.entry(unicase::UniCase::new(typo), &value);
}
let codegenned = builder.build();
writeln!(file, "{}", codegenned).unwrap();
writeln!(file, ";").unwrap();
writeln!(file).unwrap();
writeln!(
file,
"pub static AMERICAN_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &[&'static str]> = ",
)
.unwrap();
let mut builder = phf_codegen::Map::new();
for (typo, corrections) in american {
let value = itertools::join(corrections.iter().map(|s| format!("{:?}", s)), ", ");
let value = format!("&[{}]", value);
builder.entry(unicase::UniCase::new(typo), &value);
}
let codegenned = builder.build();
writeln!(file, "{}", codegenned).unwrap();
writeln!(file, ";").unwrap();
writeln!(file).unwrap();
writeln!(
file,
"pub static BRITISH_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &[&'static str]> = ",
)
.unwrap();
let mut builder = phf_codegen::Map::new();
for (typo, corrections) in british {
let value = itertools::join(corrections.iter().map(|s| format!("{:?}", s)), ", ");
let value = format!("&[{}]", value);
builder.entry(unicase::UniCase::new(typo), &value);
}
let codegenned = builder.build();
writeln!(file, "{}", codegenned).unwrap();
writeln!(file, ";").unwrap();
}
#[derive(Debug, StructOpt)]
#[structopt(rename_all = "kebab-case")]
struct Options {
#[structopt(flatten)]
codegen: codegenrs::CodeGenArgs,
#[structopt(flatten)]
rustmft: codegenrs::RustfmtArgs,
}
fn run() -> Result<i32, Box<dyn std::error::Error>> {
let options = Options::from_args();
let mut content = vec![];
generate(&mut content);
let content = String::from_utf8(content)?;
let content = options.rustmft.reformat(&content)?;
options.codegen.write_str(&content)?;
Ok(0)
}
fn main() {
let code = run().unwrap();
std::process::exit(code);
}

File diff suppressed because it is too large Load diff

3
dict/misspell/src/lib.rs Normal file
View file

@ -0,0 +1,3 @@
mod dict_codegen;
pub use crate::dict_codegen::*;

View file

@ -4,7 +4,7 @@ version = "0.1.1"
authors = ["Ed Page <eopage@gmail.com>"] authors = ["Ed Page <eopage@gmail.com>"]
description = "Source Code Spelling Correction" description = "Source Code Spelling Correction"
repository = "https://github.com/crate-ci/typos" repository = "https://github.com/crate-ci/typos"
readme = "../README.md" readme = "../../README.md"
categories = ["development-tools", "text-processing"] categories = ["development-tools", "text-processing"]
keywords = ["development", "spelling"] keywords = ["development", "spelling"]
license = "MIT" license = "MIT"
@ -15,7 +15,6 @@ azure-devops = { project = "crate-ci", pipeline = "typos" }
codecov = { repository = "crate-ci/typos" } codecov = { repository = "crate-ci/typos" }
[dependencies] [dependencies]
typos = { version = "0.1", path = "../typos" }
phf = { version = "0.8", features = ["unicase"] } phf = { version = "0.8", features = ["unicase"] }
unicase = "2.5" unicase = "2.5"
log = "0.4" log = "0.4"

1
dict/typos/assets/.gitattributes vendored Normal file
View file

@ -0,0 +1 @@
* linguist-vendored

View file

Can't render this file because it is too large.

View file

@ -4,7 +4,7 @@ version = "1.0.1"
authors = ["Ed Page <eopage@gmail.com>"] authors = ["Ed Page <eopage@gmail.com>"]
description = "Source Code Spelling Correction" description = "Source Code Spelling Correction"
repository = "https://github.com/crate-ci/typos" repository = "https://github.com/crate-ci/typos"
readme = "../README.md" readme = "../../../README.md"
categories = ["text-processing"] categories = ["text-processing"]
license = "MIT" license = "MIT"
edition = "2018" edition = "2018"

View file

@ -1,22 +1,25 @@
use structopt::StructOpt; use structopt::StructOpt;
fn generate<W: std::io::Write>(input: &[u8], file: &mut W) { pub const DICT: &[u8] = include_bytes!("../../assets/words.csv");
fn generate<W: std::io::Write>(file: &mut W) {
writeln!( writeln!(
file, file,
"// This file is code-genned by {}", "// This file is code-genned by {}",
env!("CARGO_PKG_NAME") env!("CARGO_PKG_NAME")
) )
.unwrap(); .unwrap();
writeln!(file, "#![allow(clippy::unreadable_literal)]",).unwrap();
writeln!(file).unwrap(); writeln!(file).unwrap();
writeln!(file, "use unicase::UniCase;").unwrap(); writeln!(file, "use unicase::UniCase;").unwrap();
writeln!( writeln!(
file, file,
"pub(crate) static WORD_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &'static str> = " "pub static WORD_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &'static str> = "
) )
.unwrap(); .unwrap();
let mut builder = phf_codegen::Map::new(); let mut builder = phf_codegen::Map::new();
let records: Vec<_> = csv::Reader::from_reader(input) let records: Vec<_> = csv::Reader::from_reader(DICT)
.records() .records()
.map(|r| r.unwrap()) .map(|r| r.unwrap())
.collect(); .collect();
@ -32,8 +35,6 @@ fn generate<W: std::io::Write>(input: &[u8], file: &mut W) {
#[derive(Debug, StructOpt)] #[derive(Debug, StructOpt)]
#[structopt(rename_all = "kebab-case")] #[structopt(rename_all = "kebab-case")]
struct Options { struct Options {
#[structopt(long, parse(from_os_str))]
input: std::path::PathBuf,
#[structopt(flatten)] #[structopt(flatten)]
codegen: codegenrs::CodeGenArgs, codegen: codegenrs::CodeGenArgs,
#[structopt(flatten)] #[structopt(flatten)]
@ -43,12 +44,8 @@ struct Options {
fn run() -> Result<i32, Box<dyn std::error::Error>> { fn run() -> Result<i32, Box<dyn std::error::Error>> {
let options = Options::from_args(); let options = Options::from_args();
let content = {
let mut content = vec![]; let mut content = vec![];
let input = std::fs::read(&options.input)?; generate(&mut content);
generate(&input, &mut content);
content
};
let content = String::from_utf8(content)?; let content = String::from_utf8(content)?;
let content = options.rustmft.reformat(&content)?; let content = options.rustmft.reformat(&content)?;

35615
dict/typos/src/dict_codegen.rs Normal file

File diff suppressed because it is too large Load diff

3
dict/typos/src/lib.rs Normal file
View file

@ -0,0 +1,3 @@
mod dict_codegen;
pub use crate::dict_codegen::*;

21
dict/wikipedia/Cargo.toml Normal file
View file

@ -0,0 +1,21 @@
[package]
name = "wikipedia-dict"
version = "0.1.1"
authors = ["Ed Page <eopage@gmail.com>"]
description = "Source Code Spelling Correction"
repository = "https://github.com/crate-ci/typos"
readme = "README.md"
categories = ["development-tools", "text-processing"]
keywords = ["development", "spelling"]
license = "CC-BY-SA-3.0"
edition = "2018"
publish = false
[badges]
azure-devops = { project = "crate-ci", pipeline = "typos" }
codecov = { repository = "crate-ci/typos" }
[dependencies]
phf = { version = "0.8", features = ["unicase"] }
unicase = "2.5"
log = "0.4"

5
dict/wikipedia/README.md Normal file
View file

@ -0,0 +1,5 @@
Origin: [Wikipedia:Lists of common misspellings/For machines](https://en.wikipedia.org/wiki/Wikipedia:Lists_of_common_misspellings/For_machines)
# License
Text is available under the Creative Commons Attribution-ShareAlike License; additional terms may apply.

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,23 @@
[package]
name = "wikipedia-codegen"
version = "0.1.1"
authors = ["Ed Page <eopage@gmail.com>"]
description = "Source Code Spelling Correction"
repository = "https://github.com/crate-ci/typos"
readme = "../README.md"
categories = ["text-processing"]
license = "MIT"
edition = "2018"
publish = false
[badges]
azure-devops = { project = "crate-ci", pipeline = "typos" }
codecov = { repository = "crate-ci/typos" }
[dependencies]
phf = { version = "0.8", features = ["unicase"] }
phf_codegen = "0.8"
unicase = "2.5"
itertools = "0.8"
codegenrs = "0.1"
structopt = "0.3"

View file

@ -0,0 +1,80 @@
use structopt::StructOpt;
pub const DICT: &str = include_str!("../../assets/dictionary.txt");
fn parse_dict(raw: &str) -> impl Iterator<Item = (&str, Vec<&str>)> {
raw.lines().map(|s| {
let mut parts = s.splitn(2, "->");
let typo = parts.next().unwrap().trim();
let corrections = parts
.next()
.unwrap()
.split(',')
.filter_map(|c| {
let c = c.trim();
if c.is_empty() {
None
} else {
Some(c)
}
})
.collect();
(typo, corrections)
})
}
fn generate<W: std::io::Write>(file: &mut W) {
writeln!(
file,
"// This file is code-genned by {}",
env!("CARGO_PKG_NAME")
)
.unwrap();
writeln!(file, "#![allow(clippy::unreadable_literal)]",).unwrap();
writeln!(file).unwrap();
writeln!(file, "use unicase::UniCase;").unwrap();
let dict = parse_dict(DICT);
writeln!(
file,
"pub static WORD_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &[&'static str]> = ",
)
.unwrap();
let mut builder = phf_codegen::Map::new();
for (typo, corrections) in dict {
let value = itertools::join(corrections.iter().map(|s| format!("{:?}", s)), ", ");
let value = format!("&[{}]", value);
builder.entry(unicase::UniCase::new(typo), &value);
}
let codegenned = builder.build();
writeln!(file, "{}", codegenned).unwrap();
writeln!(file, ";").unwrap();
}
#[derive(Debug, StructOpt)]
#[structopt(rename_all = "kebab-case")]
struct Options {
#[structopt(flatten)]
codegen: codegenrs::CodeGenArgs,
#[structopt(flatten)]
rustmft: codegenrs::RustfmtArgs,
}
fn run() -> Result<i32, Box<dyn std::error::Error>> {
let options = Options::from_args();
let mut content = vec![];
generate(&mut content);
let content = String::from_utf8(content)?;
let content = options.rustmft.reformat(&content)?;
options.codegen.write_str(&content)?;
Ok(0)
}
fn main() {
let code = run().unwrap();
std::process::exit(code);
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,3 @@
mod dict_codegen;
pub use crate::dict_codegen::*;

View file

@ -20,8 +20,7 @@ impl BuiltIn {
} }
pub fn correct_word<'s, 'w>(&'s self, word: typos::tokens::Word<'w>) -> Option<Cow<'s, str>> { pub fn correct_word<'s, 'w>(&'s self, word: typos::tokens::Word<'w>) -> Option<Cow<'s, str>> {
map_lookup(&crate::dict_codegen::WORD_DICTIONARY, word.token()) map_lookup(&typos_dict::WORD_DICTIONARY, word.token()).map(|s| case_correct(s, word.case()))
.map(|s| case_correct(s, word.case()))
} }
} }

View file

@ -7,6 +7,7 @@ use std::io::Write;
use structopt::StructOpt; use structopt::StructOpt;
mod config; mod config;
mod dict;
arg_enum! { arg_enum! {
#[derive(Debug, Copy, Clone, PartialEq, Eq)] #[derive(Debug, Copy, Clone, PartialEq, Eq)]
@ -318,7 +319,7 @@ fn run() -> Result<i32, failure::Error> {
config.default.update(&args.overrides); config.default.update(&args.overrides);
let config = config; let config = config;
let dictionary = typos_dict::BuiltIn::new(); let dictionary = crate::dict::BuiltIn::new();
let parser = typos::tokens::ParserBuilder::new() let parser = typos::tokens::ParserBuilder::new()
.ignore_hex(config.default.ignore_hex()) .ignore_hex(config.default.ignore_hex())

File diff suppressed because it is too large Load diff

View file

@ -1,4 +0,0 @@
mod dict;
mod dict_codegen;
pub use crate::dict::*;