mirror of
https://github.com/crate-ci/typos.git
synced 2025-01-08 15:54:47 -05:00
feat: Expose wikipedia's dict to Rust
This commit is contained in:
parent
3daafd1ea7
commit
0f06e602cb
10 changed files with 9666 additions and 0 deletions
21
Cargo.lock
generated
21
Cargo.lock
generated
|
@ -899,6 +899,27 @@ name = "wasi"
|
|||
version = "0.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "wikipedia-codegen"
|
||||
version = "0.1.1"
|
||||
dependencies = [
|
||||
"codegenrs 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"itertools 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"phf 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"phf_codegen 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"structopt 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"unicase 2.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wikipedia-dict"
|
||||
version = "0.1.1"
|
||||
dependencies = [
|
||||
"log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"phf 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"unicase 2.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winapi"
|
||||
version = "0.3.8"
|
||||
|
|
|
@ -4,6 +4,7 @@ members = [
|
|||
"dict/typos", "dict/typos/codegen",
|
||||
"dict/codespell", "dict/codespell/codegen",
|
||||
"dict/misspell", "dict/misspell/codegen",
|
||||
"dict/wikipedia", "dict/wikipedia/codegen",
|
||||
]
|
||||
|
||||
[package]
|
||||
|
|
|
@ -33,6 +33,9 @@ stages:
|
|||
- script: |
|
||||
cargo run --package misspell-codegen -- --output dict/misspell/src/dict_codegen.rs --check
|
||||
displayName: Verify misspell-dict
|
||||
- script: |
|
||||
cargo run --package wikipedia-codegen -- --output dict/wikipedia/src/dict_codegen.rs --check
|
||||
displayName: Verify wikipedia-dict
|
||||
- stage: committed
|
||||
displayName: Lint History
|
||||
dependsOn: []
|
||||
|
|
20
dict/wikipedia/Cargo.toml
Normal file
20
dict/wikipedia/Cargo.toml
Normal file
|
@ -0,0 +1,20 @@
|
|||
[package]
|
||||
name = "wikipedia-dict"
|
||||
version = "0.1.1"
|
||||
authors = ["Ed Page <eopage@gmail.com>"]
|
||||
description = "Source Code Spelling Correction"
|
||||
repository = "https://github.com/crate-ci/typos"
|
||||
readme = "README.md"
|
||||
categories = ["development-tools", "text-processing"]
|
||||
keywords = ["development", "spelling"]
|
||||
license = "CC-BY-SA-3.0"
|
||||
edition = "2018"
|
||||
|
||||
[badges]
|
||||
azure-devops = { project = "crate-ci", pipeline = "typos" }
|
||||
codecov = { repository = "crate-ci/typos" }
|
||||
|
||||
[dependencies]
|
||||
phf = { version = "0.8", features = ["unicase"] }
|
||||
unicase = "2.5"
|
||||
log = "0.4"
|
5
dict/wikipedia/README.md
Normal file
5
dict/wikipedia/README.md
Normal file
|
@ -0,0 +1,5 @@
|
|||
Origin: [Wikipedia:Lists of common misspellings/For machines](https://en.wikipedia.org/wiki/Wikipedia:Lists_of_common_misspellings/For_machines)
|
||||
|
||||
# License
|
||||
|
||||
Text is available under the Creative Commons Attribution-ShareAlike License; additional terms may apply.
|
4282
dict/wikipedia/assets/dictionary.txt
Normal file
4282
dict/wikipedia/assets/dictionary.txt
Normal file
File diff suppressed because it is too large
Load diff
23
dict/wikipedia/codegen/Cargo.toml
Normal file
23
dict/wikipedia/codegen/Cargo.toml
Normal file
|
@ -0,0 +1,23 @@
|
|||
[package]
|
||||
name = "wikipedia-codegen"
|
||||
version = "0.1.1"
|
||||
authors = ["Ed Page <eopage@gmail.com>"]
|
||||
description = "Source Code Spelling Correction"
|
||||
repository = "https://github.com/crate-ci/typos"
|
||||
readme = "../README.md"
|
||||
categories = ["text-processing"]
|
||||
license = "MIT"
|
||||
edition = "2018"
|
||||
publish = false
|
||||
|
||||
[badges]
|
||||
azure-devops = { project = "crate-ci", pipeline = "typos" }
|
||||
codecov = { repository = "crate-ci/typos" }
|
||||
|
||||
[dependencies]
|
||||
phf = { version = "0.8", features = ["unicase"] }
|
||||
phf_codegen = "0.8"
|
||||
unicase = "2.5"
|
||||
itertools = "0.8"
|
||||
codegenrs = "0.1"
|
||||
structopt = "0.3"
|
80
dict/wikipedia/codegen/src/main.rs
Normal file
80
dict/wikipedia/codegen/src/main.rs
Normal file
|
@ -0,0 +1,80 @@
|
|||
use structopt::StructOpt;
|
||||
|
||||
pub const DICT: &str = include_str!("../../assets/dictionary.txt");
|
||||
|
||||
fn parse_dict(raw: &str) -> impl Iterator<Item = (&str, Vec<&str>)> {
|
||||
raw.lines().map(|s| {
|
||||
let mut parts = s.splitn(2, "->");
|
||||
let typo = parts.next().unwrap().trim();
|
||||
let corrections = parts
|
||||
.next()
|
||||
.unwrap()
|
||||
.split(",")
|
||||
.filter_map(|c| {
|
||||
let c = c.trim();
|
||||
if c.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(c)
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
(typo, corrections)
|
||||
})
|
||||
}
|
||||
|
||||
fn generate<W: std::io::Write>(file: &mut W) {
|
||||
writeln!(
|
||||
file,
|
||||
"// This file is code-genned by {}",
|
||||
env!("CARGO_PKG_NAME")
|
||||
)
|
||||
.unwrap();
|
||||
writeln!(file).unwrap();
|
||||
writeln!(file, "use unicase::UniCase;").unwrap();
|
||||
|
||||
let dict = parse_dict(DICT);
|
||||
|
||||
writeln!(
|
||||
file,
|
||||
"pub static WORD_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &[&'static str]> = ",
|
||||
)
|
||||
.unwrap();
|
||||
let mut builder = phf_codegen::Map::new();
|
||||
for (typo, corrections) in dict {
|
||||
let value = itertools::join(corrections.iter().map(|s| format!("{:?}", s)), ", ");
|
||||
let value = format!("&[{}]", value);
|
||||
builder.entry(unicase::UniCase::new(typo), &value);
|
||||
}
|
||||
let codegenned = builder.build();
|
||||
writeln!(file, "{}", codegenned).unwrap();
|
||||
writeln!(file, ";").unwrap();
|
||||
writeln!(file, "").unwrap();
|
||||
}
|
||||
|
||||
#[derive(Debug, StructOpt)]
|
||||
#[structopt(rename_all = "kebab-case")]
|
||||
struct Options {
|
||||
#[structopt(flatten)]
|
||||
codegen: codegenrs::CodeGenArgs,
|
||||
#[structopt(flatten)]
|
||||
rustmft: codegenrs::RustfmtArgs,
|
||||
}
|
||||
|
||||
fn run() -> Result<i32, Box<dyn std::error::Error>> {
|
||||
let options = Options::from_args();
|
||||
|
||||
let mut content = vec![];
|
||||
generate(&mut content);
|
||||
|
||||
let content = String::from_utf8(content)?;
|
||||
let content = options.rustmft.reformat(&content)?;
|
||||
options.codegen.write_str(&content)?;
|
||||
|
||||
Ok(0)
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let code = run().unwrap();
|
||||
std::process::exit(code);
|
||||
}
|
5228
dict/wikipedia/src/dict_codegen.rs
Normal file
5228
dict/wikipedia/src/dict_codegen.rs
Normal file
File diff suppressed because it is too large
Load diff
3
dict/wikipedia/src/lib.rs
Normal file
3
dict/wikipedia/src/lib.rs
Normal file
|
@ -0,0 +1,3 @@
|
|||
mod dict_codegen;
|
||||
|
||||
pub use crate::dict_codegen::*;
|
Loading…
Reference in a new issue