mirror of
https://github.com/crate-ci/typos.git
synced 2024-11-22 09:01:04 -05:00
commit
2684b9b228
39 changed files with 165303 additions and 35642 deletions
1
.gitattributes
vendored
1
.gitattributes
vendored
|
@ -1 +0,0 @@
|
||||||
typos-dict/assets/* linguist-vendored
|
|
67
Cargo.lock
generated
67
Cargo.lock
generated
|
@ -135,6 +135,27 @@ dependencies = [
|
||||||
"structopt 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
"structopt 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "codespell-codegen"
|
||||||
|
version = "0.1.1"
|
||||||
|
dependencies = [
|
||||||
|
"codegenrs 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
"itertools 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
"phf 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
"phf_codegen 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
"structopt 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
"unicase 2.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "codespell-dict"
|
||||||
|
version = "0.1.1"
|
||||||
|
dependencies = [
|
||||||
|
"log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
"phf 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
"unicase 2.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "crossbeam-channel"
|
name = "crossbeam-channel"
|
||||||
version = "0.3.9"
|
version = "0.3.9"
|
||||||
|
@ -343,6 +364,28 @@ dependencies = [
|
||||||
"libc 0.2.65 (registry+https://github.com/rust-lang/crates.io-index)",
|
"libc 0.2.65 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "misspell-codegen"
|
||||||
|
version = "0.1.1"
|
||||||
|
dependencies = [
|
||||||
|
"codegenrs 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
"itertools 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
"phf 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
"phf_codegen 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
"regex 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
"structopt 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
"unicase 2.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "misspell-dict"
|
||||||
|
version = "0.1.1"
|
||||||
|
dependencies = [
|
||||||
|
"log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
"phf 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
"unicase 2.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "normalize-line-endings"
|
name = "normalize-line-endings"
|
||||||
version = "0.2.2"
|
version = "0.2.2"
|
||||||
|
@ -765,11 +808,13 @@ dependencies = [
|
||||||
"failure 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
"failure 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"ignore 0.4.10 (registry+https://github.com/rust-lang/crates.io-index)",
|
"ignore 0.4.10 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
"log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
"phf 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"serde 1.0.101 (registry+https://github.com/rust-lang/crates.io-index)",
|
"serde 1.0.101 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"structopt 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
"structopt 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"toml 0.4.10 (registry+https://github.com/rust-lang/crates.io-index)",
|
"toml 0.4.10 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"typos 0.1.1",
|
"typos 0.1.1",
|
||||||
"typos-dict 0.1.1",
|
"typos-dict 0.1.1",
|
||||||
|
"unicase 2.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -790,7 +835,6 @@ version = "0.1.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
"log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"phf 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
"phf 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"typos 0.1.1",
|
|
||||||
"unicase 2.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
"unicase 2.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -855,6 +899,27 @@ name = "wasi"
|
||||||
version = "0.7.0"
|
version = "0.7.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "wikipedia-codegen"
|
||||||
|
version = "0.1.1"
|
||||||
|
dependencies = [
|
||||||
|
"codegenrs 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
"itertools 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
"phf 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
"phf_codegen 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
"structopt 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
"unicase 2.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "wikipedia-dict"
|
||||||
|
version = "0.1.1"
|
||||||
|
dependencies = [
|
||||||
|
"log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
"phf 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
"unicase 2.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "winapi"
|
name = "winapi"
|
||||||
version = "0.3.8"
|
version = "0.3.8"
|
||||||
|
|
12
Cargo.toml
12
Cargo.toml
|
@ -1,5 +1,11 @@
|
||||||
[workspace]
|
[workspace]
|
||||||
members = ["codegen", "typos", "typos-dict"]
|
members = [
|
||||||
|
"typos",
|
||||||
|
"dict/typos", "dict/typos/codegen",
|
||||||
|
"dict/codespell", "dict/codespell/codegen",
|
||||||
|
"dict/misspell", "dict/misspell/codegen",
|
||||||
|
"dict/wikipedia", "dict/wikipedia/codegen",
|
||||||
|
]
|
||||||
|
|
||||||
[package]
|
[package]
|
||||||
name = "typos-cli"
|
name = "typos-cli"
|
||||||
|
@ -24,7 +30,9 @@ codecov = { repository = "crate-ci/typos" }
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
typos = { version = "0.1", path = "typos" }
|
typos = { version = "0.1", path = "typos" }
|
||||||
typos-dict = { version = "0.1", path = "typos-dict" }
|
typos-dict = { version = "0.1", path = "dict/typos" }
|
||||||
|
phf = { version = "0.8", features = ["unicase"] }
|
||||||
|
unicase = "2.5"
|
||||||
failure = "0.1"
|
failure = "0.1"
|
||||||
structopt = "0.3"
|
structopt = "0.3"
|
||||||
clap = "2"
|
clap = "2"
|
||||||
|
|
|
@ -25,8 +25,17 @@ stages:
|
||||||
steps:
|
steps:
|
||||||
- template: azure/install-rust.yml@templates
|
- template: azure/install-rust.yml@templates
|
||||||
- script: |
|
- script: |
|
||||||
cargo run --package typos-codegen -- --input typos-dict/assets/words.csv --output typos-dict/src/dict_codegen.rs --check
|
cargo run --package typos-codegen -- --output dict/typos/src/dict_codegen.rs --check
|
||||||
displayName: Verify Code-gen
|
displayName: Verify typos-dict
|
||||||
|
- script: |
|
||||||
|
cargo run --package codespell-codegen -- --output dict/codespell/src/dict_codegen.rs --check
|
||||||
|
displayName: Verify codespell-dict
|
||||||
|
- script: |
|
||||||
|
cargo run --package misspell-codegen -- --output dict/misspell/src/dict_codegen.rs --check
|
||||||
|
displayName: Verify misspell-dict
|
||||||
|
- script: |
|
||||||
|
cargo run --package wikipedia-codegen -- --output dict/wikipedia/src/dict_codegen.rs --check
|
||||||
|
displayName: Verify wikipedia-dict
|
||||||
- stage: committed
|
- stage: committed
|
||||||
displayName: Lint History
|
displayName: Lint History
|
||||||
dependsOn: []
|
dependsOn: []
|
||||||
|
|
21
dict/codespell/Cargo.toml
Normal file
21
dict/codespell/Cargo.toml
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
[package]
|
||||||
|
name = "codespell-dict"
|
||||||
|
version = "0.1.1"
|
||||||
|
authors = ["Ed Page <eopage@gmail.com>"]
|
||||||
|
description = "Source Code Spelling Correction"
|
||||||
|
repository = "https://github.com/crate-ci/typos"
|
||||||
|
readme = "README.md"
|
||||||
|
categories = ["development-tools", "text-processing"]
|
||||||
|
keywords = ["development", "spelling"]
|
||||||
|
license = "CC-BY-SA-3.0"
|
||||||
|
edition = "2018"
|
||||||
|
publish = false
|
||||||
|
|
||||||
|
[badges]
|
||||||
|
azure-devops = { project = "crate-ci", pipeline = "typos" }
|
||||||
|
codecov = { repository = "crate-ci/typos" }
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
phf = { version = "0.8", features = ["unicase"] }
|
||||||
|
unicase = "2.5"
|
||||||
|
log = "0.4"
|
5
dict/codespell/README.md
Normal file
5
dict/codespell/README.md
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
Origin: [codespell](https://github.com/codespell-project/codespell)
|
||||||
|
|
||||||
|
# License
|
||||||
|
|
||||||
|
dictionary.txt is a derived work of English Wikipedia and is released under the Creative Commons Attribution-Share-Alike License 3.0 http://creativecommons.org/licenses/by-sa/3.0/
|
22786
dict/codespell/assets/dictionary.txt
Normal file
22786
dict/codespell/assets/dictionary.txt
Normal file
File diff suppressed because it is too large
Load diff
23
dict/codespell/codegen/Cargo.toml
Normal file
23
dict/codespell/codegen/Cargo.toml
Normal file
|
@ -0,0 +1,23 @@
|
||||||
|
[package]
|
||||||
|
name = "codespell-codegen"
|
||||||
|
version = "0.1.1"
|
||||||
|
authors = ["Ed Page <eopage@gmail.com>"]
|
||||||
|
description = "Source Code Spelling Correction"
|
||||||
|
repository = "https://github.com/crate-ci/typos"
|
||||||
|
readme = "../README.md"
|
||||||
|
categories = ["text-processing"]
|
||||||
|
license = "MIT"
|
||||||
|
edition = "2018"
|
||||||
|
publish = false
|
||||||
|
|
||||||
|
[badges]
|
||||||
|
azure-devops = { project = "crate-ci", pipeline = "typos" }
|
||||||
|
codecov = { repository = "crate-ci/typos" }
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
phf = { version = "0.8", features = ["unicase"] }
|
||||||
|
phf_codegen = "0.8"
|
||||||
|
unicase = "2.5"
|
||||||
|
itertools = "0.8"
|
||||||
|
codegenrs = "0.1"
|
||||||
|
structopt = "0.3"
|
80
dict/codespell/codegen/src/main.rs
Normal file
80
dict/codespell/codegen/src/main.rs
Normal file
|
@ -0,0 +1,80 @@
|
||||||
|
use structopt::StructOpt;
|
||||||
|
|
||||||
|
pub const DICT: &str = include_str!("../../assets/dictionary.txt");
|
||||||
|
|
||||||
|
fn parse_dict(raw: &str) -> impl Iterator<Item = (&str, Vec<&str>)> {
|
||||||
|
raw.lines().map(|s| {
|
||||||
|
let mut parts = s.splitn(2, "->");
|
||||||
|
let typo = parts.next().unwrap().trim();
|
||||||
|
let corrections = parts
|
||||||
|
.next()
|
||||||
|
.unwrap()
|
||||||
|
.split(',')
|
||||||
|
.filter_map(|c| {
|
||||||
|
let c = c.trim();
|
||||||
|
if c.is_empty() {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(c)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
(typo, corrections)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn generate<W: std::io::Write>(file: &mut W) {
|
||||||
|
writeln!(
|
||||||
|
file,
|
||||||
|
"// This file is code-genned by {}",
|
||||||
|
env!("CARGO_PKG_NAME")
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
writeln!(file, "#![allow(clippy::unreadable_literal)]",).unwrap();
|
||||||
|
writeln!(file).unwrap();
|
||||||
|
writeln!(file, "use unicase::UniCase;").unwrap();
|
||||||
|
|
||||||
|
let dict = parse_dict(DICT);
|
||||||
|
|
||||||
|
writeln!(
|
||||||
|
file,
|
||||||
|
"pub static WORD_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &[&'static str]> = ",
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
let mut builder = phf_codegen::Map::new();
|
||||||
|
for (typo, corrections) in dict {
|
||||||
|
let value = itertools::join(corrections.iter().map(|s| format!("{:?}", s)), ", ");
|
||||||
|
let value = format!("&[{}]", value);
|
||||||
|
builder.entry(unicase::UniCase::new(typo), &value);
|
||||||
|
}
|
||||||
|
let codegenned = builder.build();
|
||||||
|
writeln!(file, "{}", codegenned).unwrap();
|
||||||
|
writeln!(file, ";").unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, StructOpt)]
|
||||||
|
#[structopt(rename_all = "kebab-case")]
|
||||||
|
struct Options {
|
||||||
|
#[structopt(flatten)]
|
||||||
|
codegen: codegenrs::CodeGenArgs,
|
||||||
|
#[structopt(flatten)]
|
||||||
|
rustmft: codegenrs::RustfmtArgs,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn run() -> Result<i32, Box<dyn std::error::Error>> {
|
||||||
|
let options = Options::from_args();
|
||||||
|
|
||||||
|
let mut content = vec![];
|
||||||
|
generate(&mut content);
|
||||||
|
|
||||||
|
let content = String::from_utf8(content)?;
|
||||||
|
let content = options.rustmft.reformat(&content)?;
|
||||||
|
options.codegen.write_str(&content)?;
|
||||||
|
|
||||||
|
Ok(0)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
let code = run().unwrap();
|
||||||
|
std::process::exit(code);
|
||||||
|
}
|
28186
dict/codespell/src/dict_codegen.rs
Normal file
28186
dict/codespell/src/dict_codegen.rs
Normal file
File diff suppressed because it is too large
Load diff
3
dict/codespell/src/lib.rs
Normal file
3
dict/codespell/src/lib.rs
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
mod dict_codegen;
|
||||||
|
|
||||||
|
pub use crate::dict_codegen::*;
|
21
dict/misspell/Cargo.toml
Normal file
21
dict/misspell/Cargo.toml
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
[package]
|
||||||
|
name = "misspell-dict"
|
||||||
|
version = "0.1.1"
|
||||||
|
authors = ["Ed Page <eopage@gmail.com>"]
|
||||||
|
description = "Source Code Spelling Correction"
|
||||||
|
repository = "https://github.com/crate-ci/typos"
|
||||||
|
readme = "README.md"
|
||||||
|
categories = ["development-tools", "text-processing"]
|
||||||
|
keywords = ["development", "spelling"]
|
||||||
|
license = "MIT"
|
||||||
|
edition = "2018"
|
||||||
|
publish = false
|
||||||
|
|
||||||
|
[badges]
|
||||||
|
azure-devops = { project = "crate-ci", pipeline = "typos" }
|
||||||
|
codecov = { repository = "crate-ci/typos" }
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
phf = { version = "0.8", features = ["unicase"] }
|
||||||
|
unicase = "2.5"
|
||||||
|
log = "0.4"
|
1
dict/misspell/README.md
Normal file
1
dict/misspell/README.md
Normal file
|
@ -0,0 +1 @@
|
||||||
|
Origin: [misspell](https://github.com/client9/misspell)
|
1
dict/misspell/assets/.gitattributes
vendored
Normal file
1
dict/misspell/assets/.gitattributes
vendored
Normal file
|
@ -0,0 +1 @@
|
||||||
|
* linguist-vendored
|
31158
dict/misspell/assets/words.go
vendored
Normal file
31158
dict/misspell/assets/words.go
vendored
Normal file
File diff suppressed because it is too large
Load diff
24
dict/misspell/codegen/Cargo.toml
Normal file
24
dict/misspell/codegen/Cargo.toml
Normal file
|
@ -0,0 +1,24 @@
|
||||||
|
[package]
|
||||||
|
name = "misspell-codegen"
|
||||||
|
version = "0.1.1"
|
||||||
|
authors = ["Ed Page <eopage@gmail.com>"]
|
||||||
|
description = "Source Code Spelling Correction"
|
||||||
|
repository = "https://github.com/crate-ci/typos"
|
||||||
|
readme = "../README.md"
|
||||||
|
categories = ["text-processing"]
|
||||||
|
license = "MIT"
|
||||||
|
edition = "2018"
|
||||||
|
publish = false
|
||||||
|
|
||||||
|
[badges]
|
||||||
|
azure-devops = { project = "crate-ci", pipeline = "typos" }
|
||||||
|
codecov = { repository = "crate-ci/typos" }
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
phf = { version = "0.8", features = ["unicase"] }
|
||||||
|
phf_codegen = "0.8"
|
||||||
|
unicase = "2.5"
|
||||||
|
itertools = "0.8"
|
||||||
|
codegenrs = "0.1"
|
||||||
|
structopt = "0.3"
|
||||||
|
regex = "1"
|
148
dict/misspell/codegen/src/main.rs
Normal file
148
dict/misspell/codegen/src/main.rs
Normal file
|
@ -0,0 +1,148 @@
|
||||||
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
use structopt::StructOpt;
|
||||||
|
|
||||||
|
pub const DICT: &str = include_str!("../../assets/words.go");
|
||||||
|
|
||||||
|
struct Words<'s> {
|
||||||
|
main: HashMap<&'s str, Vec<&'s str>>,
|
||||||
|
american: HashMap<&'s str, Vec<&'s str>>,
|
||||||
|
british: HashMap<&'s str, Vec<&'s str>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_dict(raw: &str) -> Words {
|
||||||
|
let mut bad = HashMap::new();
|
||||||
|
let mut main = HashMap::new();
|
||||||
|
let mut american = HashMap::new();
|
||||||
|
let mut british = HashMap::new();
|
||||||
|
|
||||||
|
let mapping = regex::Regex::new(r#"^"(.*)", "(.*)",$"#).unwrap();
|
||||||
|
|
||||||
|
let mut current = &mut bad;
|
||||||
|
for line in raw.lines() {
|
||||||
|
let line = line.splitn(2, "//").next().unwrap().trim();
|
||||||
|
if line.is_empty() || line.starts_with("package") {
|
||||||
|
continue;
|
||||||
|
} else if line.contains("DictMain") {
|
||||||
|
current = &mut main;
|
||||||
|
} else if line.contains("DictAmerican") {
|
||||||
|
current = &mut american;
|
||||||
|
} else if line.contains("DictBritish") {
|
||||||
|
current = &mut british;
|
||||||
|
} else if line.contains('}') {
|
||||||
|
current = &mut bad;
|
||||||
|
} else {
|
||||||
|
let captures = mapping.captures(line);
|
||||||
|
if let Some(captures) = captures {
|
||||||
|
current.insert(
|
||||||
|
captures.get(1).unwrap().as_str(),
|
||||||
|
vec![captures.get(2).unwrap().as_str()],
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
eprintln!("Unknown line: {}", line);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !bad.is_empty() {
|
||||||
|
panic!("Failed parsing; found extra words: {:#?}", bad);
|
||||||
|
}
|
||||||
|
|
||||||
|
Words {
|
||||||
|
main,
|
||||||
|
american,
|
||||||
|
british,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn generate<W: std::io::Write>(file: &mut W) {
|
||||||
|
writeln!(
|
||||||
|
file,
|
||||||
|
"// This file is code-genned by {}",
|
||||||
|
env!("CARGO_PKG_NAME")
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
writeln!(file, "#![allow(clippy::unreadable_literal)]",).unwrap();
|
||||||
|
writeln!(file).unwrap();
|
||||||
|
writeln!(file, "use unicase::UniCase;").unwrap();
|
||||||
|
|
||||||
|
let Words {
|
||||||
|
main,
|
||||||
|
american,
|
||||||
|
british,
|
||||||
|
} = parse_dict(DICT);
|
||||||
|
|
||||||
|
writeln!(
|
||||||
|
file,
|
||||||
|
"pub static MAIN_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &[&'static str]> = ",
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
let mut builder = phf_codegen::Map::new();
|
||||||
|
for (typo, corrections) in main {
|
||||||
|
let value = itertools::join(corrections.iter().map(|s| format!("{:?}", s)), ", ");
|
||||||
|
let value = format!("&[{}]", value);
|
||||||
|
builder.entry(unicase::UniCase::new(typo), &value);
|
||||||
|
}
|
||||||
|
let codegenned = builder.build();
|
||||||
|
writeln!(file, "{}", codegenned).unwrap();
|
||||||
|
writeln!(file, ";").unwrap();
|
||||||
|
writeln!(file).unwrap();
|
||||||
|
|
||||||
|
writeln!(
|
||||||
|
file,
|
||||||
|
"pub static AMERICAN_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &[&'static str]> = ",
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
let mut builder = phf_codegen::Map::new();
|
||||||
|
for (typo, corrections) in american {
|
||||||
|
let value = itertools::join(corrections.iter().map(|s| format!("{:?}", s)), ", ");
|
||||||
|
let value = format!("&[{}]", value);
|
||||||
|
builder.entry(unicase::UniCase::new(typo), &value);
|
||||||
|
}
|
||||||
|
let codegenned = builder.build();
|
||||||
|
writeln!(file, "{}", codegenned).unwrap();
|
||||||
|
writeln!(file, ";").unwrap();
|
||||||
|
writeln!(file).unwrap();
|
||||||
|
|
||||||
|
writeln!(
|
||||||
|
file,
|
||||||
|
"pub static BRITISH_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &[&'static str]> = ",
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
let mut builder = phf_codegen::Map::new();
|
||||||
|
for (typo, corrections) in british {
|
||||||
|
let value = itertools::join(corrections.iter().map(|s| format!("{:?}", s)), ", ");
|
||||||
|
let value = format!("&[{}]", value);
|
||||||
|
builder.entry(unicase::UniCase::new(typo), &value);
|
||||||
|
}
|
||||||
|
let codegenned = builder.build();
|
||||||
|
writeln!(file, "{}", codegenned).unwrap();
|
||||||
|
writeln!(file, ";").unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, StructOpt)]
|
||||||
|
#[structopt(rename_all = "kebab-case")]
|
||||||
|
struct Options {
|
||||||
|
#[structopt(flatten)]
|
||||||
|
codegen: codegenrs::CodeGenArgs,
|
||||||
|
#[structopt(flatten)]
|
||||||
|
rustmft: codegenrs::RustfmtArgs,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn run() -> Result<i32, Box<dyn std::error::Error>> {
|
||||||
|
let options = Options::from_args();
|
||||||
|
|
||||||
|
let mut content = vec![];
|
||||||
|
generate(&mut content);
|
||||||
|
|
||||||
|
let content = String::from_utf8(content)?;
|
||||||
|
let content = options.rustmft.reformat(&content)?;
|
||||||
|
options.codegen.write_str(&content)?;
|
||||||
|
|
||||||
|
Ok(0)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
let code = run().unwrap();
|
||||||
|
std::process::exit(code);
|
||||||
|
}
|
37481
dict/misspell/src/dict_codegen.rs
Normal file
37481
dict/misspell/src/dict_codegen.rs
Normal file
File diff suppressed because it is too large
Load diff
3
dict/misspell/src/lib.rs
Normal file
3
dict/misspell/src/lib.rs
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
mod dict_codegen;
|
||||||
|
|
||||||
|
pub use crate::dict_codegen::*;
|
|
@ -4,7 +4,7 @@ version = "0.1.1"
|
||||||
authors = ["Ed Page <eopage@gmail.com>"]
|
authors = ["Ed Page <eopage@gmail.com>"]
|
||||||
description = "Source Code Spelling Correction"
|
description = "Source Code Spelling Correction"
|
||||||
repository = "https://github.com/crate-ci/typos"
|
repository = "https://github.com/crate-ci/typos"
|
||||||
readme = "../README.md"
|
readme = "../../README.md"
|
||||||
categories = ["development-tools", "text-processing"]
|
categories = ["development-tools", "text-processing"]
|
||||||
keywords = ["development", "spelling"]
|
keywords = ["development", "spelling"]
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
|
@ -15,7 +15,6 @@ azure-devops = { project = "crate-ci", pipeline = "typos" }
|
||||||
codecov = { repository = "crate-ci/typos" }
|
codecov = { repository = "crate-ci/typos" }
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
typos = { version = "0.1", path = "../typos" }
|
|
||||||
phf = { version = "0.8", features = ["unicase"] }
|
phf = { version = "0.8", features = ["unicase"] }
|
||||||
unicase = "2.5"
|
unicase = "2.5"
|
||||||
log = "0.4"
|
log = "0.4"
|
1
dict/typos/assets/.gitattributes
vendored
Normal file
1
dict/typos/assets/.gitattributes
vendored
Normal file
|
@ -0,0 +1 @@
|
||||||
|
* linguist-vendored
|
Can't render this file because it is too large.
|
|
@ -4,7 +4,7 @@ version = "1.0.1"
|
||||||
authors = ["Ed Page <eopage@gmail.com>"]
|
authors = ["Ed Page <eopage@gmail.com>"]
|
||||||
description = "Source Code Spelling Correction"
|
description = "Source Code Spelling Correction"
|
||||||
repository = "https://github.com/crate-ci/typos"
|
repository = "https://github.com/crate-ci/typos"
|
||||||
readme = "../README.md"
|
readme = "../../../README.md"
|
||||||
categories = ["text-processing"]
|
categories = ["text-processing"]
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
edition = "2018"
|
edition = "2018"
|
|
@ -1,22 +1,25 @@
|
||||||
use structopt::StructOpt;
|
use structopt::StructOpt;
|
||||||
|
|
||||||
fn generate<W: std::io::Write>(input: &[u8], file: &mut W) {
|
pub const DICT: &[u8] = include_bytes!("../../assets/words.csv");
|
||||||
|
|
||||||
|
fn generate<W: std::io::Write>(file: &mut W) {
|
||||||
writeln!(
|
writeln!(
|
||||||
file,
|
file,
|
||||||
"// This file is code-genned by {}",
|
"// This file is code-genned by {}",
|
||||||
env!("CARGO_PKG_NAME")
|
env!("CARGO_PKG_NAME")
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
writeln!(file, "#![allow(clippy::unreadable_literal)]",).unwrap();
|
||||||
writeln!(file).unwrap();
|
writeln!(file).unwrap();
|
||||||
writeln!(file, "use unicase::UniCase;").unwrap();
|
writeln!(file, "use unicase::UniCase;").unwrap();
|
||||||
|
|
||||||
writeln!(
|
writeln!(
|
||||||
file,
|
file,
|
||||||
"pub(crate) static WORD_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &'static str> = "
|
"pub static WORD_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &'static str> = "
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
let mut builder = phf_codegen::Map::new();
|
let mut builder = phf_codegen::Map::new();
|
||||||
let records: Vec<_> = csv::Reader::from_reader(input)
|
let records: Vec<_> = csv::Reader::from_reader(DICT)
|
||||||
.records()
|
.records()
|
||||||
.map(|r| r.unwrap())
|
.map(|r| r.unwrap())
|
||||||
.collect();
|
.collect();
|
||||||
|
@ -32,8 +35,6 @@ fn generate<W: std::io::Write>(input: &[u8], file: &mut W) {
|
||||||
#[derive(Debug, StructOpt)]
|
#[derive(Debug, StructOpt)]
|
||||||
#[structopt(rename_all = "kebab-case")]
|
#[structopt(rename_all = "kebab-case")]
|
||||||
struct Options {
|
struct Options {
|
||||||
#[structopt(long, parse(from_os_str))]
|
|
||||||
input: std::path::PathBuf,
|
|
||||||
#[structopt(flatten)]
|
#[structopt(flatten)]
|
||||||
codegen: codegenrs::CodeGenArgs,
|
codegen: codegenrs::CodeGenArgs,
|
||||||
#[structopt(flatten)]
|
#[structopt(flatten)]
|
||||||
|
@ -43,12 +44,8 @@ struct Options {
|
||||||
fn run() -> Result<i32, Box<dyn std::error::Error>> {
|
fn run() -> Result<i32, Box<dyn std::error::Error>> {
|
||||||
let options = Options::from_args();
|
let options = Options::from_args();
|
||||||
|
|
||||||
let content = {
|
|
||||||
let mut content = vec![];
|
let mut content = vec![];
|
||||||
let input = std::fs::read(&options.input)?;
|
generate(&mut content);
|
||||||
generate(&input, &mut content);
|
|
||||||
content
|
|
||||||
};
|
|
||||||
|
|
||||||
let content = String::from_utf8(content)?;
|
let content = String::from_utf8(content)?;
|
||||||
let content = options.rustmft.reformat(&content)?;
|
let content = options.rustmft.reformat(&content)?;
|
35615
dict/typos/src/dict_codegen.rs
Normal file
35615
dict/typos/src/dict_codegen.rs
Normal file
File diff suppressed because it is too large
Load diff
3
dict/typos/src/lib.rs
Normal file
3
dict/typos/src/lib.rs
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
mod dict_codegen;
|
||||||
|
|
||||||
|
pub use crate::dict_codegen::*;
|
21
dict/wikipedia/Cargo.toml
Normal file
21
dict/wikipedia/Cargo.toml
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
[package]
|
||||||
|
name = "wikipedia-dict"
|
||||||
|
version = "0.1.1"
|
||||||
|
authors = ["Ed Page <eopage@gmail.com>"]
|
||||||
|
description = "Source Code Spelling Correction"
|
||||||
|
repository = "https://github.com/crate-ci/typos"
|
||||||
|
readme = "README.md"
|
||||||
|
categories = ["development-tools", "text-processing"]
|
||||||
|
keywords = ["development", "spelling"]
|
||||||
|
license = "CC-BY-SA-3.0"
|
||||||
|
edition = "2018"
|
||||||
|
publish = false
|
||||||
|
|
||||||
|
[badges]
|
||||||
|
azure-devops = { project = "crate-ci", pipeline = "typos" }
|
||||||
|
codecov = { repository = "crate-ci/typos" }
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
phf = { version = "0.8", features = ["unicase"] }
|
||||||
|
unicase = "2.5"
|
||||||
|
log = "0.4"
|
5
dict/wikipedia/README.md
Normal file
5
dict/wikipedia/README.md
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
Origin: [Wikipedia:Lists of common misspellings/For machines](https://en.wikipedia.org/wiki/Wikipedia:Lists_of_common_misspellings/For_machines)
|
||||||
|
|
||||||
|
# License
|
||||||
|
|
||||||
|
Text is available under the Creative Commons Attribution-ShareAlike License; additional terms may apply.
|
4282
dict/wikipedia/assets/dictionary.txt
Normal file
4282
dict/wikipedia/assets/dictionary.txt
Normal file
File diff suppressed because it is too large
Load diff
23
dict/wikipedia/codegen/Cargo.toml
Normal file
23
dict/wikipedia/codegen/Cargo.toml
Normal file
|
@ -0,0 +1,23 @@
|
||||||
|
[package]
|
||||||
|
name = "wikipedia-codegen"
|
||||||
|
version = "0.1.1"
|
||||||
|
authors = ["Ed Page <eopage@gmail.com>"]
|
||||||
|
description = "Source Code Spelling Correction"
|
||||||
|
repository = "https://github.com/crate-ci/typos"
|
||||||
|
readme = "../README.md"
|
||||||
|
categories = ["text-processing"]
|
||||||
|
license = "MIT"
|
||||||
|
edition = "2018"
|
||||||
|
publish = false
|
||||||
|
|
||||||
|
[badges]
|
||||||
|
azure-devops = { project = "crate-ci", pipeline = "typos" }
|
||||||
|
codecov = { repository = "crate-ci/typos" }
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
phf = { version = "0.8", features = ["unicase"] }
|
||||||
|
phf_codegen = "0.8"
|
||||||
|
unicase = "2.5"
|
||||||
|
itertools = "0.8"
|
||||||
|
codegenrs = "0.1"
|
||||||
|
structopt = "0.3"
|
80
dict/wikipedia/codegen/src/main.rs
Normal file
80
dict/wikipedia/codegen/src/main.rs
Normal file
|
@ -0,0 +1,80 @@
|
||||||
|
use structopt::StructOpt;
|
||||||
|
|
||||||
|
pub const DICT: &str = include_str!("../../assets/dictionary.txt");
|
||||||
|
|
||||||
|
fn parse_dict(raw: &str) -> impl Iterator<Item = (&str, Vec<&str>)> {
|
||||||
|
raw.lines().map(|s| {
|
||||||
|
let mut parts = s.splitn(2, "->");
|
||||||
|
let typo = parts.next().unwrap().trim();
|
||||||
|
let corrections = parts
|
||||||
|
.next()
|
||||||
|
.unwrap()
|
||||||
|
.split(',')
|
||||||
|
.filter_map(|c| {
|
||||||
|
let c = c.trim();
|
||||||
|
if c.is_empty() {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(c)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
(typo, corrections)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn generate<W: std::io::Write>(file: &mut W) {
|
||||||
|
writeln!(
|
||||||
|
file,
|
||||||
|
"// This file is code-genned by {}",
|
||||||
|
env!("CARGO_PKG_NAME")
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
writeln!(file, "#![allow(clippy::unreadable_literal)]",).unwrap();
|
||||||
|
writeln!(file).unwrap();
|
||||||
|
writeln!(file, "use unicase::UniCase;").unwrap();
|
||||||
|
|
||||||
|
let dict = parse_dict(DICT);
|
||||||
|
|
||||||
|
writeln!(
|
||||||
|
file,
|
||||||
|
"pub static WORD_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &[&'static str]> = ",
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
let mut builder = phf_codegen::Map::new();
|
||||||
|
for (typo, corrections) in dict {
|
||||||
|
let value = itertools::join(corrections.iter().map(|s| format!("{:?}", s)), ", ");
|
||||||
|
let value = format!("&[{}]", value);
|
||||||
|
builder.entry(unicase::UniCase::new(typo), &value);
|
||||||
|
}
|
||||||
|
let codegenned = builder.build();
|
||||||
|
writeln!(file, "{}", codegenned).unwrap();
|
||||||
|
writeln!(file, ";").unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, StructOpt)]
|
||||||
|
#[structopt(rename_all = "kebab-case")]
|
||||||
|
struct Options {
|
||||||
|
#[structopt(flatten)]
|
||||||
|
codegen: codegenrs::CodeGenArgs,
|
||||||
|
#[structopt(flatten)]
|
||||||
|
rustmft: codegenrs::RustfmtArgs,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn run() -> Result<i32, Box<dyn std::error::Error>> {
|
||||||
|
let options = Options::from_args();
|
||||||
|
|
||||||
|
let mut content = vec![];
|
||||||
|
generate(&mut content);
|
||||||
|
|
||||||
|
let content = String::from_utf8(content)?;
|
||||||
|
let content = options.rustmft.reformat(&content)?;
|
||||||
|
options.codegen.write_str(&content)?;
|
||||||
|
|
||||||
|
Ok(0)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
let code = run().unwrap();
|
||||||
|
std::process::exit(code);
|
||||||
|
}
|
5229
dict/wikipedia/src/dict_codegen.rs
Normal file
5229
dict/wikipedia/src/dict_codegen.rs
Normal file
File diff suppressed because it is too large
Load diff
3
dict/wikipedia/src/lib.rs
Normal file
3
dict/wikipedia/src/lib.rs
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
mod dict_codegen;
|
||||||
|
|
||||||
|
pub use crate::dict_codegen::*;
|
|
@ -20,8 +20,7 @@ impl BuiltIn {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn correct_word<'s, 'w>(&'s self, word: typos::tokens::Word<'w>) -> Option<Cow<'s, str>> {
|
pub fn correct_word<'s, 'w>(&'s self, word: typos::tokens::Word<'w>) -> Option<Cow<'s, str>> {
|
||||||
map_lookup(&crate::dict_codegen::WORD_DICTIONARY, word.token())
|
map_lookup(&typos_dict::WORD_DICTIONARY, word.token()).map(|s| case_correct(s, word.case()))
|
||||||
.map(|s| case_correct(s, word.case()))
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -7,6 +7,7 @@ use std::io::Write;
|
||||||
use structopt::StructOpt;
|
use structopt::StructOpt;
|
||||||
|
|
||||||
mod config;
|
mod config;
|
||||||
|
mod dict;
|
||||||
|
|
||||||
arg_enum! {
|
arg_enum! {
|
||||||
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
|
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
|
||||||
|
@ -318,7 +319,7 @@ fn run() -> Result<i32, failure::Error> {
|
||||||
config.default.update(&args.overrides);
|
config.default.update(&args.overrides);
|
||||||
let config = config;
|
let config = config;
|
||||||
|
|
||||||
let dictionary = typos_dict::BuiltIn::new();
|
let dictionary = crate::dict::BuiltIn::new();
|
||||||
|
|
||||||
let parser = typos::tokens::ParserBuilder::new()
|
let parser = typos::tokens::ParserBuilder::new()
|
||||||
.ignore_hex(config.default.ignore_hex())
|
.ignore_hex(config.default.ignore_hex())
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -1,4 +0,0 @@
|
||||||
mod dict;
|
|
||||||
mod dict_codegen;
|
|
||||||
|
|
||||||
pub use crate::dict::*;
|
|
Loading…
Reference in a new issue