mirror of
https://github.com/crate-ci/typos.git
synced 2024-11-21 16:41:01 -05:00
feat: Expose codespell's dict to Rust
This commit is contained in:
parent
8f428b8fec
commit
1cbdb3a77a
11 changed files with 51234 additions and 2 deletions
23
Cargo.lock
generated
23
Cargo.lock
generated
|
@ -135,6 +135,29 @@ dependencies = [
|
||||||
"structopt 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
"structopt 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "codespell-codegen"
|
||||||
|
version = "0.1.1"
|
||||||
|
dependencies = [
|
||||||
|
"codegenrs 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
"codespell-dict 0.1.1",
|
||||||
|
"itertools 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
"phf 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
"phf_codegen 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
"structopt 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
"unicase 2.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "codespell-dict"
|
||||||
|
version = "0.1.1"
|
||||||
|
dependencies = [
|
||||||
|
"log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
"phf 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
"typos 0.1.1",
|
||||||
|
"unicase 2.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "crossbeam-channel"
|
name = "crossbeam-channel"
|
||||||
version = "0.3.9"
|
version = "0.3.9"
|
||||||
|
|
|
@ -1,5 +1,9 @@
|
||||||
[workspace]
|
[workspace]
|
||||||
members = ["typos", "dict/typos", "dict/typos/codegen"]
|
members = [
|
||||||
|
"typos",
|
||||||
|
"dict/typos", "dict/typos/codegen",
|
||||||
|
"dict/codespell", "dict/codespell/codegen",
|
||||||
|
]
|
||||||
|
|
||||||
[package]
|
[package]
|
||||||
name = "typos-cli"
|
name = "typos-cli"
|
||||||
|
|
|
@ -26,7 +26,10 @@ stages:
|
||||||
- template: azure/install-rust.yml@templates
|
- template: azure/install-rust.yml@templates
|
||||||
- script: |
|
- script: |
|
||||||
cargo run --package typos-codegen -- --input dict/typos/assets/words.csv --output dict/typos/src/dict_codegen.rs --check
|
cargo run --package typos-codegen -- --input dict/typos/assets/words.csv --output dict/typos/src/dict_codegen.rs --check
|
||||||
displayName: Verify Code-gen
|
displayName: Verify typos-dict
|
||||||
|
- script: |
|
||||||
|
cargo run --package codespell-codegen -- --output dict/codespell/src/dict_codegen.rs --check
|
||||||
|
displayName: Verify codespell-dict
|
||||||
- stage: committed
|
- stage: committed
|
||||||
displayName: Lint History
|
displayName: Lint History
|
||||||
dependsOn: []
|
dependsOn: []
|
||||||
|
|
21
dict/codespell/Cargo.toml
Normal file
21
dict/codespell/Cargo.toml
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
[package]
|
||||||
|
name = "codespell-dict"
|
||||||
|
version = "0.1.1"
|
||||||
|
authors = ["Ed Page <eopage@gmail.com>"]
|
||||||
|
description = "Source Code Spelling Correction"
|
||||||
|
repository = "https://github.com/crate-ci/typos"
|
||||||
|
readme = "README.md"
|
||||||
|
categories = ["development-tools", "text-processing"]
|
||||||
|
keywords = ["development", "spelling"]
|
||||||
|
license = "CC-BY-SA-3.0"
|
||||||
|
edition = "2018"
|
||||||
|
|
||||||
|
[badges]
|
||||||
|
azure-devops = { project = "crate-ci", pipeline = "typos" }
|
||||||
|
codecov = { repository = "crate-ci/typos" }
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
typos = { version = "0.1", path = "../../typos" }
|
||||||
|
phf = { version = "0.8", features = ["unicase"] }
|
||||||
|
unicase = "2.5"
|
||||||
|
log = "0.4"
|
5
dict/codespell/README.md
Normal file
5
dict/codespell/README.md
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
Origin: [codespell](https://github.com/codespell-project/codespell)
|
||||||
|
|
||||||
|
# License
|
||||||
|
|
||||||
|
dictionary.txt is a derived work of English Wikipedia and is released under the Creative Commons Attribution-Share-Alike License 3.0 http://creativecommons.org/licenses/by-sa/3.0/
|
22786
dict/codespell/assets/dictionary.txt
Normal file
22786
dict/codespell/assets/dictionary.txt
Normal file
File diff suppressed because it is too large
Load diff
23
dict/codespell/codegen/Cargo.toml
Normal file
23
dict/codespell/codegen/Cargo.toml
Normal file
|
@ -0,0 +1,23 @@
|
||||||
|
[package]
|
||||||
|
name = "codespell-codegen"
|
||||||
|
version = "0.1.1"
|
||||||
|
authors = ["Ed Page <eopage@gmail.com>"]
|
||||||
|
description = "Source Code Spelling Correction"
|
||||||
|
repository = "https://github.com/crate-ci/typos"
|
||||||
|
readme = "../README.md"
|
||||||
|
categories = ["text-processing"]
|
||||||
|
license = "MIT"
|
||||||
|
edition = "2018"
|
||||||
|
publish = false
|
||||||
|
|
||||||
|
[badges]
|
||||||
|
azure-devops = { project = "crate-ci", pipeline = "typos" }
|
||||||
|
codecov = { repository = "crate-ci/typos" }
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
phf = { version = "0.8", features = ["unicase"] }
|
||||||
|
phf_codegen = "0.8"
|
||||||
|
unicase = "2.5"
|
||||||
|
itertools = "0.8"
|
||||||
|
codegenrs = "0.1"
|
||||||
|
structopt = "0.3"
|
80
dict/codespell/codegen/src/main.rs
Normal file
80
dict/codespell/codegen/src/main.rs
Normal file
|
@ -0,0 +1,80 @@
|
||||||
|
use structopt::StructOpt;
|
||||||
|
|
||||||
|
pub const DICT: &str = include_str!("../../assets/dictionary.txt");
|
||||||
|
|
||||||
|
fn parse_dict(raw: &str) -> impl Iterator<Item = (&str, Vec<&str>)> {
|
||||||
|
raw.lines().map(|s| {
|
||||||
|
let mut parts = s.splitn(2, "->");
|
||||||
|
let typo = parts.next().unwrap().trim();
|
||||||
|
let corrections = parts
|
||||||
|
.next()
|
||||||
|
.unwrap()
|
||||||
|
.split(",")
|
||||||
|
.filter_map(|c| {
|
||||||
|
let c = c.trim();
|
||||||
|
if c.is_empty() {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(c)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
(typo, corrections)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn generate<W: std::io::Write>(file: &mut W) {
|
||||||
|
writeln!(
|
||||||
|
file,
|
||||||
|
"// This file is code-genned by {}",
|
||||||
|
env!("CARGO_PKG_NAME")
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
writeln!(file).unwrap();
|
||||||
|
writeln!(file, "use unicase::UniCase;").unwrap();
|
||||||
|
|
||||||
|
let dict = parse_dict(DICT);
|
||||||
|
|
||||||
|
writeln!(
|
||||||
|
file,
|
||||||
|
"pub(crate) static WORD_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &[&'static str]> = ",
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
let mut builder = phf_codegen::Map::new();
|
||||||
|
for (typo, corrections) in dict {
|
||||||
|
let value = itertools::join(corrections.iter().map(|s| format!("{:?}", s)), ", ");
|
||||||
|
let value = format!("&[{}]", value);
|
||||||
|
builder.entry(unicase::UniCase::new(typo), &value);
|
||||||
|
}
|
||||||
|
let codegenned = builder.build();
|
||||||
|
writeln!(file, "{}", codegenned).unwrap();
|
||||||
|
writeln!(file, ";").unwrap();
|
||||||
|
writeln!(file, "").unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, StructOpt)]
|
||||||
|
#[structopt(rename_all = "kebab-case")]
|
||||||
|
struct Options {
|
||||||
|
#[structopt(flatten)]
|
||||||
|
codegen: codegenrs::CodeGenArgs,
|
||||||
|
#[structopt(flatten)]
|
||||||
|
rustmft: codegenrs::RustfmtArgs,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn run() -> Result<i32, Box<dyn std::error::Error>> {
|
||||||
|
let options = Options::from_args();
|
||||||
|
|
||||||
|
let mut content = vec![];
|
||||||
|
generate(&mut content);
|
||||||
|
|
||||||
|
let content = String::from_utf8(content)?;
|
||||||
|
let content = options.rustmft.reformat(&content)?;
|
||||||
|
options.codegen.write_str(&content)?;
|
||||||
|
|
||||||
|
Ok(0)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
let code = run().unwrap();
|
||||||
|
std::process::exit(code);
|
||||||
|
}
|
98
dict/codespell/src/dict.rs
Normal file
98
dict/codespell/src/dict.rs
Normal file
|
@ -0,0 +1,98 @@
|
||||||
|
use std::borrow::Cow;
|
||||||
|
|
||||||
|
use unicase::UniCase;
|
||||||
|
|
||||||
|
use typos::tokens::Case;
|
||||||
|
|
||||||
|
#[derive(Default)]
|
||||||
|
pub struct BuiltIn {}
|
||||||
|
|
||||||
|
impl BuiltIn {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self {}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn correct_ident<'s, 'w>(
|
||||||
|
&'s self,
|
||||||
|
_ident: typos::tokens::Identifier<'w>,
|
||||||
|
) -> Option<Cow<'s, str>> {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn correct_word<'s, 'w>(&'s self, word: typos::tokens::Word<'w>) -> Option<Cow<'s, str>> {
|
||||||
|
// HACK: Just assuming the first element is "good enough"
|
||||||
|
map_lookup(&crate::dict_codegen::WORD_DICTIONARY, word.token())
|
||||||
|
.map(|s| case_correct(s[0], word.case()))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl typos::Dictionary for BuiltIn {
|
||||||
|
fn correct_ident<'s, 'w>(
|
||||||
|
&'s self,
|
||||||
|
ident: typos::tokens::Identifier<'w>,
|
||||||
|
) -> Option<Cow<'s, str>> {
|
||||||
|
BuiltIn::correct_ident(self, ident)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn correct_word<'s, 'w>(&'s self, word: typos::tokens::Word<'w>) -> Option<Cow<'s, str>> {
|
||||||
|
BuiltIn::correct_word(self, word)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn map_lookup(
|
||||||
|
map: &'static phf::Map<UniCase<&'static str>, &'static [&'static str]>,
|
||||||
|
key: &str,
|
||||||
|
) -> Option<&'static [&'static str]> {
|
||||||
|
// This transmute should be safe as `get` will not store the reference with
|
||||||
|
// the expanded lifetime. This is due to `Borrow` being overly strict and
|
||||||
|
// can't have an impl for `&'static str` to `Borrow<&'a str>`.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// See https://github.com/rust-lang/rust/issues/28853#issuecomment-158735548
|
||||||
|
unsafe {
|
||||||
|
let key = ::std::mem::transmute::<_, &'static str>(key);
|
||||||
|
map.get(&UniCase::new(key)).cloned()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn case_correct(correction: &str, case: Case) -> Cow<'_, str> {
|
||||||
|
match case {
|
||||||
|
Case::Lower | Case::None => correction.into(),
|
||||||
|
Case::Title => {
|
||||||
|
let mut title = String::with_capacity(correction.as_bytes().len());
|
||||||
|
let mut char_indices = correction.char_indices();
|
||||||
|
if let Some((_, c)) = char_indices.next() {
|
||||||
|
title.extend(c.to_uppercase());
|
||||||
|
if let Some((i, _)) = char_indices.next() {
|
||||||
|
title.push_str(&correction[i..]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
title.into()
|
||||||
|
}
|
||||||
|
Case::Scream => correction
|
||||||
|
.chars()
|
||||||
|
.flat_map(|c| c.to_uppercase())
|
||||||
|
.collect::<String>()
|
||||||
|
.into(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_case_correct() {
|
||||||
|
let cases = [
|
||||||
|
("foo", Case::Lower, "foo"),
|
||||||
|
("foo", Case::None, "foo"),
|
||||||
|
("foo", Case::Title, "Foo"),
|
||||||
|
("foo", Case::Scream, "FOO"),
|
||||||
|
("fOo", Case::None, "fOo"),
|
||||||
|
];
|
||||||
|
for (correction, case, expected) in cases.iter() {
|
||||||
|
let actual = case_correct(correction, *case);
|
||||||
|
assert_eq!(*expected, actual);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
28185
dict/codespell/src/dict_codegen.rs
Normal file
28185
dict/codespell/src/dict_codegen.rs
Normal file
File diff suppressed because it is too large
Load diff
4
dict/codespell/src/lib.rs
Normal file
4
dict/codespell/src/lib.rs
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
mod dict;
|
||||||
|
mod dict_codegen;
|
||||||
|
|
||||||
|
pub use crate::dict::*;
|
Loading…
Reference in a new issue