refactor!: Move dict implementation into CLI

This commit is contained in:
Ed Page 2019-10-28 10:31:16 -06:00
parent 5de368ac9d
commit ce1ef2ca30
13 changed files with 35623 additions and 35726 deletions

6
Cargo.lock generated
View file

@ -140,7 +140,6 @@ name = "codespell-codegen"
version = "0.1.1" version = "0.1.1"
dependencies = [ dependencies = [
"codegenrs 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", "codegenrs 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
"codespell-dict 0.1.1",
"itertools 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)", "itertools 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
"phf 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)", "phf 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
"phf_codegen 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)", "phf_codegen 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
@ -154,7 +153,6 @@ version = "0.1.1"
dependencies = [ dependencies = [
"log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
"phf 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)", "phf 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
"typos 0.1.1",
"unicase 2.5.1 (registry+https://github.com/rust-lang/crates.io-index)", "unicase 2.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
] ]
@ -788,11 +786,13 @@ dependencies = [
"failure 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", "failure 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
"ignore 0.4.10 (registry+https://github.com/rust-lang/crates.io-index)", "ignore 0.4.10 (registry+https://github.com/rust-lang/crates.io-index)",
"log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
"phf 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.101 (registry+https://github.com/rust-lang/crates.io-index)", "serde 1.0.101 (registry+https://github.com/rust-lang/crates.io-index)",
"structopt 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", "structopt 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
"toml 0.4.10 (registry+https://github.com/rust-lang/crates.io-index)", "toml 0.4.10 (registry+https://github.com/rust-lang/crates.io-index)",
"typos 0.1.1", "typos 0.1.1",
"typos-dict 0.1.1", "typos-dict 0.1.1",
"unicase 2.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
] ]
[[package]] [[package]]
@ -804,7 +804,6 @@ dependencies = [
"phf 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)", "phf 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
"phf_codegen 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)", "phf_codegen 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
"structopt 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", "structopt 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
"typos-dict 0.1.1",
"unicase 2.5.1 (registry+https://github.com/rust-lang/crates.io-index)", "unicase 2.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
] ]
@ -814,7 +813,6 @@ version = "0.1.1"
dependencies = [ dependencies = [
"log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
"phf 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)", "phf 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
"typos 0.1.1",
"unicase 2.5.1 (registry+https://github.com/rust-lang/crates.io-index)", "unicase 2.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
] ]

View file

@ -29,6 +29,8 @@ codecov = { repository = "crate-ci/typos" }
[dependencies] [dependencies]
typos = { version = "0.1", path = "typos" } typos = { version = "0.1", path = "typos" }
typos-dict = { version = "0.1", path = "dict/typos" } typos-dict = { version = "0.1", path = "dict/typos" }
phf = { version = "0.8", features = ["unicase"] }
unicase = "2.5"
failure = "0.1" failure = "0.1"
structopt = "0.3" structopt = "0.3"
clap = "2" clap = "2"

View file

@ -15,7 +15,6 @@ azure-devops = { project = "crate-ci", pipeline = "typos" }
codecov = { repository = "crate-ci/typos" } codecov = { repository = "crate-ci/typos" }
[dependencies] [dependencies]
typos = { version = "0.1", path = "../../typos" }
phf = { version = "0.8", features = ["unicase"] } phf = { version = "0.8", features = ["unicase"] }
unicase = "2.5" unicase = "2.5"
log = "0.4" log = "0.4"

View file

@ -37,7 +37,7 @@ fn generate<W: std::io::Write>(file: &mut W) {
writeln!( writeln!(
file, file,
"pub(crate) static WORD_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &[&'static str]> = ", "pub static WORD_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &[&'static str]> = ",
) )
.unwrap(); .unwrap();
let mut builder = phf_codegen::Map::new(); let mut builder = phf_codegen::Map::new();

View file

@ -1,98 +0,0 @@
use std::borrow::Cow;
use unicase::UniCase;
use typos::tokens::Case;
#[derive(Default)]
pub struct BuiltIn {}
impl BuiltIn {
pub fn new() -> Self {
Self {}
}
pub fn correct_ident<'s, 'w>(
&'s self,
_ident: typos::tokens::Identifier<'w>,
) -> Option<Cow<'s, str>> {
None
}
pub fn correct_word<'s, 'w>(&'s self, word: typos::tokens::Word<'w>) -> Option<Cow<'s, str>> {
// HACK: Just assuming the first element is "good enough"
map_lookup(&crate::dict_codegen::WORD_DICTIONARY, word.token())
.map(|s| case_correct(s[0], word.case()))
}
}
impl typos::Dictionary for BuiltIn {
fn correct_ident<'s, 'w>(
&'s self,
ident: typos::tokens::Identifier<'w>,
) -> Option<Cow<'s, str>> {
BuiltIn::correct_ident(self, ident)
}
fn correct_word<'s, 'w>(&'s self, word: typos::tokens::Word<'w>) -> Option<Cow<'s, str>> {
BuiltIn::correct_word(self, word)
}
}
fn map_lookup(
map: &'static phf::Map<UniCase<&'static str>, &'static [&'static str]>,
key: &str,
) -> Option<&'static [&'static str]> {
// This transmute should be safe as `get` will not store the reference with
// the expanded lifetime. This is due to `Borrow` being overly strict and
// can't have an impl for `&'static str` to `Borrow<&'a str>`.
//
//
// See https://github.com/rust-lang/rust/issues/28853#issuecomment-158735548
unsafe {
let key = ::std::mem::transmute::<_, &'static str>(key);
map.get(&UniCase::new(key)).cloned()
}
}
fn case_correct(correction: &str, case: Case) -> Cow<'_, str> {
match case {
Case::Lower | Case::None => correction.into(),
Case::Title => {
let mut title = String::with_capacity(correction.as_bytes().len());
let mut char_indices = correction.char_indices();
if let Some((_, c)) = char_indices.next() {
title.extend(c.to_uppercase());
if let Some((i, _)) = char_indices.next() {
title.push_str(&correction[i..]);
}
}
title.into()
}
Case::Scream => correction
.chars()
.flat_map(|c| c.to_uppercase())
.collect::<String>()
.into(),
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_case_correct() {
let cases = [
("foo", Case::Lower, "foo"),
("foo", Case::None, "foo"),
("foo", Case::Title, "Foo"),
("foo", Case::Scream, "FOO"),
("fOo", Case::None, "fOo"),
];
for (correction, case, expected) in cases.iter() {
let actual = case_correct(correction, *case);
assert_eq!(*expected, actual);
}
}
}

View file

@ -1,7 +1,7 @@
// This file is code-genned by codespell-codegen // This file is code-genned by codespell-codegen
use unicase::UniCase; use unicase::UniCase;
pub(crate) static WORD_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &[&'static str]> = pub static WORD_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &[&'static str]> =
::phf::Map { ::phf::Map {
key: 3213172566270843353, key: 3213172566270843353,
disps: ::phf::Slice::Static(&[ disps: ::phf::Slice::Static(&[

View file

@ -1,4 +1,3 @@
mod dict;
mod dict_codegen; mod dict_codegen;
pub use crate::dict::*; pub use crate::dict_codegen::*;

View file

@ -15,7 +15,6 @@ azure-devops = { project = "crate-ci", pipeline = "typos" }
codecov = { repository = "crate-ci/typos" } codecov = { repository = "crate-ci/typos" }
[dependencies] [dependencies]
typos = { version = "0.1", path = "../../typos" }
phf = { version = "0.8", features = ["unicase"] } phf = { version = "0.8", features = ["unicase"] }
unicase = "2.5" unicase = "2.5"
log = "0.4" log = "0.4"

View file

@ -14,7 +14,7 @@ fn generate<W: std::io::Write>(file: &mut W) {
writeln!( writeln!(
file, file,
"pub(crate) static WORD_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &'static str> = " "pub static WORD_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &'static str> = "
) )
.unwrap(); .unwrap();
let mut builder = phf_codegen::Map::new(); let mut builder = phf_codegen::Map::new();

View file

@ -1,8 +1,7 @@
// This file is code-genned by typos-codegen // This file is code-genned by typos-codegen
use unicase::UniCase; use unicase::UniCase;
pub(crate) static WORD_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &'static str> = pub static WORD_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &'static str> = ::phf::Map {
::phf::Map {
key: 3213172566270843353, key: 3213172566270843353,
disps: ::phf::Slice::Static(&[ disps: ::phf::Slice::Static(&[
(0, 0), (0, 0),
@ -35612,4 +35611,4 @@ pub(crate) static WORD_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &'st
(UniCase::ascii("narcissisist"), "narcissist"), (UniCase::ascii("narcissisist"), "narcissist"),
(UniCase::ascii("decotations"), "decorations"), (UniCase::ascii("decotations"), "decorations"),
]), ]),
}; };

View file

@ -1,4 +1,3 @@
mod dict;
mod dict_codegen; mod dict_codegen;
pub use crate::dict::*; pub use crate::dict_codegen::*;

View file

@ -20,8 +20,7 @@ impl BuiltIn {
} }
pub fn correct_word<'s, 'w>(&'s self, word: typos::tokens::Word<'w>) -> Option<Cow<'s, str>> { pub fn correct_word<'s, 'w>(&'s self, word: typos::tokens::Word<'w>) -> Option<Cow<'s, str>> {
map_lookup(&crate::dict_codegen::WORD_DICTIONARY, word.token()) map_lookup(&typos_dict::WORD_DICTIONARY, word.token()).map(|s| case_correct(s, word.case()))
.map(|s| case_correct(s, word.case()))
} }
} }

View file

@ -7,6 +7,7 @@ use std::io::Write;
use structopt::StructOpt; use structopt::StructOpt;
mod config; mod config;
mod dict;
arg_enum! { arg_enum! {
#[derive(Debug, Copy, Clone, PartialEq, Eq)] #[derive(Debug, Copy, Clone, PartialEq, Eq)]
@ -318,7 +319,7 @@ fn run() -> Result<i32, failure::Error> {
config.default.update(&args.overrides); config.default.update(&args.overrides);
let config = config; let config = config;
let dictionary = typos_dict::BuiltIn::new(); let dictionary = crate::dict::BuiltIn::new();
let parser = typos::tokens::ParserBuilder::new() let parser = typos::tokens::ParserBuilder::new()
.ignore_hex(config.default.ignore_hex()) .ignore_hex(config.default.ignore_hex())