refactor!: Move dict implementation into CLI

This commit is contained in:
Ed Page 2019-10-28 10:31:16 -06:00
parent 5de368ac9d
commit ce1ef2ca30
13 changed files with 35623 additions and 35726 deletions

6
Cargo.lock generated
View file

@ -140,7 +140,6 @@ name = "codespell-codegen"
version = "0.1.1"
dependencies = [
"codegenrs 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
"codespell-dict 0.1.1",
"itertools 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
"phf 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
"phf_codegen 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
@ -154,7 +153,6 @@ version = "0.1.1"
dependencies = [
"log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
"phf 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
"typos 0.1.1",
"unicase 2.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
@ -788,11 +786,13 @@ dependencies = [
"failure 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
"ignore 0.4.10 (registry+https://github.com/rust-lang/crates.io-index)",
"log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
"phf 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.101 (registry+https://github.com/rust-lang/crates.io-index)",
"structopt 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
"toml 0.4.10 (registry+https://github.com/rust-lang/crates.io-index)",
"typos 0.1.1",
"typos-dict 0.1.1",
"unicase 2.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
@ -804,7 +804,6 @@ dependencies = [
"phf 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
"phf_codegen 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
"structopt 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
"typos-dict 0.1.1",
"unicase 2.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
@ -814,7 +813,6 @@ version = "0.1.1"
dependencies = [
"log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
"phf 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
"typos 0.1.1",
"unicase 2.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
]

View file

@ -29,6 +29,8 @@ codecov = { repository = "crate-ci/typos" }
[dependencies]
typos = { version = "0.1", path = "typos" }
typos-dict = { version = "0.1", path = "dict/typos" }
phf = { version = "0.8", features = ["unicase"] }
unicase = "2.5"
failure = "0.1"
structopt = "0.3"
clap = "2"

View file

@ -15,7 +15,6 @@ azure-devops = { project = "crate-ci", pipeline = "typos" }
codecov = { repository = "crate-ci/typos" }
[dependencies]
typos = { version = "0.1", path = "../../typos" }
phf = { version = "0.8", features = ["unicase"] }
unicase = "2.5"
log = "0.4"

View file

@ -37,7 +37,7 @@ fn generate<W: std::io::Write>(file: &mut W) {
writeln!(
file,
"pub(crate) static WORD_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &[&'static str]> = ",
"pub static WORD_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &[&'static str]> = ",
)
.unwrap();
let mut builder = phf_codegen::Map::new();

View file

@ -1,98 +0,0 @@
use std::borrow::Cow;
use unicase::UniCase;
use typos::tokens::Case;
#[derive(Default)]
pub struct BuiltIn {}
impl BuiltIn {
pub fn new() -> Self {
Self {}
}
pub fn correct_ident<'s, 'w>(
&'s self,
_ident: typos::tokens::Identifier<'w>,
) -> Option<Cow<'s, str>> {
None
}
pub fn correct_word<'s, 'w>(&'s self, word: typos::tokens::Word<'w>) -> Option<Cow<'s, str>> {
// HACK: Just assuming the first element is "good enough"
map_lookup(&crate::dict_codegen::WORD_DICTIONARY, word.token())
.map(|s| case_correct(s[0], word.case()))
}
}
impl typos::Dictionary for BuiltIn {
fn correct_ident<'s, 'w>(
&'s self,
ident: typos::tokens::Identifier<'w>,
) -> Option<Cow<'s, str>> {
BuiltIn::correct_ident(self, ident)
}
fn correct_word<'s, 'w>(&'s self, word: typos::tokens::Word<'w>) -> Option<Cow<'s, str>> {
BuiltIn::correct_word(self, word)
}
}
fn map_lookup(
map: &'static phf::Map<UniCase<&'static str>, &'static [&'static str]>,
key: &str,
) -> Option<&'static [&'static str]> {
// This transmute should be safe as `get` will not store the reference with
// the expanded lifetime. This is due to `Borrow` being overly strict and
// can't have an impl for `&'static str` to `Borrow<&'a str>`.
//
//
// See https://github.com/rust-lang/rust/issues/28853#issuecomment-158735548
unsafe {
let key = ::std::mem::transmute::<_, &'static str>(key);
map.get(&UniCase::new(key)).cloned()
}
}
fn case_correct(correction: &str, case: Case) -> Cow<'_, str> {
match case {
Case::Lower | Case::None => correction.into(),
Case::Title => {
let mut title = String::with_capacity(correction.as_bytes().len());
let mut char_indices = correction.char_indices();
if let Some((_, c)) = char_indices.next() {
title.extend(c.to_uppercase());
if let Some((i, _)) = char_indices.next() {
title.push_str(&correction[i..]);
}
}
title.into()
}
Case::Scream => correction
.chars()
.flat_map(|c| c.to_uppercase())
.collect::<String>()
.into(),
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_case_correct() {
let cases = [
("foo", Case::Lower, "foo"),
("foo", Case::None, "foo"),
("foo", Case::Title, "Foo"),
("foo", Case::Scream, "FOO"),
("fOo", Case::None, "fOo"),
];
for (correction, case, expected) in cases.iter() {
let actual = case_correct(correction, *case);
assert_eq!(*expected, actual);
}
}
}

View file

@ -1,7 +1,7 @@
// This file is code-genned by codespell-codegen
use unicase::UniCase;
pub(crate) static WORD_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &[&'static str]> =
pub static WORD_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &[&'static str]> =
::phf::Map {
key: 3213172566270843353,
disps: ::phf::Slice::Static(&[

View file

@ -1,4 +1,3 @@
mod dict;
mod dict_codegen;
pub use crate::dict::*;
pub use crate::dict_codegen::*;

View file

@ -15,7 +15,6 @@ azure-devops = { project = "crate-ci", pipeline = "typos" }
codecov = { repository = "crate-ci/typos" }
[dependencies]
typos = { version = "0.1", path = "../../typos" }
phf = { version = "0.8", features = ["unicase"] }
unicase = "2.5"
log = "0.4"

View file

@ -14,7 +14,7 @@ fn generate<W: std::io::Write>(file: &mut W) {
writeln!(
file,
"pub(crate) static WORD_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &'static str> = "
"pub static WORD_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &'static str> = "
)
.unwrap();
let mut builder = phf_codegen::Map::new();

File diff suppressed because it is too large Load diff

View file

@ -1,4 +1,3 @@
mod dict;
mod dict_codegen;
pub use crate::dict::*;
pub use crate::dict_codegen::*;

View file

@ -20,8 +20,7 @@ impl BuiltIn {
}
pub fn correct_word<'s, 'w>(&'s self, word: typos::tokens::Word<'w>) -> Option<Cow<'s, str>> {
map_lookup(&crate::dict_codegen::WORD_DICTIONARY, word.token())
.map(|s| case_correct(s, word.case()))
map_lookup(&typos_dict::WORD_DICTIONARY, word.token()).map(|s| case_correct(s, word.case()))
}
}

View file

@ -7,6 +7,7 @@ use std::io::Write;
use structopt::StructOpt;
mod config;
mod dict;
arg_enum! {
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
@ -318,7 +319,7 @@ fn run() -> Result<i32, failure::Error> {
config.default.update(&args.overrides);
let config = config;
let dictionary = typos_dict::BuiltIn::new();
let dictionary = crate::dict::BuiltIn::new();
let parser = typos::tokens::ParserBuilder::new()
.ignore_hex(config.default.ignore_hex())