refactor: Split out typos-dict

This commit is contained in:
Ed Page 2019-08-08 10:24:50 -05:00
parent 164ee9cb84
commit 1bdd1c928a
17 changed files with 173 additions and 127 deletions

36
Cargo.lock generated
View file

@ -78,6 +78,7 @@ dependencies = [
"lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
"memchr 2.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
"regex-automata 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.85 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
@ -158,16 +159,19 @@ dependencies = [
[[package]]
name = "csv"
version = "1.0.5"
version = "1.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"csv-core 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
"bstr 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"csv-core 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
"itoa 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)",
"ryu 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.85 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "csv-core"
version = "0.1.5"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"memchr 2.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
@ -697,6 +701,11 @@ name = "ryu"
version = "0.2.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "ryu"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "same-file"
version = "1.0.4"
@ -880,14 +889,11 @@ name = "typos"
version = "0.1.0"
dependencies = [
"bstr 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"csv 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)",
"derive_more 0.15.0 (registry+https://github.com/rust-lang/crates.io-index)",
"failure 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
"itertools 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
"lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
"log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
"phf 0.7.24 (registry+https://github.com/rust-lang/crates.io-index)",
"phf_codegen 0.7.24 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.85 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_json 1.0.36 (registry+https://github.com/rust-lang/crates.io-index)",
@ -910,6 +916,19 @@ dependencies = [
"structopt 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)",
"toml 0.4.10 (registry+https://github.com/rust-lang/crates.io-index)",
"typos 0.1.0",
"typos-dict 0.1.0",
]
[[package]]
name = "typos-dict"
version = "0.1.0"
dependencies = [
"csv 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
"phf 0.7.24 (registry+https://github.com/rust-lang/crates.io-index)",
"phf_codegen 0.7.24 (registry+https://github.com/rust-lang/crates.io-index)",
"typos 0.1.0",
"unicase 1.4.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
@ -1033,8 +1052,8 @@ dependencies = [
"checksum cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f"
"checksum crossbeam-channel 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "137bc235f622ffaa0428e3854e24acb53291fc0b3ff6fb2cb75a8be6fb02f06b"
"checksum crossbeam-utils 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)" = "41ee4864f4797060e52044376f7d107429ce1fb43460021b126424b7180ee21a"
"checksum csv 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "9fd1c44c58078cfbeaf11fbb3eac9ae5534c23004ed770cc4bfb48e658ae4f04"
"checksum csv-core 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "fa5cdef62f37e6ffe7d1f07a381bc0db32b7a3ff1cac0de56cb0d81e71f53d65"
"checksum csv 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "37519ccdfd73a75821cac9319d4fce15a81b9fcf75f951df5b9988aa3a0af87d"
"checksum csv-core 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "9b5cadb6b25c77aeff80ba701712494213f4a8418fcda2ee11b6560c3ad0bf4c"
"checksum derive_more 0.15.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7a141330240c921ec6d074a3e188a7c7ef95668bb95e7d44fa0e5778ec2a7afe"
"checksum difference 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "524cbf6897b527295dff137cec09ecf3a05f4fddffd7dfcd1585403449e74198"
"checksum either 1.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3be565ca5c557d7f59e7cfcf1844f9e3033650c929c6566f511e8005f205c1d0"
@ -1097,6 +1116,7 @@ dependencies = [
"checksum rustc-demangle 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)" = "adacaae16d02b6ec37fdc7acfcddf365978de76d1983d3ee22afc260e1ca9619"
"checksum rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a"
"checksum ryu 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "eb9e9b8cde282a9fe6a42dd4681319bfb63f121b8a8ee9439c6f4107e58a46f7"
"checksum ryu 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c92464b447c0ee8c4fb3824ecc8383b81717b9f1e74ba2e72540aef7b9f82997"
"checksum same-file 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "8f20c4be53a8a1ff4c1f1b2bd14570d2f634628709752f0702ecdd2b3f9a5267"
"checksum scopeguard 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "94258f53601af11e6a49f722422f6e3425c52b06245a5cf9bc09908b174f5e27"
"checksum semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403"

View file

@ -1,5 +1,5 @@
[workspace]
members = ["typos"]
members = ["typos", "typos-dict"]
[package]
name = "typos-cli"
@ -23,6 +23,7 @@ iterate_unstable = []
[dependencies]
typos = { version = "0.1", path = "typos" }
typos-dict = { version = "0.1", path = "typos-dict" }
failure = "0.1"
structopt = "0.2"
clap = "2"

View file

@ -4,12 +4,12 @@ extern crate test;
#[bench]
fn load_corrections(b: &mut test::Bencher) {
b.iter(|| typos::BuiltIn::new());
b.iter(|| typos_dict::BuiltIn::new());
}
#[bench]
fn correct_word_hit(b: &mut test::Bencher) {
let corrections = typos::BuiltIn::new();
let corrections = typos_dict::BuiltIn::new();
let input = typos::tokens::Word::new("successs", 0).unwrap();
assert_eq!(
corrections.correct_word(input),
@ -20,7 +20,7 @@ fn correct_word_hit(b: &mut test::Bencher) {
#[bench]
fn correct_word_miss(b: &mut test::Bencher) {
let corrections = typos::BuiltIn::new();
let corrections = typos_dict::BuiltIn::new();
let input = typos::tokens::Word::new("success", 0).unwrap();
assert_eq!(corrections.correct_word(input), None);
b.iter(|| corrections.correct_word(input));

View file

@ -28,4 +28,4 @@ fn main() {
}
";
pub const CORPUS: &str = include_str!("../typos/assets/words.csv");
pub const CORPUS: &str = include_str!("../typos-dict/assets/words.csv");

View file

@ -12,7 +12,7 @@ fn process_empty(b: &mut test::Bencher) {
let sample_path = temp.child("sample");
sample_path.write_str(data::EMPTY).unwrap();
let corrections = typos::BuiltIn::new();
let corrections = typos_dict::BuiltIn::new();
let parser = typos::tokens::Parser::new();
let checks = typos::checks::CheckSettings::new().build(&corrections, &parser);
b.iter(|| checks.check_file(sample_path.path(), true, typos::report::print_silent));
@ -26,7 +26,7 @@ fn process_no_tokens(b: &mut test::Bencher) {
let sample_path = temp.child("sample");
sample_path.write_str(data::NO_TOKENS).unwrap();
let corrections = typos::BuiltIn::new();
let corrections = typos_dict::BuiltIn::new();
let parser = typos::tokens::Parser::new();
let checks = typos::checks::CheckSettings::new().build(&corrections, &parser);
b.iter(|| checks.check_file(sample_path.path(), true, typos::report::print_silent));
@ -40,7 +40,7 @@ fn process_single_token(b: &mut test::Bencher) {
let sample_path = temp.child("sample");
sample_path.write_str(data::SINGLE_TOKEN).unwrap();
let corrections = typos::BuiltIn::new();
let corrections = typos_dict::BuiltIn::new();
let parser = typos::tokens::Parser::new();
let checks = typos::checks::CheckSettings::new().build(&corrections, &parser);
b.iter(|| checks.check_file(sample_path.path(), true, typos::report::print_silent));
@ -54,7 +54,7 @@ fn process_sherlock(b: &mut test::Bencher) {
let sample_path = temp.child("sample");
sample_path.write_str(data::SHERLOCK).unwrap();
let corrections = typos::BuiltIn::new();
let corrections = typos_dict::BuiltIn::new();
let parser = typos::tokens::Parser::new();
let checks = typos::checks::CheckSettings::new().build(&corrections, &parser);
b.iter(|| checks.check_file(sample_path.path(), true, typos::report::print_silent));
@ -68,7 +68,7 @@ fn process_code(b: &mut test::Bencher) {
let sample_path = temp.child("sample");
sample_path.write_str(data::CODE).unwrap();
let corrections = typos::BuiltIn::new();
let corrections = typos_dict::BuiltIn::new();
let parser = typos::tokens::Parser::new();
let checks = typos::checks::CheckSettings::new().build(&corrections, &parser);
b.iter(|| checks.check_file(sample_path.path(), true, typos::report::print_silent));
@ -82,7 +82,7 @@ fn process_corpus(b: &mut test::Bencher) {
let sample_path = temp.child("sample");
sample_path.write_str(data::CORPUS).unwrap();
let corrections = typos::BuiltIn::new();
let corrections = typos_dict::BuiltIn::new();
let parser = typos::tokens::Parser::new();
let checks = typos::checks::CheckSettings::new().build(&corrections, &parser);
b.iter(|| checks.check_file(sample_path.path(), true, typos::report::print_silent));

View file

@ -310,7 +310,7 @@ fn run() -> Result<i32, failure::Error> {
config.default.update(&args.overrides);
let config = config;
let dictionary = typos::BuiltIn::new();
let dictionary = typos_dict::BuiltIn::new();
let parser = typos::tokens::ParserBuilder::new()
.ignore_hex(config.default.ignore_hex())

31
typos-dict/Cargo.toml Normal file
View file

@ -0,0 +1,31 @@
[package]
name = "typos-dict"
version = "0.1.0"
authors = ["Ed Page <eopage@gmail.com>"]
description = "Source Code Spelling Correction"
repository = "https://github.com/epage/typos"
documentation = "https://docs.rs/typos-dict"
readme = "README.md"
categories = ["development-tools", "text-processing"]
keywords = ["development", "spelling"]
license = "MIT"
edition = "2018"
[badges]
travis-ci = { repository = "epage/typos" }
appveyor = { repository = "epage/typos" }
[features]
# Support quickly iterating
iterate_unstable = []
[dependencies]
typos = { version = "0.1", path = "../typos" }
phf = { version = "0.7", features = ["unicase"] }
unicase = "1.1"
log = "0.4"
[build-dependencies]
phf_codegen = "0.7"
csv = "1.0"
unicase = "1.1"

View file

Can't render this file because it is too large.

97
typos-dict/src/dict.rs Normal file
View file

@ -0,0 +1,97 @@
use std::borrow::Cow;
use unicase::UniCase;
use typos::tokens::Case;
#[derive(Default)]
pub struct BuiltIn {}
impl BuiltIn {
pub fn new() -> Self {
Self {}
}
pub fn correct_ident<'s, 'w>(
&'s self,
_ident: typos::tokens::Identifier<'w>,
) -> Option<Cow<'s, str>> {
None
}
pub fn correct_word<'s, 'w>(&'s self, word: typos::tokens::Word<'w>) -> Option<Cow<'s, str>> {
map_lookup(&crate::dict_codegen::WORD_DICTIONARY, word.token())
.map(|s| case_correct(s, word.case()))
}
}
impl typos::Dictionary for BuiltIn {
fn correct_ident<'s, 'w>(
&'s self,
ident: typos::tokens::Identifier<'w>,
) -> Option<Cow<'s, str>> {
BuiltIn::correct_ident(self, ident)
}
fn correct_word<'s, 'w>(&'s self, word: typos::tokens::Word<'w>) -> Option<Cow<'s, str>> {
BuiltIn::correct_word(self, word)
}
}
fn map_lookup(
map: &'static phf::Map<UniCase<&'static str>, &'static str>,
key: &str,
) -> Option<&'static str> {
// This transmute should be safe as `get` will not store the reference with
// the expanded lifetime. This is due to `Borrow` being overly strict and
// can't have an impl for `&'static str` to `Borrow<&'a str>`.
//
//
// See https://github.com/rust-lang/rust/issues/28853#issuecomment-158735548
unsafe {
let key = ::std::mem::transmute::<_, &'static str>(key);
map.get(&UniCase(key)).cloned()
}
}
fn case_correct(correction: &str, case: Case) -> Cow<'_, str> {
match case {
Case::Lower | Case::None => correction.into(),
Case::Title => {
let mut title = String::with_capacity(correction.as_bytes().len());
let mut char_indices = correction.char_indices();
if let Some((_, c)) = char_indices.next() {
title.extend(c.to_uppercase());
if let Some((i, _)) = char_indices.next() {
title.push_str(&correction[i..]);
}
}
title.into()
}
Case::Scream => correction
.chars()
.flat_map(|c| c.to_uppercase())
.collect::<String>()
.into(),
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_case_correct() {
let cases = [
("foo", Case::Lower, "foo"),
("foo", Case::None, "foo"),
("foo", Case::Title, "Foo"),
("foo", Case::Scream, "FOO"),
("fOo", Case::None, "fOo"),
];
for (correction, case, expected) in cases.iter() {
let actual = case_correct(correction, *case);
assert_eq!(*expected, actual);
}
}
}

4
typos-dict/src/lib.rs Normal file
View file

@ -0,0 +1,4 @@
mod dict;
mod dict_codegen;
pub use crate::dict::*;

View file

@ -15,13 +15,8 @@ edition = "2018"
travis-ci = { repository = "epage/typos" }
appveyor = { repository = "epage/typos" }
[features]
# Support quickly iterating
iterate_unstable = []
[dependencies]
failure = "0.1"
phf = { version = "0.7", features = ["unicase"] }
regex = "1.0"
lazy_static = "1.2.0"
serde = { version = "1.0", features = ["derive"] }
@ -32,8 +27,3 @@ bstr = "0.2"
log = "0.4"
unicode-segmentation = "1.3.0"
derive_more = "0.15.0"
[build-dependencies]
phf_codegen = "0.7"
csv = "1.0"
unicase = "1.1"

View file

@ -1,9 +1,5 @@
use std::borrow::Cow;
use unicase::UniCase;
use crate::tokens::Case;
pub trait Dictionary {
fn correct_ident<'s, 'w>(
&'s self,
@ -12,95 +8,3 @@ pub trait Dictionary {
fn correct_word<'s, 'w>(&'s self, word: crate::tokens::Word<'w>) -> Option<Cow<'s, str>>;
}
#[derive(Default)]
pub struct BuiltIn {}
impl BuiltIn {
pub fn new() -> Self {
Self {}
}
pub fn correct_ident<'s, 'w>(
&'s self,
_ident: crate::tokens::Identifier<'w>,
) -> Option<Cow<'s, str>> {
None
}
pub fn correct_word<'s, 'w>(&'s self, word: crate::tokens::Word<'w>) -> Option<Cow<'s, str>> {
map_lookup(&crate::dict_codegen::WORD_DICTIONARY, word.token())
.map(|s| case_correct(s, word.case()))
}
}
impl Dictionary for BuiltIn {
fn correct_ident<'s, 'w>(
&'s self,
ident: crate::tokens::Identifier<'w>,
) -> Option<Cow<'s, str>> {
BuiltIn::correct_ident(self, ident)
}
fn correct_word<'s, 'w>(&'s self, word: crate::tokens::Word<'w>) -> Option<Cow<'s, str>> {
BuiltIn::correct_word(self, word)
}
}
fn map_lookup(
map: &'static phf::Map<UniCase<&'static str>, &'static str>,
key: &str,
) -> Option<&'static str> {
// This transmute should be safe as `get` will not store the reference with
// the expanded lifetime. This is due to `Borrow` being overly strict and
// can't have an impl for `&'static str` to `Borrow<&'a str>`.
//
//
// See https://github.com/rust-lang/rust/issues/28853#issuecomment-158735548
unsafe {
let key = ::std::mem::transmute::<_, &'static str>(key);
map.get(&UniCase(key)).cloned()
}
}
fn case_correct(correction: &str, case: Case) -> Cow<'_, str> {
match case {
Case::Lower | Case::None => correction.into(),
Case::Title => {
let mut title = String::with_capacity(correction.as_bytes().len());
let mut char_indices = correction.char_indices();
if let Some((_, c)) = char_indices.next() {
title.extend(c.to_uppercase());
if let Some((i, _)) = char_indices.next() {
title.push_str(&correction[i..]);
}
}
title.into()
}
Case::Scream => correction
.chars()
.flat_map(|c| c.to_uppercase())
.collect::<String>()
.into(),
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_case_correct() {
let cases = [
("foo", Case::Lower, "foo"),
("foo", Case::None, "foo"),
("foo", Case::Title, "Foo"),
("foo", Case::Scream, "FOO"),
("fOo", Case::None, "fOo"),
];
for (correction, case, expected) in cases.iter() {
let actual = case_correct(correction, *case);
assert_eq!(*expected, actual);
}
}
}

View file

@ -1,5 +1,4 @@
mod dict;
mod dict_codegen;
pub mod checks;
pub mod report;