From 1bdd1c928a491e4bca285e8f048f28f6dd20dcdd Mon Sep 17 00:00:00 2001 From: Ed Page Date: Thu, 8 Aug 2019 10:24:50 -0500 Subject: [PATCH] refactor: Split out typos-dict --- Cargo.lock | 36 +++++++-- Cargo.toml | 3 +- benches/corrections.rs | 6 +- benches/data.rs | 2 +- benches/file.rs | 12 +-- src/main.rs | 2 +- typos-dict/Cargo.toml | 31 ++++++++ {typos => typos-dict}/assets/main.go | 0 {typos => typos-dict}/assets/words.csv | 0 {typos => typos-dict}/assets/words.go | 0 {typos => typos-dict}/build.rs | 0 typos-dict/src/dict.rs | 97 +++++++++++++++++++++++ {typos => typos-dict}/src/dict_codegen.rs | 0 typos-dict/src/lib.rs | 4 + typos/Cargo.toml | 10 --- typos/src/dict.rs | 96 ---------------------- typos/src/lib.rs | 1 - 17 files changed, 173 insertions(+), 127 deletions(-) create mode 100644 typos-dict/Cargo.toml rename {typos => typos-dict}/assets/main.go (100%) rename {typos => typos-dict}/assets/words.csv (100%) rename {typos => typos-dict}/assets/words.go (100%) rename {typos => typos-dict}/build.rs (100%) create mode 100644 typos-dict/src/dict.rs rename {typos => typos-dict}/src/dict_codegen.rs (100%) create mode 100644 typos-dict/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index 4e59da7..5bd16e0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -78,6 +78,7 @@ dependencies = [ "lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "memchr 2.1.3 (registry+https://github.com/rust-lang/crates.io-index)", "regex-automata 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)", + "serde 1.0.85 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -158,16 +159,19 @@ dependencies = [ [[package]] name = "csv" -version = "1.0.5" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "csv-core 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", + "bstr 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", + "csv-core 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", + "itoa 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)", + "ryu 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", "serde 1.0.85 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] name = "csv-core" -version = "0.1.5" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "memchr 2.1.3 (registry+https://github.com/rust-lang/crates.io-index)", @@ -697,6 +701,11 @@ name = "ryu" version = "0.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "ryu" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "same-file" version = "1.0.4" @@ -880,14 +889,11 @@ name = "typos" version = "0.1.0" dependencies = [ "bstr 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", - "csv 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)", "derive_more 0.15.0 (registry+https://github.com/rust-lang/crates.io-index)", "failure 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", "itertools 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)", "lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", - "phf 0.7.24 (registry+https://github.com/rust-lang/crates.io-index)", - "phf_codegen 0.7.24 (registry+https://github.com/rust-lang/crates.io-index)", "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "serde 1.0.85 (registry+https://github.com/rust-lang/crates.io-index)", "serde_json 1.0.36 (registry+https://github.com/rust-lang/crates.io-index)", @@ -910,6 +916,19 @@ dependencies = [ "structopt 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)", "toml 0.4.10 (registry+https://github.com/rust-lang/crates.io-index)", "typos 0.1.0", + "typos-dict 0.1.0", +] + +[[package]] +name = "typos-dict" +version = "0.1.0" +dependencies = [ + "csv 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)", + "log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", + "phf 0.7.24 (registry+https://github.com/rust-lang/crates.io-index)", + "phf_codegen 0.7.24 (registry+https://github.com/rust-lang/crates.io-index)", + "typos 0.1.0", + "unicase 1.4.2 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -1033,8 +1052,8 @@ dependencies = [ "checksum cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f" "checksum crossbeam-channel 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "137bc235f622ffaa0428e3854e24acb53291fc0b3ff6fb2cb75a8be6fb02f06b" "checksum crossbeam-utils 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)" = "41ee4864f4797060e52044376f7d107429ce1fb43460021b126424b7180ee21a" -"checksum csv 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "9fd1c44c58078cfbeaf11fbb3eac9ae5534c23004ed770cc4bfb48e658ae4f04" -"checksum csv-core 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "fa5cdef62f37e6ffe7d1f07a381bc0db32b7a3ff1cac0de56cb0d81e71f53d65" +"checksum csv 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "37519ccdfd73a75821cac9319d4fce15a81b9fcf75f951df5b9988aa3a0af87d" +"checksum csv-core 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "9b5cadb6b25c77aeff80ba701712494213f4a8418fcda2ee11b6560c3ad0bf4c" "checksum derive_more 0.15.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7a141330240c921ec6d074a3e188a7c7ef95668bb95e7d44fa0e5778ec2a7afe" "checksum difference 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "524cbf6897b527295dff137cec09ecf3a05f4fddffd7dfcd1585403449e74198" "checksum either 1.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3be565ca5c557d7f59e7cfcf1844f9e3033650c929c6566f511e8005f205c1d0" @@ -1097,6 +1116,7 @@ dependencies = [ "checksum rustc-demangle 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)" = "adacaae16d02b6ec37fdc7acfcddf365978de76d1983d3ee22afc260e1ca9619" "checksum rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a" "checksum ryu 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "eb9e9b8cde282a9fe6a42dd4681319bfb63f121b8a8ee9439c6f4107e58a46f7" +"checksum ryu 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c92464b447c0ee8c4fb3824ecc8383b81717b9f1e74ba2e72540aef7b9f82997" "checksum same-file 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "8f20c4be53a8a1ff4c1f1b2bd14570d2f634628709752f0702ecdd2b3f9a5267" "checksum scopeguard 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "94258f53601af11e6a49f722422f6e3425c52b06245a5cf9bc09908b174f5e27" "checksum semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403" diff --git a/Cargo.toml b/Cargo.toml index 10cd3a0..428e0fb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,5 @@ [workspace] -members = ["typos"] +members = ["typos", "typos-dict"] [package] name = "typos-cli" @@ -23,6 +23,7 @@ iterate_unstable = [] [dependencies] typos = { version = "0.1", path = "typos" } +typos-dict = { version = "0.1", path = "typos-dict" } failure = "0.1" structopt = "0.2" clap = "2" diff --git a/benches/corrections.rs b/benches/corrections.rs index 8e0d751..40a4520 100644 --- a/benches/corrections.rs +++ b/benches/corrections.rs @@ -4,12 +4,12 @@ extern crate test; #[bench] fn load_corrections(b: &mut test::Bencher) { - b.iter(|| typos::BuiltIn::new()); + b.iter(|| typos_dict::BuiltIn::new()); } #[bench] fn correct_word_hit(b: &mut test::Bencher) { - let corrections = typos::BuiltIn::new(); + let corrections = typos_dict::BuiltIn::new(); let input = typos::tokens::Word::new("successs", 0).unwrap(); assert_eq!( corrections.correct_word(input), @@ -20,7 +20,7 @@ fn correct_word_hit(b: &mut test::Bencher) { #[bench] fn correct_word_miss(b: &mut test::Bencher) { - let corrections = typos::BuiltIn::new(); + let corrections = typos_dict::BuiltIn::new(); let input = typos::tokens::Word::new("success", 0).unwrap(); assert_eq!(corrections.correct_word(input), None); b.iter(|| corrections.correct_word(input)); diff --git a/benches/data.rs b/benches/data.rs index 4cc93f3..4252655 100644 --- a/benches/data.rs +++ b/benches/data.rs @@ -28,4 +28,4 @@ fn main() { } "; -pub const CORPUS: &str = include_str!("../typos/assets/words.csv"); +pub const CORPUS: &str = include_str!("../typos-dict/assets/words.csv"); diff --git a/benches/file.rs b/benches/file.rs index 27a7c31..c653004 100644 --- a/benches/file.rs +++ b/benches/file.rs @@ -12,7 +12,7 @@ fn process_empty(b: &mut test::Bencher) { let sample_path = temp.child("sample"); sample_path.write_str(data::EMPTY).unwrap(); - let corrections = typos::BuiltIn::new(); + let corrections = typos_dict::BuiltIn::new(); let parser = typos::tokens::Parser::new(); let checks = typos::checks::CheckSettings::new().build(&corrections, &parser); b.iter(|| checks.check_file(sample_path.path(), true, typos::report::print_silent)); @@ -26,7 +26,7 @@ fn process_no_tokens(b: &mut test::Bencher) { let sample_path = temp.child("sample"); sample_path.write_str(data::NO_TOKENS).unwrap(); - let corrections = typos::BuiltIn::new(); + let corrections = typos_dict::BuiltIn::new(); let parser = typos::tokens::Parser::new(); let checks = typos::checks::CheckSettings::new().build(&corrections, &parser); b.iter(|| checks.check_file(sample_path.path(), true, typos::report::print_silent)); @@ -40,7 +40,7 @@ fn process_single_token(b: &mut test::Bencher) { let sample_path = temp.child("sample"); sample_path.write_str(data::SINGLE_TOKEN).unwrap(); - let corrections = typos::BuiltIn::new(); + let corrections = typos_dict::BuiltIn::new(); let parser = typos::tokens::Parser::new(); let checks = typos::checks::CheckSettings::new().build(&corrections, &parser); b.iter(|| checks.check_file(sample_path.path(), true, typos::report::print_silent)); @@ -54,7 +54,7 @@ fn process_sherlock(b: &mut test::Bencher) { let sample_path = temp.child("sample"); sample_path.write_str(data::SHERLOCK).unwrap(); - let corrections = typos::BuiltIn::new(); + let corrections = typos_dict::BuiltIn::new(); let parser = typos::tokens::Parser::new(); let checks = typos::checks::CheckSettings::new().build(&corrections, &parser); b.iter(|| checks.check_file(sample_path.path(), true, typos::report::print_silent)); @@ -68,7 +68,7 @@ fn process_code(b: &mut test::Bencher) { let sample_path = temp.child("sample"); sample_path.write_str(data::CODE).unwrap(); - let corrections = typos::BuiltIn::new(); + let corrections = typos_dict::BuiltIn::new(); let parser = typos::tokens::Parser::new(); let checks = typos::checks::CheckSettings::new().build(&corrections, &parser); b.iter(|| checks.check_file(sample_path.path(), true, typos::report::print_silent)); @@ -82,7 +82,7 @@ fn process_corpus(b: &mut test::Bencher) { let sample_path = temp.child("sample"); sample_path.write_str(data::CORPUS).unwrap(); - let corrections = typos::BuiltIn::new(); + let corrections = typos_dict::BuiltIn::new(); let parser = typos::tokens::Parser::new(); let checks = typos::checks::CheckSettings::new().build(&corrections, &parser); b.iter(|| checks.check_file(sample_path.path(), true, typos::report::print_silent)); diff --git a/src/main.rs b/src/main.rs index 319858c..ef19fc6 100644 --- a/src/main.rs +++ b/src/main.rs @@ -310,7 +310,7 @@ fn run() -> Result { config.default.update(&args.overrides); let config = config; - let dictionary = typos::BuiltIn::new(); + let dictionary = typos_dict::BuiltIn::new(); let parser = typos::tokens::ParserBuilder::new() .ignore_hex(config.default.ignore_hex()) diff --git a/typos-dict/Cargo.toml b/typos-dict/Cargo.toml new file mode 100644 index 0000000..97fd2d3 --- /dev/null +++ b/typos-dict/Cargo.toml @@ -0,0 +1,31 @@ +[package] +name = "typos-dict" +version = "0.1.0" +authors = ["Ed Page "] +description = "Source Code Spelling Correction" +repository = "https://github.com/epage/typos" +documentation = "https://docs.rs/typos-dict" +readme = "README.md" +categories = ["development-tools", "text-processing"] +keywords = ["development", "spelling"] +license = "MIT" +edition = "2018" + +[badges] +travis-ci = { repository = "epage/typos" } +appveyor = { repository = "epage/typos" } + +[features] +# Support quickly iterating +iterate_unstable = [] + +[dependencies] +typos = { version = "0.1", path = "../typos" } +phf = { version = "0.7", features = ["unicase"] } +unicase = "1.1" +log = "0.4" + +[build-dependencies] +phf_codegen = "0.7" +csv = "1.0" +unicase = "1.1" diff --git a/typos/assets/main.go b/typos-dict/assets/main.go similarity index 100% rename from typos/assets/main.go rename to typos-dict/assets/main.go diff --git a/typos/assets/words.csv b/typos-dict/assets/words.csv similarity index 100% rename from typos/assets/words.csv rename to typos-dict/assets/words.csv diff --git a/typos/assets/words.go b/typos-dict/assets/words.go similarity index 100% rename from typos/assets/words.go rename to typos-dict/assets/words.go diff --git a/typos/build.rs b/typos-dict/build.rs similarity index 100% rename from typos/build.rs rename to typos-dict/build.rs diff --git a/typos-dict/src/dict.rs b/typos-dict/src/dict.rs new file mode 100644 index 0000000..ed91c68 --- /dev/null +++ b/typos-dict/src/dict.rs @@ -0,0 +1,97 @@ +use std::borrow::Cow; + +use unicase::UniCase; + +use typos::tokens::Case; + +#[derive(Default)] +pub struct BuiltIn {} + +impl BuiltIn { + pub fn new() -> Self { + Self {} + } + + pub fn correct_ident<'s, 'w>( + &'s self, + _ident: typos::tokens::Identifier<'w>, + ) -> Option> { + None + } + + pub fn correct_word<'s, 'w>(&'s self, word: typos::tokens::Word<'w>) -> Option> { + map_lookup(&crate::dict_codegen::WORD_DICTIONARY, word.token()) + .map(|s| case_correct(s, word.case())) + } +} + +impl typos::Dictionary for BuiltIn { + fn correct_ident<'s, 'w>( + &'s self, + ident: typos::tokens::Identifier<'w>, + ) -> Option> { + BuiltIn::correct_ident(self, ident) + } + + fn correct_word<'s, 'w>(&'s self, word: typos::tokens::Word<'w>) -> Option> { + BuiltIn::correct_word(self, word) + } +} + +fn map_lookup( + map: &'static phf::Map, &'static str>, + key: &str, +) -> Option<&'static str> { + // This transmute should be safe as `get` will not store the reference with + // the expanded lifetime. This is due to `Borrow` being overly strict and + // can't have an impl for `&'static str` to `Borrow<&'a str>`. + // + // + // See https://github.com/rust-lang/rust/issues/28853#issuecomment-158735548 + unsafe { + let key = ::std::mem::transmute::<_, &'static str>(key); + map.get(&UniCase(key)).cloned() + } +} + +fn case_correct(correction: &str, case: Case) -> Cow<'_, str> { + match case { + Case::Lower | Case::None => correction.into(), + Case::Title => { + let mut title = String::with_capacity(correction.as_bytes().len()); + let mut char_indices = correction.char_indices(); + if let Some((_, c)) = char_indices.next() { + title.extend(c.to_uppercase()); + if let Some((i, _)) = char_indices.next() { + title.push_str(&correction[i..]); + } + } + title.into() + } + Case::Scream => correction + .chars() + .flat_map(|c| c.to_uppercase()) + .collect::() + .into(), + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_case_correct() { + let cases = [ + ("foo", Case::Lower, "foo"), + ("foo", Case::None, "foo"), + ("foo", Case::Title, "Foo"), + ("foo", Case::Scream, "FOO"), + ("fOo", Case::None, "fOo"), + ]; + for (correction, case, expected) in cases.iter() { + let actual = case_correct(correction, *case); + assert_eq!(*expected, actual); + } + } +} diff --git a/typos/src/dict_codegen.rs b/typos-dict/src/dict_codegen.rs similarity index 100% rename from typos/src/dict_codegen.rs rename to typos-dict/src/dict_codegen.rs diff --git a/typos-dict/src/lib.rs b/typos-dict/src/lib.rs new file mode 100644 index 0000000..a36726f --- /dev/null +++ b/typos-dict/src/lib.rs @@ -0,0 +1,4 @@ +mod dict; +mod dict_codegen; + +pub use crate::dict::*; diff --git a/typos/Cargo.toml b/typos/Cargo.toml index 1588f0a..bfc23f3 100644 --- a/typos/Cargo.toml +++ b/typos/Cargo.toml @@ -15,13 +15,8 @@ edition = "2018" travis-ci = { repository = "epage/typos" } appveyor = { repository = "epage/typos" } -[features] -# Support quickly iterating -iterate_unstable = [] - [dependencies] failure = "0.1" -phf = { version = "0.7", features = ["unicase"] } regex = "1.0" lazy_static = "1.2.0" serde = { version = "1.0", features = ["derive"] } @@ -32,8 +27,3 @@ bstr = "0.2" log = "0.4" unicode-segmentation = "1.3.0" derive_more = "0.15.0" - -[build-dependencies] -phf_codegen = "0.7" -csv = "1.0" -unicase = "1.1" diff --git a/typos/src/dict.rs b/typos/src/dict.rs index 1ff7359..084cbc4 100644 --- a/typos/src/dict.rs +++ b/typos/src/dict.rs @@ -1,9 +1,5 @@ use std::borrow::Cow; -use unicase::UniCase; - -use crate::tokens::Case; - pub trait Dictionary { fn correct_ident<'s, 'w>( &'s self, @@ -12,95 +8,3 @@ pub trait Dictionary { fn correct_word<'s, 'w>(&'s self, word: crate::tokens::Word<'w>) -> Option>; } - -#[derive(Default)] -pub struct BuiltIn {} - -impl BuiltIn { - pub fn new() -> Self { - Self {} - } - - pub fn correct_ident<'s, 'w>( - &'s self, - _ident: crate::tokens::Identifier<'w>, - ) -> Option> { - None - } - - pub fn correct_word<'s, 'w>(&'s self, word: crate::tokens::Word<'w>) -> Option> { - map_lookup(&crate::dict_codegen::WORD_DICTIONARY, word.token()) - .map(|s| case_correct(s, word.case())) - } -} - -impl Dictionary for BuiltIn { - fn correct_ident<'s, 'w>( - &'s self, - ident: crate::tokens::Identifier<'w>, - ) -> Option> { - BuiltIn::correct_ident(self, ident) - } - - fn correct_word<'s, 'w>(&'s self, word: crate::tokens::Word<'w>) -> Option> { - BuiltIn::correct_word(self, word) - } -} - -fn map_lookup( - map: &'static phf::Map, &'static str>, - key: &str, -) -> Option<&'static str> { - // This transmute should be safe as `get` will not store the reference with - // the expanded lifetime. This is due to `Borrow` being overly strict and - // can't have an impl for `&'static str` to `Borrow<&'a str>`. - // - // - // See https://github.com/rust-lang/rust/issues/28853#issuecomment-158735548 - unsafe { - let key = ::std::mem::transmute::<_, &'static str>(key); - map.get(&UniCase(key)).cloned() - } -} - -fn case_correct(correction: &str, case: Case) -> Cow<'_, str> { - match case { - Case::Lower | Case::None => correction.into(), - Case::Title => { - let mut title = String::with_capacity(correction.as_bytes().len()); - let mut char_indices = correction.char_indices(); - if let Some((_, c)) = char_indices.next() { - title.extend(c.to_uppercase()); - if let Some((i, _)) = char_indices.next() { - title.push_str(&correction[i..]); - } - } - title.into() - } - Case::Scream => correction - .chars() - .flat_map(|c| c.to_uppercase()) - .collect::() - .into(), - } -} - -#[cfg(test)] -mod test { - use super::*; - - #[test] - fn test_case_correct() { - let cases = [ - ("foo", Case::Lower, "foo"), - ("foo", Case::None, "foo"), - ("foo", Case::Title, "Foo"), - ("foo", Case::Scream, "FOO"), - ("fOo", Case::None, "fOo"), - ]; - for (correction, case, expected) in cases.iter() { - let actual = case_correct(correction, *case); - assert_eq!(*expected, actual); - } - } -} diff --git a/typos/src/lib.rs b/typos/src/lib.rs index 2c3fd94..1cb77c9 100644 --- a/typos/src/lib.rs +++ b/typos/src/lib.rs @@ -1,5 +1,4 @@ mod dict; -mod dict_codegen; pub mod checks; pub mod report;