From 85ee5cfac96f572dd167979412616898c2cb0428 Mon Sep 17 00:00:00 2001
From: Ed Page <eopage@gmail.com>
Date: Thu, 24 Jan 2019 08:24:20 -0700
Subject: [PATCH] fix(api): Split lib

---
 benches/corrections.rs |   6 +-
 benches/file.rs        |  24 +++----
 benches/tokenize.rs    |  12 ++--
 src/dict.rs            |  18 +++++
 src/identifier.rs      |  74 +++++++++++++++++++++
 src/lib.rs             | 146 ++++-------------------------------------
 src/main.rs            |  12 ++--
 src/report.rs          |  40 +++++++++++
 8 files changed, 170 insertions(+), 162 deletions(-)
 create mode 100644 src/dict.rs
 create mode 100644 src/identifier.rs
 create mode 100644 src/report.rs

diff --git a/benches/corrections.rs b/benches/corrections.rs
index 4c81f6b..bade620 100644
--- a/benches/corrections.rs
+++ b/benches/corrections.rs
@@ -4,19 +4,19 @@ extern crate test;
 
 #[bench]
 fn load_corrections(b: &mut test::Bencher) {
-    b.iter(|| scorrect::Corrections::new());
+    b.iter(|| scorrect::Dictionary::new());
 }
 
 #[bench]
 fn correction(b: &mut test::Bencher) {
-    let corrections = scorrect::Corrections::new();
+    let corrections = scorrect::Dictionary::new();
     assert_eq!(corrections.correct_str("successs"), Some("successes"));
     b.iter(|| corrections.correct_str("successs"));
 }
 
 #[bench]
 fn no_correction(b: &mut test::Bencher) {
-    let corrections = scorrect::Corrections::new();
+    let corrections = scorrect::Dictionary::new();
     assert_eq!(corrections.correct_str("success"), None);
     b.iter(|| corrections.correct_str("success"));
 }
diff --git a/benches/file.rs b/benches/file.rs
index 3547ff8..3056489 100644
--- a/benches/file.rs
+++ b/benches/file.rs
@@ -12,8 +12,8 @@ fn process_empty(b: &mut test::Bencher) {
     let sample_path = temp.child("sample");
     sample_path.write_str(data::EMPTY).unwrap();
 
-    let corrections = scorrect::Corrections::new();
-    b.iter(|| scorrect::process_file(sample_path.path(), &corrections, scorrect::print_silent));
+    let corrections = scorrect::Dictionary::new();
+    b.iter(|| scorrect::process_file(sample_path.path(), &corrections, scorrect::report::print_silent));
 
     temp.close().unwrap();
 }
@@ -24,8 +24,8 @@ fn process_no_tokens(b: &mut test::Bencher) {
     let sample_path = temp.child("sample");
     sample_path.write_str(data::NO_TOKENS).unwrap();
 
-    let corrections = scorrect::Corrections::new();
-    b.iter(|| scorrect::process_file(sample_path.path(), &corrections, scorrect::print_silent));
+    let corrections = scorrect::Dictionary::new();
+    b.iter(|| scorrect::process_file(sample_path.path(), &corrections, scorrect::report::print_silent));
 
     temp.close().unwrap();
 }
@@ -36,8 +36,8 @@ fn process_single_token(b: &mut test::Bencher) {
     let sample_path = temp.child("sample");
     sample_path.write_str(data::SINGLE_TOKEN).unwrap();
 
-    let corrections = scorrect::Corrections::new();
-    b.iter(|| scorrect::process_file(sample_path.path(), &corrections, scorrect::print_silent));
+    let corrections = scorrect::Dictionary::new();
+    b.iter(|| scorrect::process_file(sample_path.path(), &corrections, scorrect::report::print_silent));
 
     temp.close().unwrap();
 }
@@ -48,8 +48,8 @@ fn process_sherlock(b: &mut test::Bencher) {
     let sample_path = temp.child("sample");
     sample_path.write_str(data::SHERLOCK).unwrap();
 
-    let corrections = scorrect::Corrections::new();
-    b.iter(|| scorrect::process_file(sample_path.path(), &corrections, scorrect::print_silent));
+    let corrections = scorrect::Dictionary::new();
+    b.iter(|| scorrect::process_file(sample_path.path(), &corrections, scorrect::report::print_silent));
 
     temp.close().unwrap();
 }
@@ -60,8 +60,8 @@ fn process_code(b: &mut test::Bencher) {
     let sample_path = temp.child("sample");
     sample_path.write_str(data::CODE).unwrap();
 
-    let corrections = scorrect::Corrections::new();
-    b.iter(|| scorrect::process_file(sample_path.path(), &corrections, scorrect::print_silent));
+    let corrections = scorrect::Dictionary::new();
+    b.iter(|| scorrect::process_file(sample_path.path(), &corrections, scorrect::report::print_silent));
 
     temp.close().unwrap();
 }
@@ -72,8 +72,8 @@ fn process_corpus(b: &mut test::Bencher) {
     let sample_path = temp.child("sample");
     sample_path.write_str(data::CORPUS).unwrap();
 
-    let corrections = scorrect::Corrections::new();
-    b.iter(|| scorrect::process_file(sample_path.path(), &corrections, scorrect::print_silent));
+    let corrections = scorrect::Dictionary::new();
+    b.iter(|| scorrect::process_file(sample_path.path(), &corrections, scorrect::report::print_silent));
 
     temp.close().unwrap();
 }
diff --git a/benches/tokenize.rs b/benches/tokenize.rs
index 2914a0a..aeca216 100644
--- a/benches/tokenize.rs
+++ b/benches/tokenize.rs
@@ -6,30 +6,30 @@ mod data;
 
 #[bench]
 fn tokenize_empty(b: &mut test::Bencher) {
-    b.iter(|| scorrect::tokenize(data::EMPTY.as_bytes()).collect::<Vec<_>>());
+    b.iter(|| scorrect::identifier::tokenize(data::EMPTY.as_bytes()).collect::<Vec<_>>());
 }
 
 #[bench]
 fn tokenize_no_tokens(b: &mut test::Bencher) {
-    b.iter(|| scorrect::tokenize(data::NO_TOKENS.as_bytes()).collect::<Vec<_>>());
+    b.iter(|| scorrect::identifier::tokenize(data::NO_TOKENS.as_bytes()).collect::<Vec<_>>());
 }
 
 #[bench]
 fn tokenize_single_token(b: &mut test::Bencher) {
-    b.iter(|| scorrect::tokenize(data::SINGLE_TOKEN.as_bytes()).collect::<Vec<_>>());
+    b.iter(|| scorrect::identifier::tokenize(data::SINGLE_TOKEN.as_bytes()).collect::<Vec<_>>());
 }
 
 #[bench]
 fn tokenize_sherlock(b: &mut test::Bencher) {
-    b.iter(|| scorrect::tokenize(data::SHERLOCK.as_bytes()).collect::<Vec<_>>());
+    b.iter(|| scorrect::identifier::tokenize(data::SHERLOCK.as_bytes()).collect::<Vec<_>>());
 }
 
 #[bench]
 fn tokenize_code(b: &mut test::Bencher) {
-    b.iter(|| scorrect::tokenize(data::CODE.as_bytes()).collect::<Vec<_>>());
+    b.iter(|| scorrect::identifier::tokenize(data::CODE.as_bytes()).collect::<Vec<_>>());
 }
 
 #[bench]
 fn tokenize_corpus(b: &mut test::Bencher) {
-    b.iter(|| scorrect::tokenize(data::CORPUS.as_bytes()).collect::<Vec<_>>());
+    b.iter(|| scorrect::identifier::tokenize(data::CORPUS.as_bytes()).collect::<Vec<_>>());
 }
diff --git a/src/dict.rs b/src/dict.rs
new file mode 100644
index 0000000..0248925
--- /dev/null
+++ b/src/dict.rs
@@ -0,0 +1,18 @@
+include!(concat!(env!("OUT_DIR"), "/codegen.rs"));
+
+pub struct Dictionary {
+}
+
+impl Dictionary {
+    pub fn new() -> Self {
+        Dictionary { }
+    }
+
+    pub fn correct_str<'s>(&'s self, word: &str) -> Option<&'s str> {
+        DICTIONARY.get(word).map(|s| *s)
+    }
+
+    pub fn correct_bytes<'s>(&'s self, word: &[u8]) -> Option<&'s [u8]> {
+        std::str::from_utf8(word).ok().and_then(|word| DICTIONARY.get(word)).map(|s| s.as_bytes())
+    }
+}
diff --git a/src/identifier.rs b/src/identifier.rs
new file mode 100644
index 0000000..2bd4574
--- /dev/null
+++ b/src/identifier.rs
@@ -0,0 +1,74 @@
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct Token<'t> {
+    pub token: &'t [u8],
+    pub offset: usize,
+}
+
+impl<'t> Token<'t> {
+    pub fn new(token: &'t [u8], offset: usize) -> Self {
+        Self {
+            token,
+            offset,
+        }
+    }
+}
+
+pub fn tokenize(content: &[u8]) -> impl Iterator<Item=Token> {
+    lazy_static::lazy_static! {
+        static ref SPLIT: regex::bytes::Regex = regex::bytes::Regex::new(r#"\b(\p{Alphabetic}|\d|_)+\b"#).unwrap();
+    }
+    SPLIT.find_iter(content).map(|m| Token::new(m.as_bytes(), m.start()))
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+
+    #[test]
+    fn tokenize_empty_is_empty() {
+        let input = b"";
+        let expected: Vec<Token> = vec![];
+        let actual: Vec<_> = tokenize(input).collect();
+        assert_eq!(expected, actual);
+    }
+
+    #[test]
+    fn tokenize_word_is_word() {
+        let input = b"word";
+        let expected: Vec<Token> = vec![Token::new(b"word", 0)];
+        let actual: Vec<_> = tokenize(input).collect();
+        assert_eq!(expected, actual);
+    }
+
+    #[test]
+    fn tokenize_space_separated_words() {
+        let input = b"A B";
+        let expected: Vec<Token> = vec![Token::new(b"A", 0), Token::new(b"B", 2)];
+        let actual: Vec<_> = tokenize(input).collect();
+        assert_eq!(expected, actual);
+    }
+
+    #[test]
+    fn tokenize_dot_separated_words() {
+        let input = b"A.B";
+        let expected: Vec<Token> = vec![Token::new(b"A", 0), Token::new(b"B", 2)];
+        let actual: Vec<_> = tokenize(input).collect();
+        assert_eq!(expected, actual);
+    }
+
+    #[test]
+    fn tokenize_namespace_separated_words() {
+        let input = b"A::B";
+        let expected: Vec<Token> = vec![Token::new(b"A", 0), Token::new(b"B", 3)];
+        let actual: Vec<_> = tokenize(input).collect();
+        assert_eq!(expected, actual);
+    }
+
+    #[test]
+    fn tokenize_underscore_doesnt_separate() {
+        let input = b"A_B";
+        let expected: Vec<Token> = vec![Token::new(b"A_B", 0)];
+        let actual: Vec<_> = tokenize(input).collect();
+        assert_eq!(expected, actual);
+    }
+}
diff --git a/src/lib.rs b/src/lib.rs
index 19408e8..457c3ef 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,60 +1,33 @@
 #[macro_use]
 extern crate serde_derive;
 
+mod dict;
+
+pub mod identifier;
+pub mod report;
+
+pub use crate::dict::*;
+
 use std::fs::File;
 use std::io::Read;
 
-include!(concat!(env!("OUT_DIR"), "/codegen.rs"));
-
-#[derive(Debug, Clone, PartialEq, Eq)]
-pub struct Token<'t> {
-    pub token: &'t [u8],
-    pub offset: usize,
-}
-
-impl<'t> Token<'t> {
-    pub fn new(token: &'t [u8], offset: usize) -> Self {
-        Self {
-            token,
-            offset,
-        }
-    }
-}
-
-pub fn tokenize(content: &[u8]) -> impl Iterator<Item=Token> {
-    lazy_static::lazy_static! {
-        static ref SPLIT: regex::bytes::Regex = regex::bytes::Regex::new(r#"\b(\p{Alphabetic}|\d|_)+\b"#).unwrap();
-    }
-    SPLIT.find_iter(content).map(|m| Token::new(m.as_bytes(), m.start()))
-}
-
-#[derive(Debug, Serialize)]
-pub struct Message<'m> {
-    path: &'m std::path::Path,
-    #[serde(skip)]
-    line: &'m [u8],
-    line_num: usize,
-    col_num: usize,
-    word: &'m str,
-    correction: &'m str,
-}
-
-pub fn process_file(path: &std::path::Path, dictionary: &Corrections, report: Report) -> Result<(), failure::Error> {
+pub fn process_file(path: &std::path::Path, dictionary: &Dictionary, report: report::Report) -> Result<(), failure::Error> {
     let mut buffer = Vec::new();
     File::open(path)?.read_to_end(&mut buffer)?;
     for (line_idx, line) in grep_searcher::LineIter::new(b'\n', &buffer).enumerate() {
         let line_num = line_idx + 1;
-        for token in tokenize(line) {
+        for token in identifier::tokenize(line) {
             if let Some(word) = std::str::from_utf8(token.token).ok() {
                 if let Some(correction) = dictionary.correct_str(word) {
                     let col_num = token.offset;
-                    let msg = Message {
+                    let msg = report::Message {
                         path,
                         line,
                         line_num,
                         col_num,
                         word,
                         correction,
+                        non_exhaustive: (),
                     };
                     report(msg);
                 }
@@ -65,100 +38,3 @@ pub fn process_file(path: &std::path::Path, dictionary: &Corrections, report: Re
     Ok(())
 }
 
-pub type Report = fn(msg: Message);
-
-pub fn print_silent(_: Message) {
-}
-
-pub fn print_brief(msg: Message) {
-    println!("{}:{}:{}: {} -> {}", msg.path.display(), msg.line_num, msg.col_num, msg.word, msg.correction);
-}
-
-pub fn print_long(msg: Message) {
-    let line_num = msg.line_num.to_string();
-    let line_indent: String = itertools::repeat_n(" ", line_num.len()).collect();
-
-    let hl_indent: String = itertools::repeat_n(" ", msg.col_num).collect();
-    let hl: String = itertools::repeat_n("^", msg.word.len()).collect();
-
-    println!("error: `{}` should be `{}`", msg.word, msg.correction);
-    println!("  --> {}:{}:{}", msg.path.display(), msg.line_num, msg.col_num);
-    println!("{} |", line_indent);
-    println!("{} | {}", msg.line_num, String::from_utf8_lossy(msg.line).trim_end());
-    println!("{} | {}{}", line_indent, hl_indent, hl);
-    println!("{} |", line_indent);
-}
-
-pub fn print_json(msg: Message) {
-    println!("{}", serde_json::to_string(&msg).unwrap());
-}
-
-pub struct Corrections {
-}
-
-impl Corrections {
-    pub fn new() -> Self {
-        Corrections { }
-    }
-
-    pub fn correct_str<'s>(&'s self, word: &str) -> Option<&'s str> {
-        DICTIONARY.get(word).map(|s| *s)
-    }
-
-    pub fn correct_bytes<'s>(&'s self, word: &[u8]) -> Option<&'s [u8]> {
-        std::str::from_utf8(word).ok().and_then(|word| DICTIONARY.get(word)).map(|s| s.as_bytes())
-    }
-}
-
-#[cfg(test)]
-mod test {
-    use super::*;
-
-    #[test]
-    fn tokenize_empty_is_empty() {
-        let input = b"";
-        let expected: Vec<Token> = vec![];
-        let actual: Vec<_> = tokenize(input).collect();
-        assert_eq!(expected, actual);
-    }
-
-    #[test]
-    fn tokenize_word_is_word() {
-        let input = b"word";
-        let expected: Vec<Token> = vec![Token::new(b"word", 0)];
-        let actual: Vec<_> = tokenize(input).collect();
-        assert_eq!(expected, actual);
-    }
-
-    #[test]
-    fn tokenize_space_separated_words() {
-        let input = b"A B";
-        let expected: Vec<Token> = vec![Token::new(b"A", 0), Token::new(b"B", 2)];
-        let actual: Vec<_> = tokenize(input).collect();
-        assert_eq!(expected, actual);
-    }
-
-    #[test]
-    fn tokenize_dot_separated_words() {
-        let input = b"A.B";
-        let expected: Vec<Token> = vec![Token::new(b"A", 0), Token::new(b"B", 2)];
-        let actual: Vec<_> = tokenize(input).collect();
-        assert_eq!(expected, actual);
-    }
-
-    #[test]
-    fn tokenize_namespace_separated_words() {
-        let input = b"A::B";
-        let expected: Vec<Token> = vec![Token::new(b"A", 0), Token::new(b"B", 3)];
-        let actual: Vec<_> = tokenize(input).collect();
-        assert_eq!(expected, actual);
-    }
-
-    #[test]
-    fn tokenize_underscore_doesnt_separate() {
-        let input = b"A_B";
-        let expected: Vec<Token> = vec![Token::new(b"A_B", 0)];
-        let actual: Vec<_> = tokenize(input).collect();
-        assert_eq!(expected, actual);
-    }
-}
diff --git a/src/main.rs b/src/main.rs
index ff28004..edd4485 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -15,12 +15,12 @@ arg_enum!{
 }
 
 impl Format {
-    fn report(self) -> scorrect::Report {
+    fn report(self) -> scorrect::report::Report {
         match self {
-            Format::Silent => scorrect::print_silent,
-            Format::Brief => scorrect::print_brief,
-            Format::Long => scorrect::print_long,
-            Format::Json => scorrect::print_json,
+            Format::Silent => scorrect::report::print_silent,
+            Format::Brief => scorrect::report::print_brief,
+            Format::Long => scorrect::report::print_long,
+            Format::Json => scorrect::report::print_json,
         }
     }
 }
@@ -63,7 +63,7 @@ impl Options {
 fn run() -> Result<(), failure::Error> {
     let options = Options::from_args().infer();
 
-    let dictionary = scorrect::Corrections::new();
+    let dictionary = scorrect::Dictionary::new();
 
     let first_path = &options.path.get(0).expect("arg parsing enforces at least one");
     let mut walk = ignore::WalkBuilder::new(first_path);
diff --git a/src/report.rs b/src/report.rs
new file mode 100644
index 0000000..3281be6
--- /dev/null
+++ b/src/report.rs
@@ -0,0 +1,40 @@
+#[derive(Debug, Serialize)]
+pub struct Message<'m> {
+    pub path: &'m std::path::Path,
+    #[serde(skip)]
+    pub line: &'m [u8],
+    pub line_num: usize,
+    pub col_num: usize,
+    pub word: &'m str,
+    pub correction: &'m str,
+    #[serde(skip)]
+    pub(crate) non_exhaustive: (),
+}
+
+pub type Report = fn(msg: Message);
+
+pub fn print_silent(_: Message) {
+}
+
+pub fn print_brief(msg: Message) {
+    println!("{}:{}:{}: {} -> {}", msg.path.display(), msg.line_num, msg.col_num, msg.word, msg.correction);
+}
+
+pub fn print_long(msg: Message) {
+    let line_num = msg.line_num.to_string();
+    let line_indent: String = itertools::repeat_n(" ", line_num.len()).collect();
+
+    let hl_indent: String = itertools::repeat_n(" ", msg.col_num).collect();
+    let hl: String = itertools::repeat_n("^", msg.word.len()).collect();
+
+    println!("error: `{}` should be `{}`", msg.word, msg.correction);
+    println!("  --> {}:{}:{}", msg.path.display(), msg.line_num, msg.col_num);
+    println!("{} |", line_indent);
+    println!("{} | {}", msg.line_num, String::from_utf8_lossy(msg.line).trim_end());
+    println!("{} | {}{}", line_indent, hl_indent, hl);
+    println!("{} |", line_indent);
+}
+
+pub fn print_json(msg: Message) {
+    println!("{}", serde_json::to_string(&msg).unwrap());
+}