fix(api): Split lib

2024-12-22 07:32:18 -05:00 · 2019-01-24 08:24:20 -07:00 · 2019-01-24 08:24:20 -07:00 · 85ee5cfac9
commit 85ee5cfac9
parent d8ca9f9d5a
8 changed files with 170 additions and 162 deletions
--- a/benches/corrections.rs
+++ b/benches/corrections.rs
@ -4,19 +4,19 @@ extern crate test;

 #[bench]
 fn load_corrections(b: &mut test::Bencher) {
-    b.iter(|| scorrect::Corrections::new());
+    b.iter(|| scorrect::Dictionary::new());
 }

 #[bench]
 fn correction(b: &mut test::Bencher) {
-    let corrections = scorrect::Corrections::new();
+    let corrections = scorrect::Dictionary::new();
    assert_eq!(corrections.correct_str("successs"), Some("successes"));
    b.iter(|| corrections.correct_str("successs"));
 }

 #[bench]
 fn no_correction(b: &mut test::Bencher) {
-    let corrections = scorrect::Corrections::new();
+    let corrections = scorrect::Dictionary::new();
    assert_eq!(corrections.correct_str("success"), None);
    b.iter(|| corrections.correct_str("success"));
 }
--- a/benches/file.rs
+++ b/benches/file.rs
@ -12,8 +12,8 @@ fn process_empty(b: &mut test::Bencher) {
    let sample_path = temp.child("sample");
    sample_path.write_str(data::EMPTY).unwrap();

-    let corrections = scorrect::Corrections::new();
-    b.iter(|| scorrect::process_file(sample_path.path(), &corrections, scorrect::print_silent));
+    let corrections = scorrect::Dictionary::new();
+    b.iter(|| scorrect::process_file(sample_path.path(), &corrections, scorrect::report::print_silent));

    temp.close().unwrap();
 }
@ -24,8 +24,8 @@ fn process_no_tokens(b: &mut test::Bencher) {
    let sample_path = temp.child("sample");
    sample_path.write_str(data::NO_TOKENS).unwrap();

-    let corrections = scorrect::Corrections::new();
-    b.iter(|| scorrect::process_file(sample_path.path(), &corrections, scorrect::print_silent));
+    let corrections = scorrect::Dictionary::new();
+    b.iter(|| scorrect::process_file(sample_path.path(), &corrections, scorrect::report::print_silent));

    temp.close().unwrap();
 }
@ -36,8 +36,8 @@ fn process_single_token(b: &mut test::Bencher) {
    let sample_path = temp.child("sample");
    sample_path.write_str(data::SINGLE_TOKEN).unwrap();

-    let corrections = scorrect::Corrections::new();
-    b.iter(|| scorrect::process_file(sample_path.path(), &corrections, scorrect::print_silent));
+    let corrections = scorrect::Dictionary::new();
+    b.iter(|| scorrect::process_file(sample_path.path(), &corrections, scorrect::report::print_silent));

    temp.close().unwrap();
 }
@ -48,8 +48,8 @@ fn process_sherlock(b: &mut test::Bencher) {
    let sample_path = temp.child("sample");
    sample_path.write_str(data::SHERLOCK).unwrap();

-    let corrections = scorrect::Corrections::new();
-    b.iter(|| scorrect::process_file(sample_path.path(), &corrections, scorrect::print_silent));
+    let corrections = scorrect::Dictionary::new();
+    b.iter(|| scorrect::process_file(sample_path.path(), &corrections, scorrect::report::print_silent));

    temp.close().unwrap();
 }
@ -60,8 +60,8 @@ fn process_code(b: &mut test::Bencher) {
    let sample_path = temp.child("sample");
    sample_path.write_str(data::CODE).unwrap();

-    let corrections = scorrect::Corrections::new();
-    b.iter(|| scorrect::process_file(sample_path.path(), &corrections, scorrect::print_silent));
+    let corrections = scorrect::Dictionary::new();
+    b.iter(|| scorrect::process_file(sample_path.path(), &corrections, scorrect::report::print_silent));

    temp.close().unwrap();
 }
@ -72,8 +72,8 @@ fn process_corpus(b: &mut test::Bencher) {
    let sample_path = temp.child("sample");
    sample_path.write_str(data::CORPUS).unwrap();

-    let corrections = scorrect::Corrections::new();
-    b.iter(|| scorrect::process_file(sample_path.path(), &corrections, scorrect::print_silent));
+    let corrections = scorrect::Dictionary::new();
+    b.iter(|| scorrect::process_file(sample_path.path(), &corrections, scorrect::report::print_silent));

    temp.close().unwrap();
 }
--- a/benches/tokenize.rs
+++ b/benches/tokenize.rs
@ -6,30 +6,30 @@ mod data;

 #[bench]
 fn tokenize_empty(b: &mut test::Bencher) {
-    b.iter(|| scorrect::tokenize(data::EMPTY.as_bytes()).collect::<Vec<_>>());
+    b.iter(|| scorrect::identifier::tokenize(data::EMPTY.as_bytes()).collect::<Vec<_>>());
 }

 #[bench]
 fn tokenize_no_tokens(b: &mut test::Bencher) {
-    b.iter(|| scorrect::tokenize(data::NO_TOKENS.as_bytes()).collect::<Vec<_>>());
+    b.iter(|| scorrect::identifier::tokenize(data::NO_TOKENS.as_bytes()).collect::<Vec<_>>());
 }

 #[bench]
 fn tokenize_single_token(b: &mut test::Bencher) {
-    b.iter(|| scorrect::tokenize(data::SINGLE_TOKEN.as_bytes()).collect::<Vec<_>>());
+    b.iter(|| scorrect::identifier::tokenize(data::SINGLE_TOKEN.as_bytes()).collect::<Vec<_>>());
 }

 #[bench]
 fn tokenize_sherlock(b: &mut test::Bencher) {
-    b.iter(|| scorrect::tokenize(data::SHERLOCK.as_bytes()).collect::<Vec<_>>());
+    b.iter(|| scorrect::identifier::tokenize(data::SHERLOCK.as_bytes()).collect::<Vec<_>>());
 }

 #[bench]
 fn tokenize_code(b: &mut test::Bencher) {
-    b.iter(|| scorrect::tokenize(data::CODE.as_bytes()).collect::<Vec<_>>());
+    b.iter(|| scorrect::identifier::tokenize(data::CODE.as_bytes()).collect::<Vec<_>>());
 }

 #[bench]
 fn tokenize_corpus(b: &mut test::Bencher) {
-    b.iter(|| scorrect::tokenize(data::CORPUS.as_bytes()).collect::<Vec<_>>());
+    b.iter(|| scorrect::identifier::tokenize(data::CORPUS.as_bytes()).collect::<Vec<_>>());
 }
--- a/src/dict.rs
+++ b/src/dict.rs
@ -0,0 +1,18 @@
+include!(concat!(env!("OUT_DIR"), "/codegen.rs"));
+
+pub struct Dictionary {
+}
+
+impl Dictionary {
+    pub fn new() -> Self {
+        Dictionary { }
+    }
+
+    pub fn correct_str<'s>(&'s self, word: &str) -> Option<&'s str> {
+        DICTIONARY.get(word).map(|s| *s)
+    }
+
+    pub fn correct_bytes<'s>(&'s self, word: &[u8]) -> Option<&'s [u8]> {
+        std::str::from_utf8(word).ok().and_then(|word| DICTIONARY.get(word)).map(|s| s.as_bytes())
+    }
+}
--- a/src/identifier.rs
+++ b/src/identifier.rs
@ -0,0 +1,74 @@
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct Token<'t> {
+    pub token: &'t [u8],
+    pub offset: usize,
+}
+
+impl<'t> Token<'t> {
+    pub fn new(token: &'t [u8], offset: usize) -> Self {
+        Self {
+            token,
+            offset,
+        }
+    }
+}
+
+pub fn tokenize(content: &[u8]) -> impl Iterator<Item=Token> {
+    lazy_static::lazy_static! {
+        static ref SPLIT: regex::bytes::Regex = regex::bytes::Regex::new(r#"\b(\p{Alphabetic}|\d|_)+\b"#).unwrap();
+    }
+    SPLIT.find_iter(content).map(|m| Token::new(m.as_bytes(), m.start()))
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+
+    #[test]
+    fn tokenize_empty_is_empty() {
+        let input = b"";
+        let expected: Vec<Token> = vec![];
+        let actual: Vec<_> = tokenize(input).collect();
+        assert_eq!(expected, actual);
+    }
+
+    #[test]
+    fn tokenize_word_is_word() {
+        let input = b"word";
+        let expected: Vec<Token> = vec![Token::new(b"word", 0)];
+        let actual: Vec<_> = tokenize(input).collect();
+        assert_eq!(expected, actual);
+    }
+
+    #[test]
+    fn tokenize_space_separated_words() {
+        let input = b"A B";
+        let expected: Vec<Token> = vec![Token::new(b"A", 0), Token::new(b"B", 2)];
+        let actual: Vec<_> = tokenize(input).collect();
+        assert_eq!(expected, actual);
+    }
+
+    #[test]
+    fn tokenize_dot_separated_words() {
+        let input = b"A.B";
+        let expected: Vec<Token> = vec![Token::new(b"A", 0), Token::new(b"B", 2)];
+        let actual: Vec<_> = tokenize(input).collect();
+        assert_eq!(expected, actual);
+    }
+
+    #[test]
+    fn tokenize_namespace_separated_words() {
+        let input = b"A::B";
+        let expected: Vec<Token> = vec![Token::new(b"A", 0), Token::new(b"B", 3)];
+        let actual: Vec<_> = tokenize(input).collect();
+        assert_eq!(expected, actual);
+    }
+
+    #[test]
+    fn tokenize_underscore_doesnt_separate() {
+        let input = b"A_B";
+        let expected: Vec<Token> = vec![Token::new(b"A_B", 0)];
+        let actual: Vec<_> = tokenize(input).collect();
+        assert_eq!(expected, actual);
+    }
+}
--- a/src/lib.rs
+++ b/src/lib.rs
@ -1,60 +1,33 @@
 #[macro_use]
 extern crate serde_derive;

+mod dict;
+
+pub mod identifier;
+pub mod report;
+
+pub use crate::dict::*;
+
 use std::fs::File;
 use std::io::Read;

-include!(concat!(env!("OUT_DIR"), "/codegen.rs"));
-
-#[derive(Debug, Clone, PartialEq, Eq)]
-pub struct Token<'t> {
-    pub token: &'t [u8],
-    pub offset: usize,
-}
-
-impl<'t> Token<'t> {
-    pub fn new(token: &'t [u8], offset: usize) -> Self {
-        Self {
-            token,
-            offset,
-        }
-    }
-}
-
-pub fn tokenize(content: &[u8]) -> impl Iterator<Item=Token> {
-    lazy_static::lazy_static! {
-        static ref SPLIT: regex::bytes::Regex = regex::bytes::Regex::new(r#"\b(\p{Alphabetic}|\d|_)+\b"#).unwrap();
-    }
-    SPLIT.find_iter(content).map(|m| Token::new(m.as_bytes(), m.start()))
-}
-
-#[derive(Debug, Serialize)]
-pub struct Message<'m> {
-    path: &'m std::path::Path,
-    #[serde(skip)]
-    line: &'m [u8],
-    line_num: usize,
-    col_num: usize,
-    word: &'m str,
-    correction: &'m str,
-}
-
-pub fn process_file(path: &std::path::Path, dictionary: &Corrections, report: Report) -> Result<(), failure::Error> {
+pub fn process_file(path: &std::path::Path, dictionary: &Dictionary, report: report::Report) -> Result<(), failure::Error> {
    let mut buffer = Vec::new();
    File::open(path)?.read_to_end(&mut buffer)?;
    for (line_idx, line) in grep_searcher::LineIter::new(b'\n', &buffer).enumerate() {
        let line_num = line_idx + 1;
-        for token in tokenize(line) {
+        for token in identifier::tokenize(line) {
            if let Some(word) = std::str::from_utf8(token.token).ok() {
                if let Some(correction) = dictionary.correct_str(word) {
                    let col_num = token.offset;
-                    let msg = Message {
+                    let msg = report::Message {
                        path,
                        line,
                        line_num,
                        col_num,
                        word,
                        correction,
+                        non_exhaustive: (),
                    };
                    report(msg);
                }
@ -65,100 +38,3 @@ pub fn process_file(path: &std::path::Path, dictionary: &Corrections, report: Re
    Ok(())
 }

-pub type Report = fn(msg: Message);
-
-pub fn print_silent(_: Message) {
-}
-
-pub fn print_brief(msg: Message) {
-    println!("{}:{}:{}: {} -> {}", msg.path.display(), msg.line_num, msg.col_num, msg.word, msg.correction);
-}
-
-pub fn print_long(msg: Message) {
-    let line_num = msg.line_num.to_string();
-    let line_indent: String = itertools::repeat_n(" ", line_num.len()).collect();
-
-    let hl_indent: String = itertools::repeat_n(" ", msg.col_num).collect();
-    let hl: String = itertools::repeat_n("^", msg.word.len()).collect();
-
-    println!("error: `{}` should be `{}`", msg.word, msg.correction);
-    println!("  --> {}:{}:{}", msg.path.display(), msg.line_num, msg.col_num);
-    println!("{} |", line_indent);
-    println!("{} | {}", msg.line_num, String::from_utf8_lossy(msg.line).trim_end());
-    println!("{} | {}{}", line_indent, hl_indent, hl);
-    println!("{} |", line_indent);
-}
-
-pub fn print_json(msg: Message) {
-    println!("{}", serde_json::to_string(&msg).unwrap());
-}
-
-pub struct Corrections {
-}
-
-impl Corrections {
-    pub fn new() -> Self {
-        Corrections { }
-    }
-
-    pub fn correct_str<'s>(&'s self, word: &str) -> Option<&'s str> {
-        DICTIONARY.get(word).map(|s| *s)
-    }
-
-    pub fn correct_bytes<'s>(&'s self, word: &[u8]) -> Option<&'s [u8]> {
-        std::str::from_utf8(word).ok().and_then(|word| DICTIONARY.get(word)).map(|s| s.as_bytes())
-    }
-}
-
-#[cfg(test)]
-mod test {
-    use super::*;
-
-    #[test]
-    fn tokenize_empty_is_empty() {
-        let input = b"";
-        let expected: Vec<Token> = vec![];
-        let actual: Vec<_> = tokenize(input).collect();
-        assert_eq!(expected, actual);
-    }
-
-    #[test]
-    fn tokenize_word_is_word() {
-        let input = b"word";
-        let expected: Vec<Token> = vec![Token::new(b"word", 0)];
-        let actual: Vec<_> = tokenize(input).collect();
-        assert_eq!(expected, actual);
-    }
-
-    #[test]
-    fn tokenize_space_separated_words() {
-        let input = b"A B";
-        let expected: Vec<Token> = vec![Token::new(b"A", 0), Token::new(b"B", 2)];
-        let actual: Vec<_> = tokenize(input).collect();
-        assert_eq!(expected, actual);
-    }
-
-    #[test]
-    fn tokenize_dot_separated_words() {
-        let input = b"A.B";
-        let expected: Vec<Token> = vec![Token::new(b"A", 0), Token::new(b"B", 2)];
-        let actual: Vec<_> = tokenize(input).collect();
-        assert_eq!(expected, actual);
-    }
-
-    #[test]
-    fn tokenize_namespace_separated_words() {
-        let input = b"A::B";
-        let expected: Vec<Token> = vec![Token::new(b"A", 0), Token::new(b"B", 3)];
-        let actual: Vec<_> = tokenize(input).collect();
-        assert_eq!(expected, actual);
-    }
-
-    #[test]
-    fn tokenize_underscore_doesnt_separate() {
-        let input = b"A_B";
-        let expected: Vec<Token> = vec![Token::new(b"A_B", 0)];
-        let actual: Vec<_> = tokenize(input).collect();
-        assert_eq!(expected, actual);
-    }
-}
--- a/src/main.rs
+++ b/src/main.rs
@ -15,12 +15,12 @@ arg_enum!{
 }

 impl Format {
-    fn report(self) -> scorrect::Report {
+    fn report(self) -> scorrect::report::Report {
        match self {
-            Format::Silent => scorrect::print_silent,
-            Format::Brief => scorrect::print_brief,
-            Format::Long => scorrect::print_long,
-            Format::Json => scorrect::print_json,
+            Format::Silent => scorrect::report::print_silent,
+            Format::Brief => scorrect::report::print_brief,
+            Format::Long => scorrect::report::print_long,
+            Format::Json => scorrect::report::print_json,
        }
    }
 }
@ -63,7 +63,7 @@ impl Options {
 fn run() -> Result<(), failure::Error> {
    let options = Options::from_args().infer();

-    let dictionary = scorrect::Corrections::new();
+    let dictionary = scorrect::Dictionary::new();

    let first_path = &options.path.get(0).expect("arg parsing enforces at least one");
    let mut walk = ignore::WalkBuilder::new(first_path);
--- a/src/report.rs
+++ b/src/report.rs
@ -0,0 +1,40 @@
+#[derive(Debug, Serialize)]
+pub struct Message<'m> {
+    pub path: &'m std::path::Path,
+    #[serde(skip)]
+    pub line: &'m [u8],
+    pub line_num: usize,
+    pub col_num: usize,
+    pub word: &'m str,
+    pub correction: &'m str,
+    #[serde(skip)]
+    pub(crate) non_exhaustive: (),
+}
+
+pub type Report = fn(msg: Message);
+
+pub fn print_silent(_: Message) {
+}
+
+pub fn print_brief(msg: Message) {
+    println!("{}:{}:{}: {} -> {}", msg.path.display(), msg.line_num, msg.col_num, msg.word, msg.correction);
+}
+
+pub fn print_long(msg: Message) {
+    let line_num = msg.line_num.to_string();
+    let line_indent: String = itertools::repeat_n(" ", line_num.len()).collect();
+
+    let hl_indent: String = itertools::repeat_n(" ", msg.col_num).collect();
+    let hl: String = itertools::repeat_n("^", msg.word.len()).collect();
+
+    println!("error: `{}` should be `{}`", msg.word, msg.correction);
+    println!("  --> {}:{}:{}", msg.path.display(), msg.line_num, msg.col_num);
+    println!("{} |", line_indent);
+    println!("{} | {}", msg.line_num, String::from_utf8_lossy(msg.line).trim_end());
+    println!("{} | {}{}", line_indent, hl_indent, hl);
+    println!("{} |", line_indent);
+}
+
+pub fn print_json(msg: Message) {
+    println!("{}", serde_json::to_string(&msg).unwrap());
+}