fix(api): Split lib

2024-11-21 16:41:01 -05:00 · 2019-01-24 08:24:20 -07:00 · 2019-01-24 08:24:20 -07:00 · 85ee5cfac9
commit 85ee5cfac9
parent d8ca9f9d5a
8 changed files with 170 additions and 162 deletions
--- a/benches/corrections.rs
+++ b/benches/corrections.rs
@ -4,19 +4,19 @@ extern crate test;
 #[bench]
 fn load_corrections(b: &mut test::Bencher) {
-    b.iter(|| scorrect::Corrections::new());
+    b.iter(|| scorrect::Dictionary::new());
 }
 #[bench]
 fn correction(b: &mut test::Bencher) {
-    let corrections = scorrect::Corrections::new();
+    let corrections = scorrect::Dictionary::new();
    assert_eq!(corrections.correct_str("successs"), Some("successes"));
    b.iter(|| corrections.correct_str("successs"));
 }
 #[bench]
 fn no_correction(b: &mut test::Bencher) {
-    let corrections = scorrect::Corrections::new();
+    let corrections = scorrect::Dictionary::new();
    assert_eq!(corrections.correct_str("success"), None);
    b.iter(|| corrections.correct_str("success"));
 }
--- a/benches/file.rs
+++ b/benches/file.rs
@ -12,8 +12,8 @@ fn process_empty(b: &mut test::Bencher) {
    let sample_path = temp.child("sample");
    sample_path.write_str(data::EMPTY).unwrap();
-    let corrections = scorrect::Corrections::new();
+    let corrections = scorrect::Dictionary::new();
-    b.iter(|| scorrect::process_file(sample_path.path(), &corrections, scorrect::print_silent));
+    b.iter(|| scorrect::process_file(sample_path.path(), &corrections, scorrect::report::print_silent));
    temp.close().unwrap();
 }
@ -24,8 +24,8 @@ fn process_no_tokens(b: &mut test::Bencher) {
    let sample_path = temp.child("sample");
    sample_path.write_str(data::NO_TOKENS).unwrap();
-    let corrections = scorrect::Corrections::new();
+    let corrections = scorrect::Dictionary::new();
-    b.iter(|| scorrect::process_file(sample_path.path(), &corrections, scorrect::print_silent));
+    b.iter(|| scorrect::process_file(sample_path.path(), &corrections, scorrect::report::print_silent));
    temp.close().unwrap();
 }
@ -36,8 +36,8 @@ fn process_single_token(b: &mut test::Bencher) {
    let sample_path = temp.child("sample");
    sample_path.write_str(data::SINGLE_TOKEN).unwrap();
-    let corrections = scorrect::Corrections::new();
+    let corrections = scorrect::Dictionary::new();
-    b.iter(|| scorrect::process_file(sample_path.path(), &corrections, scorrect::print_silent));
+    b.iter(|| scorrect::process_file(sample_path.path(), &corrections, scorrect::report::print_silent));
    temp.close().unwrap();
 }
@ -48,8 +48,8 @@ fn process_sherlock(b: &mut test::Bencher) {
    let sample_path = temp.child("sample");
    sample_path.write_str(data::SHERLOCK).unwrap();
-    let corrections = scorrect::Corrections::new();
+    let corrections = scorrect::Dictionary::new();
-    b.iter(|| scorrect::process_file(sample_path.path(), &corrections, scorrect::print_silent));
+    b.iter(|| scorrect::process_file(sample_path.path(), &corrections, scorrect::report::print_silent));
    temp.close().unwrap();
 }
@ -60,8 +60,8 @@ fn process_code(b: &mut test::Bencher) {
    let sample_path = temp.child("sample");
    sample_path.write_str(data::CODE).unwrap();
-    let corrections = scorrect::Corrections::new();
+    let corrections = scorrect::Dictionary::new();
-    b.iter(|| scorrect::process_file(sample_path.path(), &corrections, scorrect::print_silent));
+    b.iter(|| scorrect::process_file(sample_path.path(), &corrections, scorrect::report::print_silent));
    temp.close().unwrap();
 }
@ -72,8 +72,8 @@ fn process_corpus(b: &mut test::Bencher) {
    let sample_path = temp.child("sample");
    sample_path.write_str(data::CORPUS).unwrap();
-    let corrections = scorrect::Corrections::new();
+    let corrections = scorrect::Dictionary::new();
-    b.iter(|| scorrect::process_file(sample_path.path(), &corrections, scorrect::print_silent));
+    b.iter(|| scorrect::process_file(sample_path.path(), &corrections, scorrect::report::print_silent));
    temp.close().unwrap();
 }
--- a/benches/tokenize.rs
+++ b/benches/tokenize.rs
@ -6,30 +6,30 @@ mod data;
 #[bench]
 fn tokenize_empty(b: &mut test::Bencher) {
-    b.iter(|| scorrect::tokenize(data::EMPTY.as_bytes()).collect::<Vec<_>>());
+    b.iter(|| scorrect::identifier::tokenize(data::EMPTY.as_bytes()).collect::<Vec<_>>());
 }
 #[bench]
 fn tokenize_no_tokens(b: &mut test::Bencher) {
-    b.iter(|| scorrect::tokenize(data::NO_TOKENS.as_bytes()).collect::<Vec<_>>());
+    b.iter(|| scorrect::identifier::tokenize(data::NO_TOKENS.as_bytes()).collect::<Vec<_>>());
 }
 #[bench]
 fn tokenize_single_token(b: &mut test::Bencher) {
-    b.iter(|| scorrect::tokenize(data::SINGLE_TOKEN.as_bytes()).collect::<Vec<_>>());
+    b.iter(|| scorrect::identifier::tokenize(data::SINGLE_TOKEN.as_bytes()).collect::<Vec<_>>());
 }
 #[bench]
 fn tokenize_sherlock(b: &mut test::Bencher) {
-    b.iter(|| scorrect::tokenize(data::SHERLOCK.as_bytes()).collect::<Vec<_>>());
+    b.iter(|| scorrect::identifier::tokenize(data::SHERLOCK.as_bytes()).collect::<Vec<_>>());
 }
 #[bench]
 fn tokenize_code(b: &mut test::Bencher) {
-    b.iter(|| scorrect::tokenize(data::CODE.as_bytes()).collect::<Vec<_>>());
+    b.iter(|| scorrect::identifier::tokenize(data::CODE.as_bytes()).collect::<Vec<_>>());
 }
 #[bench]
 fn tokenize_corpus(b: &mut test::Bencher) {
-    b.iter(|| scorrect::tokenize(data::CORPUS.as_bytes()).collect::<Vec<_>>());
+    b.iter(|| scorrect::identifier::tokenize(data::CORPUS.as_bytes()).collect::<Vec<_>>());
 }
--- a/src/dict.rs
+++ b/src/dict.rs
@ -0,0 +1,18 @@
 include!(concat!(env!("OUT_DIR"), "/codegen.rs"));
 pub struct Dictionary {
 }
 impl Dictionary {
    pub fn new() -> Self {
        Dictionary { }
    }
    pub fn correct_str<'s>(&'s self, word: &str) -> Option<&'s str> {
        DICTIONARY.get(word).map(|s| *s)
    }
    pub fn correct_bytes<'s>(&'s self, word: &[u8]) -> Option<&'s [u8]> {
        std::str::from_utf8(word).ok().and_then(|word| DICTIONARY.get(word)).map(|s| s.as_bytes())
    }
 }
--- a/src/identifier.rs
+++ b/src/identifier.rs
@ -0,0 +1,74 @@
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub struct Token<'t> {
    pub token: &'t [u8],
    pub offset: usize,
 }
 impl<'t> Token<'t> {
    pub fn new(token: &'t [u8], offset: usize) -> Self {
        Self {
            token,
            offset,
        }
    }
 }
 pub fn tokenize(content: &[u8]) -> impl Iterator<Item=Token> {
    lazy_static::lazy_static! {
        static ref SPLIT: regex::bytes::Regex = regex::bytes::Regex::new(r#"\b(\p{Alphabetic}|\d|_)+\b"#).unwrap();
    }
    SPLIT.find_iter(content).map(|m| Token::new(m.as_bytes(), m.start()))
 }
 #[cfg(test)]
 mod test {
    use super::*;
    #[test]
    fn tokenize_empty_is_empty() {
        let input = b"";
        let expected: Vec<Token> = vec![];
        let actual: Vec<_> = tokenize(input).collect();
        assert_eq!(expected, actual);
    }
    #[test]
    fn tokenize_word_is_word() {
        let input = b"word";
        let expected: Vec<Token> = vec![Token::new(b"word", 0)];
        let actual: Vec<_> = tokenize(input).collect();
        assert_eq!(expected, actual);
    }
    #[test]
    fn tokenize_space_separated_words() {
        let input = b"A B";
        let expected: Vec<Token> = vec![Token::new(b"A", 0), Token::new(b"B", 2)];
        let actual: Vec<_> = tokenize(input).collect();
        assert_eq!(expected, actual);
    }
    #[test]
    fn tokenize_dot_separated_words() {
        let input = b"A.B";
        let expected: Vec<Token> = vec![Token::new(b"A", 0), Token::new(b"B", 2)];
        let actual: Vec<_> = tokenize(input).collect();
        assert_eq!(expected, actual);
    }
    #[test]
    fn tokenize_namespace_separated_words() {
        let input = b"A::B";
        let expected: Vec<Token> = vec![Token::new(b"A", 0), Token::new(b"B", 3)];
        let actual: Vec<_> = tokenize(input).collect();
        assert_eq!(expected, actual);
    }
    #[test]
    fn tokenize_underscore_doesnt_separate() {
        let input = b"A_B";
        let expected: Vec<Token> = vec![Token::new(b"A_B", 0)];
        let actual: Vec<_> = tokenize(input).collect();
        assert_eq!(expected, actual);
    }
 }
--- a/src/lib.rs
+++ b/src/lib.rs
@ -1,60 +1,33 @@
 #[macro_use]
 extern crate serde_derive;
 mod dict;
 pub mod identifier;
 pub mod report;
 pub use crate::dict::*;
 use std::fs::File;
 use std::io::Read;
-include!(concat!(env!("OUT_DIR"), "/codegen.rs"));
+pub fn process_file(path: &std::path::Path, dictionary: &Dictionary, report: report::Report) -> Result<(), failure::Error> {
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub struct Token<'t> {
    pub token: &'t [u8],
    pub offset: usize,
 }
 impl<'t> Token<'t> {
    pub fn new(token: &'t [u8], offset: usize) -> Self {
        Self {
            token,
            offset,
        }
    }
 }
 pub fn tokenize(content: &[u8]) -> impl Iterator<Item=Token> {
    lazy_static::lazy_static! {
        static ref SPLIT: regex::bytes::Regex = regex::bytes::Regex::new(r#"\b(\p{Alphabetic}|\d|_)+\b"#).unwrap();
    }
    SPLIT.find_iter(content).map(|m| Token::new(m.as_bytes(), m.start()))
 }
 #[derive(Debug, Serialize)]
 pub struct Message<'m> {
    path: &'m std::path::Path,
    #[serde(skip)]
    line: &'m [u8],
    line_num: usize,
    col_num: usize,
    word: &'m str,
    correction: &'m str,
 }
 pub fn process_file(path: &std::path::Path, dictionary: &Corrections, report: Report) -> Result<(), failure::Error> {
    let mut buffer = Vec::new();
    File::open(path)?.read_to_end(&mut buffer)?;
    for (line_idx, line) in grep_searcher::LineIter::new(b'\n', &buffer).enumerate() {
        let line_num = line_idx + 1;
-        for token in tokenize(line) {
+        for token in identifier::tokenize(line) {
            if let Some(word) = std::str::from_utf8(token.token).ok() {
                if let Some(correction) = dictionary.correct_str(word) {
                    let col_num = token.offset;
-                    let msg = Message {
+                    let msg = report::Message {
                        path,
                        line,
                        line_num,
                        col_num,
                        word,
                        correction,
                        non_exhaustive: (),
                    };
                    report(msg);
                }
@ -65,100 +38,3 @@ pub fn process_file(path: &std::path::Path, dictionary: &Corrections, report: Re
    Ok(())
 }
 pub type Report = fn(msg: Message);
 pub fn print_silent(_: Message) {
 }
 pub fn print_brief(msg: Message) {
    println!("{}:{}:{}: {} -> {}", msg.path.display(), msg.line_num, msg.col_num, msg.word, msg.correction);
 }
 pub fn print_long(msg: Message) {
    let line_num = msg.line_num.to_string();
    let line_indent: String = itertools::repeat_n(" ", line_num.len()).collect();
    let hl_indent: String = itertools::repeat_n(" ", msg.col_num).collect();
    let hl: String = itertools::repeat_n("^", msg.word.len()).collect();
    println!("error: `{}` should be `{}`", msg.word, msg.correction);
    println!("  --> {}:{}:{}", msg.path.display(), msg.line_num, msg.col_num);
    println!("{} |", line_indent);
    println!("{} | {}", msg.line_num, String::from_utf8_lossy(msg.line).trim_end());
    println!("{} | {}{}", line_indent, hl_indent, hl);
    println!("{} |", line_indent);
 }
 pub fn print_json(msg: Message) {
    println!("{}", serde_json::to_string(&msg).unwrap());
 }
 pub struct Corrections {
 }
 impl Corrections {
    pub fn new() -> Self {
        Corrections { }
    }
    pub fn correct_str<'s>(&'s self, word: &str) -> Option<&'s str> {
        DICTIONARY.get(word).map(|s| *s)
    }
    pub fn correct_bytes<'s>(&'s self, word: &[u8]) -> Option<&'s [u8]> {
        std::str::from_utf8(word).ok().and_then(|word| DICTIONARY.get(word)).map(|s| s.as_bytes())
    }
 }
 #[cfg(test)]
 mod test {
    use super::*;
    #[test]
    fn tokenize_empty_is_empty() {
        let input = b"";
        let expected: Vec<Token> = vec![];
        let actual: Vec<_> = tokenize(input).collect();
        assert_eq!(expected, actual);
    }
    #[test]
    fn tokenize_word_is_word() {
        let input = b"word";
        let expected: Vec<Token> = vec![Token::new(b"word", 0)];
        let actual: Vec<_> = tokenize(input).collect();
        assert_eq!(expected, actual);
    }
    #[test]
    fn tokenize_space_separated_words() {
        let input = b"A B";
        let expected: Vec<Token> = vec![Token::new(b"A", 0), Token::new(b"B", 2)];
        let actual: Vec<_> = tokenize(input).collect();
        assert_eq!(expected, actual);
    }
    #[test]
    fn tokenize_dot_separated_words() {
        let input = b"A.B";
        let expected: Vec<Token> = vec![Token::new(b"A", 0), Token::new(b"B", 2)];
        let actual: Vec<_> = tokenize(input).collect();
        assert_eq!(expected, actual);
    }
    #[test]
    fn tokenize_namespace_separated_words() {
        let input = b"A::B";
        let expected: Vec<Token> = vec![Token::new(b"A", 0), Token::new(b"B", 3)];
        let actual: Vec<_> = tokenize(input).collect();
        assert_eq!(expected, actual);
    }
    #[test]
    fn tokenize_underscore_doesnt_separate() {
        let input = b"A_B";
        let expected: Vec<Token> = vec![Token::new(b"A_B", 0)];
        let actual: Vec<_> = tokenize(input).collect();
        assert_eq!(expected, actual);
    }
 }
--- a/src/main.rs
+++ b/src/main.rs
@ -15,12 +15,12 @@ arg_enum!{
 }
 impl Format {
-    fn report(self) -> scorrect::Report {
+    fn report(self) -> scorrect::report::Report {
        match self {
-            Format::Silent => scorrect::print_silent,
+            Format::Silent => scorrect::report::print_silent,
-            Format::Brief => scorrect::print_brief,
+            Format::Brief => scorrect::report::print_brief,
-            Format::Long => scorrect::print_long,
+            Format::Long => scorrect::report::print_long,
-            Format::Json => scorrect::print_json,
+            Format::Json => scorrect::report::print_json,
        }
    }
 }
@ -63,7 +63,7 @@ impl Options {
 fn run() -> Result<(), failure::Error> {
    let options = Options::from_args().infer();
-    let dictionary = scorrect::Corrections::new();
+    let dictionary = scorrect::Dictionary::new();
    let first_path = &options.path.get(0).expect("arg parsing enforces at least one");
    let mut walk = ignore::WalkBuilder::new(first_path);
--- a/src/report.rs
+++ b/src/report.rs
@ -0,0 +1,40 @@
 #[derive(Debug, Serialize)]
 pub struct Message<'m> {
    pub path: &'m std::path::Path,
    #[serde(skip)]
    pub line: &'m [u8],
    pub line_num: usize,
    pub col_num: usize,
    pub word: &'m str,
    pub correction: &'m str,
    #[serde(skip)]
    pub(crate) non_exhaustive: (),
 }
 pub type Report = fn(msg: Message);
 pub fn print_silent(_: Message) {
 }
 pub fn print_brief(msg: Message) {
    println!("{}:{}:{}: {} -> {}", msg.path.display(), msg.line_num, msg.col_num, msg.word, msg.correction);
 }
 pub fn print_long(msg: Message) {
    let line_num = msg.line_num.to_string();
    let line_indent: String = itertools::repeat_n(" ", line_num.len()).collect();
    let hl_indent: String = itertools::repeat_n(" ", msg.col_num).collect();
    let hl: String = itertools::repeat_n("^", msg.word.len()).collect();
    println!("error: `{}` should be `{}`", msg.word, msg.correction);
    println!("  --> {}:{}:{}", msg.path.display(), msg.line_num, msg.col_num);
    println!("{} |", line_indent);
    println!("{} | {}", msg.line_num, String::from_utf8_lossy(msg.line).trim_end());
    println!("{} | {}{}", line_indent, hl_indent, hl);
    println!("{} |", line_indent);
 }
 pub fn print_json(msg: Message) {
    println!("{}", serde_json::to_string(&msg).unwrap());
 }