typos/src/lib.rs

#[macro_use]
extern crate serde_derive;

use std::fs::File;
use std::io::Read;

include!(concat!(env!("OUT_DIR"), "/codegen.rs"));

#[derive(Debug)]
pub struct Token<'t> {
    pub token: &'t [u8],
    pub offset: usize,
}

pub fn tokenize(content: &[u8]) -> impl Iterator<Item=Token> {
    lazy_static::lazy_static! {
        static ref SPLIT: regex::bytes::Regex = regex::bytes::Regex::new(r#"\b\w+\b"#).unwrap();
    }
    SPLIT.find_iter(content).map(|m| {
        Token {
            token: m.as_bytes(),
            offset: m.start(),
        }
    })
}

#[derive(Debug, Serialize)]
pub struct Message<'m> {
    path: &'m std::path::Path,
    #[serde(skip)]
    line: &'m [u8],
    line_num: usize,
    col_num: usize,
    word: &'m str,
    correction: &'m str,
}

pub fn process_file(path: &std::path::Path, dictionary: &Corrections, report: Report) -> Result<(), failure::Error> {
    let mut buffer = Vec::new();
    File::open(path)?.read_to_end(&mut buffer)?;
    for (line_idx, line) in grep_searcher::LineIter::new(b'\n', &buffer).enumerate() {
        let line_num = line_idx + 1;
        for token in tokenize(line) {
            if let Some(word) = std::str::from_utf8(token.token).ok() {
                if let Some(correction) = dictionary.correct_str(word) {
                    let col_num = token.offset;
                    let msg = Message {
                        path,
                        line,
                        line_num,
                        col_num,
                        word,
                        correction,
                    };
                    report(msg);
                }
            }
        }
    }

    Ok(())
}

pub type Report = fn(msg: Message);

pub fn print_silent(_: Message) {
}

pub fn print_brief(msg: Message) {
    println!("{}:{}:{}: {} -> {}", msg.path.display(), msg.line_num, msg.col_num, msg.word, msg.correction);
}

pub fn print_long(msg: Message) {
    let line_num = msg.line_num.to_string();
    let line_indent: String = itertools::repeat_n(" ", line_num.len()).collect();

    let hl_indent: String = itertools::repeat_n(" ", msg.col_num).collect();
    let hl: String = itertools::repeat_n("^", msg.word.len()).collect();

    println!("error: `{}` should be `{}`", msg.word, msg.correction);
    println!("  --> {}:{}:{}", msg.path.display(), msg.line_num, msg.col_num);
    println!("{} |", line_indent);
    println!("{} | {}", msg.line_num, String::from_utf8_lossy(msg.line).trim_end());
    println!("{} | {}{}", line_indent, hl_indent, hl);
    println!("{} |", line_indent);
}

pub fn print_json(msg: Message) {
    println!("{}", serde_json::to_string(&msg).unwrap());
}

pub struct Corrections {
}

impl Corrections {
    pub fn new() -> Self {
        Corrections { }
    }

    pub fn correct_str<'s>(&'s self, word: &str) -> Option<&'s str> {
        DICTIONARY.get(word).map(|s| *s)
    }

    pub fn correct_bytes<'s>(&'s self, word: &[u8]) -> Option<&'s [u8]> {
        std::str::from_utf8(word).ok().and_then(|word| DICTIONARY.get(word)).map(|s| s.as_bytes())
    }
}
feat: Control over output format 2019-01-23 09:33:51 -05:00			`#[macro_use]`
			`extern crate serde_derive;`

Initial commit 2019-01-22 17:01:33 -05:00			`use std::fs::File;`
			`use std::io::Read;`

			`include!(concat!(env!("OUT_DIR"), "/codegen.rs"));`

feat: Control over output format 2019-01-23 09:33:51 -05:00			`#[derive(Debug)]`
Initial commit 2019-01-22 17:01:33 -05:00			`pub struct Token<'t> {`
			`pub token: &'t [u8],`
			`pub offset: usize,`
			`}`

			`pub fn tokenize(content: &[u8]) -> impl Iterator<Item=Token> {`
			`lazy_static::lazy_static! {`
			`static ref SPLIT: regex::bytes::Regex = regex::bytes::Regex::new(r#"\b\w+\b"#).unwrap();`
			`}`
			`SPLIT.find_iter(content).map(\|m\| {`
			`Token {`
			`token: m.as_bytes(),`
			`offset: m.start(),`
			`}`
			`})`
			`}`

feat: Control over output format 2019-01-23 09:33:51 -05:00			`#[derive(Debug, Serialize)]`
			`pub struct Message<'m> {`
			`path: &'m std::path::Path,`
			`#[serde(skip)]`
			`line: &'m [u8],`
			`line_num: usize,`
			`col_num: usize,`
			`word: &'m str,`
			`correction: &'m str,`
			`}`

			`pub fn process_file(path: &std::path::Path, dictionary: &Corrections, report: Report) -> Result<(), failure::Error> {`
Initial commit 2019-01-22 17:01:33 -05:00			`let mut buffer = Vec::new();`
			`File::open(path)?.read_to_end(&mut buffer)?;`
			`for (line_idx, line) in grep_searcher::LineIter::new(b'\n', &buffer).enumerate() {`
			`let line_num = line_idx + 1;`
			`for token in tokenize(line) {`
			`if let Some(word) = std::str::from_utf8(token.token).ok() {`
			`if let Some(correction) = dictionary.correct_str(word) {`
feat: Control over output format 2019-01-23 09:33:51 -05:00			`let col_num = token.offset;`
			`let msg = Message {`
			`path,`
			`line,`
			`line_num,`
			`col_num,`
			`word,`
			`correction,`
			`};`
			`report(msg);`
Initial commit 2019-01-22 17:01:33 -05:00			`}`
			`}`
			`}`
			`}`

			`Ok(())`
			`}`

feat: Control over output format 2019-01-23 09:33:51 -05:00			`pub type Report = fn(msg: Message);`

			`pub fn print_silent(_: Message) {`
			`}`

			`pub fn print_brief(msg: Message) {`
			`println!("{}:{}:{}: {} -> {}", msg.path.display(), msg.line_num, msg.col_num, msg.word, msg.correction);`
			`}`

			`pub fn print_long(msg: Message) {`
			`let line_num = msg.line_num.to_string();`
			`let line_indent: String = itertools::repeat_n(" ", line_num.len()).collect();`

			`let hl_indent: String = itertools::repeat_n(" ", msg.col_num).collect();`
			`let hl: String = itertools::repeat_n("^", msg.word.len()).collect();`

			println!("error: `{}` should be `{}`", msg.word, msg.correction);
			`println!(" --> {}:{}:{}", msg.path.display(), msg.line_num, msg.col_num);`
			`println!("{} \|", line_indent);`
			`println!("{} \| {}", msg.line_num, String::from_utf8_lossy(msg.line).trim_end());`
			`println!("{} \| {}{}", line_indent, hl_indent, hl);`
			`println!("{} \|", line_indent);`
			`}`

			`pub fn print_json(msg: Message) {`
			`println!("{}", serde_json::to_string(&msg).unwrap());`
			`}`

Initial commit 2019-01-22 17:01:33 -05:00			`pub struct Corrections {`
			`}`

			`impl Corrections {`
			`pub fn new() -> Self {`
			`Corrections { }`
			`}`

fix(api): Clarify lifetimes 2019-01-22 18:59:49 -05:00			`pub fn correct_str<'s>(&'s self, word: &str) -> Option<&'s str> {`
Initial commit 2019-01-22 17:01:33 -05:00			`DICTIONARY.get(word).map(\|s\| *s)`
			`}`

fix(api): Clarify lifetimes 2019-01-22 18:59:49 -05:00			`pub fn correct_bytes<'s>(&'s self, word: &[u8]) -> Option<&'s [u8]> {`
Initial commit 2019-01-22 17:01:33 -05:00			`std::str::from_utf8(word).ok().and_then(\|word\| DICTIONARY.get(word)).map(\|s\| s.as_bytes())`
			`}`
			`}`