typos/src/lib.rs

#[macro_use]
extern crate serde_derive;

mod dict;
mod dict_codegen;

pub mod report;
pub mod tokens;

pub use crate::dict::*;

use std::fs::File;
use std::io::Read;

pub fn process_file(
    path: &std::path::Path,
    dictionary: &Dictionary,
    ignore_hex: bool,
    report: report::Report,
) -> Result<(), failure::Error> {
    let mut buffer = Vec::new();
    File::open(path)?.read_to_end(&mut buffer)?;
    for (line_idx, line) in grep_searcher::LineIter::new(b'\n', &buffer).enumerate() {
        let line_num = line_idx + 1;
        for ident in tokens::Identifier::parse(line) {
            if !ignore_hex && is_hex(ident.token()) {
                continue;
            }
            if let Some(correction) = dictionary.correct_ident(ident) {
                let col_num = ident.offset();
                let msg = report::Message {
                    path,
                    line,
                    line_num,
                    col_num,
                    typo: ident.token(),
                    correction,
                    non_exhaustive: (),
                };
                report(msg);
            }
            for word in ident.split() {
                if let Some(correction) = dictionary.correct_word(word) {
                    let col_num = word.offset();
                    let msg = report::Message {
                        path,
                        line,
                        line_num,
                        col_num,
                        typo: word.token(),
                        correction,
                        non_exhaustive: (),
                    };
                    report(msg);
                }
            }
        }
    }

    Ok(())
}

fn is_hex(ident: &str) -> bool {
    lazy_static::lazy_static! {
        // `_`: number literal separator in Rust and other languages
        // `'`: number literal separator in C++
        static ref HEX: regex::Regex = regex::Regex::new(r#"^0[xX][0-9a-fA-F_']+$"#).unwrap();
    }
    HEX.is_match(ident)
}
feat: Control over output format 2019-01-23 09:33:51 -05:00			`#[macro_use]`
			`extern crate serde_derive;`

fix(api): Split lib 2019-01-24 10:24:20 -05:00			`mod dict;`
chore(CI): Fighting clippy 2019-06-14 16:53:34 -04:00			`mod dict_codegen;`
Initial commit 2019-01-22 17:01:33 -05:00
fix(api): Split lib 2019-01-24 10:24:20 -05:00			`pub mod report;`
refactor: Rename module 2019-04-16 22:16:31 -04:00			`pub mod tokens;`
test: Basic tokenization testing 2019-01-23 09:44:01 -05:00
fix(api): Split lib 2019-01-24 10:24:20 -05:00			`pub use crate::dict::*;`
Initial commit 2019-01-22 17:01:33 -05:00
fix(api): Split lib 2019-01-24 10:24:20 -05:00			`use std::fs::File;`
			`use std::io::Read;`
feat: Control over output format 2019-01-23 09:33:51 -05:00
chore: Run cargo fmt 2019-06-14 08:43:21 -04:00			`pub fn process_file(`
			`path: &std::path::Path,`
			`dictionary: &Dictionary,`
feat(parser): Ignore hex literals Trying to avoid accidentally correcting something that looks like a word inside a hex number, like `0xBEAF`. Fixes #19 2019-07-13 21:24:27 -04:00			`ignore_hex: bool,`
chore: Run cargo fmt 2019-06-14 08:43:21 -04:00			`report: report::Report,`
			`) -> Result<(), failure::Error> {`
Initial commit 2019-01-22 17:01:33 -05:00			`let mut buffer = Vec::new();`
			`File::open(path)?.read_to_end(&mut buffer)?;`
			`for (line_idx, line) in grep_searcher::LineIter::new(b'\n', &buffer).enumerate() {`
			`let line_num = line_idx + 1;`
refactor: Rename Symbol to Identifier This is more descriptive 2019-06-22 13:57:23 -04:00			`for ident in tokens::Identifier::parse(line) {`
feat(parser): Ignore hex literals Trying to avoid accidentally correcting something that looks like a word inside a hex number, like `0xBEAF`. Fixes #19 2019-07-13 21:24:27 -04:00			`if !ignore_hex && is_hex(ident.token()) {`
			`continue;`
			`}`
refactor: Rename Symbol to Identifier This is more descriptive 2019-06-22 13:57:23 -04:00			`if let Some(correction) = dictionary.correct_ident(ident) {`
			`let col_num = ident.offset();`
fix: Improve the quality of symbols being reported 2019-06-14 17:57:41 -04:00			`let msg = report::Message {`
			`path,`
			`line,`
			`line_num,`
			`col_num,`
refactor(report): Rename source field 2019-06-23 00:01:27 -04:00			`typo: ident.token(),`
fix: Improve the quality of symbols being reported 2019-06-14 17:57:41 -04:00			`correction,`
			`non_exhaustive: (),`
			`};`
			`report(msg);`
Initial commit 2019-01-22 17:01:33 -05:00			`}`
refactor: Rename Symbol to Identifier This is more descriptive 2019-06-22 13:57:23 -04:00			`for word in ident.split() {`
feat(parse): Process words composing symbols 2019-06-16 00:21:40 -04:00			`if let Some(correction) = dictionary.correct_word(word) {`
			`let col_num = word.offset();`
			`let msg = report::Message {`
			`path,`
			`line,`
			`line_num,`
			`col_num,`
refactor(report): Rename source field 2019-06-23 00:01:27 -04:00			`typo: word.token(),`
feat(parse): Process words composing symbols 2019-06-16 00:21:40 -04:00			`correction,`
			`non_exhaustive: (),`
			`};`
			`report(msg);`
			`}`
			`}`
Initial commit 2019-01-22 17:01:33 -05:00			`}`
			`}`

			`Ok(())`
			`}`
feat(parser): Ignore hex literals Trying to avoid accidentally correcting something that looks like a word inside a hex number, like `0xBEAF`. Fixes #19 2019-07-13 21:24:27 -04:00
			`fn is_hex(ident: &str) -> bool {`
			`lazy_static::lazy_static! {`
			// `_`: number literal separator in Rust and other languages
feat(parser): Support C++ hex literal separators 2019-07-13 21:28:33 -04:00			// `'`: number literal separator in C++
			`static ref HEX: regex::Regex = regex::Regex::new(r#"^0[xX][0-9a-fA-F_']+$"#).unwrap();`
feat(parser): Ignore hex literals Trying to avoid accidentally correcting something that looks like a word inside a hex number, like `0xBEAF`. Fixes #19 2019-07-13 21:24:27 -04:00			`}`
			`HEX.is_match(ident)`
			`}`