typos/src/lib.rs

114 lines
3.4 KiB
Rust
Raw Normal View History

2019-01-23 09:33:51 -05:00
#[macro_use]
extern crate serde_derive;
2019-01-24 10:24:20 -05:00
mod dict;
2019-06-14 16:53:34 -04:00
mod dict_codegen;
2019-01-22 17:01:33 -05:00
2019-01-24 10:24:20 -05:00
pub mod report;
2019-04-16 22:16:31 -04:00
pub mod tokens;
2019-01-23 09:44:01 -05:00
2019-01-24 10:24:20 -05:00
pub use crate::dict::*;
2019-01-22 17:01:33 -05:00
2019-01-24 10:24:20 -05:00
use std::fs::File;
use std::io::Read;
2019-01-23 09:33:51 -05:00
use bstr::ByteSlice;
2019-06-14 08:43:21 -04:00
pub fn process_file(
path: &std::path::Path,
dictionary: &Dictionary,
2019-07-18 22:20:45 -04:00
check_filenames: bool,
ignore_hex: bool,
2019-07-13 22:14:06 -04:00
binary: bool,
2019-06-14 08:43:21 -04:00
report: report::Report,
) -> Result<(), failure::Error> {
2019-07-18 22:20:45 -04:00
if check_filenames {
for part in path.components().filter_map(|c| c.as_os_str().to_str()) {
for ident in tokens::Identifier::parse(part) {
if !ignore_hex && is_hex(ident.token()) {
continue;
}
if let Some(correction) = dictionary.correct_ident(ident) {
let msg = report::FilenameCorrection {
path,
typo: ident.token(),
correction,
non_exhaustive: (),
};
report(msg.into());
}
for word in ident.split() {
if let Some(correction) = dictionary.correct_word(word) {
let msg = report::FilenameCorrection {
path,
typo: word.token(),
correction,
non_exhaustive: (),
};
report(msg.into());
}
}
}
}
}
2019-01-22 17:01:33 -05:00
let mut buffer = Vec::new();
File::open(path)?.read_to_end(&mut buffer)?;
2019-07-13 22:14:06 -04:00
if !binary && buffer.find_byte(b'\0').is_some() {
let msg = report::BinaryFile {
path,
non_exhaustive: (),
};
report(msg.into());
2019-07-13 22:14:06 -04:00
return Ok(());
}
for (line_idx, line) in buffer.lines().enumerate() {
2019-01-22 17:01:33 -05:00
let line_num = line_idx + 1;
2019-07-16 21:38:54 -04:00
for ident in tokens::Identifier::parse_bytes(line) {
if !ignore_hex && is_hex(ident.token()) {
continue;
}
if let Some(correction) = dictionary.correct_ident(ident) {
let col_num = ident.offset();
let msg = report::Correction {
path,
line,
line_num,
col_num,
2019-06-23 00:01:27 -04:00
typo: ident.token(),
correction,
non_exhaustive: (),
};
report(msg.into());
2019-01-22 17:01:33 -05:00
}
for word in ident.split() {
if let Some(correction) = dictionary.correct_word(word) {
let col_num = word.offset();
let msg = report::Correction {
path,
line,
line_num,
col_num,
2019-06-23 00:01:27 -04:00
typo: word.token(),
correction,
non_exhaustive: (),
};
report(msg.into());
}
}
2019-01-22 17:01:33 -05:00
}
}
Ok(())
}
fn is_hex(ident: &str) -> bool {
lazy_static::lazy_static! {
// `_`: number literal separator in Rust and other languages
// `'`: number literal separator in C++
static ref HEX: regex::Regex = regex::Regex::new(r#"^0[xX][0-9a-fA-F_']+$"#).unwrap();
}
HEX.is_match(ident)
}