2019-01-23 09:33:51 -05:00
|
|
|
#[macro_use]
|
|
|
|
extern crate serde_derive;
|
|
|
|
|
2019-01-24 10:24:20 -05:00
|
|
|
mod dict;
|
2019-06-14 16:53:34 -04:00
|
|
|
mod dict_codegen;
|
2019-01-22 17:01:33 -05:00
|
|
|
|
2019-01-24 10:24:20 -05:00
|
|
|
pub mod report;
|
2019-04-16 22:16:31 -04:00
|
|
|
pub mod tokens;
|
2019-01-23 09:44:01 -05:00
|
|
|
|
2019-01-24 10:24:20 -05:00
|
|
|
pub use crate::dict::*;
|
2019-01-22 17:01:33 -05:00
|
|
|
|
2019-01-24 10:24:20 -05:00
|
|
|
use std::fs::File;
|
|
|
|
use std::io::Read;
|
2019-01-23 09:33:51 -05:00
|
|
|
|
2019-07-13 21:52:24 -04:00
|
|
|
use bstr::ByteSlice;
|
|
|
|
|
2019-06-14 08:43:21 -04:00
|
|
|
pub fn process_file(
|
|
|
|
path: &std::path::Path,
|
|
|
|
dictionary: &Dictionary,
|
2019-07-18 22:20:45 -04:00
|
|
|
check_filenames: bool,
|
2019-07-13 21:24:27 -04:00
|
|
|
ignore_hex: bool,
|
2019-07-13 22:14:06 -04:00
|
|
|
binary: bool,
|
2019-06-14 08:43:21 -04:00
|
|
|
report: report::Report,
|
|
|
|
) -> Result<(), failure::Error> {
|
2019-07-18 22:20:45 -04:00
|
|
|
if check_filenames {
|
|
|
|
for part in path.components().filter_map(|c| c.as_os_str().to_str()) {
|
|
|
|
for ident in tokens::Identifier::parse(part) {
|
|
|
|
if !ignore_hex && is_hex(ident.token()) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if let Some(correction) = dictionary.correct_ident(ident) {
|
|
|
|
let msg = report::FilenameCorrection {
|
|
|
|
path,
|
|
|
|
typo: ident.token(),
|
|
|
|
correction,
|
|
|
|
non_exhaustive: (),
|
|
|
|
};
|
|
|
|
report(msg.into());
|
|
|
|
}
|
|
|
|
for word in ident.split() {
|
|
|
|
if let Some(correction) = dictionary.correct_word(word) {
|
|
|
|
let msg = report::FilenameCorrection {
|
|
|
|
path,
|
|
|
|
typo: word.token(),
|
|
|
|
correction,
|
|
|
|
non_exhaustive: (),
|
|
|
|
};
|
|
|
|
report(msg.into());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-01-22 17:01:33 -05:00
|
|
|
let mut buffer = Vec::new();
|
|
|
|
File::open(path)?.read_to_end(&mut buffer)?;
|
2019-07-13 22:14:06 -04:00
|
|
|
if !binary && buffer.find_byte(b'\0').is_some() {
|
2019-07-16 21:16:54 -04:00
|
|
|
let msg = report::BinaryFile {
|
|
|
|
path,
|
|
|
|
non_exhaustive: (),
|
|
|
|
};
|
|
|
|
report(msg.into());
|
2019-07-13 22:14:06 -04:00
|
|
|
return Ok(());
|
|
|
|
}
|
2019-07-13 21:52:24 -04:00
|
|
|
|
|
|
|
for (line_idx, line) in buffer.lines().enumerate() {
|
2019-01-22 17:01:33 -05:00
|
|
|
let line_num = line_idx + 1;
|
2019-07-16 21:38:54 -04:00
|
|
|
for ident in tokens::Identifier::parse_bytes(line) {
|
2019-07-13 21:24:27 -04:00
|
|
|
if !ignore_hex && is_hex(ident.token()) {
|
|
|
|
continue;
|
|
|
|
}
|
2019-06-22 13:57:23 -04:00
|
|
|
if let Some(correction) = dictionary.correct_ident(ident) {
|
|
|
|
let col_num = ident.offset();
|
2019-07-16 21:16:54 -04:00
|
|
|
let msg = report::Correction {
|
2019-06-14 17:57:41 -04:00
|
|
|
path,
|
|
|
|
line,
|
|
|
|
line_num,
|
|
|
|
col_num,
|
2019-06-23 00:01:27 -04:00
|
|
|
typo: ident.token(),
|
2019-06-14 17:57:41 -04:00
|
|
|
correction,
|
|
|
|
non_exhaustive: (),
|
|
|
|
};
|
2019-07-16 21:16:54 -04:00
|
|
|
report(msg.into());
|
2019-01-22 17:01:33 -05:00
|
|
|
}
|
2019-06-22 13:57:23 -04:00
|
|
|
for word in ident.split() {
|
2019-06-16 00:21:40 -04:00
|
|
|
if let Some(correction) = dictionary.correct_word(word) {
|
|
|
|
let col_num = word.offset();
|
2019-07-16 21:16:54 -04:00
|
|
|
let msg = report::Correction {
|
2019-06-16 00:21:40 -04:00
|
|
|
path,
|
|
|
|
line,
|
|
|
|
line_num,
|
|
|
|
col_num,
|
2019-06-23 00:01:27 -04:00
|
|
|
typo: word.token(),
|
2019-06-16 00:21:40 -04:00
|
|
|
correction,
|
|
|
|
non_exhaustive: (),
|
|
|
|
};
|
2019-07-16 21:16:54 -04:00
|
|
|
report(msg.into());
|
2019-06-16 00:21:40 -04:00
|
|
|
}
|
|
|
|
}
|
2019-01-22 17:01:33 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
2019-07-13 21:24:27 -04:00
|
|
|
|
|
|
|
fn is_hex(ident: &str) -> bool {
|
|
|
|
lazy_static::lazy_static! {
|
|
|
|
// `_`: number literal separator in Rust and other languages
|
2019-07-13 21:28:33 -04:00
|
|
|
// `'`: number literal separator in C++
|
|
|
|
static ref HEX: regex::Regex = regex::Regex::new(r#"^0[xX][0-9a-fA-F_']+$"#).unwrap();
|
2019-07-13 21:24:27 -04:00
|
|
|
}
|
|
|
|
HEX.is_match(ident)
|
|
|
|
}
|