typos/src/lib.rs

116 lines
3.6 KiB
Rust

#[macro_use]
extern crate serde_derive;
mod dict;
mod dict_codegen;
pub mod report;
pub mod tokens;
pub use crate::dict::*;
use std::fs::File;
use std::io::Read;
use bstr::ByteSlice;
pub fn process_file(
path: &std::path::Path,
dictionary: &Dictionary,
check_filenames: bool,
check_files: bool,
ignore_hex: bool,
binary: bool,
report: report::Report,
) -> Result<(), failure::Error> {
if check_filenames {
for part in path.components().filter_map(|c| c.as_os_str().to_str()) {
for ident in tokens::Identifier::parse(part) {
if !ignore_hex && is_hex(ident.token()) {
continue;
}
if let Some(correction) = dictionary.correct_ident(ident) {
let msg = report::FilenameCorrection {
path,
typo: ident.token(),
correction,
non_exhaustive: (),
};
report(msg.into());
}
for word in ident.split() {
if let Some(correction) = dictionary.correct_word(word) {
let msg = report::FilenameCorrection {
path,
typo: word.token(),
correction,
non_exhaustive: (),
};
report(msg.into());
}
}
}
}
}
if check_files {
let mut buffer = Vec::new();
File::open(path)?.read_to_end(&mut buffer)?;
if !binary && buffer.find_byte(b'\0').is_some() {
let msg = report::BinaryFile {
path,
non_exhaustive: (),
};
report(msg.into());
return Ok(());
}
for (line_idx, line) in buffer.lines().enumerate() {
let line_num = line_idx + 1;
for ident in tokens::Identifier::parse_bytes(line) {
if !ignore_hex && is_hex(ident.token()) {
continue;
}
if let Some(correction) = dictionary.correct_ident(ident) {
let col_num = ident.offset();
let msg = report::Correction {
path,
line,
line_num,
col_num,
typo: ident.token(),
correction,
non_exhaustive: (),
};
report(msg.into());
}
for word in ident.split() {
if let Some(correction) = dictionary.correct_word(word) {
let col_num = word.offset();
let msg = report::Correction {
path,
line,
line_num,
col_num,
typo: word.token(),
correction,
non_exhaustive: (),
};
report(msg.into());
}
}
}
}
}
Ok(())
}
fn is_hex(ident: &str) -> bool {
lazy_static::lazy_static! {
// `_`: number literal separator in Rust and other languages
// `'`: number literal separator in C++
static ref HEX: regex::Regex = regex::Regex::new(r#"^0[xX][0-9a-fA-F_']+$"#).unwrap();
}
HEX.is_match(ident)
}