mirror of
https://github.com/crate-ci/typos.git
synced 2024-11-22 00:51:11 -05:00
Merge pull request #32 from epage/hex
feat(parser): Ignore hex literals
This commit is contained in:
commit
9a3aef7212
5 changed files with 43 additions and 4 deletions
|
@ -17,6 +17,7 @@ fn process_empty(b: &mut test::Bencher) {
|
||||||
typos::process_file(
|
typos::process_file(
|
||||||
sample_path.path(),
|
sample_path.path(),
|
||||||
&corrections,
|
&corrections,
|
||||||
|
true,
|
||||||
typos::report::print_silent,
|
typos::report::print_silent,
|
||||||
)
|
)
|
||||||
});
|
});
|
||||||
|
@ -35,6 +36,7 @@ fn process_no_tokens(b: &mut test::Bencher) {
|
||||||
typos::process_file(
|
typos::process_file(
|
||||||
sample_path.path(),
|
sample_path.path(),
|
||||||
&corrections,
|
&corrections,
|
||||||
|
true,
|
||||||
typos::report::print_silent,
|
typos::report::print_silent,
|
||||||
)
|
)
|
||||||
});
|
});
|
||||||
|
@ -53,6 +55,7 @@ fn process_single_token(b: &mut test::Bencher) {
|
||||||
typos::process_file(
|
typos::process_file(
|
||||||
sample_path.path(),
|
sample_path.path(),
|
||||||
&corrections,
|
&corrections,
|
||||||
|
true,
|
||||||
typos::report::print_silent,
|
typos::report::print_silent,
|
||||||
)
|
)
|
||||||
});
|
});
|
||||||
|
@ -71,6 +74,7 @@ fn process_sherlock(b: &mut test::Bencher) {
|
||||||
typos::process_file(
|
typos::process_file(
|
||||||
sample_path.path(),
|
sample_path.path(),
|
||||||
&corrections,
|
&corrections,
|
||||||
|
true,
|
||||||
typos::report::print_silent,
|
typos::report::print_silent,
|
||||||
)
|
)
|
||||||
});
|
});
|
||||||
|
@ -89,6 +93,7 @@ fn process_code(b: &mut test::Bencher) {
|
||||||
typos::process_file(
|
typos::process_file(
|
||||||
sample_path.path(),
|
sample_path.path(),
|
||||||
&corrections,
|
&corrections,
|
||||||
|
true,
|
||||||
typos::report::print_silent,
|
typos::report::print_silent,
|
||||||
)
|
)
|
||||||
});
|
});
|
||||||
|
@ -107,6 +112,7 @@ fn process_corpus(b: &mut test::Bencher) {
|
||||||
typos::process_file(
|
typos::process_file(
|
||||||
sample_path.path(),
|
sample_path.path(),
|
||||||
&corrections,
|
&corrections,
|
||||||
|
true,
|
||||||
typos::report::print_silent,
|
typos::report::print_silent,
|
||||||
)
|
)
|
||||||
});
|
});
|
||||||
|
|
|
@ -40,7 +40,7 @@ Whitelist: A confidence rating is given for how close a word is to one in the wh
|
||||||
| Per-Lang Dict | No ([#14][def-14]) | No | ? | No | Yes |
|
| Per-Lang Dict | No ([#14][def-14]) | No | ? | No | Yes |
|
||||||
| CamelCase | Yes | No | ? | No | Yes |
|
| CamelCase | Yes | No | ? | No | Yes |
|
||||||
| snake_case | Yes | No | ? | No | Yes |
|
| snake_case | Yes | No | ? | No | Yes |
|
||||||
| Ignore Hex | No ([#19][def-19]) | No | ? | No | Yes |
|
| Ignore Hex | Yes | No | ? | No | Yes |
|
||||||
| C-Escapes | No ([#20][def-3]) | No | ? | No | Yes |
|
| C-Escapes | No ([#20][def-3]) | No | ? | No | Yes |
|
||||||
| Encodings | UTF-8 ([#17][def-17]) | UTF-8 | ? | Auto | Auto |
|
| Encodings | UTF-8 ([#17][def-17]) | UTF-8 | ? | Auto | Auto |
|
||||||
| Whole-project | Yes | Yes | Yes | Yes | No |
|
| Whole-project | Yes | Yes | Yes | Yes | No |
|
||||||
|
@ -59,6 +59,5 @@ Whitelist: A confidence rating is given for how close a word is to one in the wh
|
||||||
[def-14]: https://github.com/epage/typos/issues/14
|
[def-14]: https://github.com/epage/typos/issues/14
|
||||||
[def-17]: https://github.com/epage/typos/issues/17
|
[def-17]: https://github.com/epage/typos/issues/17
|
||||||
[def-18]: https://github.com/epage/typos/issues/18
|
[def-18]: https://github.com/epage/typos/issues/18
|
||||||
[def-19]: https://github.com/epage/typos/issues/19
|
|
||||||
[def-24]: https://github.com/epage/typos/issues/24
|
[def-24]: https://github.com/epage/typos/issues/24
|
||||||
[def-3]: https://github.com/epage/typos/issues/3
|
[def-3]: https://github.com/epage/typos/issues/3
|
||||||
|
|
13
src/lib.rs
13
src/lib.rs
|
@ -15,6 +15,7 @@ use std::io::Read;
|
||||||
pub fn process_file(
|
pub fn process_file(
|
||||||
path: &std::path::Path,
|
path: &std::path::Path,
|
||||||
dictionary: &Dictionary,
|
dictionary: &Dictionary,
|
||||||
|
ignore_hex: bool,
|
||||||
report: report::Report,
|
report: report::Report,
|
||||||
) -> Result<(), failure::Error> {
|
) -> Result<(), failure::Error> {
|
||||||
let mut buffer = Vec::new();
|
let mut buffer = Vec::new();
|
||||||
|
@ -22,6 +23,9 @@ pub fn process_file(
|
||||||
for (line_idx, line) in grep_searcher::LineIter::new(b'\n', &buffer).enumerate() {
|
for (line_idx, line) in grep_searcher::LineIter::new(b'\n', &buffer).enumerate() {
|
||||||
let line_num = line_idx + 1;
|
let line_num = line_idx + 1;
|
||||||
for ident in tokens::Identifier::parse(line) {
|
for ident in tokens::Identifier::parse(line) {
|
||||||
|
if !ignore_hex && is_hex(ident.token()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
if let Some(correction) = dictionary.correct_ident(ident) {
|
if let Some(correction) = dictionary.correct_ident(ident) {
|
||||||
let col_num = ident.offset();
|
let col_num = ident.offset();
|
||||||
let msg = report::Message {
|
let msg = report::Message {
|
||||||
|
@ -55,3 +59,12 @@ pub fn process_file(
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn is_hex(ident: &str) -> bool {
|
||||||
|
lazy_static::lazy_static! {
|
||||||
|
// `_`: number literal separator in Rust and other languages
|
||||||
|
// `'`: number literal separator in C++
|
||||||
|
static ref HEX: regex::Regex = regex::Regex::new(r#"^0[xX][0-9a-fA-F_']+$"#).unwrap();
|
||||||
|
}
|
||||||
|
HEX.is_match(ident)
|
||||||
|
}
|
||||||
|
|
23
src/main.rs
23
src/main.rs
|
@ -38,6 +38,12 @@ struct Options {
|
||||||
/// Paths to check
|
/// Paths to check
|
||||||
path: Vec<std::path::PathBuf>,
|
path: Vec<std::path::PathBuf>,
|
||||||
|
|
||||||
|
#[structopt(long, raw(overrides_with = r#""hex""#))]
|
||||||
|
/// Don't try to detect that an identifier looks like hex
|
||||||
|
no_hex: bool,
|
||||||
|
#[structopt(long, raw(overrides_with = r#""no-hex""#), raw(hidden = "true"))]
|
||||||
|
hex: bool,
|
||||||
|
|
||||||
#[structopt(
|
#[structopt(
|
||||||
long = "format",
|
long = "format",
|
||||||
raw(possible_values = "&Format::variants()", case_insensitive = "true"),
|
raw(possible_values = "&Format::variants()", case_insensitive = "true"),
|
||||||
|
@ -103,6 +109,15 @@ impl Options {
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn ignore_hex(&self) -> Option<bool> {
|
||||||
|
match (self.no_hex, self.hex) {
|
||||||
|
(true, false) => Some(false),
|
||||||
|
(false, true) => Some(true),
|
||||||
|
(false, false) => None,
|
||||||
|
(_, _) => unreachable!("StructOpt should make this impossible"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn ignore_hidden(&self) -> Option<bool> {
|
pub fn ignore_hidden(&self) -> Option<bool> {
|
||||||
match (self.hidden, self.no_hidden) {
|
match (self.hidden, self.no_hidden) {
|
||||||
(true, false) => Some(false),
|
(true, false) => Some(false),
|
||||||
|
@ -167,6 +182,7 @@ fn run() -> Result<(), failure::Error> {
|
||||||
let options = Options::from_args().infer();
|
let options = Options::from_args().infer();
|
||||||
|
|
||||||
let dictionary = typos::Dictionary::new();
|
let dictionary = typos::Dictionary::new();
|
||||||
|
let ignore_hex = options.ignore_hex().unwrap_or(true);
|
||||||
|
|
||||||
let first_path = &options
|
let first_path = &options
|
||||||
.path
|
.path
|
||||||
|
@ -187,7 +203,12 @@ fn run() -> Result<(), failure::Error> {
|
||||||
for entry in walk.build() {
|
for entry in walk.build() {
|
||||||
let entry = entry?;
|
let entry = entry?;
|
||||||
if entry.file_type().map(|t| t.is_file()).unwrap_or(true) {
|
if entry.file_type().map(|t| t.is_file()).unwrap_or(true) {
|
||||||
typos::process_file(entry.path(), &dictionary, options.format.report())?;
|
typos::process_file(
|
||||||
|
entry.path(),
|
||||||
|
&dictionary,
|
||||||
|
ignore_hex,
|
||||||
|
options.format.report(),
|
||||||
|
)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -42,7 +42,7 @@ impl<'t> Identifier<'t> {
|
||||||
lazy_static::lazy_static! {
|
lazy_static::lazy_static! {
|
||||||
// Getting false positives for this lint
|
// Getting false positives for this lint
|
||||||
#[allow(clippy::invalid_regex)]
|
#[allow(clippy::invalid_regex)]
|
||||||
static ref SPLIT: regex::bytes::Regex = regex::bytes::Regex::new(r#"\b(\p{Alphabetic}|\d|_)+\b"#).unwrap();
|
static ref SPLIT: regex::bytes::Regex = regex::bytes::Regex::new(r#"\b(\p{Alphabetic}|\d|_|')+\b"#).unwrap();
|
||||||
}
|
}
|
||||||
SPLIT.find_iter(content).filter_map(|m| {
|
SPLIT.find_iter(content).filter_map(|m| {
|
||||||
let s = std::str::from_utf8(m.as_bytes()).ok();
|
let s = std::str::from_utf8(m.as_bytes()).ok();
|
||||||
|
|
Loading…
Reference in a new issue