diff --git a/typos/src/tokens.rs b/typos/src/tokens.rs index acc1a36..883f1a5 100644 --- a/typos/src/tokens.rs +++ b/typos/src/tokens.rs @@ -50,7 +50,7 @@ impl ParserBuilder { Parser { words_str, words_bytes, - ignore_hex: self.ignore_hex, + ignore_hex: self.ignore_hex && self.include_digits, } } } @@ -78,23 +78,33 @@ impl Parser { } pub fn parse<'c>(&'c self, content: &'c str) -> impl Iterator> { - let ignore_hex = self.ignore_hex; self.words_str .find_iter(content) - .filter(move |m| !ignore_hex || !is_hex(m.as_str().as_bytes())) + .filter(move |m| self.accept(m.as_str().as_bytes())) .map(|m| Identifier::new_unchecked(m.as_str(), m.start())) } pub fn parse_bytes<'c>(&'c self, content: &'c [u8]) -> impl Iterator> { - let ignore_hex = self.ignore_hex; self.words_bytes .find_iter(content) - .filter(move |m| !ignore_hex || !is_hex(m.as_bytes())) + .filter(move |m| self.accept(m.as_bytes())) .filter_map(|m| { let s = std::str::from_utf8(m.as_bytes()).ok(); s.map(|s| Identifier::new_unchecked(s, m.start())) }) } + + fn accept(&self, contents: &[u8]) -> bool { + if is_number(contents) { + return false; + }; + + if self.ignore_hex { + return !is_hex(contents); + } + + true + } } impl Default for Parser { @@ -103,6 +113,15 @@ impl Default for Parser { } } +fn is_number(ident: &[u8]) -> bool { + lazy_static::lazy_static! { + // `_`: number literal separator in Rust and other languages + // `'`: number literal separator in C++ + static ref DIGITS: regex::bytes::Regex = regex::bytes::Regex::new(r#"^[0-9_']+$"#).unwrap(); + } + DIGITS.is_match(ident) +} + fn is_hex(ident: &[u8]) -> bool { lazy_static::lazy_static! { // `_`: number literal separator in Rust and other languages