From a00831c847b7efd81be520ea9b5d02f70555351f Mon Sep 17 00:00:00 2001 From: Ed Page Date: Thu, 31 Oct 2019 16:02:41 -0600 Subject: [PATCH 1/2] fix: Ignore numbers as identifiers --- typos/src/tokens.rs | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/typos/src/tokens.rs b/typos/src/tokens.rs index acc1a36..fa2c755 100644 --- a/typos/src/tokens.rs +++ b/typos/src/tokens.rs @@ -78,23 +78,33 @@ impl Parser { } pub fn parse<'c>(&'c self, content: &'c str) -> impl Iterator> { - let ignore_hex = self.ignore_hex; self.words_str .find_iter(content) - .filter(move |m| !ignore_hex || !is_hex(m.as_str().as_bytes())) + .filter(move |m| self.accept(m.as_str().as_bytes())) .map(|m| Identifier::new_unchecked(m.as_str(), m.start())) } pub fn parse_bytes<'c>(&'c self, content: &'c [u8]) -> impl Iterator> { - let ignore_hex = self.ignore_hex; self.words_bytes .find_iter(content) - .filter(move |m| !ignore_hex || !is_hex(m.as_bytes())) + .filter(move |m| self.accept(m.as_bytes())) .filter_map(|m| { let s = std::str::from_utf8(m.as_bytes()).ok(); s.map(|s| Identifier::new_unchecked(s, m.start())) }) } + + fn accept(&self, contents: &[u8]) -> bool { + if is_number(contents) { + return false; + }; + + if self.ignore_hex { + return !is_hex(contents); + } + + true + } } impl Default for Parser { @@ -103,6 +113,15 @@ impl Default for Parser { } } +fn is_number(ident: &[u8]) -> bool { + lazy_static::lazy_static! { + // `_`: number literal separator in Rust and other languages + // `'`: number literal separator in C++ + static ref DIGITS: regex::bytes::Regex = regex::bytes::Regex::new(r#"^[0-9_']+$"#).unwrap(); + } + DIGITS.is_match(ident) +} + fn is_hex(ident: &[u8]) -> bool { lazy_static::lazy_static! { // `_`: number literal separator in Rust and other languages From 68cd36d0de90226dbc9d31c2ce6d8bf6b69adb5c Mon Sep 17 00:00:00 2001 From: Ed Page Date: Fri, 1 Nov 2019 16:25:19 -0600 Subject: [PATCH 2/2] perf: Only do hex check if digits are in identifiers --- typos/src/tokens.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/typos/src/tokens.rs b/typos/src/tokens.rs index fa2c755..883f1a5 100644 --- a/typos/src/tokens.rs +++ b/typos/src/tokens.rs @@ -50,7 +50,7 @@ impl ParserBuilder { Parser { words_str, words_bytes, - ignore_hex: self.ignore_hex, + ignore_hex: self.ignore_hex && self.include_digits, } } }