From b6aabc93923f6bfc99726f304c31dcf972010457 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Tue, 16 Apr 2019 18:15:12 -0600 Subject: [PATCH] refactor: Switch to bytes for symbol lookup --- src/dict.rs | 4 ++-- src/lib.rs | 29 ++++++++++++++--------------- 2 files changed, 16 insertions(+), 17 deletions(-) diff --git a/src/dict.rs b/src/dict.rs index 0248925..365e39d 100644 --- a/src/dict.rs +++ b/src/dict.rs @@ -12,7 +12,7 @@ impl Dictionary { DICTIONARY.get(word).map(|s| *s) } - pub fn correct_bytes<'s>(&'s self, word: &[u8]) -> Option<&'s [u8]> { - std::str::from_utf8(word).ok().and_then(|word| DICTIONARY.get(word)).map(|s| s.as_bytes()) + pub fn correct_bytes<'s>(&'s self, word: &[u8]) -> Option<&'s str> { + std::str::from_utf8(word).ok().and_then(|word| DICTIONARY.get(word)).map(|s| *s) } } diff --git a/src/lib.rs b/src/lib.rs index 7c09a67..2c43648 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -17,21 +17,20 @@ pub fn process_file(path: &std::path::Path, dictionary: &Dictionary, report: rep for (line_idx, line) in grep_searcher::LineIter::new(b'\n', &buffer).enumerate() { let line_num = line_idx + 1; for token in identifier::tokenize(line) { - if let Some(word) = std::str::from_utf8(token.token).ok() { - // Correct tokens as-is - if let Some(correction) = dictionary.correct_str(word) { - let col_num = token.offset; - let msg = report::Message { - path, - line, - line_num, - col_num, - word, - correction, - non_exhaustive: (), - }; - report(msg); - } + // Correct tokens as-is + if let Some(correction) = dictionary.correct_bytes(token.token) { + let word = String::from_utf8_lossy(token.token); + let col_num = token.offset; + let msg = report::Message { + path, + line, + line_num, + col_num, + word: word.as_ref(), + correction, + non_exhaustive: (), + }; + report(msg); } } }