refactor: Switch to bytes for symbol lookup

This commit is contained in:
Ed Page 2019-04-16 18:15:12 -06:00
parent 779db94ecb
commit b6aabc9392
2 changed files with 16 additions and 17 deletions

View file

@ -12,7 +12,7 @@ impl Dictionary {
DICTIONARY.get(word).map(|s| *s) DICTIONARY.get(word).map(|s| *s)
} }
pub fn correct_bytes<'s>(&'s self, word: &[u8]) -> Option<&'s [u8]> { pub fn correct_bytes<'s>(&'s self, word: &[u8]) -> Option<&'s str> {
std::str::from_utf8(word).ok().and_then(|word| DICTIONARY.get(word)).map(|s| s.as_bytes()) std::str::from_utf8(word).ok().and_then(|word| DICTIONARY.get(word)).map(|s| *s)
} }
} }

View file

@ -17,21 +17,20 @@ pub fn process_file(path: &std::path::Path, dictionary: &Dictionary, report: rep
for (line_idx, line) in grep_searcher::LineIter::new(b'\n', &buffer).enumerate() { for (line_idx, line) in grep_searcher::LineIter::new(b'\n', &buffer).enumerate() {
let line_num = line_idx + 1; let line_num = line_idx + 1;
for token in identifier::tokenize(line) { for token in identifier::tokenize(line) {
if let Some(word) = std::str::from_utf8(token.token).ok() { // Correct tokens as-is
// Correct tokens as-is if let Some(correction) = dictionary.correct_bytes(token.token) {
if let Some(correction) = dictionary.correct_str(word) { let word = String::from_utf8_lossy(token.token);
let col_num = token.offset; let col_num = token.offset;
let msg = report::Message { let msg = report::Message {
path, path,
line, line,
line_num, line_num,
col_num, col_num,
word, word: word.as_ref(),
correction, correction,
non_exhaustive: (), non_exhaustive: (),
}; };
report(msg); report(msg);
}
} }
} }
} }