mirror of
https://github.com/crate-ci/typos.git
synced 2024-11-21 16:41:01 -05:00
fix: Improve the quality of symbols being reported
This commit is contained in:
parent
c7ca904401
commit
d78713dba1
2 changed files with 25 additions and 26 deletions
10
src/lib.rs
10
src/lib.rs
|
@ -21,17 +21,16 @@ pub fn process_file(
|
|||
File::open(path)?.read_to_end(&mut buffer)?;
|
||||
for (line_idx, line) in grep_searcher::LineIter::new(b'\n', &buffer).enumerate() {
|
||||
let line_num = line_idx + 1;
|
||||
for token in tokens::Symbol::parse(line) {
|
||||
if let Ok(word) = std::str::from_utf8(token.token) {
|
||||
for symbol in tokens::Symbol::parse(line) {
|
||||
// Correct tokens as-is
|
||||
if let Some(correction) = dictionary.correct_str(word) {
|
||||
let col_num = token.offset;
|
||||
if let Some(correction) = dictionary.correct_str(symbol.token) {
|
||||
let col_num = symbol.offset;
|
||||
let msg = report::Message {
|
||||
path,
|
||||
line,
|
||||
line_num,
|
||||
col_num,
|
||||
word,
|
||||
word: symbol.token,
|
||||
correction,
|
||||
non_exhaustive: (),
|
||||
};
|
||||
|
@ -39,7 +38,6 @@ pub fn process_file(
|
|||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
@ -1,11 +1,11 @@
|
|||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct Symbol<'t> {
|
||||
pub token: &'t [u8],
|
||||
pub token: &'t str,
|
||||
pub offset: usize,
|
||||
}
|
||||
|
||||
impl<'t> Symbol<'t> {
|
||||
pub fn new(token: &'t [u8], offset: usize) -> Self {
|
||||
pub fn new(token: &'t str, offset: usize) -> Self {
|
||||
Self { token, offset }
|
||||
}
|
||||
|
||||
|
@ -15,9 +15,10 @@ impl<'t> Symbol<'t> {
|
|||
#[allow(clippy::invalid_regex)]
|
||||
static ref SPLIT: regex::bytes::Regex = regex::bytes::Regex::new(r#"\b(\p{Alphabetic}|\d|_)+\b"#).unwrap();
|
||||
}
|
||||
SPLIT
|
||||
.find_iter(content)
|
||||
.map(|m| Symbol::new(m.as_bytes(), m.start()))
|
||||
SPLIT.find_iter(content).filter_map(|m| {
|
||||
let s = std::str::from_utf8(m.as_bytes()).ok();
|
||||
s.map(|s| Symbol::new(s, m.start()))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -36,7 +37,7 @@ mod test {
|
|||
#[test]
|
||||
fn tokenize_word_is_word() {
|
||||
let input = b"word";
|
||||
let expected: Vec<Symbol> = vec![Symbol::new(b"word", 0)];
|
||||
let expected: Vec<Symbol> = vec![Symbol::new("word", 0)];
|
||||
let actual: Vec<_> = Symbol::parse(input).collect();
|
||||
assert_eq!(expected, actual);
|
||||
}
|
||||
|
@ -44,7 +45,7 @@ mod test {
|
|||
#[test]
|
||||
fn tokenize_space_separated_words() {
|
||||
let input = b"A B";
|
||||
let expected: Vec<Symbol> = vec![Symbol::new(b"A", 0), Symbol::new(b"B", 2)];
|
||||
let expected: Vec<Symbol> = vec![Symbol::new("A", 0), Symbol::new("B", 2)];
|
||||
let actual: Vec<_> = Symbol::parse(input).collect();
|
||||
assert_eq!(expected, actual);
|
||||
}
|
||||
|
@ -52,7 +53,7 @@ mod test {
|
|||
#[test]
|
||||
fn tokenize_dot_separated_words() {
|
||||
let input = b"A.B";
|
||||
let expected: Vec<Symbol> = vec![Symbol::new(b"A", 0), Symbol::new(b"B", 2)];
|
||||
let expected: Vec<Symbol> = vec![Symbol::new("A", 0), Symbol::new("B", 2)];
|
||||
let actual: Vec<_> = Symbol::parse(input).collect();
|
||||
assert_eq!(expected, actual);
|
||||
}
|
||||
|
@ -60,7 +61,7 @@ mod test {
|
|||
#[test]
|
||||
fn tokenize_namespace_separated_words() {
|
||||
let input = b"A::B";
|
||||
let expected: Vec<Symbol> = vec![Symbol::new(b"A", 0), Symbol::new(b"B", 3)];
|
||||
let expected: Vec<Symbol> = vec![Symbol::new("A", 0), Symbol::new("B", 3)];
|
||||
let actual: Vec<_> = Symbol::parse(input).collect();
|
||||
assert_eq!(expected, actual);
|
||||
}
|
||||
|
@ -68,7 +69,7 @@ mod test {
|
|||
#[test]
|
||||
fn tokenize_underscore_doesnt_separate() {
|
||||
let input = b"A_B";
|
||||
let expected: Vec<Symbol> = vec![Symbol::new(b"A_B", 0)];
|
||||
let expected: Vec<Symbol> = vec![Symbol::new("A_B", 0)];
|
||||
let actual: Vec<_> = Symbol::parse(input).collect();
|
||||
assert_eq!(expected, actual);
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue