From 7fdd0dee164f74ca4d0dd642254419b02639fdc8 Mon Sep 17 00:00:00 2001
From: Ed Page <epage@duosecurity.com>
Date: Mon, 28 Dec 2020 21:45:19 -0600
Subject: [PATCH 01/16] style(typos): Make parser ordering clearer

---
 crates/typos/src/tokens.rs | 112 ++++++++++++++++++-------------------
 1 file changed, 56 insertions(+), 56 deletions(-)
diff --git a/crates/typos/src/tokens.rs b/crates/typos/src/tokens.rs
index 9f2728c..0a9e7f2 100644
--- a/crates/typos/src/tokens.rs
+++ b/crates/typos/src/tokens.rs
@@ -1,11 +1,3 @@
-#[derive(Debug, Clone, Copy, PartialEq, Eq)]
-pub enum Case {
-    Title,
-    Lower,
-    Scream,
-    None,
-}
-
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub struct ParserBuilder {
     ignore_hex: bool,
@@ -237,52 +229,8 @@ impl<'t> Word<'t> {
     }
 }
 
-/// Tracks the current 'mode' of the transformation algorithm as it scans the input string.
-///
-/// The mode is a tri-state which tracks the case of the last cased character of the current
-/// word. If there is no cased character (either lowercase or uppercase) since the previous
-/// word boundary, than the mode is `Boundary`. If the last cased character is lowercase, then
-/// the mode is `Lowercase`. Otherrwise, the mode is `Uppercase`.
-#[derive(Clone, Copy, PartialEq, Debug)]
-enum WordMode {
-    /// There have been no lowercase or uppercase characters in the current word.
-    Boundary,
-    /// The previous cased character in the current word is lowercase.
-    Lowercase,
-    /// The previous cased character in the current word is uppercase.
-    Uppercase,
-    Number,
-}
-
-impl WordMode {
-    fn classify(c: char) -> Self {
-        if c.is_lowercase() {
-            WordMode::Lowercase
-        } else if c.is_uppercase() {
-            WordMode::Uppercase
-        } else if c.is_ascii_digit() {
-            WordMode::Number
-        } else {
-            // This assumes all characters are either lower or upper case.
-            WordMode::Boundary
-        }
-    }
-
-    fn case(self, last: WordMode) -> Case {
-        match (self, last) {
-            (WordMode::Uppercase, WordMode::Uppercase) => Case::Scream,
-            (WordMode::Uppercase, WordMode::Lowercase) => Case::Title,
-            (WordMode::Lowercase, WordMode::Lowercase) => Case::Lower,
-            (WordMode::Number, WordMode::Number) => Case::None,
-            (WordMode::Number, _)
-            | (_, WordMode::Number)
-            | (WordMode::Boundary, _)
-            | (_, WordMode::Boundary)
-            | (WordMode::Lowercase, WordMode::Uppercase) => {
-                unreachable!("Invalid case combination: ({:?}, {:?})", self, last)
-            }
-        }
-    }
+fn split_ident(ident: &str, offset: usize) -> impl Iterator<Item = Word<'_>> {
+    SplitIdent::new(ident, offset)
 }
 
 struct SplitIdent<'s> {
@@ -377,8 +325,60 @@ impl<'s> Iterator for SplitIdent<'s> {
     }
 }
 
-fn split_ident(ident: &str, offset: usize) -> impl Iterator<Item = Word<'_>> {
-    SplitIdent::new(ident, offset)
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum Case {
+    Title,
+    Lower,
+    Scream,
+    None,
+}
+
+/// Tracks the current 'mode' of the transformation algorithm as it scans the input string.
+///
+/// The mode is a tri-state which tracks the case of the last cased character of the current
+/// word. If there is no cased character (either lowercase or uppercase) since the previous
+/// word boundary, than the mode is `Boundary`. If the last cased character is lowercase, then
+/// the mode is `Lowercase`. Otherrwise, the mode is `Uppercase`.
+#[derive(Clone, Copy, PartialEq, Debug)]
+enum WordMode {
+    /// There have been no lowercase or uppercase characters in the current word.
+    Boundary,
+    /// The previous cased character in the current word is lowercase.
+    Lowercase,
+    /// The previous cased character in the current word is uppercase.
+    Uppercase,
+    Number,
+}
+
+impl WordMode {
+    fn classify(c: char) -> Self {
+        if c.is_lowercase() {
+            WordMode::Lowercase
+        } else if c.is_uppercase() {
+            WordMode::Uppercase
+        } else if c.is_ascii_digit() {
+            WordMode::Number
+        } else {
+            // This assumes all characters are either lower or upper case.
+            WordMode::Boundary
+        }
+    }
+
+    fn case(self, last: WordMode) -> Case {
+        match (self, last) {
+            (WordMode::Uppercase, WordMode::Uppercase) => Case::Scream,
+            (WordMode::Uppercase, WordMode::Lowercase) => Case::Title,
+            (WordMode::Lowercase, WordMode::Lowercase) => Case::Lower,
+            (WordMode::Number, WordMode::Number) => Case::None,
+            (WordMode::Number, _)
+            | (_, WordMode::Number)
+            | (WordMode::Boundary, _)
+            | (_, WordMode::Boundary)
+            | (WordMode::Lowercase, WordMode::Uppercase) => {
+                unreachable!("Invalid case combination: ({:?}, {:?})", self, last)
+            }
+        }
+    }
 }
 
 #[cfg(test)]

From 1e64080c0534038a05c52f2cecb19dd40a7195c1 Mon Sep 17 00:00:00 2001
From: Ed Page <epage@duosecurity.com>
Date: Mon, 28 Dec 2020 21:51:44 -0600
Subject: [PATCH 02/16] refactor(typos): Open up the name Parser

---
 benches/checks.rs          | 10 +++++-----
 benches/tokenize.rs        | 12 ++++++------
 crates/typos/src/checks.rs | 28 ++++++++++++++--------------
 crates/typos/src/tokens.rs | 38 +++++++++++++++++++-------------------
 src/checks.rs              |  6 +++---
 src/main.rs                |  2 +-
 6 files changed, 48 insertions(+), 48 deletions(-)

diff --git a/benches/checks.rs b/benches/checks.rs
index 8f8695e..f6aa46c 100644
--- a/benches/checks.rs
+++ b/benches/checks.rs
@@ -9,7 +9,7 @@ use typos::checks::Check;
 
 fn bench_parse_ident_str(data: &str, b: &mut test::Bencher) {
     let corrections = typos_cli::dict::BuiltIn::new(Default::default());
-    let parser = typos::tokens::Parser::new();
+    let parser = typos::tokens::Tokenizer::new();
     let checks = typos::checks::TyposSettings::new().build_identifier_parser();
     b.iter(|| checks.check_str(data, &parser, &corrections, &typos::report::PrintSilent));
 }
@@ -46,7 +46,7 @@ fn parse_idents_corpus_str(b: &mut test::Bencher) {
 
 fn bench_parse_ident_bytes(data: &str, b: &mut test::Bencher) {
     let corrections = typos_cli::dict::BuiltIn::new(Default::default());
-    let parser = typos::tokens::Parser::new();
+    let parser = typos::tokens::Tokenizer::new();
     let checks = typos::checks::TyposSettings::new().build_identifier_parser();
     b.iter(|| {
         checks.check_bytes(
@@ -90,7 +90,7 @@ fn parse_idents_corpus_bytes(b: &mut test::Bencher) {
 
 fn bench_parse_word_str(data: &str, b: &mut test::Bencher) {
     let corrections = typos_cli::dict::BuiltIn::new(Default::default());
-    let parser = typos::tokens::Parser::new();
+    let parser = typos::tokens::Tokenizer::new();
     let checks = typos::checks::TyposSettings::new().build_word_parser();
     b.iter(|| checks.check_str(data, &parser, &corrections, &typos::report::PrintSilent));
 }
@@ -127,7 +127,7 @@ fn parse_words_corpus(b: &mut test::Bencher) {
 
 fn bench_typos(data: &str, b: &mut test::Bencher) {
     let corrections = typos_cli::dict::BuiltIn::new(Default::default());
-    let parser = typos::tokens::Parser::new();
+    let parser = typos::tokens::Tokenizer::new();
     let checks = typos::checks::TyposSettings::new().build_typos();
     b.iter(|| checks.check_str(data, &parser, &corrections, &typos::report::PrintSilent));
 }
@@ -168,7 +168,7 @@ fn bench_check_file(data: &str, b: &mut test::Bencher) {
     sample_path.write_str(data).unwrap();
 
     let corrections = typos_cli::dict::BuiltIn::new(Default::default());
-    let parser = typos::tokens::Parser::new();
+    let parser = typos::tokens::Tokenizer::new();
     let checks = typos::checks::TyposSettings::new().build_typos();
     b.iter(|| {
         checks.check_file(
diff --git a/benches/tokenize.rs b/benches/tokenize.rs
index 32e6a74..efcce0b 100644
--- a/benches/tokenize.rs
+++ b/benches/tokenize.rs
@@ -6,19 +6,19 @@ mod data;
 
 #[bench]
 fn ident_parse_empty(b: &mut test::Bencher) {
-    let parser = typos::tokens::Parser::new();
+    let parser = typos::tokens::Tokenizer::new();
     b.iter(|| parser.parse_bytes(data::EMPTY.as_bytes()).last());
 }
 
 #[bench]
 fn ident_parse_no_tokens(b: &mut test::Bencher) {
-    let parser = typos::tokens::Parser::new();
+    let parser = typos::tokens::Tokenizer::new();
     b.iter(|| parser.parse_bytes(data::NO_TOKENS.as_bytes()).last());
 }
 
 #[bench]
 fn ident_parse_single_token(b: &mut test::Bencher) {
-    let parser = typos::tokens::Parser::new();
+    let parser = typos::tokens::Tokenizer::new();
     b.iter(|| {
         parser.parse_bytes(data::SINGLE_TOKEN.as_bytes()).last();
     });
@@ -26,19 +26,19 @@ fn ident_parse_single_token(b: &mut test::Bencher) {
 
 #[bench]
 fn ident_parse_sherlock(b: &mut test::Bencher) {
-    let parser = typos::tokens::Parser::new();
+    let parser = typos::tokens::Tokenizer::new();
     b.iter(|| parser.parse_bytes(data::SHERLOCK.as_bytes()).last());
 }
 
 #[bench]
 fn ident_parse_code(b: &mut test::Bencher) {
-    let parser = typos::tokens::Parser::new();
+    let parser = typos::tokens::Tokenizer::new();
     b.iter(|| parser.parse_bytes(data::CODE.as_bytes()).last());
 }
 
 #[bench]
 fn ident_parse_corpus(b: &mut test::Bencher) {
-    let parser = typos::tokens::Parser::new();
+    let parser = typos::tokens::Tokenizer::new();
     b.iter(|| parser.parse_bytes(data::CORPUS.as_bytes()).last());
 }
 
diff --git a/crates/typos/src/checks.rs b/crates/typos/src/checks.rs
index 6040440..62bae7d 100644
--- a/crates/typos/src/checks.rs
+++ b/crates/typos/src/checks.rs
@@ -9,7 +9,7 @@ pub trait Check: Send + Sync {
     fn check_str(
         &self,
         buffer: &str,
-        parser: &tokens::Parser,
+        parser: &tokens::Tokenizer,
         dictionary: &dyn Dictionary,
         reporter: &dyn report::Report,
     ) -> Result<(), std::io::Error>;
@@ -17,7 +17,7 @@ pub trait Check: Send + Sync {
     fn check_bytes(
         &self,
         buffer: &[u8],
-        parser: &tokens::Parser,
+        parser: &tokens::Tokenizer,
         dictionary: &dyn Dictionary,
         reporter: &dyn report::Report,
     ) -> Result<(), std::io::Error>;
@@ -31,7 +31,7 @@ pub trait Check: Send + Sync {
     fn check_filename(
         &self,
         path: &std::path::Path,
-        parser: &tokens::Parser,
+        parser: &tokens::Tokenizer,
         dictionary: &dyn Dictionary,
         reporter: &dyn report::Report,
     ) -> Result<(), std::io::Error> {
@@ -54,7 +54,7 @@ pub trait Check: Send + Sync {
         &self,
         path: &std::path::Path,
         explicit: bool,
-        parser: &tokens::Parser,
+        parser: &tokens::Tokenizer,
         dictionary: &dyn Dictionary,
         reporter: &dyn report::Report,
     ) -> Result<(), std::io::Error> {
@@ -172,7 +172,7 @@ impl Check for Typos {
     fn check_str(
         &self,
         buffer: &str,
-        parser: &tokens::Parser,
+        parser: &tokens::Tokenizer,
         dictionary: &dyn Dictionary,
         reporter: &dyn report::Report,
     ) -> Result<(), std::io::Error> {
@@ -217,7 +217,7 @@ impl Check for Typos {
     fn check_bytes(
         &self,
         buffer: &[u8],
-        parser: &tokens::Parser,
+        parser: &tokens::Tokenizer,
         dictionary: &dyn Dictionary,
         reporter: &dyn report::Report,
     ) -> Result<(), std::io::Error> {
@@ -284,7 +284,7 @@ impl Check for ParseIdentifiers {
     fn check_str(
         &self,
         buffer: &str,
-        parser: &tokens::Parser,
+        parser: &tokens::Tokenizer,
         _dictionary: &dyn Dictionary,
         reporter: &dyn report::Report,
     ) -> Result<(), std::io::Error> {
@@ -303,7 +303,7 @@ impl Check for ParseIdentifiers {
     fn check_bytes(
         &self,
         buffer: &[u8],
-        parser: &tokens::Parser,
+        parser: &tokens::Tokenizer,
         _dictionary: &dyn Dictionary,
         reporter: &dyn report::Report,
     ) -> Result<(), std::io::Error> {
@@ -343,7 +343,7 @@ impl Check for ParseWords {
     fn check_str(
         &self,
         buffer: &str,
-        parser: &tokens::Parser,
+        parser: &tokens::Tokenizer,
         _dictionary: &dyn Dictionary,
         reporter: &dyn report::Report,
     ) -> Result<(), std::io::Error> {
@@ -365,7 +365,7 @@ impl Check for ParseWords {
     fn check_bytes(
         &self,
         buffer: &[u8],
-        parser: &tokens::Parser,
+        parser: &tokens::Tokenizer,
         _dictionary: &dyn Dictionary,
         reporter: &dyn report::Report,
     ) -> Result<(), std::io::Error> {
@@ -404,7 +404,7 @@ impl Check for Files {
     fn check_str(
         &self,
         _buffer: &str,
-        _parser: &tokens::Parser,
+        _parser: &tokens::Tokenizer,
         _dictionary: &dyn Dictionary,
         _reporter: &dyn report::Report,
     ) -> Result<(), std::io::Error> {
@@ -414,7 +414,7 @@ impl Check for Files {
     fn check_bytes(
         &self,
         _buffer: &[u8],
-        _parser: &tokens::Parser,
+        _parser: &tokens::Tokenizer,
         _dictionary: &dyn Dictionary,
         _reporter: &dyn report::Report,
     ) -> Result<(), std::io::Error> {
@@ -436,7 +436,7 @@ impl Check for Files {
     fn check_filename(
         &self,
         _path: &std::path::Path,
-        _parser: &tokens::Parser,
+        _parser: &tokens::Tokenizer,
         _dictionary: &dyn Dictionary,
         _reporter: &dyn report::Report,
     ) -> Result<(), std::io::Error> {
@@ -447,7 +447,7 @@ impl Check for Files {
         &self,
         path: &std::path::Path,
         _explicit: bool,
-        _parser: &tokens::Parser,
+        _parser: &tokens::Tokenizer,
         _dictionary: &dyn Dictionary,
         reporter: &dyn report::Report,
     ) -> Result<(), std::io::Error> {
diff --git a/crates/typos/src/tokens.rs b/crates/typos/src/tokens.rs
index 0a9e7f2..f372c96 100644
--- a/crates/typos/src/tokens.rs
+++ b/crates/typos/src/tokens.rs
@@ -1,5 +1,5 @@
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub struct ParserBuilder {
+pub struct TokenizerBuilder {
     ignore_hex: bool,
     leading_digits: bool,
     leading_chars: String,
@@ -7,7 +7,7 @@ pub struct ParserBuilder {
     include_chars: String,
 }
 
-impl ParserBuilder {
+impl TokenizerBuilder {
     pub fn new() -> Self {
         Default::default()
     }
@@ -37,7 +37,7 @@ impl ParserBuilder {
         self
     }
 
-    pub fn build(&self) -> Parser {
+    pub fn build(&self) -> Tokenizer {
         let mut pattern = r#"\b("#.to_owned();
         Self::push_pattern(&mut pattern, self.leading_digits, &self.leading_chars);
         Self::push_pattern(&mut pattern, self.include_digits, &self.include_chars);
@@ -46,7 +46,7 @@ impl ParserBuilder {
         let words_str = regex::Regex::new(&pattern).unwrap();
         let words_bytes = regex::bytes::Regex::new(&pattern).unwrap();
 
-        Parser {
+        Tokenizer {
             words_str,
             words_bytes,
             // `leading_digits` let's us bypass the regexes since you can't have a decimal or
@@ -69,7 +69,7 @@ impl ParserBuilder {
     }
 }
 
-impl Default for ParserBuilder {
+impl Default for TokenizerBuilder {
     fn default() -> Self {
         Self {
             ignore_hex: true,
@@ -82,16 +82,16 @@ impl Default for ParserBuilder {
 }
 
 #[derive(Debug, Clone)]
-pub struct Parser {
+pub struct Tokenizer {
     words_str: regex::Regex,
     words_bytes: regex::bytes::Regex,
     ignore_numbers: bool,
     ignore_hex: bool,
 }
 
-impl Parser {
+impl Tokenizer {
     pub fn new() -> Self {
-        ParserBuilder::default().build()
+        TokenizerBuilder::default().build()
     }
 
     pub fn parse_str<'c>(&'c self, content: &'c str) -> impl Iterator<Item = Identifier<'c>> {
@@ -124,7 +124,7 @@ impl Parser {
     }
 }
 
-impl Default for Parser {
+impl Default for Tokenizer {
     fn default() -> Self {
         Self::new()
     }
@@ -387,7 +387,7 @@ mod test {
 
     #[test]
     fn tokenize_empty_is_empty() {
-        let parser = Parser::new();
+        let parser = Tokenizer::new();
 
         let input = "";
         let expected: Vec<Identifier> = vec![];
@@ -399,7 +399,7 @@ mod test {
 
     #[test]
     fn tokenize_word_is_word() {
-        let parser = Parser::new();
+        let parser = Tokenizer::new();
 
         let input = "word";
         let expected: Vec<Identifier> = vec![Identifier::new_unchecked("word", 0)];
@@ -411,7 +411,7 @@ mod test {
 
     #[test]
     fn tokenize_space_separated_words() {
-        let parser = Parser::new();
+        let parser = Tokenizer::new();
 
         let input = "A B";
         let expected: Vec<Identifier> = vec![
@@ -426,7 +426,7 @@ mod test {
 
     #[test]
     fn tokenize_dot_separated_words() {
-        let parser = Parser::new();
+        let parser = Tokenizer::new();
 
         let input = "A.B";
         let expected: Vec<Identifier> = vec![
@@ -441,7 +441,7 @@ mod test {
 
     #[test]
     fn tokenize_namespace_separated_words() {
-        let parser = Parser::new();
+        let parser = Tokenizer::new();
 
         let input = "A::B";
         let expected: Vec<Identifier> = vec![
@@ -456,7 +456,7 @@ mod test {
 
     #[test]
     fn tokenize_underscore_doesnt_separate() {
-        let parser = Parser::new();
+        let parser = Tokenizer::new();
 
         let input = "A_B";
         let expected: Vec<Identifier> = vec![Identifier::new_unchecked("A_B", 0)];
@@ -468,7 +468,7 @@ mod test {
 
     #[test]
     fn tokenize_ignore_hex_enabled() {
-        let parser = ParserBuilder::new().ignore_hex(true).build();
+        let parser = TokenizerBuilder::new().ignore_hex(true).build();
 
         let input = "Hello 0xDEADBEEF World";
         let expected: Vec<Identifier> = vec![
@@ -483,7 +483,7 @@ mod test {
 
     #[test]
     fn tokenize_ignore_hex_disabled() {
-        let parser = ParserBuilder::new()
+        let parser = TokenizerBuilder::new()
             .ignore_hex(false)
             .leading_digits(true)
             .build();
@@ -523,11 +523,11 @@ mod test {
                 &[("A", Case::Scream, 0), ("String", Case::Title, 1)],
             ),
             (
-                "SimpleXMLParser",
+                "SimpleXMLTokenizer",
                 &[
                     ("Simple", Case::Title, 0),
                     ("XML", Case::Scream, 6),
-                    ("Parser", Case::Title, 9),
+                    ("Tokenizer", Case::Title, 9),
                 ],
             ),
             (
diff --git a/src/checks.rs b/src/checks.rs
index 35d51c6..78b9718 100644
--- a/src/checks.rs
+++ b/src/checks.rs
@@ -1,7 +1,7 @@
 pub(crate) fn check_path(
     walk: ignore::Walk,
     checks: &dyn typos::checks::Check,
-    parser: &typos::tokens::Parser,
+    parser: &typos::tokens::Tokenizer,
     dictionary: &dyn typos::Dictionary,
     reporter: &dyn typos::report::Report,
 ) -> Result<(), ignore::Error> {
@@ -14,7 +14,7 @@ pub(crate) fn check_path(
 pub(crate) fn check_path_parallel(
     walk: ignore::WalkParallel,
     checks: &dyn typos::checks::Check,
-    parser: &typos::tokens::Parser,
+    parser: &typos::tokens::Tokenizer,
     dictionary: &dyn typos::Dictionary,
     reporter: &dyn typos::report::Report,
 ) -> Result<(), ignore::Error> {
@@ -37,7 +37,7 @@ pub(crate) fn check_path_parallel(
 fn check_entry(
     entry: Result<ignore::DirEntry, ignore::Error>,
     checks: &dyn typos::checks::Check,
-    parser: &typos::tokens::Parser,
+    parser: &typos::tokens::Tokenizer,
     dictionary: &dyn typos::Dictionary,
     reporter: &dyn typos::report::Report,
 ) -> Result<(), ignore::Error> {
diff --git a/src/main.rs b/src/main.rs
index 80d0b99..6275b95 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -61,7 +61,7 @@ fn run() -> proc_exit::ExitResult {
         config.default.update(&args.overrides);
         let config = config;
 
-        let parser = typos::tokens::ParserBuilder::new()
+        let parser = typos::tokens::TokenizerBuilder::new()
             .ignore_hex(config.default.ignore_hex())
             .leading_digits(config.default.identifier_leading_digits())
             .leading_chars(config.default.identifier_leading_chars().to_owned())

From e741f96de33c05731328221b156d46ba59767a21 Mon Sep 17 00:00:00 2001
From: Ed Page <epage@duosecurity.com>
Date: Wed, 30 Dec 2020 18:58:35 -0600
Subject: [PATCH 03/16] refactor(typos): Decouple parsing from checks

---
 crates/typos/src/checks.rs | 170 ++++++++++++++-----------------------
 crates/typos/src/dict.rs   |  15 ++++
 crates/typos/src/lib.rs    |   4 +-
 crates/typos/src/parser.rs | 164 +++++++++++++++++++++++++++++++++++
 crates/typos/src/report.rs |   8 +-
 5 files changed, 250 insertions(+), 111 deletions(-)
 create mode 100644 crates/typos/src/parser.rs

diff --git a/crates/typos/src/checks.rs b/crates/typos/src/checks.rs
index 62bae7d..027857c 100644
--- a/crates/typos/src/checks.rs
+++ b/crates/typos/src/checks.rs
@@ -3,7 +3,6 @@ use bstr::ByteSlice;
 use crate::report;
 use crate::tokens;
 use crate::Dictionary;
-use crate::Status;
 
 pub trait Check: Send + Sync {
     fn check_str(
@@ -172,44 +171,23 @@ impl Check for Typos {
     fn check_str(
         &self,
         buffer: &str,
-        parser: &tokens::Tokenizer,
+        tokenizer: &tokens::Tokenizer,
         dictionary: &dyn Dictionary,
         reporter: &dyn report::Report,
     ) -> Result<(), std::io::Error> {
-        for ident in parser.parse_str(buffer) {
-            match dictionary.correct_ident(ident) {
-                Some(Status::Valid) => {}
-                Some(corrections) => {
-                    let byte_offset = ident.offset();
-                    let msg = report::Typo {
-                        context: None,
-                        buffer: std::borrow::Cow::Borrowed(buffer.as_bytes()),
-                        byte_offset,
-                        typo: ident.token(),
-                        corrections,
-                    };
-                    reporter.report(msg.into())?;
-                }
-                None => {
-                    for word in ident.split() {
-                        match dictionary.correct_word(word) {
-                            Some(Status::Valid) => {}
-                            Some(corrections) => {
-                                let byte_offset = word.offset();
-                                let msg = report::Typo {
-                                    context: None,
-                                    buffer: std::borrow::Cow::Borrowed(buffer.as_bytes()),
-                                    byte_offset,
-                                    typo: word.token(),
-                                    corrections,
-                                };
-                                reporter.report(msg.into())?;
-                            }
-                            None => {}
-                        }
-                    }
-                }
-            }
+        let parser = crate::ParserBuilder::new()
+            .tokenizer(tokenizer)
+            .dictionary(dictionary)
+            .typos();
+        for typo in parser.parse_str(buffer) {
+            let msg = report::Typo {
+                context: None,
+                buffer: std::borrow::Cow::Borrowed(buffer.as_bytes()),
+                byte_offset: typo.byte_offset,
+                typo: typo.typo,
+                corrections: typo.corrections,
+            };
+            reporter.report(msg.into())?;
         }
         Ok(())
     }
@@ -217,46 +195,24 @@ impl Check for Typos {
     fn check_bytes(
         &self,
         buffer: &[u8],
-        parser: &tokens::Tokenizer,
+        tokenizer: &tokens::Tokenizer,
         dictionary: &dyn Dictionary,
         reporter: &dyn report::Report,
     ) -> Result<(), std::io::Error> {
-        for ident in parser.parse_bytes(buffer) {
-            match dictionary.correct_ident(ident) {
-                Some(Status::Valid) => {}
-                Some(corrections) => {
-                    let byte_offset = ident.offset();
-                    let msg = report::Typo {
-                        context: None,
-                        buffer: std::borrow::Cow::Borrowed(buffer),
-                        byte_offset,
-                        typo: ident.token(),
-                        corrections,
-                    };
-                    reporter.report(msg.into())?;
-                }
-                None => {
-                    for word in ident.split() {
-                        match dictionary.correct_word(word) {
-                            Some(Status::Valid) => {}
-                            Some(corrections) => {
-                                let byte_offset = word.offset();
-                                let msg = report::Typo {
-                                    context: None,
-                                    buffer: std::borrow::Cow::Borrowed(buffer),
-                                    byte_offset,
-                                    typo: word.token(),
-                                    corrections,
-                                };
-                                reporter.report(msg.into())?;
-                            }
-                            None => {}
-                        }
-                    }
-                }
-            }
+        let parser = crate::ParserBuilder::new()
+            .tokenizer(tokenizer)
+            .dictionary(dictionary)
+            .typos();
+        for typo in parser.parse_bytes(buffer) {
+            let msg = report::Typo {
+                context: None,
+                buffer: std::borrow::Cow::Borrowed(buffer.as_bytes()),
+                byte_offset: typo.byte_offset,
+                typo: typo.typo,
+                corrections: typo.corrections,
+            };
+            reporter.report(msg.into())?;
         }
-
         Ok(())
     }
 
@@ -284,16 +240,19 @@ impl Check for ParseIdentifiers {
     fn check_str(
         &self,
         buffer: &str,
-        parser: &tokens::Tokenizer,
+        tokenizer: &tokens::Tokenizer,
         _dictionary: &dyn Dictionary,
         reporter: &dyn report::Report,
     ) -> Result<(), std::io::Error> {
-        let msg = report::Parse {
-            context: None,
-            kind: report::ParseKind::Identifier,
-            data: parser.parse_str(buffer).map(|i| i.token()).collect(),
-        };
-        if !msg.data.is_empty() {
+        let parser = crate::ParserBuilder::new()
+            .tokenizer(tokenizer)
+            .identifiers();
+        for word in parser.parse_str(buffer) {
+            let msg = report::Parse {
+                context: None,
+                kind: report::ParseKind::Word,
+                data: word.token(),
+            };
             reporter.report(msg.into())?;
         }
 
@@ -303,16 +262,19 @@ impl Check for ParseIdentifiers {
     fn check_bytes(
         &self,
         buffer: &[u8],
-        parser: &tokens::Tokenizer,
+        tokenizer: &tokens::Tokenizer,
         _dictionary: &dyn Dictionary,
         reporter: &dyn report::Report,
     ) -> Result<(), std::io::Error> {
-        let msg = report::Parse {
-            context: None,
-            kind: report::ParseKind::Identifier,
-            data: parser.parse_bytes(buffer).map(|i| i.token()).collect(),
-        };
-        if !msg.data.is_empty() {
+        let parser = crate::ParserBuilder::new()
+            .tokenizer(tokenizer)
+            .identifiers();
+        for word in parser.parse_bytes(buffer) {
+            let msg = report::Parse {
+                context: None,
+                kind: report::ParseKind::Word,
+                data: word.token(),
+            };
             reporter.report(msg.into())?;
         }
 
@@ -343,19 +305,17 @@ impl Check for ParseWords {
     fn check_str(
         &self,
         buffer: &str,
-        parser: &tokens::Tokenizer,
+        tokenizer: &tokens::Tokenizer,
         _dictionary: &dyn Dictionary,
         reporter: &dyn report::Report,
     ) -> Result<(), std::io::Error> {
-        let msg = report::Parse {
-            context: None,
-            kind: report::ParseKind::Word,
-            data: parser
-                .parse_str(buffer)
-                .flat_map(|ident| ident.split().map(|i| i.token()))
-                .collect(),
-        };
-        if !msg.data.is_empty() {
+        let word_parser = crate::ParserBuilder::new().tokenizer(tokenizer).words();
+        for word in word_parser.parse_str(buffer) {
+            let msg = report::Parse {
+                context: None,
+                kind: report::ParseKind::Word,
+                data: word.token(),
+            };
             reporter.report(msg.into())?;
         }
 
@@ -365,19 +325,17 @@ impl Check for ParseWords {
     fn check_bytes(
         &self,
         buffer: &[u8],
-        parser: &tokens::Tokenizer,
+        tokenizer: &tokens::Tokenizer,
         _dictionary: &dyn Dictionary,
         reporter: &dyn report::Report,
     ) -> Result<(), std::io::Error> {
-        let msg = report::Parse {
-            context: None,
-            kind: report::ParseKind::Word,
-            data: parser
-                .parse_bytes(buffer)
-                .flat_map(|ident| ident.split().map(|i| i.token()))
-                .collect(),
-        };
-        if !msg.data.is_empty() {
+        let parser = crate::ParserBuilder::new().tokenizer(tokenizer).words();
+        for word in parser.parse_bytes(buffer) {
+            let msg = report::Parse {
+                context: None,
+                kind: report::ParseKind::Word,
+                data: word.token(),
+            };
             reporter.report(msg.into())?;
         }
 
diff --git a/crates/typos/src/dict.rs b/crates/typos/src/dict.rs
index 2fded93..083ebe2 100644
--- a/crates/typos/src/dict.rs
+++ b/crates/typos/src/dict.rs
@@ -47,3 +47,18 @@ pub trait Dictionary: Send + Sync {
 
     fn correct_word<'s, 'w>(&'s self, word: crate::tokens::Word<'w>) -> Option<Status<'s>>;
 }
+
+pub(crate) struct NullDictionary;
+
+impl Dictionary for NullDictionary {
+    fn correct_ident<'s, 'w>(
+        &'s self,
+        _ident: crate::tokens::Identifier<'w>,
+    ) -> Option<Status<'s>> {
+        None
+    }
+
+    fn correct_word<'s, 'w>(&'s self, _word: crate::tokens::Word<'w>) -> Option<Status<'s>> {
+        None
+    }
+}
diff --git a/crates/typos/src/lib.rs b/crates/typos/src/lib.rs
index 1cb77c9..7c09efb 100644
--- a/crates/typos/src/lib.rs
+++ b/crates/typos/src/lib.rs
@@ -1,7 +1,9 @@
 mod dict;
+mod parser;
 
 pub mod checks;
 pub mod report;
 pub mod tokens;
 
-pub use crate::dict::*;
+pub use dict::*;
+pub use parser::*;
diff --git a/crates/typos/src/parser.rs b/crates/typos/src/parser.rs
new file mode 100644
index 0000000..613fdad
--- /dev/null
+++ b/crates/typos/src/parser.rs
@@ -0,0 +1,164 @@
+use crate::tokens;
+use crate::Dictionary;
+
+#[derive(Clone)]
+pub struct ParserBuilder<'p, 'd> {
+    tokenizer: Option<&'p tokens::Tokenizer>,
+    dictionary: &'d dyn Dictionary,
+}
+
+impl<'p> ParserBuilder<'p, 'static> {
+    pub fn new() -> Self {
+        Default::default()
+    }
+}
+
+impl<'p, 'd> ParserBuilder<'p, 'd> {
+    pub fn tokenizer(mut self, tokenizer: &'p tokens::Tokenizer) -> Self {
+        self.tokenizer = Some(tokenizer);
+        self
+    }
+
+    pub fn dictionary<'d1>(self, dictionary: &'d1 dyn Dictionary) -> ParserBuilder<'p, 'd1> {
+        ParserBuilder {
+            tokenizer: self.tokenizer,
+            dictionary: dictionary,
+        }
+    }
+
+    pub fn typos(&self) -> TyposParser<'p, 'd> {
+        TyposParser {
+            tokenizer: self.tokenizer.unwrap_or_else(|| &DEFAULT_TOKENIZER),
+            dictionary: self.dictionary,
+        }
+    }
+
+    pub fn identifiers(&self) -> IdentifiersParser<'p> {
+        IdentifiersParser {
+            tokenizer: self.tokenizer.unwrap_or_else(|| &DEFAULT_TOKENIZER),
+        }
+    }
+
+    pub fn words(&self) -> WordsParser<'p> {
+        WordsParser {
+            tokenizer: self.tokenizer.unwrap_or_else(|| &DEFAULT_TOKENIZER),
+        }
+    }
+}
+
+impl<'p> Default for ParserBuilder<'p, 'static> {
+    fn default() -> Self {
+        Self {
+            tokenizer: None,
+            dictionary: &crate::NullDictionary,
+        }
+    }
+}
+
+static DEFAULT_TOKENIZER: once_cell::sync::Lazy<tokens::Tokenizer> =
+    once_cell::sync::Lazy::new(|| tokens::Tokenizer::new());
+
+#[derive(Clone)]
+pub struct TyposParser<'p, 'd> {
+    tokenizer: &'p tokens::Tokenizer,
+    dictionary: &'d dyn Dictionary,
+}
+
+impl<'p, 'd> TyposParser<'p, 'd> {
+    pub fn parse_str<'b, 's: 'b>(&'s self, buffer: &'b str) -> impl Iterator<Item = Typo<'b>> {
+        self.tokenizer
+            .parse_str(buffer)
+            .flat_map(move |ident| self.process_ident(ident))
+    }
+
+    pub fn parse_bytes<'b, 's: 'b>(&'s self, buffer: &'b [u8]) -> impl Iterator<Item = Typo<'b>> {
+        self.tokenizer
+            .parse_bytes(buffer)
+            .flat_map(move |ident| self.process_ident(ident))
+    }
+
+    fn process_ident<'i, 's: 'i>(
+        &'s self,
+        ident: tokens::Identifier<'i>,
+    ) -> impl Iterator<Item = Typo<'i>> {
+        match self.dictionary.correct_ident(ident) {
+            Some(crate::Status::Valid) => itertools::Either::Left(None.into_iter()),
+            Some(corrections) => {
+                let typo = Typo {
+                    byte_offset: ident.offset(),
+                    typo: ident.token(),
+                    corrections,
+                };
+                itertools::Either::Left(Some(typo).into_iter())
+            }
+            None => itertools::Either::Right(
+                ident
+                    .split()
+                    .filter_map(move |word| self.process_word(word)),
+            ),
+        }
+    }
+
+    fn process_word<'w, 's: 'w>(&'s self, word: tokens::Word<'w>) -> Option<Typo<'w>> {
+        match self.dictionary.correct_word(word) {
+            Some(crate::Status::Valid) => None,
+            Some(corrections) => {
+                let typo = Typo {
+                    byte_offset: word.offset(),
+                    typo: word.token(),
+                    corrections,
+                };
+                Some(typo)
+            }
+            None => None,
+        }
+    }
+}
+
+#[derive(Clone, Debug, derive_setters::Setters)]
+#[non_exhaustive]
+pub struct Typo<'m> {
+    pub byte_offset: usize,
+    pub typo: &'m str,
+    pub corrections: crate::Status<'m>,
+}
+
+impl<'m> Default for Typo<'m> {
+    fn default() -> Self {
+        Self {
+            byte_offset: 0,
+            typo: "",
+            corrections: crate::Status::Invalid,
+        }
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct IdentifiersParser<'p> {
+    tokenizer: &'p tokens::Tokenizer,
+}
+
+impl<'p> IdentifiersParser<'p> {
+    pub fn parse_str(&self, buffer: &'p str) -> impl Iterator<Item = tokens::Identifier<'p>> {
+        self.tokenizer.parse_str(buffer)
+    }
+
+    pub fn parse_bytes(&self, buffer: &'p [u8]) -> impl Iterator<Item = tokens::Identifier<'p>> {
+        self.tokenizer.parse_bytes(buffer)
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct WordsParser<'p> {
+    tokenizer: &'p tokens::Tokenizer,
+}
+
+impl<'p> WordsParser<'p> {
+    pub fn parse_str(&self, buffer: &'p str) -> impl Iterator<Item = tokens::Word<'p>> {
+        self.tokenizer.parse_str(buffer).flat_map(|i| i.split())
+    }
+
+    pub fn parse_bytes(&self, buffer: &'p [u8]) -> impl Iterator<Item = tokens::Word<'p>> {
+        self.tokenizer.parse_bytes(buffer).flat_map(|i| i.split())
+    }
+}
diff --git a/crates/typos/src/report.rs b/crates/typos/src/report.rs
index d2d7ce9..bce2d3d 100644
--- a/crates/typos/src/report.rs
+++ b/crates/typos/src/report.rs
@@ -168,7 +168,7 @@ pub struct Parse<'m> {
     #[serde(flatten)]
     pub context: Option<Context<'m>>,
     pub kind: ParseKind,
-    pub data: Vec<&'m str>,
+    pub data: &'m str,
 }
 
 impl<'m> Default for Parse<'m> {
@@ -176,7 +176,7 @@ impl<'m> Default for Parse<'m> {
         Self {
             context: None,
             kind: ParseKind::Identifier,
-            data: vec![],
+            data: "",
         }
     }
 }
@@ -265,7 +265,7 @@ impl Report for PrintBrief {
                 writeln!(io::stdout(), "{}", msg.path.display())?;
             }
             Message::Parse(msg) => {
-                writeln!(io::stdout(), "{}", itertools::join(msg.data.iter(), " "))?;
+                writeln!(io::stdout(), "{}", msg.data)?;
             }
             Message::Error(msg) => {
                 log::error!("{}: {}", context_display(&msg.context), msg.msg);
@@ -289,7 +289,7 @@ impl Report for PrintLong {
                 writeln!(io::stdout(), "{}", msg.path.display())?;
             }
             Message::Parse(msg) => {
-                writeln!(io::stdout(), "{}", itertools::join(msg.data.iter(), " "))?;
+                writeln!(io::stdout(), "{}", msg.data)?;
             }
             Message::Error(msg) => {
                 log::error!("{}: {}", context_display(&msg.context), msg.msg);

From bc90bacff2f6e8502f6ed761b068e3033da629e5 Mon Sep 17 00:00:00 2001
From: Ed Page <epage@duosecurity.com>
Date: Wed, 30 Dec 2020 19:41:08 -0600
Subject: [PATCH 04/16] refactor(typos): Pull out file logic

---
 Cargo.lock                          | 144 +++++----
 Cargo.toml                          |   6 +
 benches/checks.rs                   |  22 +-
 crates/typos/Cargo.toml             |   5 -
 crates/typos/src/checks.rs          | 447 ---------------------------
 crates/typos/src/dict.rs            |   2 +-
 crates/typos/src/lib.rs             |   2 -
 crates/typos/src/parser.rs          |   2 +-
 src/args.rs                         |  10 +-
 src/checks.rs                       | 463 +++++++++++++++++++++++++++-
 src/diff.rs                         |  12 +-
 src/lib.rs                          |   4 +
 src/main.rs                         |  19 +-
 src/replace.rs                      |  40 +--
 {crates/typos/src => src}/report.rs |  16 +-
 15 files changed, 596 insertions(+), 598 deletions(-)
 delete mode 100644 crates/typos/src/checks.rs
 rename {crates/typos/src => src}/report.rs (96%)

diff --git a/Cargo.lock b/Cargo.lock
index 6630c4e..e624421 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2,9 +2,9 @@
 # It is not intended for manual editing.
 [[package]]
 name = "addr2line"
-version = "0.14.0"
+version = "0.14.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7c0929d69e78dd9bf5408269919fcbcaeb2e35e5d43e5815517cdc6a8e11a423"
+checksum = "a55f82cfe485775d02112886f4169bde0c5894d75e79ead7eafe7e40a25e45f7"
 dependencies = [
  "gimli",
 ]
@@ -17,9 +17,9 @@ checksum = "ee2a4ec343196209d6594e19543ae87a39f96d5534d7174822a3ad825dd6ed7e"
 
 [[package]]
 name = "ahash"
-version = "0.6.1"
+version = "0.6.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "865f8b0b3fced577b7df82e9b0eb7609595d7209c0b39e78d0646672e244b1b1"
+checksum = "a75b7e6a93ecd6dbd2c225154d0fa7f86205574ecaa6c87429fb5f66ee677c44"
 dependencies = [
  "getrandom 0.2.0",
  "lazy_static",
@@ -46,9 +46,9 @@ dependencies = [
 
 [[package]]
 name = "anyhow"
-version = "1.0.34"
+version = "1.0.37"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bf8dcb5b4bbaa28653b647d8c77bd4ed40183b48882e130c1f1ffb73de069fd7"
+checksum = "ee67c11feeac938fae061b232e38e0b6d94f97a9df10e6271319325ac4c56a86"
 
 [[package]]
 name = "arrayvec"
@@ -207,12 +207,6 @@ dependencies = [
  "unicase",
 ]
 
-[[package]]
-name = "const_fn"
-version = "0.4.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c478836e029dcef17fb47c89023448c64f781a046e0300e257ad8225ae59afab"
-
 [[package]]
 name = "content_inspector"
 version = "0.2.4"
@@ -224,13 +218,12 @@ dependencies = [
 
 [[package]]
 name = "crossbeam-utils"
-version = "0.8.0"
+version = "0.8.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ec91540d98355f690a86367e566ecad2e9e579f230230eb7c21398372be73ea5"
+checksum = "02d96d1e189ef58269ebe5b97953da3274d83a93af647c2ddd6f9dab28cedb8d"
 dependencies = [
  "autocfg",
  "cfg-if 1.0.0",
- "const_fn",
  "lazy_static",
 ]
 
@@ -275,9 +268,9 @@ dependencies = [
  "fnv",
  "ident_case",
  "proc-macro2 1.0.24",
- "quote 1.0.7",
+ "quote 1.0.8",
  "strsim 0.9.3",
- "syn 1.0.50",
+ "syn 1.0.57",
 ]
 
 [[package]]
@@ -287,8 +280,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d9b5a2f4ac4969822c62224815d069952656cadc7084fdca9751e6d959189b72"
 dependencies = [
  "darling_core",
- "quote 1.0.7",
- "syn 1.0.50",
+ "quote 1.0.8",
+ "syn 1.0.57",
 ]
 
 [[package]]
@@ -312,8 +305,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "41cb0e6161ad61ed084a36ba71fbba9e3ac5aee3606fb607fe08da6acbcf3d8c"
 dependencies = [
  "proc-macro2 1.0.24",
- "quote 1.0.7",
- "syn 1.0.50",
+ "quote 1.0.8",
+ "syn 1.0.57",
 ]
 
 [[package]]
@@ -324,8 +317,8 @@ checksum = "6604612c19dd3bb353650b715b61f09bcb089dd17bdca1a9a42637079bf5e428"
 dependencies = [
  "darling",
  "proc-macro2 1.0.24",
- "quote 1.0.7",
- "syn 1.0.50",
+ "quote 1.0.8",
+ "syn 1.0.57",
 ]
 
 [[package]]
@@ -374,8 +367,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "946ee94e3dbf58fdd324f9ce245c7b238d46a66f00e86a020b71996349e46cce"
 dependencies = [
  "proc-macro2 1.0.24",
- "quote 1.0.7",
- "syn 1.0.50",
+ "quote 1.0.8",
+ "syn 1.0.57",
 ]
 
 [[package]]
@@ -421,17 +414,17 @@ checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
 
 [[package]]
 name = "funty"
-version = "1.0.1"
+version = "1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0ba62103ce691c2fd80fbae2213dfdda9ce60804973ac6b6e97de818ea7f52c8"
+checksum = "fed34cd105917e91daa4da6b3728c47b068749d6a62c59811f06ed2ac71d9da7"
 
 [[package]]
 name = "getrandom"
-version = "0.1.15"
+version = "0.1.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fc587bc0ec293155d5bfa6b9891ec18a1e330c234f896ea47fbada4cadbe47e6"
+checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce"
 dependencies = [
- "cfg-if 0.1.10",
+ "cfg-if 1.0.0",
  "libc",
  "wasi",
 ]
@@ -478,9 +471,9 @@ dependencies = [
 
 [[package]]
 name = "heck"
-version = "0.3.1"
+version = "0.3.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "20564e78d53d2bb135c343b3f47714a56af2061f1c928fdb541dc7b9fdd94205"
+checksum = "87cbf45460356b7deeb5e3415b5563308c0a9b057c85e12b06ad551f98d0a6ac"
 dependencies = [
  "unicode-segmentation",
 ]
@@ -559,9 +552,9 @@ dependencies = [
 
 [[package]]
 name = "itoa"
-version = "0.4.6"
+version = "0.4.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dc6f3ad7b9d11a0c00842ff8de1b60ee58661048eb8049ed33c73594f359d7e6"
+checksum = "dd25036021b0de88a0aff6b850051563c6516d0bf53f8638938edbb9de732736"
 
 [[package]]
 name = "lazy_static"
@@ -584,9 +577,9 @@ dependencies = [
 
 [[package]]
 name = "libc"
-version = "0.2.80"
+version = "0.2.81"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4d58d1b70b004888f764dfbf6a26a3b0342a1632d33968e4a179d8011c760614"
+checksum = "1482821306169ec4d07f6aca392a4681f66c75c9918aa49641a2595db64053cb"
 
 [[package]]
 name = "log"
@@ -730,9 +723,9 @@ checksum = "ac74c624d6b2d21f425f752262f42188365d7b8ff1aff74c82e45136510a4857"
 
 [[package]]
 name = "predicates"
-version = "1.0.5"
+version = "1.0.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "96bfead12e90dccead362d62bb2c90a5f6fc4584963645bc7f71a735e0b0735a"
+checksum = "73dd9b7b200044694dfede9edf907c1ca19630908443e9447e624993700c6932"
 dependencies = [
  "difference",
  "float-cmp",
@@ -743,15 +736,15 @@ dependencies = [
 
 [[package]]
 name = "predicates-core"
-version = "1.0.0"
+version = "1.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "06075c3a3e92559ff8929e7a280684489ea27fe44805174c3ebd9328dcb37178"
+checksum = "fb3dbeaaf793584e29c58c7e3a82bbb3c7c06b63cea68d13b0e3cddc124104dc"
 
 [[package]]
 name = "predicates-tree"
-version = "1.0.0"
+version = "1.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8e63c4859013b38a76eca2414c64911fba30def9e3202ac461a2d22831220124"
+checksum = "aee95d988ee893cb35c06b148c80ed2cd52c8eea927f50ba7a0be1a786aeab73"
 dependencies = [
  "predicates-core",
  "treeline",
@@ -771,8 +764,8 @@ checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c"
 dependencies = [
  "proc-macro-error-attr",
  "proc-macro2 1.0.24",
- "quote 1.0.7",
- "syn 1.0.50",
+ "quote 1.0.8",
+ "syn 1.0.57",
  "version_check",
 ]
 
@@ -783,7 +776,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869"
 dependencies = [
  "proc-macro2 1.0.24",
- "quote 1.0.7",
+ "quote 1.0.8",
  "version_check",
 ]
 
@@ -822,9 +815,9 @@ dependencies = [
 
 [[package]]
 name = "quote"
-version = "1.0.7"
+version = "1.0.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "aa563d17ecb180e500da1cfd2b028310ac758de548efdd203e18f283af693f37"
+checksum = "991431c3519a3f36861882da93630ce66b52918dcf1b8e2fd66b397fc96f28df"
 dependencies = [
  "proc-macro2 1.0.24",
 ]
@@ -841,7 +834,7 @@ version = "0.7.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03"
 dependencies = [
- "getrandom 0.1.15",
+ "getrandom 0.1.16",
  "libc",
  "rand_chacha",
  "rand_core",
@@ -865,7 +858,7 @@ version = "0.5.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19"
 dependencies = [
- "getrandom 0.1.15",
+ "getrandom 0.1.16",
 ]
 
 [[package]]
@@ -975,29 +968,29 @@ checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3"
 
 [[package]]
 name = "serde"
-version = "1.0.117"
+version = "1.0.118"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b88fa983de7720629c9387e9f517353ed404164b1e482c970a90c1a4aaf7dc1a"
+checksum = "06c64263859d87aa2eb554587e2d23183398d617427327cf2b3d0ed8c69e4800"
 dependencies = [
  "serde_derive",
 ]
 
 [[package]]
 name = "serde_derive"
-version = "1.0.117"
+version = "1.0.118"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cbd1ae72adb44aab48f325a02444a5fc079349a8d804c1fc922aed3f7454c74e"
+checksum = "c84d3526699cd55261af4b941e4e725444df67aa4f9e6a3564f18030d12672df"
 dependencies = [
  "proc-macro2 1.0.24",
- "quote 1.0.7",
- "syn 1.0.50",
+ "quote 1.0.8",
+ "syn 1.0.57",
 ]
 
 [[package]]
 name = "serde_json"
-version = "1.0.59"
+version = "1.0.61"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dcac07dbffa1c65e7f816ab9eba78eb142c6d44410f4eeba1e26e4f5dfa56b95"
+checksum = "4fceb2595057b6891a4ee808f70054bd2d12f0e97f1cbb78689b59f676df325a"
 dependencies = [
  "itoa",
  "ryu",
@@ -1048,8 +1041,8 @@ dependencies = [
  "heck",
  "proc-macro-error",
  "proc-macro2 1.0.24",
- "quote 1.0.7",
- "syn 1.0.50",
+ "quote 1.0.8",
+ "syn 1.0.57",
 ]
 
 [[package]]
@@ -1065,12 +1058,12 @@ dependencies = [
 
 [[package]]
 name = "syn"
-version = "1.0.50"
+version = "1.0.57"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "443b4178719c5a851e1bde36ce12da21d74a0e60b4d982ec3385a933c812f0f6"
+checksum = "4211ce9909eb971f111059df92c45640aad50a619cf55cd76476be803c4c68e6"
 dependencies = [
  "proc-macro2 1.0.24",
- "quote 1.0.7",
+ "quote 1.0.8",
  "unicode-xid 0.2.1",
 ]
 
@@ -1114,22 +1107,22 @@ dependencies = [
 
 [[package]]
 name = "thiserror"
-version = "1.0.22"
+version = "1.0.23"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0e9ae34b84616eedaaf1e9dd6026dbe00dcafa92aa0c8077cb69df1fcfe5e53e"
+checksum = "76cc616c6abf8c8928e2fdcc0dbfab37175edd8fb49a4641066ad1364fdab146"
 dependencies = [
  "thiserror-impl",
 ]
 
 [[package]]
 name = "thiserror-impl"
-version = "1.0.22"
+version = "1.0.23"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9ba20f23e85b10754cd195504aebf6a27e2e6cbe28c17778a0c930724628dd56"
+checksum = "9be73a2caec27583d0046ef3796c3794f868a5bc813db689eed00c7631275cd1"
 dependencies = [
  "proc-macro2 1.0.24",
- "quote 1.0.7",
- "syn 1.0.50",
+ "quote 1.0.8",
+ "syn 1.0.57",
 ]
 
 [[package]]
@@ -1143,9 +1136,9 @@ dependencies = [
 
 [[package]]
 name = "toml"
-version = "0.5.7"
+version = "0.5.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "75cf45bb0bef80604d001caaec0d09da99611b3c0fd39d3080468875cdb65645"
+checksum = "a31142970826733df8241ef35dc040ef98c679ab14d7c3e54d827099b3acecaa"
 dependencies = [
  "serde",
 ]
@@ -1161,16 +1154,11 @@ name = "typos"
 version = "0.3.0"
 dependencies = [
  "anyhow",
- "bstr",
- "content_inspector",
- "derive_more 0.99.11",
- "derive_setters",
  "itertools",
  "log",
  "once_cell",
  "regex",
  "serde",
- "serde_json",
  "thiserror",
  "unicode-segmentation",
 ]
@@ -1185,21 +1173,27 @@ dependencies = [
  "bstr",
  "clap",
  "clap-verbosity-flag",
+ "content_inspector",
+ "derive_more 0.99.11",
+ "derive_setters",
  "difflib",
  "env_logger 0.8.2",
  "human-panic",
  "ignore",
+ "itertools",
  "log",
  "phf",
  "predicates",
  "proc-exit",
  "serde",
+ "serde_json",
  "structopt",
  "toml",
  "typos",
  "typos-dict",
  "typos-vars",
  "unicase",
+ "unicode-segmentation",
 ]
 
 [[package]]
diff --git a/Cargo.toml b/Cargo.toml
index bd427a7..32a6f22 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -50,6 +50,12 @@ ahash = "0.6.1"
 difflib = "0.4"
 proc-exit = "1.0"
 human-panic = "1.0.3"
+content_inspector = "0.2.4"
+unicode-segmentation = "1.6.0"
+derive_more = "0.99.11"
+derive_setters = "0.1"
+itertools = "0.9"
+serde_json = "1.0"
 
 [dev-dependencies]
 assert_fs = "1.0"
diff --git a/benches/checks.rs b/benches/checks.rs
index f6aa46c..92e4eb8 100644
--- a/benches/checks.rs
+++ b/benches/checks.rs
@@ -5,13 +5,13 @@ extern crate test;
 mod data;
 
 use assert_fs::prelude::*;
-use typos::checks::Check;
+use typos_cli::checks::Check;
 
 fn bench_parse_ident_str(data: &str, b: &mut test::Bencher) {
     let corrections = typos_cli::dict::BuiltIn::new(Default::default());
     let parser = typos::tokens::Tokenizer::new();
-    let checks = typos::checks::TyposSettings::new().build_identifier_parser();
-    b.iter(|| checks.check_str(data, &parser, &corrections, &typos::report::PrintSilent));
+    let checks = typos_cli::checks::TyposSettings::new().build_identifier_parser();
+    b.iter(|| checks.check_str(data, &parser, &corrections, &typos_cli::report::PrintSilent));
 }
 
 #[bench]
@@ -47,13 +47,13 @@ fn parse_idents_corpus_str(b: &mut test::Bencher) {
 fn bench_parse_ident_bytes(data: &str, b: &mut test::Bencher) {
     let corrections = typos_cli::dict::BuiltIn::new(Default::default());
     let parser = typos::tokens::Tokenizer::new();
-    let checks = typos::checks::TyposSettings::new().build_identifier_parser();
+    let checks = typos_cli::checks::TyposSettings::new().build_identifier_parser();
     b.iter(|| {
         checks.check_bytes(
             data.as_bytes(),
             &parser,
             &corrections,
-            &typos::report::PrintSilent,
+            &typos_cli::report::PrintSilent,
         )
     });
 }
@@ -91,8 +91,8 @@ fn parse_idents_corpus_bytes(b: &mut test::Bencher) {
 fn bench_parse_word_str(data: &str, b: &mut test::Bencher) {
     let corrections = typos_cli::dict::BuiltIn::new(Default::default());
     let parser = typos::tokens::Tokenizer::new();
-    let checks = typos::checks::TyposSettings::new().build_word_parser();
-    b.iter(|| checks.check_str(data, &parser, &corrections, &typos::report::PrintSilent));
+    let checks = typos_cli::checks::TyposSettings::new().build_word_parser();
+    b.iter(|| checks.check_str(data, &parser, &corrections, &typos_cli::report::PrintSilent));
 }
 
 #[bench]
@@ -128,8 +128,8 @@ fn parse_words_corpus(b: &mut test::Bencher) {
 fn bench_typos(data: &str, b: &mut test::Bencher) {
     let corrections = typos_cli::dict::BuiltIn::new(Default::default());
     let parser = typos::tokens::Tokenizer::new();
-    let checks = typos::checks::TyposSettings::new().build_typos();
-    b.iter(|| checks.check_str(data, &parser, &corrections, &typos::report::PrintSilent));
+    let checks = typos_cli::checks::TyposSettings::new().build_typos();
+    b.iter(|| checks.check_str(data, &parser, &corrections, &typos_cli::report::PrintSilent));
 }
 
 #[bench]
@@ -169,14 +169,14 @@ fn bench_check_file(data: &str, b: &mut test::Bencher) {
 
     let corrections = typos_cli::dict::BuiltIn::new(Default::default());
     let parser = typos::tokens::Tokenizer::new();
-    let checks = typos::checks::TyposSettings::new().build_typos();
+    let checks = typos_cli::checks::TyposSettings::new().build_typos();
     b.iter(|| {
         checks.check_file(
             sample_path.path(),
             true,
             &parser,
             &corrections,
-            &typos::report::PrintSilent,
+            &typos_cli::report::PrintSilent,
         )
     });
 
diff --git a/crates/typos/Cargo.toml b/crates/typos/Cargo.toml
index 15a317e..a128a7c 100644
--- a/crates/typos/Cargo.toml
+++ b/crates/typos/Cargo.toml
@@ -20,11 +20,6 @@ thiserror = "1.0"
 regex = "1.3"
 once_cell = "1.2.0"
 serde = { version = "1.0", features = ["derive"] }
-serde_json = "1.0"
 itertools = "0.9"
-bstr = "0.2"
 log = "0.4"
 unicode-segmentation = "1.7.1"
-derive_more = "0.99.11"
-derive_setters = "0.1"
-content_inspector = "0.2.4"
diff --git a/crates/typos/src/checks.rs b/crates/typos/src/checks.rs
deleted file mode 100644
index 027857c..0000000
--- a/crates/typos/src/checks.rs
+++ /dev/null
@@ -1,447 +0,0 @@
-use bstr::ByteSlice;
-
-use crate::report;
-use crate::tokens;
-use crate::Dictionary;
-
-pub trait Check: Send + Sync {
-    fn check_str(
-        &self,
-        buffer: &str,
-        parser: &tokens::Tokenizer,
-        dictionary: &dyn Dictionary,
-        reporter: &dyn report::Report,
-    ) -> Result<(), std::io::Error>;
-
-    fn check_bytes(
-        &self,
-        buffer: &[u8],
-        parser: &tokens::Tokenizer,
-        dictionary: &dyn Dictionary,
-        reporter: &dyn report::Report,
-    ) -> Result<(), std::io::Error>;
-
-    fn check_filenames(&self) -> bool;
-
-    fn check_files(&self) -> bool;
-
-    fn binary(&self) -> bool;
-
-    fn check_filename(
-        &self,
-        path: &std::path::Path,
-        parser: &tokens::Tokenizer,
-        dictionary: &dyn Dictionary,
-        reporter: &dyn report::Report,
-    ) -> Result<(), std::io::Error> {
-        if !self.check_filenames() {
-            return Ok(());
-        }
-
-        if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
-            let context_reporter = ReportContext {
-                reporter,
-                context: report::PathContext { path }.into(),
-            };
-            self.check_str(file_name, parser, dictionary, &context_reporter)?;
-        }
-
-        Ok(())
-    }
-
-    fn check_file(
-        &self,
-        path: &std::path::Path,
-        explicit: bool,
-        parser: &tokens::Tokenizer,
-        dictionary: &dyn Dictionary,
-        reporter: &dyn report::Report,
-    ) -> Result<(), std::io::Error> {
-        if !self.check_files() {
-            return Ok(());
-        }
-
-        let buffer = read_file(path, reporter)?;
-        let (buffer, content_type) = massage_data(buffer)?;
-        if !explicit && !self.binary() && content_type.is_binary() {
-            let msg = report::BinaryFile { path };
-            reporter.report(msg.into())?;
-            return Ok(());
-        }
-
-        for (line_idx, line) in buffer.lines().enumerate() {
-            let line_num = line_idx + 1;
-            let context_reporter = ReportContext {
-                reporter,
-                context: report::FileContext { path, line_num }.into(),
-            };
-            self.check_bytes(line, parser, dictionary, &context_reporter)?;
-        }
-
-        Ok(())
-    }
-}
-
-struct ReportContext<'m, 'r> {
-    reporter: &'r dyn report::Report,
-    context: report::Context<'m>,
-}
-
-impl<'m, 'r> report::Report for ReportContext<'m, 'r> {
-    fn report(&self, msg: report::Message) -> Result<(), std::io::Error> {
-        let msg = msg.context(Some(self.context.clone()));
-        self.reporter.report(msg)
-    }
-}
-
-#[derive(Debug, Clone, PartialEq, Eq)]
-pub struct TyposSettings {
-    check_filenames: bool,
-    check_files: bool,
-    binary: bool,
-}
-
-impl TyposSettings {
-    pub fn new() -> Self {
-        Default::default()
-    }
-
-    pub fn check_filenames(&mut self, yes: bool) -> &mut Self {
-        self.check_filenames = yes;
-        self
-    }
-
-    pub fn check_files(&mut self, yes: bool) -> &mut Self {
-        self.check_files = yes;
-        self
-    }
-
-    pub fn binary(&mut self, yes: bool) -> &mut Self {
-        self.binary = yes;
-        self
-    }
-
-    pub fn build_typos(&self) -> Typos {
-        Typos {
-            check_filenames: self.check_filenames,
-            check_files: self.check_files,
-            binary: self.binary,
-        }
-    }
-
-    pub fn build_identifier_parser(&self) -> ParseIdentifiers {
-        ParseIdentifiers {
-            check_filenames: self.check_filenames,
-            check_files: self.check_files,
-            binary: self.binary,
-        }
-    }
-
-    pub fn build_word_parser(&self) -> ParseWords {
-        ParseWords {
-            check_filenames: self.check_filenames,
-            check_files: self.check_files,
-            binary: self.binary,
-        }
-    }
-
-    pub fn build_files(&self) -> Files {
-        Files {}
-    }
-}
-
-impl Default for TyposSettings {
-    fn default() -> Self {
-        Self {
-            check_filenames: true,
-            check_files: true,
-            binary: false,
-        }
-    }
-}
-
-#[derive(Debug, Clone)]
-pub struct Typos {
-    check_filenames: bool,
-    check_files: bool,
-    binary: bool,
-}
-
-impl Check for Typos {
-    fn check_str(
-        &self,
-        buffer: &str,
-        tokenizer: &tokens::Tokenizer,
-        dictionary: &dyn Dictionary,
-        reporter: &dyn report::Report,
-    ) -> Result<(), std::io::Error> {
-        let parser = crate::ParserBuilder::new()
-            .tokenizer(tokenizer)
-            .dictionary(dictionary)
-            .typos();
-        for typo in parser.parse_str(buffer) {
-            let msg = report::Typo {
-                context: None,
-                buffer: std::borrow::Cow::Borrowed(buffer.as_bytes()),
-                byte_offset: typo.byte_offset,
-                typo: typo.typo,
-                corrections: typo.corrections,
-            };
-            reporter.report(msg.into())?;
-        }
-        Ok(())
-    }
-
-    fn check_bytes(
-        &self,
-        buffer: &[u8],
-        tokenizer: &tokens::Tokenizer,
-        dictionary: &dyn Dictionary,
-        reporter: &dyn report::Report,
-    ) -> Result<(), std::io::Error> {
-        let parser = crate::ParserBuilder::new()
-            .tokenizer(tokenizer)
-            .dictionary(dictionary)
-            .typos();
-        for typo in parser.parse_bytes(buffer) {
-            let msg = report::Typo {
-                context: None,
-                buffer: std::borrow::Cow::Borrowed(buffer.as_bytes()),
-                byte_offset: typo.byte_offset,
-                typo: typo.typo,
-                corrections: typo.corrections,
-            };
-            reporter.report(msg.into())?;
-        }
-        Ok(())
-    }
-
-    fn check_filenames(&self) -> bool {
-        self.check_filenames
-    }
-
-    fn check_files(&self) -> bool {
-        self.check_files
-    }
-
-    fn binary(&self) -> bool {
-        self.binary
-    }
-}
-
-#[derive(Debug, Clone)]
-pub struct ParseIdentifiers {
-    check_filenames: bool,
-    check_files: bool,
-    binary: bool,
-}
-
-impl Check for ParseIdentifiers {
-    fn check_str(
-        &self,
-        buffer: &str,
-        tokenizer: &tokens::Tokenizer,
-        _dictionary: &dyn Dictionary,
-        reporter: &dyn report::Report,
-    ) -> Result<(), std::io::Error> {
-        let parser = crate::ParserBuilder::new()
-            .tokenizer(tokenizer)
-            .identifiers();
-        for word in parser.parse_str(buffer) {
-            let msg = report::Parse {
-                context: None,
-                kind: report::ParseKind::Word,
-                data: word.token(),
-            };
-            reporter.report(msg.into())?;
-        }
-
-        Ok(())
-    }
-
-    fn check_bytes(
-        &self,
-        buffer: &[u8],
-        tokenizer: &tokens::Tokenizer,
-        _dictionary: &dyn Dictionary,
-        reporter: &dyn report::Report,
-    ) -> Result<(), std::io::Error> {
-        let parser = crate::ParserBuilder::new()
-            .tokenizer(tokenizer)
-            .identifiers();
-        for word in parser.parse_bytes(buffer) {
-            let msg = report::Parse {
-                context: None,
-                kind: report::ParseKind::Word,
-                data: word.token(),
-            };
-            reporter.report(msg.into())?;
-        }
-
-        Ok(())
-    }
-
-    fn check_filenames(&self) -> bool {
-        self.check_filenames
-    }
-
-    fn check_files(&self) -> bool {
-        self.check_files
-    }
-
-    fn binary(&self) -> bool {
-        self.binary
-    }
-}
-
-#[derive(Debug, Clone)]
-pub struct ParseWords {
-    check_filenames: bool,
-    check_files: bool,
-    binary: bool,
-}
-
-impl Check for ParseWords {
-    fn check_str(
-        &self,
-        buffer: &str,
-        tokenizer: &tokens::Tokenizer,
-        _dictionary: &dyn Dictionary,
-        reporter: &dyn report::Report,
-    ) -> Result<(), std::io::Error> {
-        let word_parser = crate::ParserBuilder::new().tokenizer(tokenizer).words();
-        for word in word_parser.parse_str(buffer) {
-            let msg = report::Parse {
-                context: None,
-                kind: report::ParseKind::Word,
-                data: word.token(),
-            };
-            reporter.report(msg.into())?;
-        }
-
-        Ok(())
-    }
-
-    fn check_bytes(
-        &self,
-        buffer: &[u8],
-        tokenizer: &tokens::Tokenizer,
-        _dictionary: &dyn Dictionary,
-        reporter: &dyn report::Report,
-    ) -> Result<(), std::io::Error> {
-        let parser = crate::ParserBuilder::new().tokenizer(tokenizer).words();
-        for word in parser.parse_bytes(buffer) {
-            let msg = report::Parse {
-                context: None,
-                kind: report::ParseKind::Word,
-                data: word.token(),
-            };
-            reporter.report(msg.into())?;
-        }
-
-        Ok(())
-    }
-
-    fn check_filenames(&self) -> bool {
-        self.check_filenames
-    }
-
-    fn check_files(&self) -> bool {
-        self.check_files
-    }
-
-    fn binary(&self) -> bool {
-        self.binary
-    }
-}
-
-#[derive(Debug, Clone)]
-pub struct Files {}
-
-impl Check for Files {
-    fn check_str(
-        &self,
-        _buffer: &str,
-        _parser: &tokens::Tokenizer,
-        _dictionary: &dyn Dictionary,
-        _reporter: &dyn report::Report,
-    ) -> Result<(), std::io::Error> {
-        Ok(())
-    }
-
-    fn check_bytes(
-        &self,
-        _buffer: &[u8],
-        _parser: &tokens::Tokenizer,
-        _dictionary: &dyn Dictionary,
-        _reporter: &dyn report::Report,
-    ) -> Result<(), std::io::Error> {
-        Ok(())
-    }
-
-    fn check_filenames(&self) -> bool {
-        true
-    }
-
-    fn check_files(&self) -> bool {
-        true
-    }
-
-    fn binary(&self) -> bool {
-        true
-    }
-
-    fn check_filename(
-        &self,
-        _path: &std::path::Path,
-        _parser: &tokens::Tokenizer,
-        _dictionary: &dyn Dictionary,
-        _reporter: &dyn report::Report,
-    ) -> Result<(), std::io::Error> {
-        Ok(())
-    }
-
-    fn check_file(
-        &self,
-        path: &std::path::Path,
-        _explicit: bool,
-        _parser: &tokens::Tokenizer,
-        _dictionary: &dyn Dictionary,
-        reporter: &dyn report::Report,
-    ) -> Result<(), std::io::Error> {
-        let msg = report::File::new(path);
-        reporter.report(msg.into())?;
-
-        Ok(())
-    }
-}
-
-fn read_file(
-    path: &std::path::Path,
-    reporter: &dyn report::Report,
-) -> Result<Vec<u8>, std::io::Error> {
-    let buffer = match std::fs::read(path) {
-        Ok(buffer) => buffer,
-        Err(err) => {
-            let msg = report::Error::new(err.to_string());
-            reporter.report(msg.into())?;
-            Vec::new()
-        }
-    };
-    Ok(buffer)
-}
-
-fn massage_data(
-    buffer: Vec<u8>,
-) -> Result<(Vec<u8>, content_inspector::ContentType), std::io::Error> {
-    let mut content_type = content_inspector::inspect(&buffer);
-
-    // HACK: We only support UTF-8 at the moment
-    if content_type != content_inspector::ContentType::UTF_8_BOM
-        && content_type != content_inspector::ContentType::UTF_8
-    {
-        content_type = content_inspector::ContentType::BINARY;
-    }
-
-    Ok((buffer, content_type))
-}
diff --git a/crates/typos/src/dict.rs b/crates/typos/src/dict.rs
index 083ebe2..6e0a7f8 100644
--- a/crates/typos/src/dict.rs
+++ b/crates/typos/src/dict.rs
@@ -1,6 +1,6 @@
 use std::borrow::Cow;
 
-#[derive(Clone, PartialEq, Eq, Debug, serde::Serialize, derive_more::From)]
+#[derive(Clone, PartialEq, Eq, Debug, serde::Serialize)]
 #[serde(rename_all = "snake_case")]
 #[serde(untagged)]
 pub enum Status<'c> {
diff --git a/crates/typos/src/lib.rs b/crates/typos/src/lib.rs
index 7c09efb..93ba77d 100644
--- a/crates/typos/src/lib.rs
+++ b/crates/typos/src/lib.rs
@@ -1,8 +1,6 @@
 mod dict;
 mod parser;
 
-pub mod checks;
-pub mod report;
 pub mod tokens;
 
 pub use dict::*;
diff --git a/crates/typos/src/parser.rs b/crates/typos/src/parser.rs
index 613fdad..d427da1 100644
--- a/crates/typos/src/parser.rs
+++ b/crates/typos/src/parser.rs
@@ -115,7 +115,7 @@ impl<'p, 'd> TyposParser<'p, 'd> {
     }
 }
 
-#[derive(Clone, Debug, derive_setters::Setters)]
+#[derive(Clone, Debug)]
 #[non_exhaustive]
 pub struct Typo<'m> {
     pub byte_offset: usize,
diff --git a/src/args.rs b/src/args.rs
index 1b1d153..4a3398a 100644
--- a/src/args.rs
+++ b/src/args.rs
@@ -12,13 +12,13 @@ arg_enum! {
     }
 }
 
-pub const PRINT_SILENT: typos::report::PrintSilent = typos::report::PrintSilent;
-pub const PRINT_BRIEF: typos::report::PrintBrief = typos::report::PrintBrief;
-pub const PRINT_LONG: typos::report::PrintLong = typos::report::PrintLong;
-pub const PRINT_JSON: typos::report::PrintJson = typos::report::PrintJson;
+pub const PRINT_SILENT: typos_cli::report::PrintSilent = typos_cli::report::PrintSilent;
+pub const PRINT_BRIEF: typos_cli::report::PrintBrief = typos_cli::report::PrintBrief;
+pub const PRINT_LONG: typos_cli::report::PrintLong = typos_cli::report::PrintLong;
+pub const PRINT_JSON: typos_cli::report::PrintJson = typos_cli::report::PrintJson;
 
 impl Format {
-    pub(crate) fn reporter(self) -> &'static dyn typos::report::Report {
+    pub(crate) fn reporter(self) -> &'static dyn typos_cli::report::Report {
         match self {
             Format::Silent => &PRINT_SILENT,
             Format::Brief => &PRINT_BRIEF,
diff --git a/src/checks.rs b/src/checks.rs
index 78b9718..7692cc0 100644
--- a/src/checks.rs
+++ b/src/checks.rs
@@ -1,9 +1,456 @@
-pub(crate) fn check_path(
+use bstr::ByteSlice;
+
+use crate::report;
+use typos::tokens;
+use typos::Dictionary;
+
+pub trait Check: Send + Sync {
+    fn check_str(
+        &self,
+        buffer: &str,
+        parser: &tokens::Tokenizer,
+        dictionary: &dyn Dictionary,
+        reporter: &dyn report::Report,
+    ) -> Result<(), std::io::Error>;
+
+    fn check_bytes(
+        &self,
+        buffer: &[u8],
+        parser: &tokens::Tokenizer,
+        dictionary: &dyn Dictionary,
+        reporter: &dyn report::Report,
+    ) -> Result<(), std::io::Error>;
+
+    fn check_filenames(&self) -> bool;
+
+    fn check_files(&self) -> bool;
+
+    fn binary(&self) -> bool;
+
+    fn check_filename(
+        &self,
+        path: &std::path::Path,
+        parser: &tokens::Tokenizer,
+        dictionary: &dyn Dictionary,
+        reporter: &dyn report::Report,
+    ) -> Result<(), std::io::Error> {
+        if !self.check_filenames() {
+            return Ok(());
+        }
+
+        if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
+            let context_reporter = ReportContext {
+                reporter,
+                context: report::PathContext { path }.into(),
+            };
+            self.check_str(file_name, parser, dictionary, &context_reporter)?;
+        }
+
+        Ok(())
+    }
+
+    fn check_file(
+        &self,
+        path: &std::path::Path,
+        explicit: bool,
+        parser: &tokens::Tokenizer,
+        dictionary: &dyn Dictionary,
+        reporter: &dyn report::Report,
+    ) -> Result<(), std::io::Error> {
+        if !self.check_files() {
+            return Ok(());
+        }
+
+        let buffer = read_file(path, reporter)?;
+        let (buffer, content_type) = massage_data(buffer)?;
+        if !explicit && !self.binary() && content_type.is_binary() {
+            let msg = report::BinaryFile { path };
+            reporter.report(msg.into())?;
+            return Ok(());
+        }
+
+        for (line_idx, line) in buffer.lines().enumerate() {
+            let line_num = line_idx + 1;
+            let context_reporter = ReportContext {
+                reporter,
+                context: report::FileContext { path, line_num }.into(),
+            };
+            self.check_bytes(line, parser, dictionary, &context_reporter)?;
+        }
+
+        Ok(())
+    }
+}
+
+struct ReportContext<'m, 'r> {
+    reporter: &'r dyn report::Report,
+    context: report::Context<'m>,
+}
+
+impl<'m, 'r> report::Report for ReportContext<'m, 'r> {
+    fn report(&self, msg: report::Message) -> Result<(), std::io::Error> {
+        let msg = msg.context(Some(self.context.clone()));
+        self.reporter.report(msg)
+    }
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct TyposSettings {
+    check_filenames: bool,
+    check_files: bool,
+    binary: bool,
+}
+
+impl TyposSettings {
+    pub fn new() -> Self {
+        Default::default()
+    }
+
+    pub fn check_filenames(&mut self, yes: bool) -> &mut Self {
+        self.check_filenames = yes;
+        self
+    }
+
+    pub fn check_files(&mut self, yes: bool) -> &mut Self {
+        self.check_files = yes;
+        self
+    }
+
+    pub fn binary(&mut self, yes: bool) -> &mut Self {
+        self.binary = yes;
+        self
+    }
+
+    pub fn build_typos(&self) -> Typos {
+        Typos {
+            check_filenames: self.check_filenames,
+            check_files: self.check_files,
+            binary: self.binary,
+        }
+    }
+
+    pub fn build_identifier_parser(&self) -> ParseIdentifiers {
+        ParseIdentifiers {
+            check_filenames: self.check_filenames,
+            check_files: self.check_files,
+            binary: self.binary,
+        }
+    }
+
+    pub fn build_word_parser(&self) -> ParseWords {
+        ParseWords {
+            check_filenames: self.check_filenames,
+            check_files: self.check_files,
+            binary: self.binary,
+        }
+    }
+
+    pub fn build_files(&self) -> Files {
+        Files {}
+    }
+}
+
+impl Default for TyposSettings {
+    fn default() -> Self {
+        Self {
+            check_filenames: true,
+            check_files: true,
+            binary: false,
+        }
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct Typos {
+    check_filenames: bool,
+    check_files: bool,
+    binary: bool,
+}
+
+impl Check for Typos {
+    fn check_str(
+        &self,
+        buffer: &str,
+        tokenizer: &tokens::Tokenizer,
+        dictionary: &dyn Dictionary,
+        reporter: &dyn report::Report,
+    ) -> Result<(), std::io::Error> {
+        let parser = typos::ParserBuilder::new()
+            .tokenizer(tokenizer)
+            .dictionary(dictionary)
+            .typos();
+        for typo in parser.parse_str(buffer) {
+            let msg = report::Typo {
+                context: None,
+                buffer: std::borrow::Cow::Borrowed(buffer.as_bytes()),
+                byte_offset: typo.byte_offset,
+                typo: typo.typo,
+                corrections: typo.corrections,
+            };
+            reporter.report(msg.into())?;
+        }
+        Ok(())
+    }
+
+    fn check_bytes(
+        &self,
+        buffer: &[u8],
+        tokenizer: &tokens::Tokenizer,
+        dictionary: &dyn Dictionary,
+        reporter: &dyn report::Report,
+    ) -> Result<(), std::io::Error> {
+        let parser = typos::ParserBuilder::new()
+            .tokenizer(tokenizer)
+            .dictionary(dictionary)
+            .typos();
+        for typo in parser.parse_bytes(buffer) {
+            let msg = report::Typo {
+                context: None,
+                buffer: std::borrow::Cow::Borrowed(buffer.as_bytes()),
+                byte_offset: typo.byte_offset,
+                typo: typo.typo,
+                corrections: typo.corrections,
+            };
+            reporter.report(msg.into())?;
+        }
+        Ok(())
+    }
+
+    fn check_filenames(&self) -> bool {
+        self.check_filenames
+    }
+
+    fn check_files(&self) -> bool {
+        self.check_files
+    }
+
+    fn binary(&self) -> bool {
+        self.binary
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct ParseIdentifiers {
+    check_filenames: bool,
+    check_files: bool,
+    binary: bool,
+}
+
+impl Check for ParseIdentifiers {
+    fn check_str(
+        &self,
+        buffer: &str,
+        tokenizer: &tokens::Tokenizer,
+        _dictionary: &dyn Dictionary,
+        reporter: &dyn report::Report,
+    ) -> Result<(), std::io::Error> {
+        let parser = typos::ParserBuilder::new()
+            .tokenizer(tokenizer)
+            .identifiers();
+        for word in parser.parse_str(buffer) {
+            let msg = report::Parse {
+                context: None,
+                kind: report::ParseKind::Word,
+                data: word.token(),
+            };
+            reporter.report(msg.into())?;
+        }
+
+        Ok(())
+    }
+
+    fn check_bytes(
+        &self,
+        buffer: &[u8],
+        tokenizer: &tokens::Tokenizer,
+        _dictionary: &dyn Dictionary,
+        reporter: &dyn report::Report,
+    ) -> Result<(), std::io::Error> {
+        let parser = typos::ParserBuilder::new()
+            .tokenizer(tokenizer)
+            .identifiers();
+        for word in parser.parse_bytes(buffer) {
+            let msg = report::Parse {
+                context: None,
+                kind: report::ParseKind::Word,
+                data: word.token(),
+            };
+            reporter.report(msg.into())?;
+        }
+
+        Ok(())
+    }
+
+    fn check_filenames(&self) -> bool {
+        self.check_filenames
+    }
+
+    fn check_files(&self) -> bool {
+        self.check_files
+    }
+
+    fn binary(&self) -> bool {
+        self.binary
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct ParseWords {
+    check_filenames: bool,
+    check_files: bool,
+    binary: bool,
+}
+
+impl Check for ParseWords {
+    fn check_str(
+        &self,
+        buffer: &str,
+        tokenizer: &tokens::Tokenizer,
+        _dictionary: &dyn Dictionary,
+        reporter: &dyn report::Report,
+    ) -> Result<(), std::io::Error> {
+        let word_parser = typos::ParserBuilder::new().tokenizer(tokenizer).words();
+        for word in word_parser.parse_str(buffer) {
+            let msg = report::Parse {
+                context: None,
+                kind: report::ParseKind::Word,
+                data: word.token(),
+            };
+            reporter.report(msg.into())?;
+        }
+
+        Ok(())
+    }
+
+    fn check_bytes(
+        &self,
+        buffer: &[u8],
+        tokenizer: &tokens::Tokenizer,
+        _dictionary: &dyn Dictionary,
+        reporter: &dyn report::Report,
+    ) -> Result<(), std::io::Error> {
+        let parser = typos::ParserBuilder::new().tokenizer(tokenizer).words();
+        for word in parser.parse_bytes(buffer) {
+            let msg = report::Parse {
+                context: None,
+                kind: report::ParseKind::Word,
+                data: word.token(),
+            };
+            reporter.report(msg.into())?;
+        }
+
+        Ok(())
+    }
+
+    fn check_filenames(&self) -> bool {
+        self.check_filenames
+    }
+
+    fn check_files(&self) -> bool {
+        self.check_files
+    }
+
+    fn binary(&self) -> bool {
+        self.binary
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct Files {}
+
+impl Check for Files {
+    fn check_str(
+        &self,
+        _buffer: &str,
+        _parser: &tokens::Tokenizer,
+        _dictionary: &dyn Dictionary,
+        _reporter: &dyn report::Report,
+    ) -> Result<(), std::io::Error> {
+        Ok(())
+    }
+
+    fn check_bytes(
+        &self,
+        _buffer: &[u8],
+        _parser: &tokens::Tokenizer,
+        _dictionary: &dyn Dictionary,
+        _reporter: &dyn report::Report,
+    ) -> Result<(), std::io::Error> {
+        Ok(())
+    }
+
+    fn check_filenames(&self) -> bool {
+        true
+    }
+
+    fn check_files(&self) -> bool {
+        true
+    }
+
+    fn binary(&self) -> bool {
+        true
+    }
+
+    fn check_filename(
+        &self,
+        _path: &std::path::Path,
+        _parser: &tokens::Tokenizer,
+        _dictionary: &dyn Dictionary,
+        _reporter: &dyn report::Report,
+    ) -> Result<(), std::io::Error> {
+        Ok(())
+    }
+
+    fn check_file(
+        &self,
+        path: &std::path::Path,
+        _explicit: bool,
+        _parser: &tokens::Tokenizer,
+        _dictionary: &dyn Dictionary,
+        reporter: &dyn report::Report,
+    ) -> Result<(), std::io::Error> {
+        let msg = report::File::new(path);
+        reporter.report(msg.into())?;
+
+        Ok(())
+    }
+}
+
+fn read_file(
+    path: &std::path::Path,
+    reporter: &dyn report::Report,
+) -> Result<Vec<u8>, std::io::Error> {
+    let buffer = match std::fs::read(path) {
+        Ok(buffer) => buffer,
+        Err(err) => {
+            let msg = report::Error::new(err.to_string());
+            reporter.report(msg.into())?;
+            Vec::new()
+        }
+    };
+    Ok(buffer)
+}
+
+fn massage_data(
+    buffer: Vec<u8>,
+) -> Result<(Vec<u8>, content_inspector::ContentType), std::io::Error> {
+    let mut content_type = content_inspector::inspect(&buffer);
+
+    // HACK: We only support UTF-8 at the moment
+    if content_type != content_inspector::ContentType::UTF_8_BOM
+        && content_type != content_inspector::ContentType::UTF_8
+    {
+        content_type = content_inspector::ContentType::BINARY;
+    }
+
+    Ok((buffer, content_type))
+}
+pub fn check_path(
     walk: ignore::Walk,
-    checks: &dyn typos::checks::Check,
+    checks: &dyn Check,
     parser: &typos::tokens::Tokenizer,
     dictionary: &dyn typos::Dictionary,
-    reporter: &dyn typos::report::Report,
+    reporter: &dyn report::Report,
 ) -> Result<(), ignore::Error> {
     for entry in walk {
         check_entry(entry, checks, parser, dictionary, reporter)?;
@@ -11,12 +458,12 @@ pub(crate) fn check_path(
     Ok(())
 }
 
-pub(crate) fn check_path_parallel(
+pub fn check_path_parallel(
     walk: ignore::WalkParallel,
-    checks: &dyn typos::checks::Check,
+    checks: &dyn Check,
     parser: &typos::tokens::Tokenizer,
     dictionary: &dyn typos::Dictionary,
-    reporter: &dyn typos::report::Report,
+    reporter: &dyn report::Report,
 ) -> Result<(), ignore::Error> {
     let error: std::sync::Mutex<Result<(), ignore::Error>> = std::sync::Mutex::new(Ok(()));
     walk.run(|| {
@@ -36,10 +483,10 @@ pub(crate) fn check_path_parallel(
 
 fn check_entry(
     entry: Result<ignore::DirEntry, ignore::Error>,
-    checks: &dyn typos::checks::Check,
+    checks: &dyn Check,
     parser: &typos::tokens::Tokenizer,
     dictionary: &dyn typos::Dictionary,
-    reporter: &dyn typos::report::Report,
+    reporter: &dyn report::Report,
 ) -> Result<(), ignore::Error> {
     let entry = entry?;
     if entry.file_type().map(|t| t.is_file()).unwrap_or(true) {
diff --git a/src/diff.rs b/src/diff.rs
index c99ac4e..8457588 100644
--- a/src/diff.rs
+++ b/src/diff.rs
@@ -4,12 +4,12 @@ use std::sync;
 use bstr::ByteSlice;
 
 pub struct Diff<'r> {
-    reporter: &'r dyn typos::report::Report,
+    reporter: &'r dyn crate::report::Report,
     deferred: sync::Mutex<crate::replace::Deferred>,
 }
 
 impl<'r> Diff<'r> {
-    pub(crate) fn new(reporter: &'r dyn typos::report::Report) -> Self {
+    pub fn new(reporter: &'r dyn crate::report::Report) -> Self {
         Self {
             reporter,
             deferred: sync::Mutex::new(crate::replace::Deferred::default()),
@@ -56,10 +56,10 @@ impl<'r> Diff<'r> {
     }
 }
 
-impl<'r> typos::report::Report for Diff<'r> {
-    fn report(&self, msg: typos::report::Message<'_>) -> Result<(), std::io::Error> {
+impl<'r> crate::report::Report for Diff<'r> {
+    fn report(&self, msg: crate::report::Message<'_>) -> Result<(), std::io::Error> {
         let typo = match &msg {
-            typos::report::Message::Typo(typo) => typo,
+            crate::report::Message::Typo(typo) => typo,
             _ => return self.reporter.report(msg),
         };
 
@@ -69,7 +69,7 @@ impl<'r> typos::report::Report for Diff<'r> {
         };
 
         match &typo.context {
-            Some(typos::report::Context::File(file)) => {
+            Some(crate::report::Context::File(file)) => {
                 let path = file.path.to_owned();
                 let line_num = file.line_num;
                 let correction = crate::replace::Correction::new(
diff --git a/src/lib.rs b/src/lib.rs
index db703d1..632c1b0 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,2 +1,6 @@
+pub mod checks;
 pub mod config;
 pub mod dict;
+pub mod diff;
+pub mod replace;
+pub mod report;
diff --git a/src/main.rs b/src/main.rs
index 6275b95..d417642 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -7,11 +7,12 @@ use std::io::Write;
 use structopt::StructOpt;
 
 mod args;
-mod checks;
-mod config;
-mod dict;
-mod diff;
-mod replace;
+use typos_cli::checks;
+use typos_cli::config;
+use typos_cli::dict;
+use typos_cli::diff;
+use typos_cli::replace;
+use typos_cli::report;
 
 use proc_exit::WithCodeResultExt;
 
@@ -74,7 +75,7 @@ fn run() -> proc_exit::ExitResult {
         dictionary.identifiers(config.default.extend_identifiers());
         dictionary.words(config.default.extend_words());
 
-        let mut settings = typos::checks::TyposSettings::new();
+        let mut settings = checks::TyposSettings::new();
         settings
             .check_filenames(config.default.check_filename())
             .check_files(config.default.check_file())
@@ -98,8 +99,8 @@ fn run() -> proc_exit::ExitResult {
         } else {
             args.format.reporter()
         };
-        let status_reporter = typos::report::MessageStatus::new(output_reporter);
-        let mut reporter: &dyn typos::report::Report = &status_reporter;
+        let status_reporter = report::MessageStatus::new(output_reporter);
+        let mut reporter: &dyn report::Report = &status_reporter;
         let replace_reporter = replace::Replace::new(reporter);
         let diff_reporter = diff::Diff::new(reporter);
         if args.diff {
@@ -109,7 +110,7 @@ fn run() -> proc_exit::ExitResult {
         }
 
         let (files, identifier_parser, word_parser, checks);
-        let selected_checks: &dyn typos::checks::Check = if args.files {
+        let selected_checks: &dyn checks::Check = if args.files {
             files = settings.build_files();
             &files
         } else if args.identifiers {
diff --git a/src/replace.rs b/src/replace.rs
index 1ac129a..4bec030 100644
--- a/src/replace.rs
+++ b/src/replace.rs
@@ -6,12 +6,12 @@ use std::sync;
 use bstr::ByteSlice;
 
 pub struct Replace<'r> {
-    reporter: &'r dyn typos::report::Report,
+    reporter: &'r dyn crate::report::Report,
     deferred: sync::Mutex<Deferred>,
 }
 
 impl<'r> Replace<'r> {
-    pub(crate) fn new(reporter: &'r dyn typos::report::Report) -> Self {
+    pub fn new(reporter: &'r dyn crate::report::Report) -> Self {
         Self {
             reporter,
             deferred: sync::Mutex::new(Deferred::default()),
@@ -54,10 +54,10 @@ impl<'r> Replace<'r> {
     }
 }
 
-impl<'r> typos::report::Report for Replace<'r> {
-    fn report(&self, msg: typos::report::Message<'_>) -> Result<(), std::io::Error> {
+impl<'r> crate::report::Report for Replace<'r> {
+    fn report(&self, msg: crate::report::Message<'_>) -> Result<(), std::io::Error> {
         let typo = match &msg {
-            typos::report::Message::Typo(typo) => typo,
+            crate::report::Message::Typo(typo) => typo,
             _ => return self.reporter.report(msg),
         };
 
@@ -67,7 +67,7 @@ impl<'r> typos::report::Report for Replace<'r> {
         };
 
         match &typo.context {
-            Some(typos::report::Context::File(file)) => {
+            Some(crate::report::Context::File(file)) => {
                 let path = file.path.to_owned();
                 let line_num = file.line_num;
                 let correction =
@@ -82,7 +82,7 @@ impl<'r> typos::report::Report for Replace<'r> {
                 content.push(correction);
                 Ok(())
             }
-            Some(typos::report::Context::Path(path)) => {
+            Some(crate::report::Context::Path(path)) => {
                 let path = path.path.to_owned();
                 let correction =
                     Correction::new(typo.byte_offset, typo.typo, corrections[0].as_ref());
@@ -97,20 +97,20 @@ impl<'r> typos::report::Report for Replace<'r> {
 }
 
 #[derive(Clone, Debug, Default)]
-pub(crate) struct Deferred {
-    pub(crate) content: BTreeMap<path::PathBuf, BTreeMap<usize, Vec<Correction>>>,
-    pub(crate) paths: BTreeMap<path::PathBuf, Vec<Correction>>,
+pub struct Deferred {
+    pub content: BTreeMap<path::PathBuf, BTreeMap<usize, Vec<Correction>>>,
+    pub paths: BTreeMap<path::PathBuf, Vec<Correction>>,
 }
 
 #[derive(Clone, Debug, PartialOrd, Ord, PartialEq, Eq)]
-pub(crate) struct Correction {
+pub struct Correction {
     pub byte_offset: usize,
     pub typo: Vec<u8>,
     pub correction: Vec<u8>,
 }
 
 impl Correction {
-    pub(crate) fn new(byte_offset: usize, typo: &str, correction: &str) -> Self {
+    pub fn new(byte_offset: usize, typo: &str, correction: &str) -> Self {
         Self {
             byte_offset,
             typo: typo.as_bytes().to_vec(),
@@ -119,7 +119,7 @@ impl Correction {
     }
 }
 
-pub(crate) fn correct(mut line: Vec<u8>, corrections: &[Correction]) -> Vec<u8> {
+pub fn correct(mut line: Vec<u8>, corrections: &[Correction]) -> Vec<u8> {
     let mut corrections: Vec<_> = corrections.iter().collect();
     corrections.sort_unstable();
     corrections.reverse();
@@ -137,8 +137,8 @@ pub(crate) fn correct(mut line: Vec<u8>, corrections: &[Correction]) -> Vec<u8>
 mod test {
     use super::*;
 
+    use crate::report::Report;
     use assert_fs::prelude::*;
-    use typos::report::Report;
 
     fn simple_correct(line: &str, corrections: Vec<(usize, &str, &str)>) -> String {
         let line = line.as_bytes().to_vec();
@@ -205,13 +205,13 @@ mod test {
         let input_file = temp.child("foo.txt");
         input_file.write_str("1 foo 2\n3 4 5").unwrap();
 
-        let primary = typos::report::PrintSilent;
+        let primary = crate::report::PrintSilent;
         let replace = Replace::new(&primary);
         replace
             .report(
-                typos::report::Typo::default()
+                crate::report::Typo::default()
                     .context(Some(
-                        typos::report::FileContext::default()
+                        crate::report::FileContext::default()
                             .path(input_file.path())
                             .line_num(1)
                             .into(),
@@ -236,13 +236,13 @@ mod test {
         let input_file = temp.child("foo.txt");
         input_file.write_str("foo foo foo").unwrap();
 
-        let primary = typos::report::PrintSilent;
+        let primary = crate::report::PrintSilent;
         let replace = Replace::new(&primary);
         replace
             .report(
-                typos::report::Typo::default()
+                crate::report::Typo::default()
                     .context(Some(
-                        typos::report::PathContext::default()
+                        crate::report::PathContext::default()
                             .path(input_file.path())
                             .into(),
                     ))
diff --git a/crates/typos/src/report.rs b/src/report.rs
similarity index 96%
rename from crates/typos/src/report.rs
rename to src/report.rs
index bce2d3d..3d213cd 100644
--- a/crates/typos/src/report.rs
+++ b/src/report.rs
@@ -72,7 +72,7 @@ pub struct Typo<'m> {
     pub buffer: Cow<'m, [u8]>,
     pub byte_offset: usize,
     pub typo: &'m str,
-    pub corrections: crate::Status<'m>,
+    pub corrections: typos::Status<'m>,
 }
 
 impl<'m> Default for Typo<'m> {
@@ -82,7 +82,7 @@ impl<'m> Default for Typo<'m> {
             buffer: Cow::Borrowed(&[]),
             byte_offset: 0,
             typo: "",
-            corrections: crate::Status::Invalid,
+            corrections: typos::Status::Invalid,
         }
     }
 }
@@ -308,8 +308,8 @@ fn print_brief_correction(msg: &Typo) -> Result<(), std::io::Error> {
     )
     .count();
     match &msg.corrections {
-        crate::Status::Valid => {}
-        crate::Status::Invalid => {
+        typos::Status::Valid => {}
+        typos::Status::Invalid => {
             writeln!(
                 io::stdout(),
                 "{}:{}: `{}` is disallowed",
@@ -318,7 +318,7 @@ fn print_brief_correction(msg: &Typo) -> Result<(), std::io::Error> {
                 msg.typo,
             )?;
         }
-        crate::Status::Corrections(corrections) => {
+        typos::Status::Corrections(corrections) => {
             writeln!(
                 io::stdout(),
                 "{}:{}: `{}` -> {}",
@@ -345,11 +345,11 @@ fn print_long_correction(msg: &Typo) -> Result<(), std::io::Error> {
     )
     .count();
     match &msg.corrections {
-        crate::Status::Valid => {}
-        crate::Status::Invalid => {
+        typos::Status::Valid => {}
+        typos::Status::Invalid => {
             writeln!(handle, "error: `{}` is disallowed`", msg.typo,)?;
         }
-        crate::Status::Corrections(corrections) => {
+        typos::Status::Corrections(corrections) => {
             writeln!(
                 handle,
                 "error: `{}` should be {}",

From 220a79ff300e002a99e663e224d8667ebf84ee6c Mon Sep 17 00:00:00 2001
From: Ed Page <epage@duosecurity.com>
Date: Wed, 30 Dec 2020 21:13:20 -0600
Subject: [PATCH 05/16] refactor: Make room for parent function

---
 benches/checks.rs | 2 +-
 src/checks.rs     | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/benches/checks.rs b/benches/checks.rs
index 92e4eb8..3379392 100644
--- a/benches/checks.rs
+++ b/benches/checks.rs
@@ -171,7 +171,7 @@ fn bench_check_file(data: &str, b: &mut test::Bencher) {
     let parser = typos::tokens::Tokenizer::new();
     let checks = typos_cli::checks::TyposSettings::new().build_typos();
     b.iter(|| {
-        checks.check_file(
+        checks.check_file_content(
             sample_path.path(),
             true,
             &parser,
diff --git a/src/checks.rs b/src/checks.rs
index 7692cc0..01bfd91 100644
--- a/src/checks.rs
+++ b/src/checks.rs
@@ -49,7 +49,7 @@ pub trait Check: Send + Sync {
         Ok(())
     }
 
-    fn check_file(
+    fn check_file_content(
         &self,
         path: &std::path::Path,
         explicit: bool,
@@ -401,7 +401,7 @@ impl Check for Files {
         Ok(())
     }
 
-    fn check_file(
+    fn check_file_content(
         &self,
         path: &std::path::Path,
         _explicit: bool,
@@ -492,7 +492,7 @@ fn check_entry(
     if entry.file_type().map(|t| t.is_file()).unwrap_or(true) {
         let explicit = entry.depth() == 0;
         checks.check_filename(entry.path(), parser, dictionary, reporter)?;
-        checks.check_file(entry.path(), explicit, parser, dictionary, reporter)?;
+        checks.check_file_content(entry.path(), explicit, parser, dictionary, reporter)?;
     }
 
     Ok(())

From 6c28376e503ab48f230d06b2e41456e6bb3b12ff Mon Sep 17 00:00:00 2001
From: Ed Page <epage@duosecurity.com>
Date: Wed, 30 Dec 2020 21:17:28 -0600
Subject: [PATCH 06/16] refactor: Give checks full control

---
 src/checks.rs | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/src/checks.rs b/src/checks.rs
index 01bfd91..c84c76a 100644
--- a/src/checks.rs
+++ b/src/checks.rs
@@ -80,6 +80,19 @@ pub trait Check: Send + Sync {
 
         Ok(())
     }
+
+    fn check_file(
+        &self,
+        path: &std::path::Path,
+        explicit: bool,
+        parser: &tokens::Tokenizer,
+        dictionary: &dyn Dictionary,
+        reporter: &dyn report::Report,
+    ) -> Result<(), std::io::Error> {
+        self.check_filename(path, parser, dictionary, reporter)?;
+        self.check_file_content(path, explicit, parser, dictionary, reporter)?;
+        Ok(())
+    }
 }
 
 struct ReportContext<'m, 'r> {
@@ -491,8 +504,7 @@ fn check_entry(
     let entry = entry?;
     if entry.file_type().map(|t| t.is_file()).unwrap_or(true) {
         let explicit = entry.depth() == 0;
-        checks.check_filename(entry.path(), parser, dictionary, reporter)?;
-        checks.check_file_content(entry.path(), explicit, parser, dictionary, reporter)?;
+        checks.check_file(entry.path(), explicit, parser, dictionary, reporter)?;
     }
 
     Ok(())

From d28174439b512a18d799a3116d15b9b8152863f5 Mon Sep 17 00:00:00 2001
From: Ed Page <epage@duosecurity.com>
Date: Wed, 30 Dec 2020 21:26:48 -0600
Subject: [PATCH 07/16] refactor: Switch FoundFiles to check_file

---
 src/checks.rs | 45 +++++++++++++++++++++++++++++++++++++--------
 1 file changed, 37 insertions(+), 8 deletions(-)

diff --git a/src/checks.rs b/src/checks.rs
index c84c76a..a6ef3d1 100644
--- a/src/checks.rs
+++ b/src/checks.rs
@@ -158,8 +158,10 @@ impl TyposSettings {
         }
     }
 
-    pub fn build_files(&self) -> Files {
-        Files {}
+    pub fn build_files(&self) -> FoundFiles {
+        FoundFiles {
+            binary: self.binary,
+        }
     }
 }
 
@@ -369,9 +371,11 @@ impl Check for ParseWords {
 }
 
 #[derive(Debug, Clone)]
-pub struct Files {}
+pub struct FoundFiles {
+    binary: bool,
+}
 
-impl Check for Files {
+impl Check for FoundFiles {
     fn check_str(
         &self,
         _buffer: &str,
@@ -401,7 +405,7 @@ impl Check for Files {
     }
 
     fn binary(&self) -> bool {
-        true
+        self.binary
     }
 
     fn check_filename(
@@ -416,14 +420,38 @@ impl Check for Files {
 
     fn check_file_content(
         &self,
-        path: &std::path::Path,
+        _path: &std::path::Path,
         _explicit: bool,
         _parser: &tokens::Tokenizer,
         _dictionary: &dyn Dictionary,
+        _reporter: &dyn report::Report,
+    ) -> Result<(), std::io::Error> {
+        Ok(())
+    }
+
+    fn check_file(
+        &self,
+        path: &std::path::Path,
+        explicit: bool,
+        _parser: &tokens::Tokenizer,
+        _dictionary: &dyn Dictionary,
         reporter: &dyn report::Report,
     ) -> Result<(), std::io::Error> {
-        let msg = report::File::new(path);
-        reporter.report(msg.into())?;
+        // Check `self.binary` first so we can easily check performance of walking vs reading
+        if self.binary {
+            let msg = report::File::new(path);
+            reporter.report(msg.into())?;
+        } else {
+            let buffer = read_file(path, reporter)?;
+            let (_buffer, content_type) = massage_data(buffer)?;
+            if !explicit && content_type.is_binary() {
+                let msg = report::BinaryFile { path };
+                reporter.report(msg.into())?;
+            } else {
+                let msg = report::File::new(path);
+                reporter.report(msg.into())?;
+            }
+        }
 
         Ok(())
     }
@@ -458,6 +486,7 @@ fn massage_data(
 
     Ok((buffer, content_type))
 }
+
 pub fn check_path(
     walk: ignore::Walk,
     checks: &dyn Check,

From 6e53d7e7196682a10933260bc1b7e669f1ae4f4e Mon Sep 17 00:00:00 2001
From: Ed Page <epage@duosecurity.com>
Date: Wed, 30 Dec 2020 21:42:30 -0600
Subject: [PATCH 08/16] refactor: Switch Words/Identifiers to check_file

---
 src/checks.rs | 150 +++++++++++++++++++++++++++++++++-----------------
 1 file changed, 98 insertions(+), 52 deletions(-)

diff --git a/src/checks.rs b/src/checks.rs
index a6ef3d1..ea8e5cb 100644
--- a/src/checks.rs
+++ b/src/checks.rs
@@ -142,16 +142,16 @@ impl TyposSettings {
         }
     }
 
-    pub fn build_identifier_parser(&self) -> ParseIdentifiers {
-        ParseIdentifiers {
+    pub fn build_identifier_parser(&self) -> Identifiers {
+        Identifiers {
             check_filenames: self.check_filenames,
             check_files: self.check_files,
             binary: self.binary,
         }
     }
 
-    pub fn build_word_parser(&self) -> ParseWords {
-        ParseWords {
+    pub fn build_word_parser(&self) -> Words {
+        Words {
             check_filenames: self.check_filenames,
             check_files: self.check_files,
             binary: self.binary,
@@ -245,38 +245,37 @@ impl Check for Typos {
 }
 
 #[derive(Debug, Clone)]
-pub struct ParseIdentifiers {
+pub struct Identifiers {
     check_filenames: bool,
     check_files: bool,
     binary: bool,
 }
 
-impl Check for ParseIdentifiers {
+impl Check for Identifiers {
     fn check_str(
         &self,
-        buffer: &str,
-        tokenizer: &tokens::Tokenizer,
+        _buffer: &str,
+        _tokenizer: &tokens::Tokenizer,
         _dictionary: &dyn Dictionary,
-        reporter: &dyn report::Report,
+        _reporter: &dyn report::Report,
     ) -> Result<(), std::io::Error> {
-        let parser = typos::ParserBuilder::new()
-            .tokenizer(tokenizer)
-            .identifiers();
-        for word in parser.parse_str(buffer) {
-            let msg = report::Parse {
-                context: None,
-                kind: report::ParseKind::Word,
-                data: word.token(),
-            };
-            reporter.report(msg.into())?;
-        }
-
         Ok(())
     }
 
     fn check_bytes(
         &self,
-        buffer: &[u8],
+        _buffer: &[u8],
+        _tokenizer: &tokens::Tokenizer,
+        _dictionary: &dyn Dictionary,
+        _reporter: &dyn report::Report,
+    ) -> Result<(), std::io::Error> {
+        Ok(())
+    }
+
+    fn check_file(
+        &self,
+        path: &std::path::Path,
+        explicit: bool,
         tokenizer: &tokens::Tokenizer,
         _dictionary: &dyn Dictionary,
         reporter: &dyn report::Report,
@@ -284,13 +283,36 @@ impl Check for ParseIdentifiers {
         let parser = typos::ParserBuilder::new()
             .tokenizer(tokenizer)
             .identifiers();
-        for word in parser.parse_bytes(buffer) {
-            let msg = report::Parse {
-                context: None,
-                kind: report::ParseKind::Word,
-                data: word.token(),
-            };
-            reporter.report(msg.into())?;
+
+        if self.check_filenames() {
+            if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
+                for word in parser.parse_str(file_name) {
+                    let msg = report::Parse {
+                        context: Some(report::PathContext { path }.into()),
+                        kind: report::ParseKind::Identifier,
+                        data: word.token(),
+                    };
+                    reporter.report(msg.into())?;
+                }
+            }
+        }
+
+        if self.check_files() {
+            let buffer = read_file(path, reporter)?;
+            let (buffer, content_type) = massage_data(buffer)?;
+            if !explicit && !self.binary() && content_type.is_binary() {
+                let msg = report::BinaryFile { path };
+                reporter.report(msg.into())?;
+            } else {
+                for word in parser.parse_bytes(&buffer) {
+                    let msg = report::Parse {
+                        context: Some(report::FileContext { path, line_num: 0 }.into()),
+                        kind: report::ParseKind::Identifier,
+                        data: word.token(),
+                    };
+                    reporter.report(msg.into())?;
+                }
+            }
         }
 
         Ok(())
@@ -310,48 +332,72 @@ impl Check for ParseIdentifiers {
 }
 
 #[derive(Debug, Clone)]
-pub struct ParseWords {
+pub struct Words {
     check_filenames: bool,
     check_files: bool,
     binary: bool,
 }
 
-impl Check for ParseWords {
+impl Check for Words {
     fn check_str(
         &self,
-        buffer: &str,
-        tokenizer: &tokens::Tokenizer,
+        _buffer: &str,
+        _tokenizer: &tokens::Tokenizer,
         _dictionary: &dyn Dictionary,
-        reporter: &dyn report::Report,
+        _reporter: &dyn report::Report,
     ) -> Result<(), std::io::Error> {
-        let word_parser = typos::ParserBuilder::new().tokenizer(tokenizer).words();
-        for word in word_parser.parse_str(buffer) {
-            let msg = report::Parse {
-                context: None,
-                kind: report::ParseKind::Word,
-                data: word.token(),
-            };
-            reporter.report(msg.into())?;
-        }
-
         Ok(())
     }
 
     fn check_bytes(
         &self,
-        buffer: &[u8],
+        _buffer: &[u8],
+        _tokenizer: &tokens::Tokenizer,
+        _dictionary: &dyn Dictionary,
+        _reporter: &dyn report::Report,
+    ) -> Result<(), std::io::Error> {
+        Ok(())
+    }
+
+    fn check_file(
+        &self,
+        path: &std::path::Path,
+        explicit: bool,
         tokenizer: &tokens::Tokenizer,
         _dictionary: &dyn Dictionary,
         reporter: &dyn report::Report,
     ) -> Result<(), std::io::Error> {
         let parser = typos::ParserBuilder::new().tokenizer(tokenizer).words();
-        for word in parser.parse_bytes(buffer) {
-            let msg = report::Parse {
-                context: None,
-                kind: report::ParseKind::Word,
-                data: word.token(),
-            };
-            reporter.report(msg.into())?;
+
+        if self.check_filenames() {
+            if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
+                for word in parser.parse_str(file_name) {
+                    let msg = report::Parse {
+                        context: Some(report::PathContext { path }.into()),
+                        kind: report::ParseKind::Word,
+                        data: word.token(),
+                    };
+                    reporter.report(msg.into())?;
+                }
+            }
+        }
+
+        if self.check_files() {
+            let buffer = read_file(path, reporter)?;
+            let (buffer, content_type) = massage_data(buffer)?;
+            if !explicit && !self.binary() && content_type.is_binary() {
+                let msg = report::BinaryFile { path };
+                reporter.report(msg.into())?;
+            } else {
+                for word in parser.parse_bytes(&buffer) {
+                    let msg = report::Parse {
+                        context: Some(report::FileContext { path, line_num: 0 }.into()),
+                        kind: report::ParseKind::Word,
+                        data: word.token(),
+                    };
+                    reporter.report(msg.into())?;
+                }
+            }
         }
 
         Ok(())

From 663eb94d32a980ede97039a67cbe416e736f06b1 Mon Sep 17 00:00:00 2001
From: Ed Page <epage@duosecurity.com>
Date: Thu, 31 Dec 2020 17:41:32 -0600
Subject: [PATCH 09/16] refactor: Switch Typos to check_file

---
 benches/checks.rs | 236 +++++++++++++++---------------
 src/checks.rs     | 358 ++++++++++++----------------------------------
 2 files changed, 212 insertions(+), 382 deletions(-)

diff --git a/benches/checks.rs b/benches/checks.rs
index 3379392..dcb8dc2 100644
--- a/benches/checks.rs
+++ b/benches/checks.rs
@@ -7,129 +7,178 @@ mod data;
 use assert_fs::prelude::*;
 use typos_cli::checks::Check;
 
-fn bench_parse_ident_str(data: &str, b: &mut test::Bencher) {
+fn bench_files(data: &str, b: &mut test::Bencher) {
+    let temp = assert_fs::TempDir::new().unwrap();
+    let sample_path = temp.child("sample");
+    sample_path.write_str(data).unwrap();
+
     let corrections = typos_cli::dict::BuiltIn::new(Default::default());
     let parser = typos::tokens::Tokenizer::new();
-    let checks = typos_cli::checks::TyposSettings::new().build_identifier_parser();
-    b.iter(|| checks.check_str(data, &parser, &corrections, &typos_cli::report::PrintSilent));
-}
-
-#[bench]
-fn parse_idents_empty_str(b: &mut test::Bencher) {
-    bench_parse_ident_str(data::EMPTY, b);
-}
-
-#[bench]
-fn parse_idents_no_tokens_str(b: &mut test::Bencher) {
-    bench_parse_ident_str(data::NO_TOKENS, b);
-}
-
-#[bench]
-fn parse_idents_single_token_str(b: &mut test::Bencher) {
-    bench_parse_ident_str(data::SINGLE_TOKEN, b);
-}
-
-#[bench]
-fn parse_idents_sherlock_str(b: &mut test::Bencher) {
-    bench_parse_ident_str(data::SHERLOCK, b);
-}
-
-#[bench]
-fn parse_idents_code_str(b: &mut test::Bencher) {
-    bench_parse_ident_str(data::CODE, b);
-}
-
-#[bench]
-fn parse_idents_corpus_str(b: &mut test::Bencher) {
-    bench_parse_ident_str(data::CORPUS, b);
-}
-
-fn bench_parse_ident_bytes(data: &str, b: &mut test::Bencher) {
-    let corrections = typos_cli::dict::BuiltIn::new(Default::default());
-    let parser = typos::tokens::Tokenizer::new();
-    let checks = typos_cli::checks::TyposSettings::new().build_identifier_parser();
+    let checks = typos_cli::checks::TyposSettings::new().build_files();
     b.iter(|| {
-        checks.check_bytes(
-            data.as_bytes(),
+        checks.check_file(
+            sample_path.path(),
+            true,
             &parser,
             &corrections,
             &typos_cli::report::PrintSilent,
         )
     });
+
+    temp.close().unwrap();
 }
 
 #[bench]
-fn parse_idents_empty_bytes(b: &mut test::Bencher) {
-    bench_parse_ident_bytes(data::EMPTY, b);
+fn files_empty(b: &mut test::Bencher) {
+    bench_files(data::EMPTY, b);
 }
 
 #[bench]
-fn parse_idents_no_tokens_bytes(b: &mut test::Bencher) {
-    bench_parse_ident_bytes(data::NO_TOKENS, b);
+fn files_no_tokens(b: &mut test::Bencher) {
+    bench_files(data::NO_TOKENS, b);
 }
 
 #[bench]
-fn parse_idents_single_token_bytes(b: &mut test::Bencher) {
-    bench_parse_ident_bytes(data::SINGLE_TOKEN, b);
+fn files_single_token(b: &mut test::Bencher) {
+    bench_files(data::SINGLE_TOKEN, b);
 }
 
 #[bench]
-fn parse_idents_sherlock_bytes(b: &mut test::Bencher) {
-    bench_parse_ident_bytes(data::SHERLOCK, b);
+fn files_sherlock(b: &mut test::Bencher) {
+    bench_files(data::SHERLOCK, b);
 }
 
 #[bench]
-fn parse_idents_code_bytes(b: &mut test::Bencher) {
-    bench_parse_ident_bytes(data::CODE, b);
+fn files_code(b: &mut test::Bencher) {
+    bench_files(data::CODE, b);
 }
 
 #[bench]
-fn parse_idents_corpus_bytes(b: &mut test::Bencher) {
-    bench_parse_ident_bytes(data::CORPUS, b);
+fn files_corpus(b: &mut test::Bencher) {
+    bench_files(data::CORPUS, b);
 }
 
-fn bench_parse_word_str(data: &str, b: &mut test::Bencher) {
+fn bench_identifiers(data: &str, b: &mut test::Bencher) {
+    let temp = assert_fs::TempDir::new().unwrap();
+    let sample_path = temp.child("sample");
+    sample_path.write_str(data).unwrap();
+
+    let corrections = typos_cli::dict::BuiltIn::new(Default::default());
+    let parser = typos::tokens::Tokenizer::new();
+    let checks = typos_cli::checks::TyposSettings::new().build_identifier_parser();
+    b.iter(|| {
+        checks.check_file(
+            sample_path.path(),
+            true,
+            &parser,
+            &corrections,
+            &typos_cli::report::PrintSilent,
+        )
+    });
+
+    temp.close().unwrap();
+}
+
+#[bench]
+fn identifiers_empty(b: &mut test::Bencher) {
+    bench_identifiers(data::EMPTY, b);
+}
+
+#[bench]
+fn identifiers_no_tokens(b: &mut test::Bencher) {
+    bench_identifiers(data::NO_TOKENS, b);
+}
+
+#[bench]
+fn identifiers_single_token(b: &mut test::Bencher) {
+    bench_identifiers(data::SINGLE_TOKEN, b);
+}
+
+#[bench]
+fn identifiers_sherlock(b: &mut test::Bencher) {
+    bench_identifiers(data::SHERLOCK, b);
+}
+
+#[bench]
+fn identifiers_code(b: &mut test::Bencher) {
+    bench_identifiers(data::CODE, b);
+}
+
+#[bench]
+fn identifiers_corpus(b: &mut test::Bencher) {
+    bench_identifiers(data::CORPUS, b);
+}
+
+fn bench_words(data: &str, b: &mut test::Bencher) {
+    let temp = assert_fs::TempDir::new().unwrap();
+    let sample_path = temp.child("sample");
+    sample_path.write_str(data).unwrap();
+
     let corrections = typos_cli::dict::BuiltIn::new(Default::default());
     let parser = typos::tokens::Tokenizer::new();
     let checks = typos_cli::checks::TyposSettings::new().build_word_parser();
-    b.iter(|| checks.check_str(data, &parser, &corrections, &typos_cli::report::PrintSilent));
+    b.iter(|| {
+        checks.check_file(
+            sample_path.path(),
+            true,
+            &parser,
+            &corrections,
+            &typos_cli::report::PrintSilent,
+        )
+    });
+
+    temp.close().unwrap();
 }
 
 #[bench]
-fn parse_words_empty(b: &mut test::Bencher) {
-    bench_parse_word_str(data::EMPTY, b);
+fn words_empty(b: &mut test::Bencher) {
+    bench_words(data::EMPTY, b);
 }
 
 #[bench]
-fn parse_words_no_tokens(b: &mut test::Bencher) {
-    bench_parse_word_str(data::NO_TOKENS, b);
+fn words_no_tokens(b: &mut test::Bencher) {
+    bench_words(data::NO_TOKENS, b);
 }
 
 #[bench]
-fn parse_words_single_token(b: &mut test::Bencher) {
-    bench_parse_word_str(data::SINGLE_TOKEN, b);
+fn words_single_token(b: &mut test::Bencher) {
+    bench_words(data::SINGLE_TOKEN, b);
 }
 
 #[bench]
-fn parse_words_sherlock(b: &mut test::Bencher) {
-    bench_parse_word_str(data::SHERLOCK, b);
+fn words_sherlock(b: &mut test::Bencher) {
+    bench_words(data::SHERLOCK, b);
 }
 
 #[bench]
-fn parse_words_code(b: &mut test::Bencher) {
-    bench_parse_word_str(data::CODE, b);
+fn words_code(b: &mut test::Bencher) {
+    bench_words(data::CODE, b);
 }
 
 #[bench]
-fn parse_words_corpus(b: &mut test::Bencher) {
-    bench_parse_word_str(data::CORPUS, b);
+fn words_corpus(b: &mut test::Bencher) {
+    bench_words(data::CORPUS, b);
 }
 
 fn bench_typos(data: &str, b: &mut test::Bencher) {
+    let temp = assert_fs::TempDir::new().unwrap();
+    let sample_path = temp.child("sample");
+    sample_path.write_str(data).unwrap();
+
     let corrections = typos_cli::dict::BuiltIn::new(Default::default());
     let parser = typos::tokens::Tokenizer::new();
     let checks = typos_cli::checks::TyposSettings::new().build_typos();
-    b.iter(|| checks.check_str(data, &parser, &corrections, &typos_cli::report::PrintSilent));
+    b.iter(|| {
+        checks.check_file(
+            sample_path.path(),
+            true,
+            &parser,
+            &corrections,
+            &typos_cli::report::PrintSilent,
+        )
+    });
+
+    temp.close().unwrap();
 }
 
 #[bench]
@@ -161,54 +210,3 @@ fn typos_code(b: &mut test::Bencher) {
 fn typos_corpus(b: &mut test::Bencher) {
     bench_typos(data::CORPUS, b);
 }
-
-fn bench_check_file(data: &str, b: &mut test::Bencher) {
-    let temp = assert_fs::TempDir::new().unwrap();
-    let sample_path = temp.child("sample");
-    sample_path.write_str(data).unwrap();
-
-    let corrections = typos_cli::dict::BuiltIn::new(Default::default());
-    let parser = typos::tokens::Tokenizer::new();
-    let checks = typos_cli::checks::TyposSettings::new().build_typos();
-    b.iter(|| {
-        checks.check_file_content(
-            sample_path.path(),
-            true,
-            &parser,
-            &corrections,
-            &typos_cli::report::PrintSilent,
-        )
-    });
-
-    temp.close().unwrap();
-}
-
-#[bench]
-fn check_file_empty(b: &mut test::Bencher) {
-    bench_check_file(data::EMPTY, b);
-}
-
-#[bench]
-fn check_file_no_tokens(b: &mut test::Bencher) {
-    bench_check_file(data::NO_TOKENS, b);
-}
-
-#[bench]
-fn check_file_single_token(b: &mut test::Bencher) {
-    bench_check_file(data::SINGLE_TOKEN, b);
-}
-
-#[bench]
-fn check_file_sherlock(b: &mut test::Bencher) {
-    bench_check_file(data::SHERLOCK, b);
-}
-
-#[bench]
-fn check_file_code(b: &mut test::Bencher) {
-    bench_check_file(data::CODE, b);
-}
-
-#[bench]
-fn check_file_corpus(b: &mut test::Bencher) {
-    bench_check_file(data::CORPUS, b);
-}
diff --git a/src/checks.rs b/src/checks.rs
index ea8e5cb..65d7e48 100644
--- a/src/checks.rs
+++ b/src/checks.rs
@@ -5,82 +5,6 @@ use typos::tokens;
 use typos::Dictionary;
 
 pub trait Check: Send + Sync {
-    fn check_str(
-        &self,
-        buffer: &str,
-        parser: &tokens::Tokenizer,
-        dictionary: &dyn Dictionary,
-        reporter: &dyn report::Report,
-    ) -> Result<(), std::io::Error>;
-
-    fn check_bytes(
-        &self,
-        buffer: &[u8],
-        parser: &tokens::Tokenizer,
-        dictionary: &dyn Dictionary,
-        reporter: &dyn report::Report,
-    ) -> Result<(), std::io::Error>;
-
-    fn check_filenames(&self) -> bool;
-
-    fn check_files(&self) -> bool;
-
-    fn binary(&self) -> bool;
-
-    fn check_filename(
-        &self,
-        path: &std::path::Path,
-        parser: &tokens::Tokenizer,
-        dictionary: &dyn Dictionary,
-        reporter: &dyn report::Report,
-    ) -> Result<(), std::io::Error> {
-        if !self.check_filenames() {
-            return Ok(());
-        }
-
-        if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
-            let context_reporter = ReportContext {
-                reporter,
-                context: report::PathContext { path }.into(),
-            };
-            self.check_str(file_name, parser, dictionary, &context_reporter)?;
-        }
-
-        Ok(())
-    }
-
-    fn check_file_content(
-        &self,
-        path: &std::path::Path,
-        explicit: bool,
-        parser: &tokens::Tokenizer,
-        dictionary: &dyn Dictionary,
-        reporter: &dyn report::Report,
-    ) -> Result<(), std::io::Error> {
-        if !self.check_files() {
-            return Ok(());
-        }
-
-        let buffer = read_file(path, reporter)?;
-        let (buffer, content_type) = massage_data(buffer)?;
-        if !explicit && !self.binary() && content_type.is_binary() {
-            let msg = report::BinaryFile { path };
-            reporter.report(msg.into())?;
-            return Ok(());
-        }
-
-        for (line_idx, line) in buffer.lines().enumerate() {
-            let line_num = line_idx + 1;
-            let context_reporter = ReportContext {
-                reporter,
-                context: report::FileContext { path, line_num }.into(),
-            };
-            self.check_bytes(line, parser, dictionary, &context_reporter)?;
-        }
-
-        Ok(())
-    }
-
     fn check_file(
         &self,
         path: &std::path::Path,
@@ -88,23 +12,7 @@ pub trait Check: Send + Sync {
         parser: &tokens::Tokenizer,
         dictionary: &dyn Dictionary,
         reporter: &dyn report::Report,
-    ) -> Result<(), std::io::Error> {
-        self.check_filename(path, parser, dictionary, reporter)?;
-        self.check_file_content(path, explicit, parser, dictionary, reporter)?;
-        Ok(())
-    }
-}
-
-struct ReportContext<'m, 'r> {
-    reporter: &'r dyn report::Report,
-    context: report::Context<'m>,
-}
-
-impl<'m, 'r> report::Report for ReportContext<'m, 'r> {
-    fn report(&self, msg: report::Message) -> Result<(), std::io::Error> {
-        let msg = msg.context(Some(self.context.clone()));
-        self.reporter.report(msg)
-    }
+    ) -> Result<(), std::io::Error>;
 }
 
 #[derive(Debug, Clone, PartialEq, Eq)]
@@ -183,9 +91,10 @@ pub struct Typos {
 }
 
 impl Check for Typos {
-    fn check_str(
+    fn check_file(
         &self,
-        buffer: &str,
+        path: &std::path::Path,
+        explicit: bool,
         tokenizer: &tokens::Tokenizer,
         dictionary: &dyn Dictionary,
         reporter: &dyn report::Report,
@@ -194,54 +103,47 @@ impl Check for Typos {
             .tokenizer(tokenizer)
             .dictionary(dictionary)
             .typos();
-        for typo in parser.parse_str(buffer) {
-            let msg = report::Typo {
-                context: None,
-                buffer: std::borrow::Cow::Borrowed(buffer.as_bytes()),
-                byte_offset: typo.byte_offset,
-                typo: typo.typo,
-                corrections: typo.corrections,
-            };
-            reporter.report(msg.into())?;
+
+        if self.check_filenames {
+            if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
+                for typo in parser.parse_str(file_name) {
+                    let msg = report::Typo {
+                        context: Some(report::PathContext { path }.into()),
+                        buffer: std::borrow::Cow::Borrowed(file_name.as_bytes()),
+                        byte_offset: typo.byte_offset,
+                        typo: typo.typo,
+                        corrections: typo.corrections,
+                    };
+                    reporter.report(msg.into())?;
+                }
+            }
         }
-        Ok(())
-    }
 
-    fn check_bytes(
-        &self,
-        buffer: &[u8],
-        tokenizer: &tokens::Tokenizer,
-        dictionary: &dyn Dictionary,
-        reporter: &dyn report::Report,
-    ) -> Result<(), std::io::Error> {
-        let parser = typos::ParserBuilder::new()
-            .tokenizer(tokenizer)
-            .dictionary(dictionary)
-            .typos();
-        for typo in parser.parse_bytes(buffer) {
-            let msg = report::Typo {
-                context: None,
-                buffer: std::borrow::Cow::Borrowed(buffer.as_bytes()),
-                byte_offset: typo.byte_offset,
-                typo: typo.typo,
-                corrections: typo.corrections,
-            };
-            reporter.report(msg.into())?;
+        if self.check_files {
+            let buffer = read_file(path, reporter)?;
+            let (buffer, content_type) = massage_data(buffer)?;
+            if !explicit && !self.binary && content_type.is_binary() {
+                let msg = report::BinaryFile { path };
+                reporter.report(msg.into())?;
+            } else {
+                let mut accum_line_num = AccumulateLineNum::new();
+                for typo in parser.parse_bytes(&buffer) {
+                    let line_num = accum_line_num.line_num(&buffer, typo.byte_offset);
+                    let (line, line_offset) = extract_line(&buffer, typo.byte_offset);
+                    let msg = report::Typo {
+                        context: Some(report::FileContext { path, line_num }.into()),
+                        buffer: std::borrow::Cow::Borrowed(line),
+                        byte_offset: line_offset,
+                        typo: typo.typo,
+                        corrections: typo.corrections,
+                    };
+                    reporter.report(msg.into())?;
+                }
+            }
         }
+
         Ok(())
     }
-
-    fn check_filenames(&self) -> bool {
-        self.check_filenames
-    }
-
-    fn check_files(&self) -> bool {
-        self.check_files
-    }
-
-    fn binary(&self) -> bool {
-        self.binary
-    }
 }
 
 #[derive(Debug, Clone)]
@@ -252,26 +154,6 @@ pub struct Identifiers {
 }
 
 impl Check for Identifiers {
-    fn check_str(
-        &self,
-        _buffer: &str,
-        _tokenizer: &tokens::Tokenizer,
-        _dictionary: &dyn Dictionary,
-        _reporter: &dyn report::Report,
-    ) -> Result<(), std::io::Error> {
-        Ok(())
-    }
-
-    fn check_bytes(
-        &self,
-        _buffer: &[u8],
-        _tokenizer: &tokens::Tokenizer,
-        _dictionary: &dyn Dictionary,
-        _reporter: &dyn report::Report,
-    ) -> Result<(), std::io::Error> {
-        Ok(())
-    }
-
     fn check_file(
         &self,
         path: &std::path::Path,
@@ -284,7 +166,7 @@ impl Check for Identifiers {
             .tokenizer(tokenizer)
             .identifiers();
 
-        if self.check_filenames() {
+        if self.check_filenames {
             if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
                 for word in parser.parse_str(file_name) {
                     let msg = report::Parse {
@@ -297,16 +179,20 @@ impl Check for Identifiers {
             }
         }
 
-        if self.check_files() {
+        if self.check_files {
             let buffer = read_file(path, reporter)?;
             let (buffer, content_type) = massage_data(buffer)?;
-            if !explicit && !self.binary() && content_type.is_binary() {
+            if !explicit && !self.binary && content_type.is_binary() {
                 let msg = report::BinaryFile { path };
                 reporter.report(msg.into())?;
             } else {
                 for word in parser.parse_bytes(&buffer) {
+                    // HACK: Don't look up the line_num per entry to better match the performance
+                    // of Typos for comparison purposes.  We don't really get much out of it
+                    // anyway.
+                    let line_num = 0;
                     let msg = report::Parse {
-                        context: Some(report::FileContext { path, line_num: 0 }.into()),
+                        context: Some(report::FileContext { path, line_num }.into()),
                         kind: report::ParseKind::Identifier,
                         data: word.token(),
                     };
@@ -317,18 +203,6 @@ impl Check for Identifiers {
 
         Ok(())
     }
-
-    fn check_filenames(&self) -> bool {
-        self.check_filenames
-    }
-
-    fn check_files(&self) -> bool {
-        self.check_files
-    }
-
-    fn binary(&self) -> bool {
-        self.binary
-    }
 }
 
 #[derive(Debug, Clone)]
@@ -339,26 +213,6 @@ pub struct Words {
 }
 
 impl Check for Words {
-    fn check_str(
-        &self,
-        _buffer: &str,
-        _tokenizer: &tokens::Tokenizer,
-        _dictionary: &dyn Dictionary,
-        _reporter: &dyn report::Report,
-    ) -> Result<(), std::io::Error> {
-        Ok(())
-    }
-
-    fn check_bytes(
-        &self,
-        _buffer: &[u8],
-        _tokenizer: &tokens::Tokenizer,
-        _dictionary: &dyn Dictionary,
-        _reporter: &dyn report::Report,
-    ) -> Result<(), std::io::Error> {
-        Ok(())
-    }
-
     fn check_file(
         &self,
         path: &std::path::Path,
@@ -369,7 +223,7 @@ impl Check for Words {
     ) -> Result<(), std::io::Error> {
         let parser = typos::ParserBuilder::new().tokenizer(tokenizer).words();
 
-        if self.check_filenames() {
+        if self.check_filenames {
             if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
                 for word in parser.parse_str(file_name) {
                     let msg = report::Parse {
@@ -382,16 +236,20 @@ impl Check for Words {
             }
         }
 
-        if self.check_files() {
+        if self.check_files {
             let buffer = read_file(path, reporter)?;
             let (buffer, content_type) = massage_data(buffer)?;
-            if !explicit && !self.binary() && content_type.is_binary() {
+            if !explicit && !self.binary && content_type.is_binary() {
                 let msg = report::BinaryFile { path };
                 reporter.report(msg.into())?;
             } else {
                 for word in parser.parse_bytes(&buffer) {
+                    // HACK: Don't look up the line_num per entry to better match the performance
+                    // of Typos for comparison purposes.  We don't really get much out of it
+                    // anyway.
+                    let line_num = 0;
                     let msg = report::Parse {
-                        context: Some(report::FileContext { path, line_num: 0 }.into()),
+                        context: Some(report::FileContext { path, line_num }.into()),
                         kind: report::ParseKind::Word,
                         data: word.token(),
                     };
@@ -402,18 +260,6 @@ impl Check for Words {
 
         Ok(())
     }
-
-    fn check_filenames(&self) -> bool {
-        self.check_filenames
-    }
-
-    fn check_files(&self) -> bool {
-        self.check_files
-    }
-
-    fn binary(&self) -> bool {
-        self.binary
-    }
 }
 
 #[derive(Debug, Clone)]
@@ -422,59 +268,6 @@ pub struct FoundFiles {
 }
 
 impl Check for FoundFiles {
-    fn check_str(
-        &self,
-        _buffer: &str,
-        _parser: &tokens::Tokenizer,
-        _dictionary: &dyn Dictionary,
-        _reporter: &dyn report::Report,
-    ) -> Result<(), std::io::Error> {
-        Ok(())
-    }
-
-    fn check_bytes(
-        &self,
-        _buffer: &[u8],
-        _parser: &tokens::Tokenizer,
-        _dictionary: &dyn Dictionary,
-        _reporter: &dyn report::Report,
-    ) -> Result<(), std::io::Error> {
-        Ok(())
-    }
-
-    fn check_filenames(&self) -> bool {
-        true
-    }
-
-    fn check_files(&self) -> bool {
-        true
-    }
-
-    fn binary(&self) -> bool {
-        self.binary
-    }
-
-    fn check_filename(
-        &self,
-        _path: &std::path::Path,
-        _parser: &tokens::Tokenizer,
-        _dictionary: &dyn Dictionary,
-        _reporter: &dyn report::Report,
-    ) -> Result<(), std::io::Error> {
-        Ok(())
-    }
-
-    fn check_file_content(
-        &self,
-        _path: &std::path::Path,
-        _explicit: bool,
-        _parser: &tokens::Tokenizer,
-        _dictionary: &dyn Dictionary,
-        _reporter: &dyn report::Report,
-    ) -> Result<(), std::io::Error> {
-        Ok(())
-    }
-
     fn check_file(
         &self,
         path: &std::path::Path,
@@ -533,6 +326,45 @@ fn massage_data(
     Ok((buffer, content_type))
 }
 
+struct AccumulateLineNum {
+    line_num: usize,
+    last_offset: usize,
+}
+
+impl AccumulateLineNum {
+    fn new() -> Self {
+        Self {
+            // 1-indexed
+            line_num: 1,
+            last_offset: 0,
+        }
+    }
+
+    fn line_num(&mut self, buffer: &[u8], byte_offset: usize) -> usize {
+        assert!(self.last_offset <= byte_offset);
+        let slice = &buffer[self.last_offset..byte_offset];
+        let newlines = slice.lines().count();
+        let line_num = self.line_num + newlines;
+        self.line_num = line_num;
+        self.last_offset = byte_offset;
+        line_num
+    }
+}
+
+fn extract_line(buffer: &[u8], byte_offset: usize) -> (&[u8], usize) {
+    let line_start = buffer[0..byte_offset]
+        .rfind_byte(b'\n')
+        // Skip the newline
+        .map(|s| s + 1)
+        .unwrap_or(0);
+    let line = buffer[line_start..]
+        .lines()
+        .next()
+        .expect("should always be at least a line");
+    let line_offset = byte_offset - line_start;
+    (line, line_offset)
+}
+
 pub fn check_path(
     walk: ignore::Walk,
     checks: &dyn Check,

From 48112a47e92d892c6dd8665315923daf90de96df Mon Sep 17 00:00:00 2001
From: Ed Page <epage@duosecurity.com>
Date: Thu, 31 Dec 2020 19:29:45 -0600
Subject: [PATCH 10/16] refactor(parser): Abstract over lifetimes

---
 crates/typos/src/dict.rs   | 14 ++++++++++++++
 crates/typos/src/parser.rs | 27 +++++++++++++++++++++++----
 src/checks.rs              |  4 ++--
 3 files changed, 39 insertions(+), 6 deletions(-)

diff --git a/crates/typos/src/dict.rs b/crates/typos/src/dict.rs
index 6e0a7f8..7c92d5b 100644
--- a/crates/typos/src/dict.rs
+++ b/crates/typos/src/dict.rs
@@ -27,6 +27,20 @@ impl<'c> Status<'c> {
         }
     }
 
+    pub fn into_owned(self) -> Status<'static> {
+        match self {
+            Status::Valid => Status::Valid,
+            Status::Invalid => Status::Invalid,
+            Status::Corrections(corrections) => {
+                let corrections = corrections
+                    .into_iter()
+                    .map(|c| Cow::Owned(c.into_owned()))
+                    .collect();
+                Status::Corrections(corrections)
+            }
+        }
+    }
+
     pub fn borrow(&self) -> Status<'_> {
         match self {
             Status::Corrections(corrections) => {
diff --git a/crates/typos/src/parser.rs b/crates/typos/src/parser.rs
index d427da1..95d0d4c 100644
--- a/crates/typos/src/parser.rs
+++ b/crates/typos/src/parser.rs
@@ -1,5 +1,6 @@
 use crate::tokens;
 use crate::Dictionary;
+use std::borrow::Cow;
 
 #[derive(Clone)]
 pub struct ParserBuilder<'p, 'd> {
@@ -86,7 +87,7 @@ impl<'p, 'd> TyposParser<'p, 'd> {
             Some(corrections) => {
                 let typo = Typo {
                     byte_offset: ident.offset(),
-                    typo: ident.token(),
+                    typo: ident.token().into(),
                     corrections,
                 };
                 itertools::Either::Left(Some(typo).into_iter())
@@ -105,7 +106,7 @@ impl<'p, 'd> TyposParser<'p, 'd> {
             Some(corrections) => {
                 let typo = Typo {
                     byte_offset: word.offset(),
-                    typo: word.token(),
+                    typo: word.token().into(),
                     corrections,
                 };
                 Some(typo)
@@ -119,15 +120,33 @@ impl<'p, 'd> TyposParser<'p, 'd> {
 #[non_exhaustive]
 pub struct Typo<'m> {
     pub byte_offset: usize,
-    pub typo: &'m str,
+    pub typo: Cow<'m, str>,
     pub corrections: crate::Status<'m>,
 }
 
+impl<'m> Typo<'m> {
+    pub fn into_owned(self) -> Typo<'static> {
+        Typo {
+            byte_offset: self.byte_offset,
+            typo: Cow::Owned(self.typo.into_owned()),
+            corrections: self.corrections.into_owned(),
+        }
+    }
+
+    pub fn borrow(&self) -> Typo<'_> {
+        Typo {
+            byte_offset: self.byte_offset,
+            typo: Cow::Borrowed(self.typo.as_ref()),
+            corrections: self.corrections.borrow(),
+        }
+    }
+}
+
 impl<'m> Default for Typo<'m> {
     fn default() -> Self {
         Self {
             byte_offset: 0,
-            typo: "",
+            typo: "".into(),
             corrections: crate::Status::Invalid,
         }
     }
diff --git a/src/checks.rs b/src/checks.rs
index 65d7e48..84e9c83 100644
--- a/src/checks.rs
+++ b/src/checks.rs
@@ -111,7 +111,7 @@ impl Check for Typos {
                         context: Some(report::PathContext { path }.into()),
                         buffer: std::borrow::Cow::Borrowed(file_name.as_bytes()),
                         byte_offset: typo.byte_offset,
-                        typo: typo.typo,
+                        typo: typo.typo.as_ref(),
                         corrections: typo.corrections,
                     };
                     reporter.report(msg.into())?;
@@ -134,7 +134,7 @@ impl Check for Typos {
                         context: Some(report::FileContext { path, line_num }.into()),
                         buffer: std::borrow::Cow::Borrowed(line),
                         byte_offset: line_offset,
-                        typo: typo.typo,
+                        typo: typo.typo.as_ref(),
                         corrections: typo.corrections,
                     };
                     reporter.report(msg.into())?;

From c900e485938e0d46aefc88b76735ef87af510af4 Mon Sep 17 00:00:00 2001
From: Ed Page <epage@duosecurity.com>
Date: Fri, 1 Jan 2021 18:25:48 -0600
Subject: [PATCH 11/16] fix: Arg write-changes reports immediately

---
 src/checks.rs  | 161 +++++++++++++++++++++++++++++++++++++++-----
 src/lib.rs     |   2 +-
 src/main.rs    |  13 ++--
 src/replace.rs | 178 +++----------------------------------------------
 4 files changed, 159 insertions(+), 195 deletions(-)

diff --git a/src/checks.rs b/src/checks.rs
index 84e9c83..7053d24 100644
--- a/src/checks.rs
+++ b/src/checks.rs
@@ -50,6 +50,14 @@ impl TyposSettings {
         }
     }
 
+    pub fn build_fix_typos(&self) -> FixTypos {
+        FixTypos {
+            check_filenames: self.check_filenames,
+            check_files: self.check_files,
+            binary: self.binary,
+        }
+    }
+
     pub fn build_identifier_parser(&self) -> Identifiers {
         Identifiers {
             check_filenames: self.check_filenames,
@@ -120,8 +128,7 @@ impl Check for Typos {
         }
 
         if self.check_files {
-            let buffer = read_file(path, reporter)?;
-            let (buffer, content_type) = massage_data(buffer)?;
+            let (buffer, content_type) = read_file(path, reporter)?;
             if !explicit && !self.binary && content_type.is_binary() {
                 let msg = report::BinaryFile { path };
                 reporter.report(msg.into())?;
@@ -146,6 +153,91 @@ impl Check for Typos {
     }
 }
 
+#[derive(Debug, Clone)]
+pub struct FixTypos {
+    check_filenames: bool,
+    check_files: bool,
+    binary: bool,
+}
+
+impl Check for FixTypos {
+    fn check_file(
+        &self,
+        path: &std::path::Path,
+        explicit: bool,
+        tokenizer: &tokens::Tokenizer,
+        dictionary: &dyn Dictionary,
+        reporter: &dyn report::Report,
+    ) -> Result<(), std::io::Error> {
+        let parser = typos::ParserBuilder::new()
+            .tokenizer(tokenizer)
+            .dictionary(dictionary)
+            .typos();
+
+        if self.check_files {
+            let (buffer, content_type) = read_file(path, reporter)?;
+            if !explicit && !self.binary && content_type.is_binary() {
+                let msg = report::BinaryFile { path };
+                reporter.report(msg.into())?;
+            } else {
+                let mut fixes = Vec::new();
+                let mut accum_line_num = AccumulateLineNum::new();
+                for typo in parser.parse_bytes(&buffer) {
+                    if is_fixable(&typo) {
+                        fixes.push(typo.into_owned());
+                    } else {
+                        let line_num = accum_line_num.line_num(&buffer, typo.byte_offset);
+                        let (line, line_offset) = extract_line(&buffer, typo.byte_offset);
+                        let msg = report::Typo {
+                            context: Some(report::FileContext { path, line_num }.into()),
+                            buffer: std::borrow::Cow::Borrowed(line),
+                            byte_offset: line_offset,
+                            typo: typo.typo.as_ref(),
+                            corrections: typo.corrections,
+                        };
+                        reporter.report(msg.into())?;
+                    }
+                }
+                if !fixes.is_empty() {
+                    let buffer = fix_buffer(buffer, fixes.into_iter());
+                    write_file(path, content_type, &buffer, reporter)?;
+                }
+            }
+        }
+
+        // Ensure the above write can happen before renaming the file.
+        if self.check_filenames {
+            if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
+                let mut fixes = Vec::new();
+                for typo in parser.parse_str(file_name) {
+                    if is_fixable(&typo) {
+                        fixes.push(typo.into_owned());
+                    } else {
+                        let msg = report::Typo {
+                            context: Some(report::PathContext { path }.into()),
+                            buffer: std::borrow::Cow::Borrowed(file_name.as_bytes()),
+                            byte_offset: typo.byte_offset,
+                            typo: typo.typo.as_ref(),
+                            corrections: typo.corrections,
+                        };
+                        reporter.report(msg.into())?;
+                    }
+                }
+                if !fixes.is_empty() {
+                    let file_name = file_name.to_owned().into_bytes();
+                    let new_name = fix_buffer(file_name, fixes.into_iter());
+                    let new_name =
+                        String::from_utf8(new_name).expect("corrections are valid utf-8");
+                    let new_path = path.with_file_name(new_name);
+                    std::fs::rename(path, new_path)?;
+                }
+            }
+        }
+
+        Ok(())
+    }
+}
+
 #[derive(Debug, Clone)]
 pub struct Identifiers {
     check_filenames: bool,
@@ -180,8 +272,7 @@ impl Check for Identifiers {
         }
 
         if self.check_files {
-            let buffer = read_file(path, reporter)?;
-            let (buffer, content_type) = massage_data(buffer)?;
+            let (buffer, content_type) = read_file(path, reporter)?;
             if !explicit && !self.binary && content_type.is_binary() {
                 let msg = report::BinaryFile { path };
                 reporter.report(msg.into())?;
@@ -237,8 +328,7 @@ impl Check for Words {
         }
 
         if self.check_files {
-            let buffer = read_file(path, reporter)?;
-            let (buffer, content_type) = massage_data(buffer)?;
+            let (buffer, content_type) = read_file(path, reporter)?;
             if !explicit && !self.binary && content_type.is_binary() {
                 let msg = report::BinaryFile { path };
                 reporter.report(msg.into())?;
@@ -281,8 +371,7 @@ impl Check for FoundFiles {
             let msg = report::File::new(path);
             reporter.report(msg.into())?;
         } else {
-            let buffer = read_file(path, reporter)?;
-            let (_buffer, content_type) = massage_data(buffer)?;
+            let (_buffer, content_type) = read_file(path, reporter)?;
             if !explicit && content_type.is_binary() {
                 let msg = report::BinaryFile { path };
                 reporter.report(msg.into())?;
@@ -296,10 +385,10 @@ impl Check for FoundFiles {
     }
 }
 
-fn read_file(
+pub fn read_file(
     path: &std::path::Path,
     reporter: &dyn report::Report,
-) -> Result<Vec<u8>, std::io::Error> {
+) -> Result<(Vec<u8>, content_inspector::ContentType), std::io::Error> {
     let buffer = match std::fs::read(path) {
         Ok(buffer) => buffer,
         Err(err) => {
@@ -308,14 +397,8 @@ fn read_file(
             Vec::new()
         }
     };
-    Ok(buffer)
-}
 
-fn massage_data(
-    buffer: Vec<u8>,
-) -> Result<(Vec<u8>, content_inspector::ContentType), std::io::Error> {
     let mut content_type = content_inspector::inspect(&buffer);
-
     // HACK: We only support UTF-8 at the moment
     if content_type != content_inspector::ContentType::UTF_8_BOM
         && content_type != content_inspector::ContentType::UTF_8
@@ -326,6 +409,27 @@ fn massage_data(
     Ok((buffer, content_type))
 }
 
+pub fn write_file(
+    path: &std::path::Path,
+    content_type: content_inspector::ContentType,
+    buffer: &[u8],
+    reporter: &dyn report::Report,
+) -> Result<(), std::io::Error> {
+    assert!(
+        content_type == content_inspector::ContentType::UTF_8_BOM
+            || content_type == content_inspector::ContentType::UTF_8
+            || content_type == content_inspector::ContentType::BINARY
+    );
+    match std::fs::write(path, buffer) {
+        Ok(()) => (),
+        Err(err) => {
+            let msg = report::Error::new(err.to_string());
+            reporter.report(msg.into())?;
+        }
+    };
+    Ok(())
+}
+
 struct AccumulateLineNum {
     line_num: usize,
     last_offset: usize,
@@ -365,6 +469,31 @@ fn extract_line(buffer: &[u8], byte_offset: usize) -> (&[u8], usize) {
     (line, line_offset)
 }
 
+fn extract_fix<'t>(typo: &'t typos::Typo<'t>) -> Option<&'t str> {
+    match &typo.corrections {
+        typos::Status::Corrections(c) if c.len() == 1 => Some(c[0].as_ref()),
+        _ => None,
+    }
+}
+
+fn is_fixable<'t>(typo: &typos::Typo<'t>) -> bool {
+    extract_fix(typo).is_some()
+}
+
+fn fix_buffer(mut buffer: Vec<u8>, typos: impl Iterator<Item = typos::Typo<'static>>) -> Vec<u8> {
+    let mut offset = 0isize;
+    for typo in typos {
+        let fix = extract_fix(&typo).expect("Caller only provides fixable typos");
+        let start = ((typo.byte_offset as isize) + offset) as usize;
+        let end = start + typo.typo.len();
+
+        buffer.splice(start..end, fix.as_bytes().iter().copied());
+
+        offset += (fix.len() as isize) - (typo.typo.len() as isize);
+    }
+    buffer
+}
+
 pub fn check_path(
     walk: ignore::Walk,
     checks: &dyn Check,
diff --git a/src/lib.rs b/src/lib.rs
index 632c1b0..2584359 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -2,5 +2,5 @@ pub mod checks;
 pub mod config;
 pub mod dict;
 pub mod diff;
-pub mod replace;
+pub(crate) mod replace;
 pub mod report;
diff --git a/src/main.rs b/src/main.rs
index d417642..5e33fd5 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -11,7 +11,6 @@ use typos_cli::checks;
 use typos_cli::config;
 use typos_cli::dict;
 use typos_cli::diff;
-use typos_cli::replace;
 use typos_cli::report;
 
 use proc_exit::WithCodeResultExt;
@@ -101,15 +100,12 @@ fn run() -> proc_exit::ExitResult {
         };
         let status_reporter = report::MessageStatus::new(output_reporter);
         let mut reporter: &dyn report::Report = &status_reporter;
-        let replace_reporter = replace::Replace::new(reporter);
         let diff_reporter = diff::Diff::new(reporter);
         if args.diff {
             reporter = &diff_reporter;
-        } else if args.write_changes {
-            reporter = &replace_reporter;
         }
 
-        let (files, identifier_parser, word_parser, checks);
+        let (files, identifier_parser, word_parser, checks, fixer);
         let selected_checks: &dyn checks::Check = if args.files {
             files = settings.build_files();
             &files
@@ -119,6 +115,9 @@ fn run() -> proc_exit::ExitResult {
         } else if args.words {
             word_parser = settings.build_word_parser();
             &word_parser
+        } else if args.write_changes {
+            fixer = settings.build_fix_typos();
+            &fixer
         } else {
             checks = settings.build_typos();
             &checks
@@ -156,10 +155,6 @@ fn run() -> proc_exit::ExitResult {
 
         if args.diff {
             diff_reporter.show().with_code(proc_exit::Code::FAILURE)?;
-        } else if args.write_changes {
-            replace_reporter
-                .write()
-                .with_code(proc_exit::Code::FAILURE)?;
         }
     }
 
diff --git a/src/replace.rs b/src/replace.rs
index 4bec030..78f2a17 100644
--- a/src/replace.rs
+++ b/src/replace.rs
@@ -1,116 +1,21 @@
 use std::collections::BTreeMap;
-use std::io::Write;
 use std::path;
-use std::sync;
-
-use bstr::ByteSlice;
-
-pub struct Replace<'r> {
-    reporter: &'r dyn crate::report::Report,
-    deferred: sync::Mutex<Deferred>,
-}
-
-impl<'r> Replace<'r> {
-    pub fn new(reporter: &'r dyn crate::report::Report) -> Self {
-        Self {
-            reporter,
-            deferred: sync::Mutex::new(Deferred::default()),
-        }
-    }
-
-    pub fn write(&self) -> Result<(), std::io::Error> {
-        let deferred = self.deferred.lock().unwrap();
-
-        for (path, corrections) in deferred.content.iter() {
-            let buffer = std::fs::read(path)?;
-
-            let mut file = std::fs::File::create(path)?;
-            for (line_idx, line) in buffer.lines_with_terminator().enumerate() {
-                let line_num = line_idx + 1;
-                if let Some(corrections) = corrections.get(&line_num) {
-                    let line = line.to_vec();
-                    let line = correct(line, &corrections);
-                    file.write_all(&line)?;
-                } else {
-                    file.write_all(&line)?;
-                }
-            }
-        }
-
-        for (path, corrections) in deferred.paths.iter() {
-            let orig_name = path
-                .file_name()
-                .and_then(|s| s.to_str())
-                .expect("generating a correction requires the filename to be valid.")
-                .to_owned()
-                .into_bytes();
-            let new_name = correct(orig_name, &corrections);
-            let new_name = String::from_utf8(new_name).expect("corrections are valid utf-8");
-            let new_path = path.with_file_name(new_name);
-            std::fs::rename(path, new_path)?;
-        }
-
-        Ok(())
-    }
-}
-
-impl<'r> crate::report::Report for Replace<'r> {
-    fn report(&self, msg: crate::report::Message<'_>) -> Result<(), std::io::Error> {
-        let typo = match &msg {
-            crate::report::Message::Typo(typo) => typo,
-            _ => return self.reporter.report(msg),
-        };
-
-        let corrections = match &typo.corrections {
-            typos::Status::Corrections(corrections) if corrections.len() == 1 => corrections,
-            _ => return self.reporter.report(msg),
-        };
-
-        match &typo.context {
-            Some(crate::report::Context::File(file)) => {
-                let path = file.path.to_owned();
-                let line_num = file.line_num;
-                let correction =
-                    Correction::new(typo.byte_offset, typo.typo, corrections[0].as_ref());
-                let mut deferred = self.deferred.lock().unwrap();
-                let content = deferred
-                    .content
-                    .entry(path)
-                    .or_insert_with(BTreeMap::new)
-                    .entry(line_num)
-                    .or_insert_with(Vec::new);
-                content.push(correction);
-                Ok(())
-            }
-            Some(crate::report::Context::Path(path)) => {
-                let path = path.path.to_owned();
-                let correction =
-                    Correction::new(typo.byte_offset, typo.typo, corrections[0].as_ref());
-                let mut deferred = self.deferred.lock().unwrap();
-                let content = deferred.paths.entry(path).or_insert_with(Vec::new);
-                content.push(correction);
-                Ok(())
-            }
-            _ => self.reporter.report(msg),
-        }
-    }
-}
 
 #[derive(Clone, Debug, Default)]
-pub struct Deferred {
-    pub content: BTreeMap<path::PathBuf, BTreeMap<usize, Vec<Correction>>>,
-    pub paths: BTreeMap<path::PathBuf, Vec<Correction>>,
+pub(crate) struct Deferred {
+    pub(crate) content: BTreeMap<path::PathBuf, BTreeMap<usize, Vec<Correction>>>,
+    pub(crate) paths: BTreeMap<path::PathBuf, Vec<Correction>>,
 }
 
 #[derive(Clone, Debug, PartialOrd, Ord, PartialEq, Eq)]
-pub struct Correction {
-    pub byte_offset: usize,
-    pub typo: Vec<u8>,
-    pub correction: Vec<u8>,
+pub(crate) struct Correction {
+    pub(crate) byte_offset: usize,
+    pub(crate) typo: Vec<u8>,
+    pub(crate) correction: Vec<u8>,
 }
 
 impl Correction {
-    pub fn new(byte_offset: usize, typo: &str, correction: &str) -> Self {
+    pub(crate) fn new(byte_offset: usize, typo: &str, correction: &str) -> Self {
         Self {
             byte_offset,
             typo: typo.as_bytes().to_vec(),
@@ -119,7 +24,7 @@ impl Correction {
     }
 }
 
-pub fn correct(mut line: Vec<u8>, corrections: &[Correction]) -> Vec<u8> {
+pub(crate) fn correct(mut line: Vec<u8>, corrections: &[Correction]) -> Vec<u8> {
     let mut corrections: Vec<_> = corrections.iter().collect();
     corrections.sort_unstable();
     corrections.reverse();
@@ -137,9 +42,6 @@ pub fn correct(mut line: Vec<u8>, corrections: &[Correction]) -> Vec<u8> {
 mod test {
     use super::*;
 
-    use crate::report::Report;
-    use assert_fs::prelude::*;
-
     fn simple_correct(line: &str, corrections: Vec<(usize, &str, &str)>) -> String {
         let line = line.as_bytes().to_vec();
         let corrections: Vec<_> = corrections
@@ -198,66 +100,4 @@ mod test {
         );
         assert_eq!(actual, "foo happy world");
     }
-
-    #[test]
-    fn test_replace_content() {
-        let temp = assert_fs::TempDir::new().unwrap();
-        let input_file = temp.child("foo.txt");
-        input_file.write_str("1 foo 2\n3 4 5").unwrap();
-
-        let primary = crate::report::PrintSilent;
-        let replace = Replace::new(&primary);
-        replace
-            .report(
-                crate::report::Typo::default()
-                    .context(Some(
-                        crate::report::FileContext::default()
-                            .path(input_file.path())
-                            .line_num(1)
-                            .into(),
-                    ))
-                    .buffer(std::borrow::Cow::Borrowed(b"1 foo 2\n3 4 5"))
-                    .byte_offset(2)
-                    .typo("foo")
-                    .corrections(typos::Status::Corrections(vec![
-                        std::borrow::Cow::Borrowed("bar"),
-                    ]))
-                    .into(),
-            )
-            .unwrap();
-        replace.write().unwrap();
-
-        input_file.assert("1 bar 2\n3 4 5");
-    }
-
-    #[test]
-    fn test_replace_path() {
-        let temp = assert_fs::TempDir::new().unwrap();
-        let input_file = temp.child("foo.txt");
-        input_file.write_str("foo foo foo").unwrap();
-
-        let primary = crate::report::PrintSilent;
-        let replace = Replace::new(&primary);
-        replace
-            .report(
-                crate::report::Typo::default()
-                    .context(Some(
-                        crate::report::PathContext::default()
-                            .path(input_file.path())
-                            .into(),
-                    ))
-                    .buffer(std::borrow::Cow::Borrowed(b"foo.txt"))
-                    .byte_offset(0)
-                    .typo("foo")
-                    .corrections(typos::Status::Corrections(vec![
-                        std::borrow::Cow::Borrowed("bar"),
-                    ]))
-                    .into(),
-            )
-            .unwrap();
-        replace.write().unwrap();
-
-        input_file.assert(predicates::path::missing());
-        temp.child("bar.txt").assert("foo foo foo");
-    }
 }

From 5f82dd60176e8a1378a8b9d068a38f988e5f4370 Mon Sep 17 00:00:00 2001
From: Ed Page <epage@duosecurity.com>
Date: Fri, 1 Jan 2021 21:16:20 -0600
Subject: [PATCH 12/16] fix: Arg diff reports immediately

---
 src/checks.rs  | 125 +++++++++++++++++++++++++++++++++++++++++++++++++
 src/diff.rs    |  93 ------------------------------------
 src/lib.rs     |   2 -
 src/main.rs    |  16 ++-----
 src/replace.rs | 103 ----------------------------------------
 5 files changed, 130 insertions(+), 209 deletions(-)
 delete mode 100644 src/diff.rs
 delete mode 100644 src/replace.rs

diff --git a/src/checks.rs b/src/checks.rs
index 7053d24..9349437 100644
--- a/src/checks.rs
+++ b/src/checks.rs
@@ -58,6 +58,14 @@ impl TyposSettings {
         }
     }
 
+    pub fn build_diff_typos(&self) -> DiffTypos {
+        DiffTypos {
+            check_filenames: self.check_filenames,
+            check_files: self.check_files,
+            binary: self.binary,
+        }
+    }
+
     pub fn build_identifier_parser(&self) -> Identifiers {
         Identifiers {
             check_filenames: self.check_filenames,
@@ -238,6 +246,123 @@ impl Check for FixTypos {
     }
 }
 
+#[derive(Debug, Clone)]
+pub struct DiffTypos {
+    check_filenames: bool,
+    check_files: bool,
+    binary: bool,
+}
+
+impl Check for DiffTypos {
+    fn check_file(
+        &self,
+        path: &std::path::Path,
+        explicit: bool,
+        tokenizer: &tokens::Tokenizer,
+        dictionary: &dyn Dictionary,
+        reporter: &dyn report::Report,
+    ) -> Result<(), std::io::Error> {
+        let parser = typos::ParserBuilder::new()
+            .tokenizer(tokenizer)
+            .dictionary(dictionary)
+            .typos();
+
+        let mut content = Vec::new();
+        let mut new_content = Vec::new();
+        if self.check_files {
+            let (buffer, content_type) = read_file(path, reporter)?;
+            if !explicit && !self.binary && content_type.is_binary() {
+                let msg = report::BinaryFile { path };
+                reporter.report(msg.into())?;
+            } else {
+                let mut fixes = Vec::new();
+                let mut accum_line_num = AccumulateLineNum::new();
+                for typo in parser.parse_bytes(&buffer) {
+                    if is_fixable(&typo) {
+                        fixes.push(typo.into_owned());
+                    } else {
+                        let line_num = accum_line_num.line_num(&buffer, typo.byte_offset);
+                        let (line, line_offset) = extract_line(&buffer, typo.byte_offset);
+                        let msg = report::Typo {
+                            context: Some(report::FileContext { path, line_num }.into()),
+                            buffer: std::borrow::Cow::Borrowed(line),
+                            byte_offset: line_offset,
+                            typo: typo.typo.as_ref(),
+                            corrections: typo.corrections,
+                        };
+                        reporter.report(msg.into())?;
+                    }
+                }
+                if !fixes.is_empty() {
+                    new_content = fix_buffer(buffer.clone(), fixes.into_iter());
+                    content = buffer
+                }
+            }
+        }
+
+        // Match FixTypos ordering for easy diffing.
+        let mut new_path = None;
+        if self.check_filenames {
+            if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
+                let mut fixes = Vec::new();
+                for typo in parser.parse_str(file_name) {
+                    if is_fixable(&typo) {
+                        fixes.push(typo.into_owned());
+                    } else {
+                        let msg = report::Typo {
+                            context: Some(report::PathContext { path }.into()),
+                            buffer: std::borrow::Cow::Borrowed(file_name.as_bytes()),
+                            byte_offset: typo.byte_offset,
+                            typo: typo.typo.as_ref(),
+                            corrections: typo.corrections,
+                        };
+                        reporter.report(msg.into())?;
+                    }
+                }
+                if !fixes.is_empty() {
+                    let file_name = file_name.to_owned().into_bytes();
+                    let new_name = fix_buffer(file_name, fixes.into_iter());
+                    let new_name =
+                        String::from_utf8(new_name).expect("corrections are valid utf-8");
+                    new_path = Some(path.with_file_name(new_name));
+                }
+            }
+        }
+
+        if new_path.is_some() || !content.is_empty() {
+            let original_path = path.display().to_string();
+            let fixed_path = new_path
+                .as_ref()
+                .map(|p| p.as_path())
+                .unwrap_or(path)
+                .display()
+                .to_string();
+            let original_content: Vec<_> = content
+                .lines_with_terminator()
+                .map(|s| String::from_utf8_lossy(s).into_owned())
+                .collect();
+            let fixed_content: Vec<_> = new_content
+                .lines_with_terminator()
+                .map(|s| String::from_utf8_lossy(s).into_owned())
+                .collect();
+            let diff = difflib::unified_diff(
+                &original_content,
+                &fixed_content,
+                original_path.as_str(),
+                fixed_path.as_str(),
+                "original",
+                "fixed",
+                0,
+            );
+            for line in diff {
+                print!("{}", line);
+            }
+        }
+
+        Ok(())
+    }
+}
+
 #[derive(Debug, Clone)]
 pub struct Identifiers {
     check_filenames: bool,
diff --git a/src/diff.rs b/src/diff.rs
deleted file mode 100644
index 8457588..0000000
--- a/src/diff.rs
+++ /dev/null
@@ -1,93 +0,0 @@
-use std::collections::BTreeMap;
-use std::sync;
-
-use bstr::ByteSlice;
-
-pub struct Diff<'r> {
-    reporter: &'r dyn crate::report::Report,
-    deferred: sync::Mutex<crate::replace::Deferred>,
-}
-
-impl<'r> Diff<'r> {
-    pub fn new(reporter: &'r dyn crate::report::Report) -> Self {
-        Self {
-            reporter,
-            deferred: sync::Mutex::new(crate::replace::Deferred::default()),
-        }
-    }
-
-    pub fn show(&self) -> Result<(), std::io::Error> {
-        let deferred = self.deferred.lock().unwrap();
-
-        for (path, corrections) in deferred.content.iter() {
-            let buffer = std::fs::read(path)?;
-
-            let mut original = Vec::new();
-            let mut corrected = Vec::new();
-            for (line_idx, line) in buffer.lines_with_terminator().enumerate() {
-                original.push(String::from_utf8_lossy(line).into_owned());
-
-                let line_num = line_idx + 1;
-                let line = if let Some(corrections) = corrections.get(&line_num) {
-                    let line = line.to_vec();
-                    crate::replace::correct(line, &corrections)
-                } else {
-                    line.to_owned()
-                };
-                corrected.push(String::from_utf8_lossy(&line).into_owned())
-            }
-
-            let display_path = path.display().to_string();
-            let diff = difflib::unified_diff(
-                &original,
-                &corrected,
-                display_path.as_str(),
-                display_path.as_str(),
-                "original",
-                "corrected",
-                0,
-            );
-            for line in diff {
-                print!("{}", line);
-            }
-        }
-
-        Ok(())
-    }
-}
-
-impl<'r> crate::report::Report for Diff<'r> {
-    fn report(&self, msg: crate::report::Message<'_>) -> Result<(), std::io::Error> {
-        let typo = match &msg {
-            crate::report::Message::Typo(typo) => typo,
-            _ => return self.reporter.report(msg),
-        };
-
-        let corrections = match &typo.corrections {
-            typos::Status::Corrections(corrections) if corrections.len() == 1 => corrections,
-            _ => return self.reporter.report(msg),
-        };
-
-        match &typo.context {
-            Some(crate::report::Context::File(file)) => {
-                let path = file.path.to_owned();
-                let line_num = file.line_num;
-                let correction = crate::replace::Correction::new(
-                    typo.byte_offset,
-                    typo.typo,
-                    corrections[0].as_ref(),
-                );
-                let mut deferred = self.deferred.lock().unwrap();
-                let content = deferred
-                    .content
-                    .entry(path)
-                    .or_insert_with(BTreeMap::new)
-                    .entry(line_num)
-                    .or_insert_with(Vec::new);
-                content.push(correction);
-                Ok(())
-            }
-            _ => self.reporter.report(msg),
-        }
-    }
-}
diff --git a/src/lib.rs b/src/lib.rs
index 2584359..4d0e01e 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,6 +1,4 @@
 pub mod checks;
 pub mod config;
 pub mod dict;
-pub mod diff;
-pub(crate) mod replace;
 pub mod report;
diff --git a/src/main.rs b/src/main.rs
index 5e33fd5..ade1fee 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -10,7 +10,6 @@ mod args;
 use typos_cli::checks;
 use typos_cli::config;
 use typos_cli::dict;
-use typos_cli::diff;
 use typos_cli::report;
 
 use proc_exit::WithCodeResultExt;
@@ -99,13 +98,9 @@ fn run() -> proc_exit::ExitResult {
             args.format.reporter()
         };
         let status_reporter = report::MessageStatus::new(output_reporter);
-        let mut reporter: &dyn report::Report = &status_reporter;
-        let diff_reporter = diff::Diff::new(reporter);
-        if args.diff {
-            reporter = &diff_reporter;
-        }
+        let reporter: &dyn report::Report = &status_reporter;
 
-        let (files, identifier_parser, word_parser, checks, fixer);
+        let (files, identifier_parser, word_parser, checks, fixer, differ);
         let selected_checks: &dyn checks::Check = if args.files {
             files = settings.build_files();
             &files
@@ -118,6 +113,9 @@ fn run() -> proc_exit::ExitResult {
         } else if args.write_changes {
             fixer = settings.build_fix_typos();
             &fixer
+        } else if args.diff {
+            differ = settings.build_diff_typos();
+            &differ
         } else {
             checks = settings.build_typos();
             &checks
@@ -152,10 +150,6 @@ fn run() -> proc_exit::ExitResult {
         if status_reporter.errors_found() {
             errors_found = true;
         }
-
-        if args.diff {
-            diff_reporter.show().with_code(proc_exit::Code::FAILURE)?;
-        }
     }
 
     if errors_found {
diff --git a/src/replace.rs b/src/replace.rs
deleted file mode 100644
index 78f2a17..0000000
--- a/src/replace.rs
+++ /dev/null
@@ -1,103 +0,0 @@
-use std::collections::BTreeMap;
-use std::path;
-
-#[derive(Clone, Debug, Default)]
-pub(crate) struct Deferred {
-    pub(crate) content: BTreeMap<path::PathBuf, BTreeMap<usize, Vec<Correction>>>,
-    pub(crate) paths: BTreeMap<path::PathBuf, Vec<Correction>>,
-}
-
-#[derive(Clone, Debug, PartialOrd, Ord, PartialEq, Eq)]
-pub(crate) struct Correction {
-    pub(crate) byte_offset: usize,
-    pub(crate) typo: Vec<u8>,
-    pub(crate) correction: Vec<u8>,
-}
-
-impl Correction {
-    pub(crate) fn new(byte_offset: usize, typo: &str, correction: &str) -> Self {
-        Self {
-            byte_offset,
-            typo: typo.as_bytes().to_vec(),
-            correction: correction.as_bytes().to_vec(),
-        }
-    }
-}
-
-pub(crate) fn correct(mut line: Vec<u8>, corrections: &[Correction]) -> Vec<u8> {
-    let mut corrections: Vec<_> = corrections.iter().collect();
-    corrections.sort_unstable();
-    corrections.reverse();
-
-    for correction in corrections {
-        let start = correction.byte_offset;
-        let end = start + correction.typo.len();
-        line.splice(start..end, correction.correction.iter().copied());
-    }
-
-    line
-}
-
-#[cfg(test)]
-mod test {
-    use super::*;
-
-    fn simple_correct(line: &str, corrections: Vec<(usize, &str, &str)>) -> String {
-        let line = line.as_bytes().to_vec();
-        let corrections: Vec<_> = corrections
-            .into_iter()
-            .map(|(byte_offset, typo, correction)| Correction {
-                byte_offset,
-                typo: typo.as_bytes().to_vec(),
-                correction: correction.as_bytes().to_vec(),
-            })
-            .collect();
-        let actual = correct(line, &corrections);
-        String::from_utf8(actual).unwrap()
-    }
-
-    #[test]
-    fn test_correct_single() {
-        let actual = simple_correct("foo foo foo", vec![(4, "foo", "bar")]);
-        assert_eq!(actual, "foo bar foo");
-    }
-
-    #[test]
-    fn test_correct_single_grow() {
-        let actual = simple_correct("foo foo foo", vec![(4, "foo", "happy")]);
-        assert_eq!(actual, "foo happy foo");
-    }
-
-    #[test]
-    fn test_correct_single_shrink() {
-        let actual = simple_correct("foo foo foo", vec![(4, "foo", "if")]);
-        assert_eq!(actual, "foo if foo");
-    }
-
-    #[test]
-    fn test_correct_start() {
-        let actual = simple_correct("foo foo foo", vec![(0, "foo", "bar")]);
-        assert_eq!(actual, "bar foo foo");
-    }
-
-    #[test]
-    fn test_correct_end() {
-        let actual = simple_correct("foo foo foo", vec![(8, "foo", "bar")]);
-        assert_eq!(actual, "foo foo bar");
-    }
-
-    #[test]
-    fn test_correct_end_grow() {
-        let actual = simple_correct("foo foo foo", vec![(8, "foo", "happy")]);
-        assert_eq!(actual, "foo foo happy");
-    }
-
-    #[test]
-    fn test_correct_multiple() {
-        let actual = simple_correct(
-            "foo foo foo",
-            vec![(4, "foo", "happy"), (8, "foo", "world")],
-        );
-        assert_eq!(actual, "foo happy world");
-    }
-}

From aba85df4350c7bbbf127448f84c1b61ebcd6c3f3 Mon Sep 17 00:00:00 2001
From: Ed Page <epage@duosecurity.com>
Date: Fri, 1 Jan 2021 21:35:49 -0600
Subject: [PATCH 13/16] docs(typos): Clarify intent

---
 crates/typos/src/dict.rs   | 51 ++++++++++++++++++++++----------------
 crates/typos/src/parser.rs |  9 +++++++
 crates/typos/src/tokens.rs | 11 ++++++++
 3 files changed, 49 insertions(+), 22 deletions(-)

diff --git a/crates/typos/src/dict.rs b/crates/typos/src/dict.rs
index 7c92d5b..971ca86 100644
--- a/crates/typos/src/dict.rs
+++ b/crates/typos/src/dict.rs
@@ -1,5 +1,34 @@
 use std::borrow::Cow;
 
+/// Look up the validity of a term.
+pub trait Dictionary: Send + Sync {
+    /// Look up the validity of an Identifier.
+    ///
+    /// `None` if the status is unknown.
+    fn correct_ident<'s, 'w>(&'s self, ident: crate::tokens::Identifier<'w>) -> Option<Status<'s>>;
+
+    /// Look up the validity of a Word.
+    ///
+    /// `None` if the status is unknown.
+    fn correct_word<'s, 'w>(&'s self, word: crate::tokens::Word<'w>) -> Option<Status<'s>>;
+}
+
+pub(crate) struct NullDictionary;
+
+impl Dictionary for NullDictionary {
+    fn correct_ident<'s, 'w>(
+        &'s self,
+        _ident: crate::tokens::Identifier<'w>,
+    ) -> Option<Status<'s>> {
+        None
+    }
+
+    fn correct_word<'s, 'w>(&'s self, _word: crate::tokens::Word<'w>) -> Option<Status<'s>> {
+        None
+    }
+}
+
+/// Validity of a term in a Dictionary.
 #[derive(Clone, PartialEq, Eq, Debug, serde::Serialize)]
 #[serde(rename_all = "snake_case")]
 #[serde(untagged)]
@@ -54,25 +83,3 @@ impl<'c> Status<'c> {
         }
     }
 }
-
-pub trait Dictionary: Send + Sync {
-    fn correct_ident<'s, 'w>(&'s self, _ident: crate::tokens::Identifier<'w>)
-        -> Option<Status<'s>>;
-
-    fn correct_word<'s, 'w>(&'s self, word: crate::tokens::Word<'w>) -> Option<Status<'s>>;
-}
-
-pub(crate) struct NullDictionary;
-
-impl Dictionary for NullDictionary {
-    fn correct_ident<'s, 'w>(
-        &'s self,
-        _ident: crate::tokens::Identifier<'w>,
-    ) -> Option<Status<'s>> {
-        None
-    }
-
-    fn correct_word<'s, 'w>(&'s self, _word: crate::tokens::Word<'w>) -> Option<Status<'s>> {
-        None
-    }
-}
diff --git a/crates/typos/src/parser.rs b/crates/typos/src/parser.rs
index 95d0d4c..ae23bfd 100644
--- a/crates/typos/src/parser.rs
+++ b/crates/typos/src/parser.rs
@@ -15,11 +15,13 @@ impl<'p> ParserBuilder<'p, 'static> {
 }
 
 impl<'p, 'd> ParserBuilder<'p, 'd> {
+    /// Set the Tokenizer used when parsing.
     pub fn tokenizer(mut self, tokenizer: &'p tokens::Tokenizer) -> Self {
         self.tokenizer = Some(tokenizer);
         self
     }
 
+    /// Set the dictionary used when parsing.
     pub fn dictionary<'d1>(self, dictionary: &'d1 dyn Dictionary) -> ParserBuilder<'p, 'd1> {
         ParserBuilder {
             tokenizer: self.tokenizer,
@@ -27,6 +29,7 @@ impl<'p, 'd> ParserBuilder<'p, 'd> {
         }
     }
 
+    /// Extract typos from the buffer.
     pub fn typos(&self) -> TyposParser<'p, 'd> {
         TyposParser {
             tokenizer: self.tokenizer.unwrap_or_else(|| &DEFAULT_TOKENIZER),
@@ -34,12 +37,14 @@ impl<'p, 'd> ParserBuilder<'p, 'd> {
         }
     }
 
+    /// Parse for Identifiers.
     pub fn identifiers(&self) -> IdentifiersParser<'p> {
         IdentifiersParser {
             tokenizer: self.tokenizer.unwrap_or_else(|| &DEFAULT_TOKENIZER),
         }
     }
 
+    /// Parse for Words.
     pub fn words(&self) -> WordsParser<'p> {
         WordsParser {
             tokenizer: self.tokenizer.unwrap_or_else(|| &DEFAULT_TOKENIZER),
@@ -59,6 +64,7 @@ impl<'p> Default for ParserBuilder<'p, 'static> {
 static DEFAULT_TOKENIZER: once_cell::sync::Lazy<tokens::Tokenizer> =
     once_cell::sync::Lazy::new(|| tokens::Tokenizer::new());
 
+/// Extract typos from the buffer.
 #[derive(Clone)]
 pub struct TyposParser<'p, 'd> {
     tokenizer: &'p tokens::Tokenizer,
@@ -116,6 +122,7 @@ impl<'p, 'd> TyposParser<'p, 'd> {
     }
 }
 
+/// An invalid term found in the buffer.
 #[derive(Clone, Debug)]
 #[non_exhaustive]
 pub struct Typo<'m> {
@@ -152,6 +159,7 @@ impl<'m> Default for Typo<'m> {
     }
 }
 
+/// Parse for Identifiers.
 #[derive(Debug, Clone)]
 pub struct IdentifiersParser<'p> {
     tokenizer: &'p tokens::Tokenizer,
@@ -167,6 +175,7 @@ impl<'p> IdentifiersParser<'p> {
     }
 }
 
+/// Parse for Words.
 #[derive(Debug, Clone)]
 pub struct WordsParser<'p> {
     tokenizer: &'p tokens::Tokenizer,
diff --git a/crates/typos/src/tokens.rs b/crates/typos/src/tokens.rs
index f372c96..3f5aefc 100644
--- a/crates/typos/src/tokens.rs
+++ b/crates/typos/src/tokens.rs
@@ -1,3 +1,4 @@
+/// Define rules for tokenizaing a buffer.
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub struct TokenizerBuilder {
     ignore_hex: bool,
@@ -12,26 +13,31 @@ impl TokenizerBuilder {
         Default::default()
     }
 
+    /// Specify that hexadecimal numbers should be ignored.
     pub fn ignore_hex(&mut self, yes: bool) -> &mut Self {
         self.ignore_hex = yes;
         self
     }
 
+    /// Specify that leading digits are allowed for Identifiers.
     pub fn leading_digits(&mut self, yes: bool) -> &mut Self {
         self.leading_digits = yes;
         self
     }
 
+    /// Extend accepted leading characters for Identifiers.
     pub fn leading_chars(&mut self, chars: String) -> &mut Self {
         self.leading_chars = chars;
         self
     }
 
+    /// Specify that digits can be included in Identifiers.
     pub fn include_digits(&mut self, yes: bool) -> &mut Self {
         self.include_digits = yes;
         self
     }
 
+    /// Extend accepted characters for Identifiers.
     pub fn include_chars(&mut self, chars: String) -> &mut Self {
         self.include_chars = chars;
         self
@@ -81,6 +87,7 @@ impl Default for TokenizerBuilder {
     }
 }
 
+/// Extract Identifiers from a buffer.
 #[derive(Debug, Clone)]
 pub struct Tokenizer {
     words_str: regex::Regex,
@@ -148,6 +155,7 @@ fn is_hex(ident: &[u8]) -> bool {
     HEX.is_match(ident)
 }
 
+/// A term composed of Words.
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub struct Identifier<'t> {
     token: &'t str,
@@ -171,11 +179,13 @@ impl<'t> Identifier<'t> {
         self.offset
     }
 
+    /// Split into individual Words.
     pub fn split(&self) -> impl Iterator<Item = Word<'t>> {
         split_ident(self.token, self.offset)
     }
 }
 
+/// An indivisible term.
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub struct Word<'t> {
     token: &'t str,
@@ -325,6 +335,7 @@ impl<'s> Iterator for SplitIdent<'s> {
     }
 }
 
+/// Format of the term.
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum Case {
     Title,

From 692f0ac095948f58d80fcf29fefd383838087ae8 Mon Sep 17 00:00:00 2001
From: Ed Page <epage@duosecurity.com>
Date: Sat, 2 Jan 2021 12:51:35 -0600
Subject: [PATCH 14/16] refactor(typos): Focus API on primary use case

---
 crates/typos/src/parser.rs | 49 ++------------------------------------
 src/checks.rs              | 20 ++++++----------
 2 files changed, 9 insertions(+), 60 deletions(-)

diff --git a/crates/typos/src/parser.rs b/crates/typos/src/parser.rs
index ae23bfd..f62bffb 100644
--- a/crates/typos/src/parser.rs
+++ b/crates/typos/src/parser.rs
@@ -2,6 +2,7 @@ use crate::tokens;
 use crate::Dictionary;
 use std::borrow::Cow;
 
+/// Extract typos from the buffer.
 #[derive(Clone)]
 pub struct ParserBuilder<'p, 'd> {
     tokenizer: Option<&'p tokens::Tokenizer>,
@@ -30,26 +31,12 @@ impl<'p, 'd> ParserBuilder<'p, 'd> {
     }
 
     /// Extract typos from the buffer.
-    pub fn typos(&self) -> TyposParser<'p, 'd> {
+    pub fn build(&self) -> TyposParser<'p, 'd> {
         TyposParser {
             tokenizer: self.tokenizer.unwrap_or_else(|| &DEFAULT_TOKENIZER),
             dictionary: self.dictionary,
         }
     }
-
-    /// Parse for Identifiers.
-    pub fn identifiers(&self) -> IdentifiersParser<'p> {
-        IdentifiersParser {
-            tokenizer: self.tokenizer.unwrap_or_else(|| &DEFAULT_TOKENIZER),
-        }
-    }
-
-    /// Parse for Words.
-    pub fn words(&self) -> WordsParser<'p> {
-        WordsParser {
-            tokenizer: self.tokenizer.unwrap_or_else(|| &DEFAULT_TOKENIZER),
-        }
-    }
 }
 
 impl<'p> Default for ParserBuilder<'p, 'static> {
@@ -158,35 +145,3 @@ impl<'m> Default for Typo<'m> {
         }
     }
 }
-
-/// Parse for Identifiers.
-#[derive(Debug, Clone)]
-pub struct IdentifiersParser<'p> {
-    tokenizer: &'p tokens::Tokenizer,
-}
-
-impl<'p> IdentifiersParser<'p> {
-    pub fn parse_str(&self, buffer: &'p str) -> impl Iterator<Item = tokens::Identifier<'p>> {
-        self.tokenizer.parse_str(buffer)
-    }
-
-    pub fn parse_bytes(&self, buffer: &'p [u8]) -> impl Iterator<Item = tokens::Identifier<'p>> {
-        self.tokenizer.parse_bytes(buffer)
-    }
-}
-
-/// Parse for Words.
-#[derive(Debug, Clone)]
-pub struct WordsParser<'p> {
-    tokenizer: &'p tokens::Tokenizer,
-}
-
-impl<'p> WordsParser<'p> {
-    pub fn parse_str(&self, buffer: &'p str) -> impl Iterator<Item = tokens::Word<'p>> {
-        self.tokenizer.parse_str(buffer).flat_map(|i| i.split())
-    }
-
-    pub fn parse_bytes(&self, buffer: &'p [u8]) -> impl Iterator<Item = tokens::Word<'p>> {
-        self.tokenizer.parse_bytes(buffer).flat_map(|i| i.split())
-    }
-}
diff --git a/src/checks.rs b/src/checks.rs
index 9349437..3778396 100644
--- a/src/checks.rs
+++ b/src/checks.rs
@@ -118,7 +118,7 @@ impl Check for Typos {
         let parser = typos::ParserBuilder::new()
             .tokenizer(tokenizer)
             .dictionary(dictionary)
-            .typos();
+            .build();
 
         if self.check_filenames {
             if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
@@ -180,7 +180,7 @@ impl Check for FixTypos {
         let parser = typos::ParserBuilder::new()
             .tokenizer(tokenizer)
             .dictionary(dictionary)
-            .typos();
+            .build();
 
         if self.check_files {
             let (buffer, content_type) = read_file(path, reporter)?;
@@ -265,7 +265,7 @@ impl Check for DiffTypos {
         let parser = typos::ParserBuilder::new()
             .tokenizer(tokenizer)
             .dictionary(dictionary)
-            .typos();
+            .build();
 
         let mut content = Vec::new();
         let mut new_content = Vec::new();
@@ -379,13 +379,9 @@ impl Check for Identifiers {
         _dictionary: &dyn Dictionary,
         reporter: &dyn report::Report,
     ) -> Result<(), std::io::Error> {
-        let parser = typos::ParserBuilder::new()
-            .tokenizer(tokenizer)
-            .identifiers();
-
         if self.check_filenames {
             if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
-                for word in parser.parse_str(file_name) {
+                for word in tokenizer.parse_str(file_name) {
                     let msg = report::Parse {
                         context: Some(report::PathContext { path }.into()),
                         kind: report::ParseKind::Identifier,
@@ -402,7 +398,7 @@ impl Check for Identifiers {
                 let msg = report::BinaryFile { path };
                 reporter.report(msg.into())?;
             } else {
-                for word in parser.parse_bytes(&buffer) {
+                for word in tokenizer.parse_bytes(&buffer) {
                     // HACK: Don't look up the line_num per entry to better match the performance
                     // of Typos for comparison purposes.  We don't really get much out of it
                     // anyway.
@@ -437,11 +433,9 @@ impl Check for Words {
         _dictionary: &dyn Dictionary,
         reporter: &dyn report::Report,
     ) -> Result<(), std::io::Error> {
-        let parser = typos::ParserBuilder::new().tokenizer(tokenizer).words();
-
         if self.check_filenames {
             if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
-                for word in parser.parse_str(file_name) {
+                for word in tokenizer.parse_str(file_name).flat_map(|i| i.split()) {
                     let msg = report::Parse {
                         context: Some(report::PathContext { path }.into()),
                         kind: report::ParseKind::Word,
@@ -458,7 +452,7 @@ impl Check for Words {
                 let msg = report::BinaryFile { path };
                 reporter.report(msg.into())?;
             } else {
-                for word in parser.parse_bytes(&buffer) {
+                for word in tokenizer.parse_bytes(&buffer).flat_map(|i| i.split()) {
                     // HACK: Don't look up the line_num per entry to better match the performance
                     // of Typos for comparison purposes.  We don't really get much out of it
                     // anyway.

From e6a4f49eb54eefcd34e815be536137fdac345758 Mon Sep 17 00:00:00 2001
From: Ed Page <epage@duosecurity.com>
Date: Sat, 2 Jan 2021 12:56:20 -0600
Subject: [PATCH 15/16] refactor: Clarify names

---
 benches/checks.rs |  2 +-
 src/checks.rs     | 30 +++++++++++++++---------------
 src/main.rs       |  6 +++---
 3 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/benches/checks.rs b/benches/checks.rs
index dcb8dc2..fbf1f42 100644
--- a/benches/checks.rs
+++ b/benches/checks.rs
@@ -5,7 +5,7 @@ extern crate test;
 mod data;
 
 use assert_fs::prelude::*;
-use typos_cli::checks::Check;
+use typos_cli::checks::FileChecker;
 
 fn bench_files(data: &str, b: &mut test::Bencher) {
     let temp = assert_fs::TempDir::new().unwrap();
diff --git a/src/checks.rs b/src/checks.rs
index 3778396..36d8980 100644
--- a/src/checks.rs
+++ b/src/checks.rs
@@ -4,7 +4,7 @@ use crate::report;
 use typos::tokens;
 use typos::Dictionary;
 
-pub trait Check: Send + Sync {
+pub trait FileChecker: Send + Sync {
     fn check_file(
         &self,
         path: &std::path::Path,
@@ -106,7 +106,7 @@ pub struct Typos {
     binary: bool,
 }
 
-impl Check for Typos {
+impl FileChecker for Typos {
     fn check_file(
         &self,
         path: &std::path::Path,
@@ -168,7 +168,7 @@ pub struct FixTypos {
     binary: bool,
 }
 
-impl Check for FixTypos {
+impl FileChecker for FixTypos {
     fn check_file(
         &self,
         path: &std::path::Path,
@@ -253,7 +253,7 @@ pub struct DiffTypos {
     binary: bool,
 }
 
-impl Check for DiffTypos {
+impl FileChecker for DiffTypos {
     fn check_file(
         &self,
         path: &std::path::Path,
@@ -370,7 +370,7 @@ pub struct Identifiers {
     binary: bool,
 }
 
-impl Check for Identifiers {
+impl FileChecker for Identifiers {
     fn check_file(
         &self,
         path: &std::path::Path,
@@ -424,7 +424,7 @@ pub struct Words {
     binary: bool,
 }
 
-impl Check for Words {
+impl FileChecker for Words {
     fn check_file(
         &self,
         path: &std::path::Path,
@@ -476,7 +476,7 @@ pub struct FoundFiles {
     binary: bool,
 }
 
-impl Check for FoundFiles {
+impl FileChecker for FoundFiles {
     fn check_file(
         &self,
         path: &std::path::Path,
@@ -613,22 +613,22 @@ fn fix_buffer(mut buffer: Vec<u8>, typos: impl Iterator<Item = typos::Typo<'stat
     buffer
 }
 
-pub fn check_path(
+pub fn walk_path(
     walk: ignore::Walk,
-    checks: &dyn Check,
+    checks: &dyn FileChecker,
     parser: &typos::tokens::Tokenizer,
     dictionary: &dyn typos::Dictionary,
     reporter: &dyn report::Report,
 ) -> Result<(), ignore::Error> {
     for entry in walk {
-        check_entry(entry, checks, parser, dictionary, reporter)?;
+        walk_entry(entry, checks, parser, dictionary, reporter)?;
     }
     Ok(())
 }
 
-pub fn check_path_parallel(
+pub fn walk_path_parallel(
     walk: ignore::WalkParallel,
-    checks: &dyn Check,
+    checks: &dyn FileChecker,
     parser: &typos::tokens::Tokenizer,
     dictionary: &dyn typos::Dictionary,
     reporter: &dyn report::Report,
@@ -636,7 +636,7 @@ pub fn check_path_parallel(
     let error: std::sync::Mutex<Result<(), ignore::Error>> = std::sync::Mutex::new(Ok(()));
     walk.run(|| {
         Box::new(|entry: Result<ignore::DirEntry, ignore::Error>| {
-            match check_entry(entry, checks, parser, dictionary, reporter) {
+            match walk_entry(entry, checks, parser, dictionary, reporter) {
                 Ok(()) => ignore::WalkState::Continue,
                 Err(err) => {
                     *error.lock().unwrap() = Err(err);
@@ -649,9 +649,9 @@ pub fn check_path_parallel(
     error.into_inner().unwrap()
 }
 
-fn check_entry(
+fn walk_entry(
     entry: Result<ignore::DirEntry, ignore::Error>,
-    checks: &dyn Check,
+    checks: &dyn FileChecker,
     parser: &typos::tokens::Tokenizer,
     dictionary: &dyn typos::Dictionary,
     reporter: &dyn report::Report,
diff --git a/src/main.rs b/src/main.rs
index ade1fee..f5e206d 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -101,7 +101,7 @@ fn run() -> proc_exit::ExitResult {
         let reporter: &dyn report::Report = &status_reporter;
 
         let (files, identifier_parser, word_parser, checks, fixer, differ);
-        let selected_checks: &dyn checks::Check = if args.files {
+        let selected_checks: &dyn checks::FileChecker = if args.files {
             files = settings.build_files();
             &files
         } else if args.identifiers {
@@ -122,7 +122,7 @@ fn run() -> proc_exit::ExitResult {
         };
 
         if single_threaded {
-            checks::check_path(
+            checks::walk_path(
                 walk.build(),
                 selected_checks,
                 &parser,
@@ -130,7 +130,7 @@ fn run() -> proc_exit::ExitResult {
                 reporter,
             )
         } else {
-            checks::check_path_parallel(
+            checks::walk_path_parallel(
                 walk.build_parallel(),
                 selected_checks,
                 &parser,

From 67222e9338c695cf7ff24822076fa7af2851be09 Mon Sep 17 00:00:00 2001
From: Ed Page <epage@duosecurity.com>
Date: Sat, 2 Jan 2021 13:17:15 -0600
Subject: [PATCH 16/16] style: Address clippy

---
 crates/typos/src/parser.rs |  6 +++---
 src/checks.rs              |  9 ++-------
 src/report.rs              | 19 +++++++++++++++----
 3 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/crates/typos/src/parser.rs b/crates/typos/src/parser.rs
index f62bffb..883a730 100644
--- a/crates/typos/src/parser.rs
+++ b/crates/typos/src/parser.rs
@@ -26,14 +26,14 @@ impl<'p, 'd> ParserBuilder<'p, 'd> {
     pub fn dictionary<'d1>(self, dictionary: &'d1 dyn Dictionary) -> ParserBuilder<'p, 'd1> {
         ParserBuilder {
             tokenizer: self.tokenizer,
-            dictionary: dictionary,
+            dictionary,
         }
     }
 
     /// Extract typos from the buffer.
     pub fn build(&self) -> TyposParser<'p, 'd> {
         TyposParser {
-            tokenizer: self.tokenizer.unwrap_or_else(|| &DEFAULT_TOKENIZER),
+            tokenizer: self.tokenizer.unwrap_or(&DEFAULT_TOKENIZER),
             dictionary: self.dictionary,
         }
     }
@@ -49,7 +49,7 @@ impl<'p> Default for ParserBuilder<'p, 'static> {
 }
 
 static DEFAULT_TOKENIZER: once_cell::sync::Lazy<tokens::Tokenizer> =
-    once_cell::sync::Lazy::new(|| tokens::Tokenizer::new());
+    once_cell::sync::Lazy::new(tokens::Tokenizer::new);
 
 /// Extract typos from the buffer.
 #[derive(Clone)]
diff --git a/src/checks.rs b/src/checks.rs
index 36d8980..3bfb25b 100644
--- a/src/checks.rs
+++ b/src/checks.rs
@@ -331,12 +331,7 @@ impl FileChecker for DiffTypos {
 
         if new_path.is_some() || !content.is_empty() {
             let original_path = path.display().to_string();
-            let fixed_path = new_path
-                .as_ref()
-                .map(|p| p.as_path())
-                .unwrap_or(path)
-                .display()
-                .to_string();
+            let fixed_path = new_path.as_deref().unwrap_or(path).display().to_string();
             let original_content: Vec<_> = content
                 .lines_with_terminator()
                 .map(|s| String::from_utf8_lossy(s).into_owned())
@@ -595,7 +590,7 @@ fn extract_fix<'t>(typo: &'t typos::Typo<'t>) -> Option<&'t str> {
     }
 }
 
-fn is_fixable<'t>(typo: &typos::Typo<'t>) -> bool {
+fn is_fixable(typo: &typos::Typo<'_>) -> bool {
     extract_fix(typo).is_some()
 }
 
diff --git a/src/report.rs b/src/report.rs
index 3d213cd..08b2f28 100644
--- a/src/report.rs
+++ b/src/report.rs
@@ -234,10 +234,21 @@ impl<'r> MessageStatus<'r> {
 
 impl<'r> Report for MessageStatus<'r> {
     fn report(&self, msg: Message) -> Result<(), std::io::Error> {
-        self.typos_found
-            .compare_and_swap(false, msg.is_correction(), atomic::Ordering::Relaxed);
-        self.errors_found
-            .compare_and_swap(false, msg.is_error(), atomic::Ordering::Relaxed);
+        let _ = self.typos_found.compare_exchange(
+            false,
+            msg.is_correction(),
+            atomic::Ordering::Relaxed,
+            atomic::Ordering::Relaxed,
+        );
+        let _ = self
+            .errors_found
+            .compare_exchange(
+                false,
+                msg.is_error(),
+                atomic::Ordering::Relaxed,
+                atomic::Ordering::Relaxed,
+            )
+            .unwrap();
         self.reporter.report(msg)
     }
 }