Merge pull request #64 from epage/debug

feat: Dump files, identifiers, and words
2024-11-22 00:51:11 -05:00 · 2019-10-31 11:40:43 -06:00 · 2019-10-31 11:40:43 -06:00 · cc4b53a1b4
commit cc4b53a1b4
parent a48a457cc3 ce365ae12e
5 changed files with 509 additions and 306 deletions
--- a/benches/file.rs
+++ b/benches/file.rs
@ -7,374 +7,203 @@ mod data;
 use assert_fs::prelude::*;
 use bstr::ByteSlice;

-#[bench]
-fn check_file_empty(b: &mut test::Bencher) {
+fn bench_read(data: &str, b: &mut test::Bencher) {
    let temp = assert_fs::TempDir::new().unwrap();
    let sample_path = temp.child("sample");
-    sample_path.write_str(data::EMPTY).unwrap();
+    sample_path.write_str(data).unwrap();

-    let corrections = typos_cli::dict::BuiltIn::new();
-    let parser = typos::tokens::Parser::new();
-    let checks = typos::checks::CheckSettings::new().build(&corrections, &parser);
-    b.iter(|| checks.check_file(sample_path.path(), true, typos::report::print_silent));
-
-    temp.close().unwrap();
-}
-
-#[bench]
-fn check_file_no_tokens(b: &mut test::Bencher) {
-    let temp = assert_fs::TempDir::new().unwrap();
-    let sample_path = temp.child("sample");
-    sample_path.write_str(data::NO_TOKENS).unwrap();
-
-    let corrections = typos_cli::dict::BuiltIn::new();
-    let parser = typos::tokens::Parser::new();
-    let checks = typos::checks::CheckSettings::new().build(&corrections, &parser);
-    b.iter(|| checks.check_file(sample_path.path(), true, typos::report::print_silent));
-
-    temp.close().unwrap();
-}
-
-#[bench]
-fn check_file_single_token(b: &mut test::Bencher) {
-    let temp = assert_fs::TempDir::new().unwrap();
-    let sample_path = temp.child("sample");
-    sample_path.write_str(data::SINGLE_TOKEN).unwrap();
-
-    let corrections = typos_cli::dict::BuiltIn::new();
-    let parser = typos::tokens::Parser::new();
-    let checks = typos::checks::CheckSettings::new().build(&corrections, &parser);
-    b.iter(|| checks.check_file(sample_path.path(), true, typos::report::print_silent));
-
-    temp.close().unwrap();
-}
-
-#[bench]
-fn check_file_sherlock(b: &mut test::Bencher) {
-    let temp = assert_fs::TempDir::new().unwrap();
-    let sample_path = temp.child("sample");
-    sample_path.write_str(data::SHERLOCK).unwrap();
-
-    let corrections = typos_cli::dict::BuiltIn::new();
-    let parser = typos::tokens::Parser::new();
-    let checks = typos::checks::CheckSettings::new().build(&corrections, &parser);
-    b.iter(|| checks.check_file(sample_path.path(), true, typos::report::print_silent));
-
-    temp.close().unwrap();
-}
-
-#[bench]
-fn check_file_code(b: &mut test::Bencher) {
-    let temp = assert_fs::TempDir::new().unwrap();
-    let sample_path = temp.child("sample");
-    sample_path.write_str(data::CODE).unwrap();
-
-    let corrections = typos_cli::dict::BuiltIn::new();
-    let parser = typos::tokens::Parser::new();
-    let checks = typos::checks::CheckSettings::new().build(&corrections, &parser);
-    b.iter(|| checks.check_file(sample_path.path(), true, typos::report::print_silent));
-
-    temp.close().unwrap();
-}
-
-#[bench]
-fn check_file_corpus(b: &mut test::Bencher) {
-    let temp = assert_fs::TempDir::new().unwrap();
-    let sample_path = temp.child("sample");
-    sample_path.write_str(data::CORPUS).unwrap();
-
-    let corrections = typos_cli::dict::BuiltIn::new();
-    let parser = typos::tokens::Parser::new();
-    let checks = typos::checks::CheckSettings::new().build(&corrections, &parser);
-    b.iter(|| checks.check_file(sample_path.path(), true, typos::report::print_silent));
+    b.iter(|| std::fs::read(sample_path.path()));

    temp.close().unwrap();
 }

 #[bench]
 fn read_empty(b: &mut test::Bencher) {
-    let temp = assert_fs::TempDir::new().unwrap();
-    let sample_path = temp.child("sample");
-    sample_path.write_str(data::EMPTY).unwrap();
-
-    b.iter(|| std::fs::read(sample_path.path()));
-
-    temp.close().unwrap();
+    bench_read(data::EMPTY, b);
 }

 #[bench]
 fn read_no_tokens(b: &mut test::Bencher) {
-    let temp = assert_fs::TempDir::new().unwrap();
-    let sample_path = temp.child("sample");
-    sample_path.write_str(data::NO_TOKENS).unwrap();
-
-    b.iter(|| std::fs::read(sample_path.path()));
-
-    temp.close().unwrap();
+    bench_read(data::NO_TOKENS, b);
 }

 #[bench]
 fn read_single_token(b: &mut test::Bencher) {
-    let temp = assert_fs::TempDir::new().unwrap();
-    let sample_path = temp.child("sample");
-    sample_path.write_str(data::SINGLE_TOKEN).unwrap();
-
-    b.iter(|| std::fs::read(sample_path.path()));
-
-    temp.close().unwrap();
+    bench_read(data::SINGLE_TOKEN, b);
 }

 #[bench]
 fn read_sherlock(b: &mut test::Bencher) {
-    let temp = assert_fs::TempDir::new().unwrap();
-    let sample_path = temp.child("sample");
-    sample_path.write_str(data::SHERLOCK).unwrap();
-
-    b.iter(|| std::fs::read(sample_path.path()));
-
-    temp.close().unwrap();
+    bench_read(data::SHERLOCK, b);
 }

 #[bench]
 fn read_code(b: &mut test::Bencher) {
-    let temp = assert_fs::TempDir::new().unwrap();
-    let sample_path = temp.child("sample");
-    sample_path.write_str(data::CODE).unwrap();
-
-    b.iter(|| std::fs::read(sample_path.path()));
-
-    temp.close().unwrap();
+    bench_read(data::CODE, b);
 }

 #[bench]
 fn read_corpus(b: &mut test::Bencher) {
+    bench_read(data::CORPUS, b);
+}
+
+fn bench_split_lines(data: &str, b: &mut test::Bencher) {
+    b.iter(|| data.as_bytes().lines().enumerate().last());
+}
+
+#[bench]
+fn parse_words_lines_empty(b: &mut test::Bencher) {
+    bench_split_lines(data::EMPTY, b);
+}
+
+#[bench]
+fn parse_words_lines_no_tokens(b: &mut test::Bencher) {
+    bench_split_lines(data::NO_TOKENS, b);
+}
+
+#[bench]
+fn parse_words_lines_single_token(b: &mut test::Bencher) {
+    bench_split_lines(data::SINGLE_TOKEN, b);
+}
+
+#[bench]
+fn parse_words_lines_sherlock(b: &mut test::Bencher) {
+    bench_split_lines(data::SHERLOCK, b);
+}
+
+#[bench]
+fn parse_words_lines_code(b: &mut test::Bencher) {
+    bench_split_lines(data::CODE, b);
+}
+
+#[bench]
+fn parse_words_lines_corpus(b: &mut test::Bencher) {
+    bench_split_lines(data::CORPUS, b);
+}
+
+fn bench_parse_ident(data: &str, b: &mut test::Bencher) {
    let temp = assert_fs::TempDir::new().unwrap();
    let sample_path = temp.child("sample");
-    sample_path.write_str(data::CORPUS).unwrap();
+    sample_path.write_str(data).unwrap();

-    b.iter(|| std::fs::read(sample_path.path()));
+    let parser = typos::tokens::Parser::new();
+    let checks = typos::checks::TyposSettings::new().build_identifier_parser(&parser);
+    b.iter(|| checks.check_file(sample_path.path(), true, typos::report::print_silent));

    temp.close().unwrap();
 }

 #[bench]
-fn split_lines_empty(b: &mut test::Bencher) {
-    b.iter(|| data::EMPTY.as_bytes().lines().enumerate().last());
+fn parse_idents_empty(b: &mut test::Bencher) {
+    bench_parse_ident(data::EMPTY, b);
 }

 #[bench]
-fn split_lines_no_tokens(b: &mut test::Bencher) {
-    b.iter(|| data::NO_TOKENS.as_bytes().lines().enumerate().last());
+fn parse_idents_no_tokens(b: &mut test::Bencher) {
+    bench_parse_ident(data::NO_TOKENS, b);
 }

 #[bench]
-fn split_lines_single_token(b: &mut test::Bencher) {
-    b.iter(|| data::SINGLE_TOKEN.as_bytes().lines().enumerate().last());
+fn parse_idents_single_token(b: &mut test::Bencher) {
+    bench_parse_ident(data::SINGLE_TOKEN, b);
 }

 #[bench]
-fn split_lines_sherlock(b: &mut test::Bencher) {
-    b.iter(|| data::SHERLOCK.as_bytes().lines().enumerate().last());
+fn parse_idents_sherlock(b: &mut test::Bencher) {
+    bench_parse_ident(data::SHERLOCK, b);
 }

 #[bench]
-fn split_lines_code(b: &mut test::Bencher) {
-    b.iter(|| data::CODE.as_bytes().lines().enumerate().last());
+fn parse_idents_code(b: &mut test::Bencher) {
+    bench_parse_ident(data::CODE, b);
 }

 #[bench]
-fn split_lines_corpus(b: &mut test::Bencher) {
-    b.iter(|| data::CORPUS.as_bytes().lines().enumerate().last());
+fn parse_idents_corpus(b: &mut test::Bencher) {
+    bench_parse_ident(data::CORPUS, b);
 }

-#[bench]
-fn parse_empty(b: &mut test::Bencher) {
+fn bench_parse_word(data: &str, b: &mut test::Bencher) {
+    let temp = assert_fs::TempDir::new().unwrap();
+    let sample_path = temp.child("sample");
+    sample_path.write_str(data).unwrap();
+
    let parser = typos::tokens::Parser::new();
-    b.iter(|| {
-        data::EMPTY
-            .as_bytes()
-            .lines()
-            .enumerate()
-            .for_each(|(_idx, l)| {
-                parser.parse_bytes(l).last();
-                ()
-            })
-    });
+    let checks = typos::checks::TyposSettings::new().build_word_parser(&parser);
+    b.iter(|| checks.check_file(sample_path.path(), true, typos::report::print_silent));
+
+    temp.close().unwrap();
 }

 #[bench]
-fn parse_no_tokens(b: &mut test::Bencher) {
-    let parser = typos::tokens::Parser::new();
-    b.iter(|| {
-        data::NO_TOKENS
-            .as_bytes()
-            .lines()
-            .enumerate()
-            .for_each(|(_idx, l)| {
-                parser.parse_bytes(l).last();
-                ()
-            })
-    });
+fn parse_words_empty(b: &mut test::Bencher) {
+    bench_parse_word(data::EMPTY, b);
 }

 #[bench]
-fn parse_single_token(b: &mut test::Bencher) {
-    let parser = typos::tokens::Parser::new();
-    b.iter(|| {
-        data::SINGLE_TOKEN
-            .as_bytes()
-            .lines()
-            .enumerate()
-            .for_each(|(_idx, l)| {
-                parser.parse_bytes(l).last();
-                ()
-            })
-    });
+fn parse_words_no_tokens(b: &mut test::Bencher) {
+    bench_parse_word(data::NO_TOKENS, b);
 }

 #[bench]
-fn parse_sherlock(b: &mut test::Bencher) {
-    let parser = typos::tokens::Parser::new();
-    b.iter(|| {
-        data::SHERLOCK
-            .as_bytes()
-            .lines()
-            .enumerate()
-            .for_each(|(_idx, l)| {
-                parser.parse_bytes(l).last();
-                ()
-            })
-    });
+fn parse_words_single_token(b: &mut test::Bencher) {
+    bench_parse_word(data::SINGLE_TOKEN, b);
 }

 #[bench]
-fn parse_code(b: &mut test::Bencher) {
-    let parser = typos::tokens::Parser::new();
-    b.iter(|| {
-        data::CODE
-            .as_bytes()
-            .lines()
-            .enumerate()
-            .for_each(|(_idx, l)| {
-                parser.parse_bytes(l).last();
-                ()
-            })
-    });
+fn parse_words_sherlock(b: &mut test::Bencher) {
+    bench_parse_word(data::SHERLOCK, b);
 }

 #[bench]
-fn parse_corpus(b: &mut test::Bencher) {
-    let parser = typos::tokens::Parser::new();
-    b.iter(|| {
-        data::CORPUS
-            .as_bytes()
-            .lines()
-            .enumerate()
-            .for_each(|(_idx, l)| {
-                parser.parse_bytes(l).last();
-                ()
-            })
-    });
+fn parse_words_code(b: &mut test::Bencher) {
+    bench_parse_word(data::CODE, b);
 }

 #[bench]
-fn split_empty(b: &mut test::Bencher) {
+fn parse_words_corpus(b: &mut test::Bencher) {
+    bench_parse_word(data::CORPUS, b);
+}
+
+fn bench_check_file(data: &str, b: &mut test::Bencher) {
+    let temp = assert_fs::TempDir::new().unwrap();
+    let sample_path = temp.child("sample");
+    sample_path.write_str(data).unwrap();
+
+    let corrections = typos_cli::dict::BuiltIn::new();
    let parser = typos::tokens::Parser::new();
-    b.iter(|| {
-        data::EMPTY
-            .as_bytes()
-            .lines()
-            .enumerate()
-            .for_each(|(_idx, l)| {
-                parser.parse_bytes(l).for_each(|l| {
-                    l.split().last();
-                    ()
-                })
-            })
-    });
+    let checks = typos::checks::TyposSettings::new().build_checks(&corrections, &parser);
+    b.iter(|| checks.check_file(sample_path.path(), true, typos::report::print_silent));
+
+    temp.close().unwrap();
 }

 #[bench]
-fn split_no_tokens(b: &mut test::Bencher) {
-    let parser = typos::tokens::Parser::new();
-    b.iter(|| {
-        data::NO_TOKENS
-            .as_bytes()
-            .lines()
-            .enumerate()
-            .for_each(|(_idx, l)| {
-                parser.parse_bytes(l).for_each(|l| {
-                    l.split().last();
-                    ()
-                })
-            })
-    });
+fn check_file_empty(b: &mut test::Bencher) {
+    bench_check_file(data::EMPTY, b);
 }

 #[bench]
-fn split_single_token(b: &mut test::Bencher) {
-    let parser = typos::tokens::Parser::new();
-    b.iter(|| {
-        data::SINGLE_TOKEN
-            .as_bytes()
-            .lines()
-            .enumerate()
-            .for_each(|(_idx, l)| {
-                parser.parse_bytes(l).for_each(|l| {
-                    l.split().last();
-                    ()
-                })
-            })
-    });
+fn check_file_no_tokens(b: &mut test::Bencher) {
+    bench_check_file(data::NO_TOKENS, b);
 }

 #[bench]
-fn split_sherlock(b: &mut test::Bencher) {
-    let parser = typos::tokens::Parser::new();
-    b.iter(|| {
-        data::SHERLOCK
-            .as_bytes()
-            .lines()
-            .enumerate()
-            .for_each(|(_idx, l)| {
-                parser.parse_bytes(l).for_each(|l| {
-                    l.split().last();
-                    ()
-                })
-            })
-    });
+fn check_file_single_token(b: &mut test::Bencher) {
+    bench_check_file(data::SINGLE_TOKEN, b);
 }

 #[bench]
-fn split_code(b: &mut test::Bencher) {
-    let parser = typos::tokens::Parser::new();
-    b.iter(|| {
-        data::CODE
-            .as_bytes()
-            .lines()
-            .enumerate()
-            .for_each(|(_idx, l)| {
-                parser.parse_bytes(l).for_each(|l| {
-                    l.split().last();
-                    ()
-                })
-            })
-    });
+fn check_file_sherlock(b: &mut test::Bencher) {
+    bench_check_file(data::SHERLOCK, b);
 }

 #[bench]
-fn split_corpus(b: &mut test::Bencher) {
-    let parser = typos::tokens::Parser::new();
-    b.iter(|| {
-        data::CORPUS
-            .as_bytes()
-            .lines()
-            .enumerate()
-            .for_each(|(_idx, l)| {
-                parser.parse_bytes(l).for_each(|l| {
-                    l.split().last();
-                    ()
-                })
-            })
-    });
+fn check_file_code(b: &mut test::Bencher) {
+    bench_check_file(data::CODE, b);
+}
+
+#[bench]
+fn check_file_corpus(b: &mut test::Bencher) {
+    bench_check_file(data::CORPUS, b);
 }
--- a/src/main.rs
+++ b/src/main.rs
@ -56,6 +56,18 @@ struct Args {
    /// Ignore implicit configuration files.
    isolated: bool,

+    #[structopt(long)]
+    /// Print each file that would be spellchecked.
+    files: bool,
+
+    #[structopt(long)]
+    /// Print each identifier that would be spellchecked.
+    identifiers: bool,
+
+    #[structopt(long)]
+    /// Print each word that would be spellchecked.
+    words: bool,
+
    #[structopt(flatten)]
    overrides: FileArgs,

@ -249,7 +261,79 @@ impl config::WalkSource for WalkArgs {
    }
 }

-pub fn init_logging(level: Option<log::Level>) {
+trait Checks {
+    fn check_filename(
+        &self,
+        path: &std::path::Path,
+        report: typos::report::Report,
+    ) -> Result<bool, typos::Error>;
+
+    fn check_file(
+        &self,
+        path: &std::path::Path,
+        explicit: bool,
+        report: typos::report::Report,
+    ) -> Result<bool, typos::Error>;
+}
+
+impl<'p> Checks for typos::checks::ParseIdentifiers<'p> {
+    fn check_filename(
+        &self,
+        path: &std::path::Path,
+        report: typos::report::Report,
+    ) -> Result<bool, typos::Error> {
+        self.check_filename(path, report)
+    }
+
+    fn check_file(
+        &self,
+        path: &std::path::Path,
+        explicit: bool,
+        report: typos::report::Report,
+    ) -> Result<bool, typos::Error> {
+        self.check_file(path, explicit, report)
+    }
+}
+
+impl<'p> Checks for typos::checks::ParseWords<'p> {
+    fn check_filename(
+        &self,
+        path: &std::path::Path,
+        report: typos::report::Report,
+    ) -> Result<bool, typos::Error> {
+        self.check_filename(path, report)
+    }
+
+    fn check_file(
+        &self,
+        path: &std::path::Path,
+        explicit: bool,
+        report: typos::report::Report,
+    ) -> Result<bool, typos::Error> {
+        self.check_file(path, explicit, report)
+    }
+}
+
+impl<'d, 'p> Checks for typos::checks::Checks<'d, 'p> {
+    fn check_filename(
+        &self,
+        path: &std::path::Path,
+        report: typos::report::Report,
+    ) -> Result<bool, typos::Error> {
+        self.check_filename(path, report)
+    }
+
+    fn check_file(
+        &self,
+        path: &std::path::Path,
+        explicit: bool,
+        report: typos::report::Report,
+    ) -> Result<bool, typos::Error> {
+        self.check_file(path, explicit, report)
+    }
+}
+
+fn init_logging(level: Option<log::Level>) {
    if let Some(level) = level {
        let mut builder = env_logger::Builder::new();

@ -274,18 +358,18 @@ pub fn init_logging(level: Option<log::Level>) {

 fn check_entry(
    entry: Result<ignore::DirEntry, ignore::Error>,
-    args: &Args,
-    checks: &typos::checks::Checks,
+    format: Format,
+    checks: &dyn Checks,
 ) -> Result<bool, anyhow::Error> {
    let mut typos_found = false;

    let entry = entry?;
    if entry.file_type().map(|t| t.is_file()).unwrap_or(true) {
        let explicit = entry.depth() == 0;
-        if checks.check_filename(entry.path(), args.format.report())? {
+        if checks.check_filename(entry.path(), format.report())? {
            typos_found = true;
        }
-        if checks.check_file(entry.path(), explicit, args.format.report())? {
+        if checks.check_file(entry.path(), explicit, format.report())? {
            typos_found = true;
        }
    }
@ -332,11 +416,11 @@ fn run() -> Result<i32, anyhow::Error> {
            .include_chars(config.default.identifier_include_chars().to_owned())
            .build();

-        let checks = typos::checks::CheckSettings::new()
+        let mut settings = typos::checks::TyposSettings::new();
+        settings
            .check_filenames(config.default.check_filename())
            .check_files(config.default.check_file())
-            .binary(config.files.binary())
-            .build(&dictionary, &parser);
+            .binary(config.files.binary());

        let mut walk = ignore::WalkBuilder::new(path);
        walk.hidden(config.files.ignore_hidden())
@ -345,8 +429,24 @@ fn run() -> Result<i32, anyhow::Error> {
            .git_ignore(config.files.ignore_vcs())
            .git_exclude(config.files.ignore_vcs())
            .parents(config.files.ignore_parent());
+        if args.files {
            for entry in walk.build() {
-            match check_entry(entry, &args, &checks) {
+                match entry {
+                    Ok(entry) => {
+                        let msg = typos::report::File::new(entry.path());
+                        args.format.report()(msg.into());
+                    }
+                    Err(err) => {
+                        let msg = typos::report::Error::new(err.to_string());
+                        args.format.report()(msg.into());
+                        errors_found = true
+                    }
+                }
+            }
+        } else if args.identifiers {
+            let checks = settings.build_identifier_parser(&parser);
+            for entry in walk.build() {
+                match check_entry(entry, args.format, &checks) {
                    Ok(true) => typos_found = true,
                    Err(err) => {
                        let msg = typos::report::Error::new(err.to_string());
@ -356,6 +456,33 @@ fn run() -> Result<i32, anyhow::Error> {
                    _ => (),
                }
            }
+        } else if args.words {
+            let checks = settings.build_word_parser(&parser);
+            for entry in walk.build() {
+                match check_entry(entry, args.format, &checks) {
+                    Ok(true) => typos_found = true,
+                    Err(err) => {
+                        let msg = typos::report::Error::new(err.to_string());
+                        args.format.report()(msg.into());
+                        errors_found = true
+                    }
+                    _ => (),
+                }
+            }
+        } else {
+            let checks = settings.build_checks(&dictionary, &parser);
+            for entry in walk.build() {
+                match check_entry(entry, args.format, &checks) {
+                    Ok(true) => typos_found = true,
+                    Err(err) => {
+                        let msg = typos::report::Error::new(err.to_string());
+                        args.format.report()(msg.into());
+                        errors_found = true
+                    }
+                    _ => (),
+                }
+            }
+        }
    }

    if errors_found {
--- a/typos/src/checks.rs
+++ b/typos/src/checks.rs
@ -5,13 +5,13 @@ use crate::tokens;
 use crate::Dictionary;

 #[derive(Debug, Clone, PartialEq, Eq)]
-pub struct CheckSettings {
+pub struct TyposSettings {
    check_filenames: bool,
    check_files: bool,
    binary: bool,
 }

-impl CheckSettings {
+impl TyposSettings {
    pub fn new() -> Self {
        Default::default()
    }
@ -31,7 +31,7 @@ impl CheckSettings {
        self
    }

-    pub fn build<'d, 'p>(
+    pub fn build_checks<'d, 'p>(
        &self,
        dictionary: &'d dyn Dictionary,
        parser: &'p tokens::Parser,
@ -44,9 +44,27 @@ impl CheckSettings {
            binary: self.binary,
        }
    }
+
+    pub fn build_identifier_parser<'p>(&self, parser: &'p tokens::Parser) -> ParseIdentifiers<'p> {
+        ParseIdentifiers {
+            parser,
+            check_filenames: self.check_filenames,
+            check_files: self.check_files,
+            binary: self.binary,
+        }
+    }
+
+    pub fn build_word_parser<'p>(&self, parser: &'p tokens::Parser) -> ParseWords<'p> {
+        ParseWords {
+            parser,
+            check_filenames: self.check_filenames,
+            check_files: self.check_files,
+            binary: self.binary,
+        }
+    }
 }

-impl Default for CheckSettings {
+impl Default for TyposSettings {
    fn default() -> Self {
        Self {
            check_filenames: true,
@ -56,6 +74,176 @@ impl Default for CheckSettings {
    }
 }

+#[derive(Clone)]
+pub struct ParseIdentifiers<'p> {
+    parser: &'p tokens::Parser,
+    check_filenames: bool,
+    check_files: bool,
+    binary: bool,
+}
+
+impl<'p> ParseIdentifiers<'p> {
+    pub fn check_filename(
+        &self,
+        path: &std::path::Path,
+        report: report::Report,
+    ) -> Result<bool, crate::Error> {
+        let typos_found = false;
+
+        if !self.check_filenames {
+            return Ok(typos_found);
+        }
+
+        for part in path.components().filter_map(|c| c.as_os_str().to_str()) {
+            let msg = report::Parse {
+                path,
+                kind: report::ParseKind::Identifier,
+                data: self.parser.parse(part).map(|i| i.token()).collect(),
+                non_exhaustive: (),
+            };
+            report(msg.into());
+        }
+
+        Ok(typos_found)
+    }
+
+    pub fn check_file(
+        &self,
+        path: &std::path::Path,
+        explicit: bool,
+        report: report::Report,
+    ) -> Result<bool, crate::Error> {
+        let typos_found = false;
+
+        if !self.check_files {
+            return Ok(typos_found);
+        }
+
+        let buffer = std::fs::read(path)
+            .map_err(|e| crate::ErrorKind::IoError.into_error().with_source(e))?;
+        if !explicit && !self.binary && is_binary(&buffer) {
+            let msg = report::BinaryFile {
+                path,
+                non_exhaustive: (),
+            };
+            report(msg.into());
+            return Ok(typos_found);
+        }
+
+        for line in buffer.lines() {
+            let msg = report::Parse {
+                path,
+                kind: report::ParseKind::Identifier,
+                data: self.parser.parse_bytes(line).map(|i| i.token()).collect(),
+                non_exhaustive: (),
+            };
+            report(msg.into());
+        }
+
+        Ok(typos_found)
+    }
+}
+
+impl std::fmt::Debug for ParseIdentifiers<'_> {
+    fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
+        fmt.debug_struct("Checks")
+            .field("parser", self.parser)
+            .field("check_filenames", &self.check_filenames)
+            .field("check_files", &self.check_files)
+            .field("binary", &self.binary)
+            .finish()
+    }
+}
+
+#[derive(Clone)]
+pub struct ParseWords<'p> {
+    parser: &'p tokens::Parser,
+    check_filenames: bool,
+    check_files: bool,
+    binary: bool,
+}
+
+impl<'p> ParseWords<'p> {
+    pub fn check_filename(
+        &self,
+        path: &std::path::Path,
+        report: report::Report,
+    ) -> Result<bool, crate::Error> {
+        let typos_found = false;
+
+        if !self.check_filenames {
+            return Ok(typos_found);
+        }
+
+        for part in path.components().filter_map(|c| c.as_os_str().to_str()) {
+            let msg = report::Parse {
+                path,
+                kind: report::ParseKind::Word,
+                data: self
+                    .parser
+                    .parse(part)
+                    .flat_map(|ident| ident.split().map(|i| i.token()))
+                    .collect(),
+                non_exhaustive: (),
+            };
+            report(msg.into());
+        }
+
+        Ok(typos_found)
+    }
+
+    pub fn check_file(
+        &self,
+        path: &std::path::Path,
+        explicit: bool,
+        report: report::Report,
+    ) -> Result<bool, crate::Error> {
+        let typos_found = false;
+
+        if !self.check_files {
+            return Ok(typos_found);
+        }
+
+        let buffer = std::fs::read(path)
+            .map_err(|e| crate::ErrorKind::IoError.into_error().with_source(e))?;
+        if !explicit && !self.binary && is_binary(&buffer) {
+            let msg = report::BinaryFile {
+                path,
+                non_exhaustive: (),
+            };
+            report(msg.into());
+            return Ok(typos_found);
+        }
+
+        for line in buffer.lines() {
+            let msg = report::Parse {
+                path,
+                kind: report::ParseKind::Word,
+                data: self
+                    .parser
+                    .parse_bytes(line)
+                    .flat_map(|ident| ident.split().map(|i| i.token()))
+                    .collect(),
+                non_exhaustive: (),
+            };
+            report(msg.into());
+        }
+
+        Ok(typos_found)
+    }
+}
+
+impl std::fmt::Debug for ParseWords<'_> {
+    fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
+        fmt.debug_struct("Checks")
+            .field("parser", self.parser)
+            .field("check_filenames", &self.check_filenames)
+            .field("check_files", &self.check_files)
+            .field("binary", &self.binary)
+            .finish()
+    }
+}
+
 #[derive(Clone)]
 pub struct Checks<'d, 'p> {
    dictionary: &'d dyn Dictionary,
@ -122,8 +310,7 @@ impl<'d, 'p> Checks<'d, 'p> {

        let buffer = std::fs::read(path)
            .map_err(|e| crate::ErrorKind::IoError.into_error().with_source(e))?;
-        let null_max = std::cmp::min(buffer.len(), 1024);
-        if !explicit && !self.binary && buffer[0..null_max].find_byte(b'\0').is_some() {
+        if !explicit && !self.binary && is_binary(&buffer) {
            let msg = report::BinaryFile {
                path,
                non_exhaustive: (),
@ -183,3 +370,8 @@ impl std::fmt::Debug for Checks<'_, '_> {
            .finish()
    }
 }
+
+fn is_binary(buffer: &[u8]) -> bool {
+    let null_max = std::cmp::min(buffer.len(), 1024);
+    buffer[0..null_max].find_byte(b'\0').is_some()
+}
--- a/typos/src/report.rs
+++ b/typos/src/report.rs
@ -8,8 +8,12 @@ pub enum Message<'m> {
    BinaryFile(BinaryFile<'m>),
    Correction(Correction<'m>),
    FilenameCorrection(FilenameCorrection<'m>),
+    File(File<'m>),
+    Parse(Parse<'m>),
    PathError(PathError<'m>),
    Error(Error),
+    #[serde(skip)]
+    __NonExhaustive,
 }

 #[derive(Clone, Debug, serde::Serialize, derive_more::Display)]
@ -42,6 +46,39 @@ pub struct FilenameCorrection<'m> {
    pub(crate) non_exhaustive: (),
 }

+#[derive(Copy, Clone, Debug, serde::Serialize)]
+pub enum ParseKind {
+    Identifier,
+    Word,
+    #[doc(hidden)]
+    __NonExhaustive,
+}
+
+#[derive(Clone, Debug, serde::Serialize)]
+pub struct File<'m> {
+    pub path: &'m std::path::Path,
+    #[serde(skip)]
+    pub(crate) non_exhaustive: (),
+}
+
+impl<'m> File<'m> {
+    pub fn new(path: &'m std::path::Path) -> Self {
+        Self {
+            path,
+            non_exhaustive: (),
+        }
+    }
+}
+
+#[derive(Clone, Debug, serde::Serialize)]
+pub struct Parse<'m> {
+    pub path: &'m std::path::Path,
+    pub kind: ParseKind,
+    pub data: Vec<&'m str>,
+    #[serde(skip)]
+    pub(crate) non_exhaustive: (),
+}
+
 #[derive(Clone, Debug, serde::Serialize)]
 pub struct PathError<'m> {
    pub path: &'m std::path::Path,
@ -88,12 +125,21 @@ pub fn print_brief(msg: Message) {
        Message::FilenameCorrection(msg) => {
            println!("{}: {} -> {}", msg.path.display(), msg.typo, msg.correction);
        }
+        Message::File(msg) => {
+            println!("{}", msg.path.display());
+        }
+        Message::Parse(msg) => {
+            println!("{}", itertools::join(msg.data.iter(), " "));
+        }
        Message::PathError(msg) => {
            println!("{}: {}", msg.path.display(), msg.msg);
        }
        Message::Error(msg) => {
            println!("{}", msg.msg);
        }
+        Message::__NonExhaustive => {
+            unreachable!("Non-creatable case");
+        }
    }
 }

@ -111,12 +157,21 @@ pub fn print_long(msg: Message) {
                msg.correction
            );
        }
+        Message::File(msg) => {
+            println!("{}", msg.path.display());
+        }
+        Message::Parse(msg) => {
+            println!("{}", itertools::join(msg.data.iter(), " "));
+        }
        Message::PathError(msg) => {
            println!("{}: {}", msg.path.display(), msg.msg);
        }
        Message::Error(msg) => {
            println!("{}", msg.msg);
        }
+        Message::__NonExhaustive => {
+            unreachable!("Non-creatable case");
+        }
    }
 }

--- a/typos/src/tokens.rs
+++ b/typos/src/tokens.rs
@ -123,7 +123,7 @@ impl<'t> Identifier<'t> {
        Self { token, offset }
    }

-    pub fn token(&self) -> &str {
+    pub fn token(&self) -> &'t str {
        self.token
    }

@ -135,7 +135,7 @@ impl<'t> Identifier<'t> {
        self.offset
    }

-    pub fn split(&self) -> impl Iterator<Item = Word<'_>> {
+    pub fn split(&self) -> impl Iterator<Item = Word<'t>> {
        split_ident(self.token, self.offset)
    }
 }
@ -177,7 +177,7 @@ impl<'t> Word<'t> {
        }
    }

-    pub fn token(&self) -> &str {
+    pub fn token(&self) -> &'t str {
        self.token
    }