Merge pull request #652 from epage/type

feat(cli): '--file-types' flag to help debug detection issues
This commit is contained in:
Ed Page 2023-01-13 21:17:39 -06:00 committed by GitHub
commit 926bad3f6d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 136 additions and 54 deletions

16
Cargo.lock generated
View file

@ -196,12 +196,12 @@ dependencies = [
[[package]] [[package]]
name = "clap" name = "clap"
version = "4.0.32" version = "4.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a7db700bc935f9e43e88d00b0850dae18a63773cfbec6d8e070fccf7fef89a39" checksum = "aa91278560fc226a5d9d736cc21e485ff9aad47d26b8ffe1f54cba868b684b9f"
dependencies = [ dependencies = [
"bitflags", "bitflags",
"clap_derive 4.0.21", "clap_derive 4.1.0",
"clap_lex 0.3.0", "clap_lex 0.3.0",
"is-terminal", "is-terminal",
"once_cell", "once_cell",
@ -215,7 +215,7 @@ version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "23e2b6c3dcdb73299f48ae05b294da14e2f560b3ed2c09e742269eb1b22af231" checksum = "23e2b6c3dcdb73299f48ae05b294da14e2f560b3ed2c09e742269eb1b22af231"
dependencies = [ dependencies = [
"clap 4.0.32", "clap 4.1.0",
"log", "log",
] ]
@ -234,9 +234,9 @@ dependencies = [
[[package]] [[package]]
name = "clap_derive" name = "clap_derive"
version = "4.0.21" version = "4.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0177313f9f02afc995627906bbd8967e2be069f5261954222dac78290c2b9014" checksum = "684a277d672e91966334af371f1a7b5833f9aa00b07c84e92fbce95e00208ce8"
dependencies = [ dependencies = [
"heck", "heck",
"proc-macro-error", "proc-macro-error",
@ -303,7 +303,7 @@ version = "0.0.13"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "64a9572308414bfdbda25e41cdd4a15a90e68d93f2d5a66e3ff6e1cdc856f923" checksum = "64a9572308414bfdbda25e41cdd4a15a90e68d93f2d5a66e3ff6e1cdc856f923"
dependencies = [ dependencies = [
"clap 4.0.32", "clap 4.1.0",
"concolor", "concolor",
] ]
@ -1619,7 +1619,7 @@ dependencies = [
"assert_fs", "assert_fs",
"atty", "atty",
"bstr 1.1.0", "bstr 1.1.0",
"clap 4.0.32", "clap 4.1.0",
"clap-verbosity-flag", "clap-verbosity-flag",
"concolor", "concolor",
"concolor-clap", "concolor-clap",

View file

@ -65,7 +65,7 @@ typos-dict = { version = "^0.9", path = "crates/typos-dict", optional = true }
typos-vars = { version = "^0.8", path = "crates/typos-vars", optional = true } typos-vars = { version = "^0.8", path = "crates/typos-vars", optional = true }
unicase = "2.6" unicase = "2.6"
anyhow = "1.0" anyhow = "1.0"
clap = { version = "4.0.32", features = ["derive"] } clap = { version = "4.1.0", features = ["derive"] }
clap-verbosity-flag = "2.0" clap-verbosity-flag = "2.0"
ignore = "0.4" ignore = "0.4"
serde = { version = "1.0", features = ["derive"] } serde = { version = "1.0", features = ["derive"] }

View file

@ -46,51 +46,55 @@ impl Default for Format {
)] )]
#[command(group = clap::ArgGroup::new("mode").multiple(false))] #[command(group = clap::ArgGroup::new("mode").multiple(false))]
pub(crate) struct Args { pub(crate) struct Args {
#[arg(default_value = ".")]
/// Paths to check with `-` for stdin /// Paths to check with `-` for stdin
#[arg(default_value = ".")]
pub(crate) path: Vec<std::path::PathBuf>, pub(crate) path: Vec<std::path::PathBuf>,
#[arg(short = 'c', long = "config")]
/// Custom config file /// Custom config file
#[arg(short = 'c', long = "config")]
pub(crate) custom_config: Option<std::path::PathBuf>, pub(crate) custom_config: Option<std::path::PathBuf>,
#[arg(long)]
/// Ignore implicit configuration files. /// Ignore implicit configuration files.
#[arg(long)]
pub(crate) isolated: bool, pub(crate) isolated: bool,
#[arg(long, group = "mode")]
/// Print a diff of what would change /// Print a diff of what would change
#[arg(long, group = "mode")]
pub(crate) diff: bool, pub(crate) diff: bool,
#[arg(long, short = 'w', group = "mode")]
/// Write fixes out /// Write fixes out
#[arg(long, short = 'w', group = "mode")]
pub(crate) write_changes: bool, pub(crate) write_changes: bool,
#[arg(long, group = "mode")]
/// Debug: Print each file that would be spellchecked. /// Debug: Print each file that would be spellchecked.
#[arg(long, group = "mode")]
pub(crate) files: bool, pub(crate) files: bool,
/// Debug: Print each file's type
#[arg(long, group = "mode")] #[arg(long, group = "mode")]
pub(crate) file_types: bool,
/// Debug: Print each identifier that would be spellchecked. /// Debug: Print each identifier that would be spellchecked.
#[arg(long, group = "mode")]
pub(crate) identifiers: bool, pub(crate) identifiers: bool,
#[arg(long, group = "mode")]
/// Debug: Print each word that would be spellchecked. /// Debug: Print each word that would be spellchecked.
#[arg(long, group = "mode")]
pub(crate) words: bool, pub(crate) words: bool,
#[arg(long, group = "mode")]
/// Write the current configuration to file with `-` for stdout /// Write the current configuration to file with `-` for stdout
#[arg(long, group = "mode")]
pub(crate) dump_config: Option<std::path::PathBuf>, pub(crate) dump_config: Option<std::path::PathBuf>,
#[arg(long, group = "mode")]
/// Show all supported file types. /// Show all supported file types.
#[arg(long, group = "mode")]
pub(crate) type_list: bool, pub(crate) type_list: bool,
#[arg(long, value_enum, ignore_case = true, default_value("long"))] #[arg(long, value_enum, ignore_case = true, default_value("long"))]
pub(crate) format: Format, pub(crate) format: Format,
#[arg(short = 'j', long = "threads", default_value = "0")]
/// The approximate number of threads to use. /// The approximate number of threads to use.
#[arg(short = 'j', long = "threads", default_value = "0")]
pub(crate) threads: usize, pub(crate) threads: usize,
#[command(flatten)] #[command(flatten)]
@ -106,28 +110,28 @@ pub(crate) struct Args {
#[derive(Debug, Clone, clap::Args)] #[derive(Debug, Clone, clap::Args)]
#[command(rename_all = "kebab-case")] #[command(rename_all = "kebab-case")]
pub(crate) struct FileArgs { pub(crate) struct FileArgs {
#[arg(long, overrides_with("no_binary"))]
/// Search binary files. /// Search binary files.
#[arg(long, overrides_with("no_binary"))]
binary: bool, binary: bool,
#[arg(long, overrides_with("binary"), hide = true)] #[arg(long, overrides_with("binary"), hide = true)]
no_binary: bool, no_binary: bool,
#[arg(long, overrides_with("check_filenames"))]
/// Skip verifying spelling in file names. /// Skip verifying spelling in file names.
#[arg(long, overrides_with("check_filenames"))]
no_check_filenames: bool, no_check_filenames: bool,
#[arg(long, overrides_with("no_check_filenames"), hide = true)] #[arg(long, overrides_with("no_check_filenames"), hide = true)]
check_filenames: bool, check_filenames: bool,
#[arg(long, overrides_with("check_files"))]
/// Skip verifying spelling in files. /// Skip verifying spelling in files.
#[arg(long, overrides_with("check_files"))]
no_check_files: bool, no_check_files: bool,
#[arg(long, overrides_with("no_check_files"), hide = true)] #[arg(long, overrides_with("no_check_files"), hide = true)]
check_files: bool, check_files: bool,
#[arg(long, overrides_with("no_unicode"), hide = true)] #[arg(long, overrides_with("no_unicode"), hide = true)]
unicode: bool, unicode: bool,
#[arg(long, overrides_with("unicode"))]
/// Only allow ASCII characters in identifiers /// Only allow ASCII characters in identifiers
#[arg(long, overrides_with("unicode"))]
no_unicode: bool, no_unicode: bool,
#[arg(long)] #[arg(long)]
@ -173,11 +177,11 @@ impl FileArgs {
} }
#[derive(Debug, clap::Args)] #[derive(Debug, clap::Args)]
#[clap(rename_all = "kebab-case")] #[command(rename_all = "kebab-case")]
pub(crate) struct ConfigArgs { pub(crate) struct ConfigArgs {
#[clap(flatten)] #[command(flatten)]
walk: WalkArgs, walk: WalkArgs,
#[clap(flatten)] #[command(flatten)]
overrides: FileArgs, overrides: FileArgs,
} }
@ -192,46 +196,46 @@ impl ConfigArgs {
} }
#[derive(Debug, clap::Args)] #[derive(Debug, clap::Args)]
#[clap(rename_all = "kebab-case")] #[command(rename_all = "kebab-case")]
pub(crate) struct WalkArgs { pub(crate) struct WalkArgs {
#[clap(long, name = "GLOB")]
/// Ignore files & directories matching the glob. /// Ignore files & directories matching the glob.
#[arg(long, value_name = "GLOB")]
exclude: Vec<String>, exclude: Vec<String>,
#[clap(long, overrides_with("no_hidden"))]
/// Search hidden files and directories. /// Search hidden files and directories.
#[arg(long, overrides_with("no_hidden"))]
hidden: bool, hidden: bool,
#[clap(long, overrides_with("hidden"), hide = true)] #[arg(long, overrides_with("hidden"), hide = true)]
no_hidden: bool, no_hidden: bool,
#[clap(long, overrides_with("ignore"))]
/// Don't respect ignore files. /// Don't respect ignore files.
#[arg(long, overrides_with("ignore"))]
no_ignore: bool, no_ignore: bool,
#[clap(long, overrides_with("no_ignore"), hide = true)] #[arg(long, overrides_with("no_ignore"), hide = true)]
ignore: bool, ignore: bool,
#[clap(long, overrides_with("ignore_dot"))]
/// Don't respect .ignore files. /// Don't respect .ignore files.
#[arg(long, overrides_with("ignore_dot"))]
no_ignore_dot: bool, no_ignore_dot: bool,
#[clap(long, overrides_with("no_ignore_dot"), hide = true)] #[arg(long, overrides_with("no_ignore_dot"), hide = true)]
ignore_dot: bool, ignore_dot: bool,
#[clap(long, overrides_with("ignore_global"))]
/// Don't respect global ignore files. /// Don't respect global ignore files.
#[arg(long, overrides_with("ignore_global"))]
no_ignore_global: bool, no_ignore_global: bool,
#[clap(long, overrides_with("no_ignore_global"), hide = true)] #[arg(long, overrides_with("no_ignore_global"), hide = true)]
ignore_global: bool, ignore_global: bool,
#[clap(long, overrides_with("ignore_parent"))]
/// Don't respect ignore files in parent directories. /// Don't respect ignore files in parent directories.
#[arg(long, overrides_with("ignore_parent"))]
no_ignore_parent: bool, no_ignore_parent: bool,
#[clap(long, overrides_with("no_ignore_parent"), hide = true)] #[arg(long, overrides_with("no_ignore_parent"), hide = true)]
ignore_parent: bool, ignore_parent: bool,
#[clap(long, overrides_with("ignore_vcs"))]
/// Don't respect ignore files in vcs directories. /// Don't respect ignore files in vcs directories.
#[arg(long, overrides_with("ignore_vcs"))]
no_ignore_vcs: bool, no_ignore_vcs: bool,
#[clap(long, overrides_with("no_ignore_vcs"), hide = true)] #[arg(long, overrides_with("no_ignore_vcs"), hide = true)]
ignore_vcs: bool, ignore_vcs: bool,
} }

View file

@ -227,6 +227,8 @@ fn run_checks(
let selected_checks: &dyn typos_cli::file::FileChecker = if args.files { let selected_checks: &dyn typos_cli::file::FileChecker = if args.files {
&typos_cli::file::FoundFiles &typos_cli::file::FoundFiles
} else if args.file_types {
&typos_cli::file::FileTypes
} else if args.identifiers { } else if args.identifiers {
&typos_cli::file::Identifiers &typos_cli::file::Identifiers
} else if args.words { } else if args.words {

View file

@ -89,6 +89,14 @@ impl Report for PrintBrief {
log::info!("{}", msg); log::info!("{}", msg);
} }
Message::Typo(msg) => print_brief_correction(msg, self.stdout_palette)?, Message::Typo(msg) => print_brief_correction(msg, self.stdout_palette)?,
Message::FileType(msg) => {
writeln!(
io::stdout(),
"{}:{}",
msg.path.display(),
msg.file_type.unwrap_or("-")
)?;
}
Message::File(msg) => { Message::File(msg) => {
writeln!(io::stdout(), "{}", msg.path.display())?; writeln!(io::stdout(), "{}", msg.path.display())?;
} }
@ -116,6 +124,14 @@ impl Report for PrintLong {
log::info!("{}", msg); log::info!("{}", msg);
} }
Message::Typo(msg) => print_long_correction(msg, self.stdout_palette)?, Message::Typo(msg) => print_long_correction(msg, self.stdout_palette)?,
Message::FileType(msg) => {
writeln!(
io::stdout(),
"{}:{}",
msg.path.display(),
msg.file_type.unwrap_or("-")
)?;
}
Message::File(msg) => { Message::File(msg) => {
writeln!(io::stdout(), "{}", msg.path.display())?; writeln!(io::stdout(), "{}", msg.path.display())?;
} }

View file

@ -352,6 +352,36 @@ impl FileChecker for Words {
} }
} }
#[derive(Debug, Clone, Copy)]
pub struct FileTypes;
impl FileChecker for FileTypes {
fn check_file(
&self,
path: &std::path::Path,
explicit: bool,
policy: &crate::policy::Policy,
reporter: &dyn report::Report,
) -> Result<(), std::io::Error> {
// Check `policy.binary` first so we can easily check performance of walking vs reading
if policy.binary {
let msg = report::FileType::new(path, policy.file_type);
reporter.report(msg.into())?;
} else {
let (_buffer, content_type) = read_file(path, reporter)?;
if !explicit && content_type.is_binary() {
let msg = report::BinaryFile { path };
reporter.report(msg.into())?;
} else {
let msg = report::FileType::new(path, policy.file_type);
reporter.report(msg.into())?;
}
}
Ok(())
}
}
#[derive(Debug, Clone, Copy)] #[derive(Debug, Clone, Copy)]
pub struct FoundFiles; pub struct FoundFiles;

View file

@ -91,10 +91,11 @@ impl<'s> ConfigEngine<'s> {
pub fn policy(&self, path: &std::path::Path) -> Policy<'_, '_> { pub fn policy(&self, path: &std::path::Path) -> Policy<'_, '_> {
debug_assert!(path.is_absolute(), "{} is not absolute", path.display()); debug_assert!(path.is_absolute(), "{} is not absolute", path.display());
let dir = self.get_dir(path).expect("`walk()` should be called first"); let dir = self.get_dir(path).expect("`walk()` should be called first");
let file_config = dir.get_file_config(path); let (file_type, file_config) = dir.get_file_config(path);
Policy { Policy {
check_filenames: file_config.check_filenames, check_filenames: file_config.check_filenames,
check_files: file_config.check_files, check_files: file_config.check_files,
file_type,
binary: file_config.binary, binary: file_config.binary,
tokenizer: self.get_tokenizer(&file_config), tokenizer: self.get_tokenizer(&file_config),
dict: self.get_dict(&file_config), dict: self.get_dict(&file_config),
@ -299,21 +300,23 @@ struct DirConfig {
} }
impl DirConfig { impl DirConfig {
fn get_file_config(&self, path: &std::path::Path) -> FileConfig { fn get_file_config(&self, path: &std::path::Path) -> (Option<&str>, FileConfig) {
let name = self.type_matcher.file_matched(path); let name = self.type_matcher.file_matched(path);
name.and_then(|name| { let config = name
log::debug!("{}: `{}` policy", path.display(), name); .and_then(|name| {
self.types.get(name).copied() log::debug!("{}: `{}` policy", path.display(), name);
}) self.types.get(name).copied()
.unwrap_or_else(|| { })
log::debug!( .unwrap_or_else(|| {
"{}: default policy for `{}` file type", log::debug!(
path.display(), "{}: default policy for `{}` file type",
name.unwrap_or("<unknown>") path.display(),
); name.unwrap_or("<unknown>")
self.default );
}) self.default
});
(name, config)
} }
} }
@ -331,6 +334,7 @@ struct FileConfig {
pub struct Policy<'t, 'd> { pub struct Policy<'t, 'd> {
pub check_filenames: bool, pub check_filenames: bool,
pub check_files: bool, pub check_files: bool,
pub file_type: Option<&'d str>,
pub binary: bool, pub binary: bool,
pub tokenizer: &'t typos::tokens::Tokenizer, pub tokenizer: &'t typos::tokens::Tokenizer,
pub dict: &'d dyn typos::Dictionary, pub dict: &'d dyn typos::Dictionary,
@ -351,6 +355,7 @@ impl<'t, 'd> Default for Policy<'t, 'd> {
Self { Self {
check_filenames: true, check_filenames: true,
check_files: true, check_files: true,
file_type: None,
binary: false, binary: false,
tokenizer: &DEFAULT_TOKENIZER, tokenizer: &DEFAULT_TOKENIZER,
dict: &DEFAULT_DICT, dict: &DEFAULT_DICT,

View file

@ -13,6 +13,7 @@ pub trait Report: Send + Sync {
pub enum Message<'m> { pub enum Message<'m> {
BinaryFile(BinaryFile<'m>), BinaryFile(BinaryFile<'m>),
Typo(Typo<'m>), Typo(Typo<'m>),
FileType(FileType<'m>),
File(File<'m>), File(File<'m>),
Parse(Parse<'m>), Parse(Parse<'m>),
Error(Error<'m>), Error(Error<'m>),
@ -23,6 +24,7 @@ impl<'m> Message<'m> {
match self { match self {
Message::BinaryFile(_) => false, Message::BinaryFile(_) => false,
Message::Typo(c) => c.corrections.is_correction(), Message::Typo(c) => c.corrections.is_correction(),
Message::FileType(_) => false,
Message::File(_) => false, Message::File(_) => false,
Message::Parse(_) => false, Message::Parse(_) => false,
Message::Error(_) => false, Message::Error(_) => false,
@ -33,6 +35,7 @@ impl<'m> Message<'m> {
match self { match self {
Message::BinaryFile(_) => false, Message::BinaryFile(_) => false,
Message::Typo(_) => false, Message::Typo(_) => false,
Message::FileType(_) => false,
Message::File(_) => false, Message::File(_) => false,
Message::Parse(_) => false, Message::Parse(_) => false,
Message::Error(_) => true, Message::Error(_) => true,
@ -144,6 +147,28 @@ pub enum ParseKind {
Word, Word,
} }
#[derive(Clone, Debug, serde::Serialize, derive_setters::Setters)]
#[non_exhaustive]
pub struct FileType<'m> {
pub path: &'m std::path::Path,
pub file_type: Option<&'m str>,
}
impl<'m> FileType<'m> {
pub fn new(path: &'m std::path::Path, file_type: Option<&'m str>) -> Self {
Self { path, file_type }
}
}
impl<'m> Default for FileType<'m> {
fn default() -> Self {
Self {
path: std::path::Path::new("-"),
file_type: None,
}
}
}
#[derive(Clone, Debug, serde::Serialize, derive_setters::Setters)] #[derive(Clone, Debug, serde::Serialize, derive_setters::Setters)]
#[non_exhaustive] #[non_exhaustive]
pub struct File<'m> { pub struct File<'m> {