Merge pull request #652 from epage/type

feat(cli): '--file-types' flag to help debug detection issues
This commit is contained in:
Ed Page 2023-01-13 21:17:39 -06:00 committed by GitHub
commit 926bad3f6d
WARNING! Although there is a key with this ID in the database it does not verify this commit! This commit is SUSPICIOUS.
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 136 additions and 54 deletions

16
Cargo.lock generated
View file

@ -196,12 +196,12 @@ dependencies = [
[[package]]
name = "clap"
version = "4.0.32"
version = "4.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a7db700bc935f9e43e88d00b0850dae18a63773cfbec6d8e070fccf7fef89a39"
checksum = "aa91278560fc226a5d9d736cc21e485ff9aad47d26b8ffe1f54cba868b684b9f"
dependencies = [
"bitflags",
"clap_derive 4.0.21",
"clap_derive 4.1.0",
"clap_lex 0.3.0",
"is-terminal",
"once_cell",
@ -215,7 +215,7 @@ version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "23e2b6c3dcdb73299f48ae05b294da14e2f560b3ed2c09e742269eb1b22af231"
dependencies = [
"clap 4.0.32",
"clap 4.1.0",
"log",
]
@ -234,9 +234,9 @@ dependencies = [
[[package]]
name = "clap_derive"
version = "4.0.21"
version = "4.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0177313f9f02afc995627906bbd8967e2be069f5261954222dac78290c2b9014"
checksum = "684a277d672e91966334af371f1a7b5833f9aa00b07c84e92fbce95e00208ce8"
dependencies = [
"heck",
"proc-macro-error",
@ -303,7 +303,7 @@ version = "0.0.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "64a9572308414bfdbda25e41cdd4a15a90e68d93f2d5a66e3ff6e1cdc856f923"
dependencies = [
"clap 4.0.32",
"clap 4.1.0",
"concolor",
]
@ -1619,7 +1619,7 @@ dependencies = [
"assert_fs",
"atty",
"bstr 1.1.0",
"clap 4.0.32",
"clap 4.1.0",
"clap-verbosity-flag",
"concolor",
"concolor-clap",

View file

@ -65,7 +65,7 @@ typos-dict = { version = "^0.9", path = "crates/typos-dict", optional = true }
typos-vars = { version = "^0.8", path = "crates/typos-vars", optional = true }
unicase = "2.6"
anyhow = "1.0"
clap = { version = "4.0.32", features = ["derive"] }
clap = { version = "4.1.0", features = ["derive"] }
clap-verbosity-flag = "2.0"
ignore = "0.4"
serde = { version = "1.0", features = ["derive"] }

View file

@ -46,51 +46,55 @@ impl Default for Format {
)]
#[command(group = clap::ArgGroup::new("mode").multiple(false))]
pub(crate) struct Args {
#[arg(default_value = ".")]
/// Paths to check with `-` for stdin
#[arg(default_value = ".")]
pub(crate) path: Vec<std::path::PathBuf>,
#[arg(short = 'c', long = "config")]
/// Custom config file
#[arg(short = 'c', long = "config")]
pub(crate) custom_config: Option<std::path::PathBuf>,
#[arg(long)]
/// Ignore implicit configuration files.
#[arg(long)]
pub(crate) isolated: bool,
#[arg(long, group = "mode")]
/// Print a diff of what would change
#[arg(long, group = "mode")]
pub(crate) diff: bool,
#[arg(long, short = 'w', group = "mode")]
/// Write fixes out
#[arg(long, short = 'w', group = "mode")]
pub(crate) write_changes: bool,
#[arg(long, group = "mode")]
/// Debug: Print each file that would be spellchecked.
#[arg(long, group = "mode")]
pub(crate) files: bool,
/// Debug: Print each file's type
#[arg(long, group = "mode")]
pub(crate) file_types: bool,
/// Debug: Print each identifier that would be spellchecked.
#[arg(long, group = "mode")]
pub(crate) identifiers: bool,
#[arg(long, group = "mode")]
/// Debug: Print each word that would be spellchecked.
#[arg(long, group = "mode")]
pub(crate) words: bool,
#[arg(long, group = "mode")]
/// Write the current configuration to file with `-` for stdout
#[arg(long, group = "mode")]
pub(crate) dump_config: Option<std::path::PathBuf>,
#[arg(long, group = "mode")]
/// Show all supported file types.
#[arg(long, group = "mode")]
pub(crate) type_list: bool,
#[arg(long, value_enum, ignore_case = true, default_value("long"))]
pub(crate) format: Format,
#[arg(short = 'j', long = "threads", default_value = "0")]
/// The approximate number of threads to use.
#[arg(short = 'j', long = "threads", default_value = "0")]
pub(crate) threads: usize,
#[command(flatten)]
@ -106,28 +110,28 @@ pub(crate) struct Args {
#[derive(Debug, Clone, clap::Args)]
#[command(rename_all = "kebab-case")]
pub(crate) struct FileArgs {
#[arg(long, overrides_with("no_binary"))]
/// Search binary files.
#[arg(long, overrides_with("no_binary"))]
binary: bool,
#[arg(long, overrides_with("binary"), hide = true)]
no_binary: bool,
#[arg(long, overrides_with("check_filenames"))]
/// Skip verifying spelling in file names.
#[arg(long, overrides_with("check_filenames"))]
no_check_filenames: bool,
#[arg(long, overrides_with("no_check_filenames"), hide = true)]
check_filenames: bool,
#[arg(long, overrides_with("check_files"))]
/// Skip verifying spelling in files.
#[arg(long, overrides_with("check_files"))]
no_check_files: bool,
#[arg(long, overrides_with("no_check_files"), hide = true)]
check_files: bool,
#[arg(long, overrides_with("no_unicode"), hide = true)]
unicode: bool,
#[arg(long, overrides_with("unicode"))]
/// Only allow ASCII characters in identifiers
#[arg(long, overrides_with("unicode"))]
no_unicode: bool,
#[arg(long)]
@ -173,11 +177,11 @@ impl FileArgs {
}
#[derive(Debug, clap::Args)]
#[clap(rename_all = "kebab-case")]
#[command(rename_all = "kebab-case")]
pub(crate) struct ConfigArgs {
#[clap(flatten)]
#[command(flatten)]
walk: WalkArgs,
#[clap(flatten)]
#[command(flatten)]
overrides: FileArgs,
}
@ -192,46 +196,46 @@ impl ConfigArgs {
}
#[derive(Debug, clap::Args)]
#[clap(rename_all = "kebab-case")]
#[command(rename_all = "kebab-case")]
pub(crate) struct WalkArgs {
#[clap(long, name = "GLOB")]
/// Ignore files & directories matching the glob.
#[arg(long, value_name = "GLOB")]
exclude: Vec<String>,
#[clap(long, overrides_with("no_hidden"))]
/// Search hidden files and directories.
#[arg(long, overrides_with("no_hidden"))]
hidden: bool,
#[clap(long, overrides_with("hidden"), hide = true)]
#[arg(long, overrides_with("hidden"), hide = true)]
no_hidden: bool,
#[clap(long, overrides_with("ignore"))]
/// Don't respect ignore files.
#[arg(long, overrides_with("ignore"))]
no_ignore: bool,
#[clap(long, overrides_with("no_ignore"), hide = true)]
#[arg(long, overrides_with("no_ignore"), hide = true)]
ignore: bool,
#[clap(long, overrides_with("ignore_dot"))]
/// Don't respect .ignore files.
#[arg(long, overrides_with("ignore_dot"))]
no_ignore_dot: bool,
#[clap(long, overrides_with("no_ignore_dot"), hide = true)]
#[arg(long, overrides_with("no_ignore_dot"), hide = true)]
ignore_dot: bool,
#[clap(long, overrides_with("ignore_global"))]
/// Don't respect global ignore files.
#[arg(long, overrides_with("ignore_global"))]
no_ignore_global: bool,
#[clap(long, overrides_with("no_ignore_global"), hide = true)]
#[arg(long, overrides_with("no_ignore_global"), hide = true)]
ignore_global: bool,
#[clap(long, overrides_with("ignore_parent"))]
/// Don't respect ignore files in parent directories.
#[arg(long, overrides_with("ignore_parent"))]
no_ignore_parent: bool,
#[clap(long, overrides_with("no_ignore_parent"), hide = true)]
#[arg(long, overrides_with("no_ignore_parent"), hide = true)]
ignore_parent: bool,
#[clap(long, overrides_with("ignore_vcs"))]
/// Don't respect ignore files in vcs directories.
#[arg(long, overrides_with("ignore_vcs"))]
no_ignore_vcs: bool,
#[clap(long, overrides_with("no_ignore_vcs"), hide = true)]
#[arg(long, overrides_with("no_ignore_vcs"), hide = true)]
ignore_vcs: bool,
}

View file

@ -227,6 +227,8 @@ fn run_checks(
let selected_checks: &dyn typos_cli::file::FileChecker = if args.files {
&typos_cli::file::FoundFiles
} else if args.file_types {
&typos_cli::file::FileTypes
} else if args.identifiers {
&typos_cli::file::Identifiers
} else if args.words {

View file

@ -89,6 +89,14 @@ impl Report for PrintBrief {
log::info!("{}", msg);
}
Message::Typo(msg) => print_brief_correction(msg, self.stdout_palette)?,
Message::FileType(msg) => {
writeln!(
io::stdout(),
"{}:{}",
msg.path.display(),
msg.file_type.unwrap_or("-")
)?;
}
Message::File(msg) => {
writeln!(io::stdout(), "{}", msg.path.display())?;
}
@ -116,6 +124,14 @@ impl Report for PrintLong {
log::info!("{}", msg);
}
Message::Typo(msg) => print_long_correction(msg, self.stdout_palette)?,
Message::FileType(msg) => {
writeln!(
io::stdout(),
"{}:{}",
msg.path.display(),
msg.file_type.unwrap_or("-")
)?;
}
Message::File(msg) => {
writeln!(io::stdout(), "{}", msg.path.display())?;
}

View file

@ -352,6 +352,36 @@ impl FileChecker for Words {
}
}
#[derive(Debug, Clone, Copy)]
pub struct FileTypes;
impl FileChecker for FileTypes {
fn check_file(
&self,
path: &std::path::Path,
explicit: bool,
policy: &crate::policy::Policy,
reporter: &dyn report::Report,
) -> Result<(), std::io::Error> {
// Check `policy.binary` first so we can easily check performance of walking vs reading
if policy.binary {
let msg = report::FileType::new(path, policy.file_type);
reporter.report(msg.into())?;
} else {
let (_buffer, content_type) = read_file(path, reporter)?;
if !explicit && content_type.is_binary() {
let msg = report::BinaryFile { path };
reporter.report(msg.into())?;
} else {
let msg = report::FileType::new(path, policy.file_type);
reporter.report(msg.into())?;
}
}
Ok(())
}
}
#[derive(Debug, Clone, Copy)]
pub struct FoundFiles;

View file

@ -91,10 +91,11 @@ impl<'s> ConfigEngine<'s> {
pub fn policy(&self, path: &std::path::Path) -> Policy<'_, '_> {
debug_assert!(path.is_absolute(), "{} is not absolute", path.display());
let dir = self.get_dir(path).expect("`walk()` should be called first");
let file_config = dir.get_file_config(path);
let (file_type, file_config) = dir.get_file_config(path);
Policy {
check_filenames: file_config.check_filenames,
check_files: file_config.check_files,
file_type,
binary: file_config.binary,
tokenizer: self.get_tokenizer(&file_config),
dict: self.get_dict(&file_config),
@ -299,21 +300,23 @@ struct DirConfig {
}
impl DirConfig {
fn get_file_config(&self, path: &std::path::Path) -> FileConfig {
fn get_file_config(&self, path: &std::path::Path) -> (Option<&str>, FileConfig) {
let name = self.type_matcher.file_matched(path);
name.and_then(|name| {
log::debug!("{}: `{}` policy", path.display(), name);
self.types.get(name).copied()
})
.unwrap_or_else(|| {
log::debug!(
"{}: default policy for `{}` file type",
path.display(),
name.unwrap_or("<unknown>")
);
self.default
})
let config = name
.and_then(|name| {
log::debug!("{}: `{}` policy", path.display(), name);
self.types.get(name).copied()
})
.unwrap_or_else(|| {
log::debug!(
"{}: default policy for `{}` file type",
path.display(),
name.unwrap_or("<unknown>")
);
self.default
});
(name, config)
}
}
@ -331,6 +334,7 @@ struct FileConfig {
pub struct Policy<'t, 'd> {
pub check_filenames: bool,
pub check_files: bool,
pub file_type: Option<&'d str>,
pub binary: bool,
pub tokenizer: &'t typos::tokens::Tokenizer,
pub dict: &'d dyn typos::Dictionary,
@ -351,6 +355,7 @@ impl<'t, 'd> Default for Policy<'t, 'd> {
Self {
check_filenames: true,
check_files: true,
file_type: None,
binary: false,
tokenizer: &DEFAULT_TOKENIZER,
dict: &DEFAULT_DICT,

View file

@ -13,6 +13,7 @@ pub trait Report: Send + Sync {
pub enum Message<'m> {
BinaryFile(BinaryFile<'m>),
Typo(Typo<'m>),
FileType(FileType<'m>),
File(File<'m>),
Parse(Parse<'m>),
Error(Error<'m>),
@ -23,6 +24,7 @@ impl<'m> Message<'m> {
match self {
Message::BinaryFile(_) => false,
Message::Typo(c) => c.corrections.is_correction(),
Message::FileType(_) => false,
Message::File(_) => false,
Message::Parse(_) => false,
Message::Error(_) => false,
@ -33,6 +35,7 @@ impl<'m> Message<'m> {
match self {
Message::BinaryFile(_) => false,
Message::Typo(_) => false,
Message::FileType(_) => false,
Message::File(_) => false,
Message::Parse(_) => false,
Message::Error(_) => true,
@ -144,6 +147,28 @@ pub enum ParseKind {
Word,
}
#[derive(Clone, Debug, serde::Serialize, derive_setters::Setters)]
#[non_exhaustive]
pub struct FileType<'m> {
pub path: &'m std::path::Path,
pub file_type: Option<&'m str>,
}
impl<'m> FileType<'m> {
pub fn new(path: &'m std::path::Path, file_type: Option<&'m str>) -> Self {
Self { path, file_type }
}
}
impl<'m> Default for FileType<'m> {
fn default() -> Self {
Self {
path: std::path::Path::new("-"),
file_type: None,
}
}
}
#[derive(Clone, Debug, serde::Serialize, derive_setters::Setters)]
#[non_exhaustive]
pub struct File<'m> {