feat(cli): '--file-types' flag to help debug detection issues

Like #567
This commit is contained in:
Ed Page 2023-01-13 20:59:49 -06:00
parent 222da3d81a
commit b92a8c1eea
6 changed files with 96 additions and 14 deletions

View file

@ -70,6 +70,10 @@ pub(crate) struct Args {
#[arg(long, group = "mode")] #[arg(long, group = "mode")]
pub(crate) files: bool, pub(crate) files: bool,
/// Debug: Print each file's type
#[arg(long, group = "mode")]
pub(crate) file_types: bool,
/// Debug: Print each identifier that would be spellchecked. /// Debug: Print each identifier that would be spellchecked.
#[arg(long, group = "mode")] #[arg(long, group = "mode")]
pub(crate) identifiers: bool, pub(crate) identifiers: bool,

View file

@ -227,6 +227,8 @@ fn run_checks(
let selected_checks: &dyn typos_cli::file::FileChecker = if args.files { let selected_checks: &dyn typos_cli::file::FileChecker = if args.files {
&typos_cli::file::FoundFiles &typos_cli::file::FoundFiles
} else if args.file_types {
&typos_cli::file::FileTypes
} else if args.identifiers { } else if args.identifiers {
&typos_cli::file::Identifiers &typos_cli::file::Identifiers
} else if args.words { } else if args.words {

View file

@ -89,6 +89,14 @@ impl Report for PrintBrief {
log::info!("{}", msg); log::info!("{}", msg);
} }
Message::Typo(msg) => print_brief_correction(msg, self.stdout_palette)?, Message::Typo(msg) => print_brief_correction(msg, self.stdout_palette)?,
Message::FileType(msg) => {
writeln!(
io::stdout(),
"{}:{}",
msg.path.display(),
msg.file_type.unwrap_or("-")
)?;
}
Message::File(msg) => { Message::File(msg) => {
writeln!(io::stdout(), "{}", msg.path.display())?; writeln!(io::stdout(), "{}", msg.path.display())?;
} }
@ -116,6 +124,14 @@ impl Report for PrintLong {
log::info!("{}", msg); log::info!("{}", msg);
} }
Message::Typo(msg) => print_long_correction(msg, self.stdout_palette)?, Message::Typo(msg) => print_long_correction(msg, self.stdout_palette)?,
Message::FileType(msg) => {
writeln!(
io::stdout(),
"{}:{}",
msg.path.display(),
msg.file_type.unwrap_or("-")
)?;
}
Message::File(msg) => { Message::File(msg) => {
writeln!(io::stdout(), "{}", msg.path.display())?; writeln!(io::stdout(), "{}", msg.path.display())?;
} }

View file

@ -352,6 +352,36 @@ impl FileChecker for Words {
} }
} }
#[derive(Debug, Clone, Copy)]
pub struct FileTypes;
impl FileChecker for FileTypes {
fn check_file(
&self,
path: &std::path::Path,
explicit: bool,
policy: &crate::policy::Policy,
reporter: &dyn report::Report,
) -> Result<(), std::io::Error> {
// Check `policy.binary` first so we can easily check performance of walking vs reading
if policy.binary {
let msg = report::FileType::new(path, policy.file_type);
reporter.report(msg.into())?;
} else {
let (_buffer, content_type) = read_file(path, reporter)?;
if !explicit && content_type.is_binary() {
let msg = report::BinaryFile { path };
reporter.report(msg.into())?;
} else {
let msg = report::FileType::new(path, policy.file_type);
reporter.report(msg.into())?;
}
}
Ok(())
}
}
#[derive(Debug, Clone, Copy)] #[derive(Debug, Clone, Copy)]
pub struct FoundFiles; pub struct FoundFiles;

View file

@ -91,10 +91,11 @@ impl<'s> ConfigEngine<'s> {
pub fn policy(&self, path: &std::path::Path) -> Policy<'_, '_> { pub fn policy(&self, path: &std::path::Path) -> Policy<'_, '_> {
debug_assert!(path.is_absolute(), "{} is not absolute", path.display()); debug_assert!(path.is_absolute(), "{} is not absolute", path.display());
let dir = self.get_dir(path).expect("`walk()` should be called first"); let dir = self.get_dir(path).expect("`walk()` should be called first");
let file_config = dir.get_file_config(path); let (file_type, file_config) = dir.get_file_config(path);
Policy { Policy {
check_filenames: file_config.check_filenames, check_filenames: file_config.check_filenames,
check_files: file_config.check_files, check_files: file_config.check_files,
file_type,
binary: file_config.binary, binary: file_config.binary,
tokenizer: self.get_tokenizer(&file_config), tokenizer: self.get_tokenizer(&file_config),
dict: self.get_dict(&file_config), dict: self.get_dict(&file_config),
@ -299,21 +300,23 @@ struct DirConfig {
} }
impl DirConfig { impl DirConfig {
fn get_file_config(&self, path: &std::path::Path) -> FileConfig { fn get_file_config(&self, path: &std::path::Path) -> (Option<&str>, FileConfig) {
let name = self.type_matcher.file_matched(path); let name = self.type_matcher.file_matched(path);
name.and_then(|name| { let config = name
log::debug!("{}: `{}` policy", path.display(), name); .and_then(|name| {
self.types.get(name).copied() log::debug!("{}: `{}` policy", path.display(), name);
}) self.types.get(name).copied()
.unwrap_or_else(|| { })
log::debug!( .unwrap_or_else(|| {
"{}: default policy for `{}` file type", log::debug!(
path.display(), "{}: default policy for `{}` file type",
name.unwrap_or("<unknown>") path.display(),
); name.unwrap_or("<unknown>")
self.default );
}) self.default
});
(name, config)
} }
} }
@ -331,6 +334,7 @@ struct FileConfig {
pub struct Policy<'t, 'd> { pub struct Policy<'t, 'd> {
pub check_filenames: bool, pub check_filenames: bool,
pub check_files: bool, pub check_files: bool,
pub file_type: Option<&'d str>,
pub binary: bool, pub binary: bool,
pub tokenizer: &'t typos::tokens::Tokenizer, pub tokenizer: &'t typos::tokens::Tokenizer,
pub dict: &'d dyn typos::Dictionary, pub dict: &'d dyn typos::Dictionary,
@ -351,6 +355,7 @@ impl<'t, 'd> Default for Policy<'t, 'd> {
Self { Self {
check_filenames: true, check_filenames: true,
check_files: true, check_files: true,
file_type: None,
binary: false, binary: false,
tokenizer: &DEFAULT_TOKENIZER, tokenizer: &DEFAULT_TOKENIZER,
dict: &DEFAULT_DICT, dict: &DEFAULT_DICT,

View file

@ -13,6 +13,7 @@ pub trait Report: Send + Sync {
pub enum Message<'m> { pub enum Message<'m> {
BinaryFile(BinaryFile<'m>), BinaryFile(BinaryFile<'m>),
Typo(Typo<'m>), Typo(Typo<'m>),
FileType(FileType<'m>),
File(File<'m>), File(File<'m>),
Parse(Parse<'m>), Parse(Parse<'m>),
Error(Error<'m>), Error(Error<'m>),
@ -23,6 +24,7 @@ impl<'m> Message<'m> {
match self { match self {
Message::BinaryFile(_) => false, Message::BinaryFile(_) => false,
Message::Typo(c) => c.corrections.is_correction(), Message::Typo(c) => c.corrections.is_correction(),
Message::FileType(_) => false,
Message::File(_) => false, Message::File(_) => false,
Message::Parse(_) => false, Message::Parse(_) => false,
Message::Error(_) => false, Message::Error(_) => false,
@ -33,6 +35,7 @@ impl<'m> Message<'m> {
match self { match self {
Message::BinaryFile(_) => false, Message::BinaryFile(_) => false,
Message::Typo(_) => false, Message::Typo(_) => false,
Message::FileType(_) => false,
Message::File(_) => false, Message::File(_) => false,
Message::Parse(_) => false, Message::Parse(_) => false,
Message::Error(_) => true, Message::Error(_) => true,
@ -144,6 +147,28 @@ pub enum ParseKind {
Word, Word,
} }
#[derive(Clone, Debug, serde::Serialize, derive_setters::Setters)]
#[non_exhaustive]
pub struct FileType<'m> {
pub path: &'m std::path::Path,
pub file_type: Option<&'m str>,
}
impl<'m> FileType<'m> {
pub fn new(path: &'m std::path::Path, file_type: Option<&'m str>) -> Self {
Self { path, file_type }
}
}
impl<'m> Default for FileType<'m> {
fn default() -> Self {
Self {
path: std::path::Path::new("-"),
file_type: None,
}
}
}
#[derive(Clone, Debug, serde::Serialize, derive_setters::Setters)] #[derive(Clone, Debug, serde::Serialize, derive_setters::Setters)]
#[non_exhaustive] #[non_exhaustive]
pub struct File<'m> { pub struct File<'m> {