diff --git a/Cargo.lock b/Cargo.lock index de48d97..2f268cd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -651,9 +651,9 @@ checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" [[package]] name = "globset" -version = "0.4.8" +version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10463d9ff00a2a068db14231982f5132edebad0d7660cd956a1c30292dbcbfbd" +checksum = "0a1e17342619edbc21a964c2afbeb6c820c6a2560032872f397bb97ea127bd0a" dependencies = [ "aho-corasick", "bstr", @@ -1541,6 +1541,7 @@ dependencies = [ "difflib", "encoding", "env_logger", + "globset", "human-panic", "ignore", "itertools", @@ -1551,6 +1552,7 @@ dependencies = [ "proc-exit", "serde", "serde_json", + "thread_local", "toml_edit", "trycmd", "typed-arena", diff --git a/Cargo.toml b/Cargo.toml index 7e70a51..e3a6d77 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -94,6 +94,8 @@ typed-arena = "2.0.1" maplit = "1.0" unicode-width = "0.1.9" unic-emoji-char = "0.9.0" +thread_local = "1.1.4" +globset = "0.4.9" [dev-dependencies] assert_fs = "1.0" diff --git a/src/bin/typos-cli/main.rs b/src/bin/typos-cli/main.rs index 171a73e..aaf8fcf 100644 --- a/src/bin/typos-cli/main.rs +++ b/src/bin/typos-cli/main.rs @@ -131,13 +131,8 @@ fn run_type_list(args: &args::Args) -> proc_exit::ExitResult { let stdout = std::io::stdout(); let mut handle = stdout.lock(); - for def in definitions { - writeln!( - handle, - "{}: {}", - def.name(), - itertools::join(def.globs(), ", ") - )?; + for (name, globs) in definitions { + writeln!(handle, "{}: {}", name, itertools::join(globs, ", "))?; } Ok(()) diff --git a/src/file_type.rs b/src/file_type.rs new file mode 100644 index 0000000..4b044bc --- /dev/null +++ b/src/file_type.rs @@ -0,0 +1,183 @@ +use std::collections::BTreeMap; + +use kstring::KString; + +#[derive(Default, Clone, Debug)] +pub struct TypesBuilder { + definitions: BTreeMap>, +} + +impl TypesBuilder { + pub fn new() -> Self { + Default::default() + } + + pub fn add_defaults(&mut self) { + self.definitions.extend( + crate::default_types::DEFAULT_TYPES + .iter() + .map(|(name, glob)| { + let name = KString::from(*name); + let globs = glob.iter().map(|s| KString::from(*s)).collect(); + (name, globs) + }), + ); + } + + pub fn contains_name(&self, name: &str) -> bool { + self.definitions.contains_key(name) + } + + pub fn add(&mut self, name: impl Into, glob: impl Into) { + let name = name.into(); + let glob = glob.into(); + self.definitions.entry(name).or_default().push(glob); + } + + pub fn build(self) -> Result { + let mut definitions = self + .definitions + .iter() + .flat_map(|(name, globs)| { + globs.iter().map(move |glob| { + let sort = sort_key(glob); + (sort, name, glob) + }) + }) + .collect::>(); + definitions.sort(); + + let mut glob_to_name = Vec::new(); + let mut build_set = globset::GlobSetBuilder::new(); + for (_, name, glob) in definitions { + glob_to_name.push(name.clone()); + build_set.add( + globset::GlobBuilder::new(glob) + .literal_separator(true) + .build()?, + ); + } + let set = build_set.build()?; + + Ok(Types { + definitions: self.definitions, + glob_to_name, + set, + matches: std::sync::Arc::new(thread_local::ThreadLocal::default()), + }) + } +} + +fn sort_key(glob: &str) -> Vec> { + let mut key = glob + .split('.') + .map(|s| { + if s == "*" { + GlobPart::Wild(s) + } else if s.contains('*') { + GlobPart::PartialWild(s) + } else { + GlobPart::Literalish(s) + } + }) + .collect::>(); + key.reverse(); + key +} + +#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] +enum GlobPart<'s> { + Wild(&'s str), + PartialWild(&'s str), + Literalish(&'s str), +} + +#[derive(Default, Clone, Debug)] +pub struct Types { + definitions: BTreeMap>, + glob_to_name: Vec, + set: globset::GlobSet, + /// Temporary storage for globs that match. + matches: std::sync::Arc>>>, +} + +impl Types { + pub fn definitions(&self) -> &BTreeMap> { + &self.definitions + } + + pub fn file_matched(&self, path: &std::path::Path) -> Option<&str> { + let file_name = path.file_name()?; + let mut matches = self.matches.get_or_default().borrow_mut(); + self.set.matches_into(file_name, &mut *matches); + matches + .last() + .copied() + .map(|i| self.glob_to_name[i].as_str()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + macro_rules! matched { + ($name:ident, $types:expr, $path:expr, $matched:expr) => { + #[test] + fn $name() { + let mut btypes = TypesBuilder::new(); + for (name, globs) in $types { + for glob in *globs { + btypes.add(*name, *glob); + } + } + let types = btypes.build().unwrap(); + let actual = types.file_matched(std::path::Path::new($path)); + let expected: Option<&str> = $matched.into(); + assert_eq!(expected, actual, "{}", $path); + } + }; + } + + fn types() -> &'static [(&'static str, &'static [&'static str])] { + &[ + ("html", &["*.html", "*.htm"]), + ("js", &["*.js"]), + ("json", &["*.json"]), + ("lock", &["package-lock.json", "*.lock"]), + ] + } + + matched!(basic_match, types(), "leftpad.js", "js"); + matched!(multi_def_1, types(), "index.html", "html"); + matched!(multi_def_2, types(), "index.htm", "html"); + matched!(no_match, types(), "leftpad.ada", None); + matched!(more_specific, types(), "package-lock.json", "lock"); + + macro_rules! sort { + ($name:ident, $actual:expr, $expected:expr) => { + #[test] + fn $name() { + let expected = $expected.into_iter().collect::>(); + + let mut actual = $actual.into_iter().collect::>(); + actual.sort_by_key(|s| sort_key(s)); + + assert_eq!(expected, actual); + } + }; + } + + sort!(literal_sort, ["b", "c", "a"], ["a", "b", "c"]); + sort!( + basic_glob_sort, + ["a_specific", "z_partial*"], + ["z_partial*", "a_specific"] + ); + sort!( + nested_glob_sort, + ["a.specific", "z*.partial", "z.partial*"], + ["z.partial*", "z*.partial", "a.specific"] + ); + sort!(most_specific, ["*.txt.in", "*.in"], ["*.in", "*.txt.in"]); +} diff --git a/src/lib.rs b/src/lib.rs index c836c0d..4437e2d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -13,3 +13,4 @@ pub mod policy; pub mod report; mod default_types; +mod file_type; diff --git a/src/policy.rs b/src/policy.rs index 9fe71ad..42948db 100644 --- a/src/policy.rs +++ b/src/policy.rs @@ -76,7 +76,10 @@ impl<'s> ConfigEngine<'s> { self.get_walk(dir) } - pub fn file_types(&self, cwd: &std::path::Path) -> &[ignore::types::FileTypeDef] { + pub fn file_types( + &self, + cwd: &std::path::Path, + ) -> &std::collections::BTreeMap> { debug_assert!(cwd.is_absolute(), "{} is not absolute", cwd.display()); let dir = self .configs @@ -176,25 +179,17 @@ impl<'s> ConfigEngine<'s> { let walk = self.walk.intern(files); - let mut type_matcher = ignore::types::TypesBuilder::new(); - for &(name, exts) in crate::default_types::DEFAULT_TYPES { - for ext in exts { - type_matcher.add(name, ext).expect("all defaults are valid"); - } - } + let mut type_matcher = crate::file_type::TypesBuilder::new(); + type_matcher.add_defaults(); let mut types: std::collections::HashMap<_, _> = Default::default(); for (type_name, type_engine) in type_.patterns() { if type_engine.extend_glob.is_empty() { - if type_matcher - .definitions() - .iter() - .all(|def| def.name() != type_name.as_str()) - { + if !type_matcher.contains_name(&type_name) { anyhow::bail!("Unknown type definition `{}`, pass `--type-list` to see valid names or set `extend_glob` to add a new one.", type_name); } } else { for glob in type_engine.extend_glob.iter() { - type_matcher.add(type_name.as_str(), glob.as_str())?; + type_matcher.add(type_name.as_ref(), glob.as_ref()); } } @@ -208,8 +203,6 @@ impl<'s> ConfigEngine<'s> { default.update(&overrides); let default = self.init_file_config(default); - type_matcher.select("all"); - let dir = DirConfig { walk, default, @@ -302,16 +295,12 @@ struct DirConfig { walk: usize, default: FileConfig, types: std::collections::HashMap, - type_matcher: ignore::types::Types, + type_matcher: crate::file_type::Types, } impl DirConfig { fn get_file_config(&self, path: &std::path::Path) -> FileConfig { - let match_ = self.type_matcher.matched(path, false); - let name = match_ - .inner() - .and_then(|g| g.file_type_def()) - .map(|f| f.name()); + let name = self.type_matcher.file_matched(path); name.and_then(|name| { log::debug!("{}: `{}` policy", path.display(), name);