mirror of
https://github.com/crate-ci/typos.git
synced 2024-11-24 18:10:56 -05:00
fix(config): Resolve ambiguous file types
Before, when two file types matched the same glob, the file type that one was non-deterministic. Now, "the more specific" file type wins. What this means is that we break up the file by its extensions and prioritize the more literal glob - If its just `*`, then its lowest priority - If it contains `*` and other logic, then its next - If it doesn't contain a `*`, then its the highest priority This leaves out other glob syntax like `{one,two}` as those are closed-ended and so considered specific still. Fixes #487
This commit is contained in:
parent
4cf566d25f
commit
0bb32cc473
6 changed files with 202 additions and 30 deletions
6
Cargo.lock
generated
6
Cargo.lock
generated
|
@ -651,9 +651,9 @@ checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574"
|
|||
|
||||
[[package]]
|
||||
name = "globset"
|
||||
version = "0.4.8"
|
||||
version = "0.4.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "10463d9ff00a2a068db14231982f5132edebad0d7660cd956a1c30292dbcbfbd"
|
||||
checksum = "0a1e17342619edbc21a964c2afbeb6c820c6a2560032872f397bb97ea127bd0a"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"bstr",
|
||||
|
@ -1541,6 +1541,7 @@ dependencies = [
|
|||
"difflib",
|
||||
"encoding",
|
||||
"env_logger",
|
||||
"globset",
|
||||
"human-panic",
|
||||
"ignore",
|
||||
"itertools",
|
||||
|
@ -1551,6 +1552,7 @@ dependencies = [
|
|||
"proc-exit",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"thread_local",
|
||||
"toml_edit",
|
||||
"trycmd",
|
||||
"typed-arena",
|
||||
|
|
|
@ -94,6 +94,8 @@ typed-arena = "2.0.1"
|
|||
maplit = "1.0"
|
||||
unicode-width = "0.1.9"
|
||||
unic-emoji-char = "0.9.0"
|
||||
thread_local = "1.1.4"
|
||||
globset = "0.4.9"
|
||||
|
||||
[dev-dependencies]
|
||||
assert_fs = "1.0"
|
||||
|
|
|
@ -131,13 +131,8 @@ fn run_type_list(args: &args::Args) -> proc_exit::ExitResult {
|
|||
|
||||
let stdout = std::io::stdout();
|
||||
let mut handle = stdout.lock();
|
||||
for def in definitions {
|
||||
writeln!(
|
||||
handle,
|
||||
"{}: {}",
|
||||
def.name(),
|
||||
itertools::join(def.globs(), ", ")
|
||||
)?;
|
||||
for (name, globs) in definitions {
|
||||
writeln!(handle, "{}: {}", name, itertools::join(globs, ", "))?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
|
183
src/file_type.rs
Normal file
183
src/file_type.rs
Normal file
|
@ -0,0 +1,183 @@
|
|||
use std::collections::BTreeMap;
|
||||
|
||||
use kstring::KString;
|
||||
|
||||
#[derive(Default, Clone, Debug)]
|
||||
pub struct TypesBuilder {
|
||||
definitions: BTreeMap<KString, Vec<KString>>,
|
||||
}
|
||||
|
||||
impl TypesBuilder {
|
||||
pub fn new() -> Self {
|
||||
Default::default()
|
||||
}
|
||||
|
||||
pub fn add_defaults(&mut self) {
|
||||
self.definitions.extend(
|
||||
crate::default_types::DEFAULT_TYPES
|
||||
.iter()
|
||||
.map(|(name, glob)| {
|
||||
let name = KString::from(*name);
|
||||
let globs = glob.iter().map(|s| KString::from(*s)).collect();
|
||||
(name, globs)
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
pub fn contains_name(&self, name: &str) -> bool {
|
||||
self.definitions.contains_key(name)
|
||||
}
|
||||
|
||||
pub fn add(&mut self, name: impl Into<KString>, glob: impl Into<KString>) {
|
||||
let name = name.into();
|
||||
let glob = glob.into();
|
||||
self.definitions.entry(name).or_default().push(glob);
|
||||
}
|
||||
|
||||
pub fn build(self) -> Result<Types, anyhow::Error> {
|
||||
let mut definitions = self
|
||||
.definitions
|
||||
.iter()
|
||||
.flat_map(|(name, globs)| {
|
||||
globs.iter().map(move |glob| {
|
||||
let sort = sort_key(glob);
|
||||
(sort, name, glob)
|
||||
})
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
definitions.sort();
|
||||
|
||||
let mut glob_to_name = Vec::new();
|
||||
let mut build_set = globset::GlobSetBuilder::new();
|
||||
for (_, name, glob) in definitions {
|
||||
glob_to_name.push(name.clone());
|
||||
build_set.add(
|
||||
globset::GlobBuilder::new(glob)
|
||||
.literal_separator(true)
|
||||
.build()?,
|
||||
);
|
||||
}
|
||||
let set = build_set.build()?;
|
||||
|
||||
Ok(Types {
|
||||
definitions: self.definitions,
|
||||
glob_to_name,
|
||||
set,
|
||||
matches: std::sync::Arc::new(thread_local::ThreadLocal::default()),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
fn sort_key(glob: &str) -> Vec<GlobPart<'_>> {
|
||||
let mut key = glob
|
||||
.split('.')
|
||||
.map(|s| {
|
||||
if s == "*" {
|
||||
GlobPart::Wild(s)
|
||||
} else if s.contains('*') {
|
||||
GlobPart::PartialWild(s)
|
||||
} else {
|
||||
GlobPart::Literalish(s)
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
key.reverse();
|
||||
key
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
|
||||
enum GlobPart<'s> {
|
||||
Wild(&'s str),
|
||||
PartialWild(&'s str),
|
||||
Literalish(&'s str),
|
||||
}
|
||||
|
||||
#[derive(Default, Clone, Debug)]
|
||||
pub struct Types {
|
||||
definitions: BTreeMap<KString, Vec<KString>>,
|
||||
glob_to_name: Vec<KString>,
|
||||
set: globset::GlobSet,
|
||||
/// Temporary storage for globs that match.
|
||||
matches: std::sync::Arc<thread_local::ThreadLocal<std::cell::RefCell<Vec<usize>>>>,
|
||||
}
|
||||
|
||||
impl Types {
|
||||
pub fn definitions(&self) -> &BTreeMap<KString, Vec<KString>> {
|
||||
&self.definitions
|
||||
}
|
||||
|
||||
pub fn file_matched(&self, path: &std::path::Path) -> Option<&str> {
|
||||
let file_name = path.file_name()?;
|
||||
let mut matches = self.matches.get_or_default().borrow_mut();
|
||||
self.set.matches_into(file_name, &mut *matches);
|
||||
matches
|
||||
.last()
|
||||
.copied()
|
||||
.map(|i| self.glob_to_name[i].as_str())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
macro_rules! matched {
|
||||
($name:ident, $types:expr, $path:expr, $matched:expr) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
let mut btypes = TypesBuilder::new();
|
||||
for (name, globs) in $types {
|
||||
for glob in *globs {
|
||||
btypes.add(*name, *glob);
|
||||
}
|
||||
}
|
||||
let types = btypes.build().unwrap();
|
||||
let actual = types.file_matched(std::path::Path::new($path));
|
||||
let expected: Option<&str> = $matched.into();
|
||||
assert_eq!(expected, actual, "{}", $path);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
fn types() -> &'static [(&'static str, &'static [&'static str])] {
|
||||
&[
|
||||
("html", &["*.html", "*.htm"]),
|
||||
("js", &["*.js"]),
|
||||
("json", &["*.json"]),
|
||||
("lock", &["package-lock.json", "*.lock"]),
|
||||
]
|
||||
}
|
||||
|
||||
matched!(basic_match, types(), "leftpad.js", "js");
|
||||
matched!(multi_def_1, types(), "index.html", "html");
|
||||
matched!(multi_def_2, types(), "index.htm", "html");
|
||||
matched!(no_match, types(), "leftpad.ada", None);
|
||||
matched!(more_specific, types(), "package-lock.json", "lock");
|
||||
|
||||
macro_rules! sort {
|
||||
($name:ident, $actual:expr, $expected:expr) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
let expected = $expected.into_iter().collect::<Vec<&str>>();
|
||||
|
||||
let mut actual = $actual.into_iter().collect::<Vec<&str>>();
|
||||
actual.sort_by_key(|s| sort_key(s));
|
||||
|
||||
assert_eq!(expected, actual);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
sort!(literal_sort, ["b", "c", "a"], ["a", "b", "c"]);
|
||||
sort!(
|
||||
basic_glob_sort,
|
||||
["a_specific", "z_partial*"],
|
||||
["z_partial*", "a_specific"]
|
||||
);
|
||||
sort!(
|
||||
nested_glob_sort,
|
||||
["a.specific", "z*.partial", "z.partial*"],
|
||||
["z.partial*", "z*.partial", "a.specific"]
|
||||
);
|
||||
sort!(most_specific, ["*.txt.in", "*.in"], ["*.in", "*.txt.in"]);
|
||||
}
|
|
@ -13,3 +13,4 @@ pub mod policy;
|
|||
pub mod report;
|
||||
|
||||
mod default_types;
|
||||
mod file_type;
|
||||
|
|
|
@ -76,7 +76,10 @@ impl<'s> ConfigEngine<'s> {
|
|||
self.get_walk(dir)
|
||||
}
|
||||
|
||||
pub fn file_types(&self, cwd: &std::path::Path) -> &[ignore::types::FileTypeDef] {
|
||||
pub fn file_types(
|
||||
&self,
|
||||
cwd: &std::path::Path,
|
||||
) -> &std::collections::BTreeMap<kstring::KString, Vec<kstring::KString>> {
|
||||
debug_assert!(cwd.is_absolute(), "{} is not absolute", cwd.display());
|
||||
let dir = self
|
||||
.configs
|
||||
|
@ -176,25 +179,17 @@ impl<'s> ConfigEngine<'s> {
|
|||
|
||||
let walk = self.walk.intern(files);
|
||||
|
||||
let mut type_matcher = ignore::types::TypesBuilder::new();
|
||||
for &(name, exts) in crate::default_types::DEFAULT_TYPES {
|
||||
for ext in exts {
|
||||
type_matcher.add(name, ext).expect("all defaults are valid");
|
||||
}
|
||||
}
|
||||
let mut type_matcher = crate::file_type::TypesBuilder::new();
|
||||
type_matcher.add_defaults();
|
||||
let mut types: std::collections::HashMap<_, _> = Default::default();
|
||||
for (type_name, type_engine) in type_.patterns() {
|
||||
if type_engine.extend_glob.is_empty() {
|
||||
if type_matcher
|
||||
.definitions()
|
||||
.iter()
|
||||
.all(|def| def.name() != type_name.as_str())
|
||||
{
|
||||
if !type_matcher.contains_name(&type_name) {
|
||||
anyhow::bail!("Unknown type definition `{}`, pass `--type-list` to see valid names or set `extend_glob` to add a new one.", type_name);
|
||||
}
|
||||
} else {
|
||||
for glob in type_engine.extend_glob.iter() {
|
||||
type_matcher.add(type_name.as_str(), glob.as_str())?;
|
||||
type_matcher.add(type_name.as_ref(), glob.as_ref());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -208,8 +203,6 @@ impl<'s> ConfigEngine<'s> {
|
|||
default.update(&overrides);
|
||||
let default = self.init_file_config(default);
|
||||
|
||||
type_matcher.select("all");
|
||||
|
||||
let dir = DirConfig {
|
||||
walk,
|
||||
default,
|
||||
|
@ -302,16 +295,12 @@ struct DirConfig {
|
|||
walk: usize,
|
||||
default: FileConfig,
|
||||
types: std::collections::HashMap<kstring::KString, FileConfig>,
|
||||
type_matcher: ignore::types::Types,
|
||||
type_matcher: crate::file_type::Types,
|
||||
}
|
||||
|
||||
impl DirConfig {
|
||||
fn get_file_config(&self, path: &std::path::Path) -> FileConfig {
|
||||
let match_ = self.type_matcher.matched(path, false);
|
||||
let name = match_
|
||||
.inner()
|
||||
.and_then(|g| g.file_type_def())
|
||||
.map(|f| f.name());
|
||||
let name = self.type_matcher.file_matched(path);
|
||||
|
||||
name.and_then(|name| {
|
||||
log::debug!("{}: `{}` policy", path.display(), name);
|
||||
|
|
Loading…
Reference in a new issue