mirror of
https://github.com/crate-ci/typos.git
synced 2024-11-25 10:31:02 -05:00
fix(config): Resolve ambiguous file types
Before, when two file types matched the same glob, the file type that one was non-deterministic. Now, "the more specific" file type wins. What this means is that we break up the file by its extensions and prioritize the more literal glob - If its just `*`, then its lowest priority - If it contains `*` and other logic, then its next - If it doesn't contain a `*`, then its the highest priority This leaves out other glob syntax like `{one,two}` as those are closed-ended and so considered specific still. Fixes #487
This commit is contained in:
parent
4cf566d25f
commit
0bb32cc473
6 changed files with 202 additions and 30 deletions
6
Cargo.lock
generated
6
Cargo.lock
generated
|
@ -651,9 +651,9 @@ checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "globset"
|
name = "globset"
|
||||||
version = "0.4.8"
|
version = "0.4.9"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "10463d9ff00a2a068db14231982f5132edebad0d7660cd956a1c30292dbcbfbd"
|
checksum = "0a1e17342619edbc21a964c2afbeb6c820c6a2560032872f397bb97ea127bd0a"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"aho-corasick",
|
"aho-corasick",
|
||||||
"bstr",
|
"bstr",
|
||||||
|
@ -1541,6 +1541,7 @@ dependencies = [
|
||||||
"difflib",
|
"difflib",
|
||||||
"encoding",
|
"encoding",
|
||||||
"env_logger",
|
"env_logger",
|
||||||
|
"globset",
|
||||||
"human-panic",
|
"human-panic",
|
||||||
"ignore",
|
"ignore",
|
||||||
"itertools",
|
"itertools",
|
||||||
|
@ -1551,6 +1552,7 @@ dependencies = [
|
||||||
"proc-exit",
|
"proc-exit",
|
||||||
"serde",
|
"serde",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
|
"thread_local",
|
||||||
"toml_edit",
|
"toml_edit",
|
||||||
"trycmd",
|
"trycmd",
|
||||||
"typed-arena",
|
"typed-arena",
|
||||||
|
|
|
@ -94,6 +94,8 @@ typed-arena = "2.0.1"
|
||||||
maplit = "1.0"
|
maplit = "1.0"
|
||||||
unicode-width = "0.1.9"
|
unicode-width = "0.1.9"
|
||||||
unic-emoji-char = "0.9.0"
|
unic-emoji-char = "0.9.0"
|
||||||
|
thread_local = "1.1.4"
|
||||||
|
globset = "0.4.9"
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
assert_fs = "1.0"
|
assert_fs = "1.0"
|
||||||
|
|
|
@ -131,13 +131,8 @@ fn run_type_list(args: &args::Args) -> proc_exit::ExitResult {
|
||||||
|
|
||||||
let stdout = std::io::stdout();
|
let stdout = std::io::stdout();
|
||||||
let mut handle = stdout.lock();
|
let mut handle = stdout.lock();
|
||||||
for def in definitions {
|
for (name, globs) in definitions {
|
||||||
writeln!(
|
writeln!(handle, "{}: {}", name, itertools::join(globs, ", "))?;
|
||||||
handle,
|
|
||||||
"{}: {}",
|
|
||||||
def.name(),
|
|
||||||
itertools::join(def.globs(), ", ")
|
|
||||||
)?;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
|
|
183
src/file_type.rs
Normal file
183
src/file_type.rs
Normal file
|
@ -0,0 +1,183 @@
|
||||||
|
use std::collections::BTreeMap;
|
||||||
|
|
||||||
|
use kstring::KString;
|
||||||
|
|
||||||
|
#[derive(Default, Clone, Debug)]
|
||||||
|
pub struct TypesBuilder {
|
||||||
|
definitions: BTreeMap<KString, Vec<KString>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TypesBuilder {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Default::default()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn add_defaults(&mut self) {
|
||||||
|
self.definitions.extend(
|
||||||
|
crate::default_types::DEFAULT_TYPES
|
||||||
|
.iter()
|
||||||
|
.map(|(name, glob)| {
|
||||||
|
let name = KString::from(*name);
|
||||||
|
let globs = glob.iter().map(|s| KString::from(*s)).collect();
|
||||||
|
(name, globs)
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn contains_name(&self, name: &str) -> bool {
|
||||||
|
self.definitions.contains_key(name)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn add(&mut self, name: impl Into<KString>, glob: impl Into<KString>) {
|
||||||
|
let name = name.into();
|
||||||
|
let glob = glob.into();
|
||||||
|
self.definitions.entry(name).or_default().push(glob);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn build(self) -> Result<Types, anyhow::Error> {
|
||||||
|
let mut definitions = self
|
||||||
|
.definitions
|
||||||
|
.iter()
|
||||||
|
.flat_map(|(name, globs)| {
|
||||||
|
globs.iter().map(move |glob| {
|
||||||
|
let sort = sort_key(glob);
|
||||||
|
(sort, name, glob)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
definitions.sort();
|
||||||
|
|
||||||
|
let mut glob_to_name = Vec::new();
|
||||||
|
let mut build_set = globset::GlobSetBuilder::new();
|
||||||
|
for (_, name, glob) in definitions {
|
||||||
|
glob_to_name.push(name.clone());
|
||||||
|
build_set.add(
|
||||||
|
globset::GlobBuilder::new(glob)
|
||||||
|
.literal_separator(true)
|
||||||
|
.build()?,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
let set = build_set.build()?;
|
||||||
|
|
||||||
|
Ok(Types {
|
||||||
|
definitions: self.definitions,
|
||||||
|
glob_to_name,
|
||||||
|
set,
|
||||||
|
matches: std::sync::Arc::new(thread_local::ThreadLocal::default()),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn sort_key(glob: &str) -> Vec<GlobPart<'_>> {
|
||||||
|
let mut key = glob
|
||||||
|
.split('.')
|
||||||
|
.map(|s| {
|
||||||
|
if s == "*" {
|
||||||
|
GlobPart::Wild(s)
|
||||||
|
} else if s.contains('*') {
|
||||||
|
GlobPart::PartialWild(s)
|
||||||
|
} else {
|
||||||
|
GlobPart::Literalish(s)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
key.reverse();
|
||||||
|
key
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
|
||||||
|
enum GlobPart<'s> {
|
||||||
|
Wild(&'s str),
|
||||||
|
PartialWild(&'s str),
|
||||||
|
Literalish(&'s str),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Default, Clone, Debug)]
|
||||||
|
pub struct Types {
|
||||||
|
definitions: BTreeMap<KString, Vec<KString>>,
|
||||||
|
glob_to_name: Vec<KString>,
|
||||||
|
set: globset::GlobSet,
|
||||||
|
/// Temporary storage for globs that match.
|
||||||
|
matches: std::sync::Arc<thread_local::ThreadLocal<std::cell::RefCell<Vec<usize>>>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Types {
|
||||||
|
pub fn definitions(&self) -> &BTreeMap<KString, Vec<KString>> {
|
||||||
|
&self.definitions
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn file_matched(&self, path: &std::path::Path) -> Option<&str> {
|
||||||
|
let file_name = path.file_name()?;
|
||||||
|
let mut matches = self.matches.get_or_default().borrow_mut();
|
||||||
|
self.set.matches_into(file_name, &mut *matches);
|
||||||
|
matches
|
||||||
|
.last()
|
||||||
|
.copied()
|
||||||
|
.map(|i| self.glob_to_name[i].as_str())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
macro_rules! matched {
|
||||||
|
($name:ident, $types:expr, $path:expr, $matched:expr) => {
|
||||||
|
#[test]
|
||||||
|
fn $name() {
|
||||||
|
let mut btypes = TypesBuilder::new();
|
||||||
|
for (name, globs) in $types {
|
||||||
|
for glob in *globs {
|
||||||
|
btypes.add(*name, *glob);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let types = btypes.build().unwrap();
|
||||||
|
let actual = types.file_matched(std::path::Path::new($path));
|
||||||
|
let expected: Option<&str> = $matched.into();
|
||||||
|
assert_eq!(expected, actual, "{}", $path);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
fn types() -> &'static [(&'static str, &'static [&'static str])] {
|
||||||
|
&[
|
||||||
|
("html", &["*.html", "*.htm"]),
|
||||||
|
("js", &["*.js"]),
|
||||||
|
("json", &["*.json"]),
|
||||||
|
("lock", &["package-lock.json", "*.lock"]),
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
matched!(basic_match, types(), "leftpad.js", "js");
|
||||||
|
matched!(multi_def_1, types(), "index.html", "html");
|
||||||
|
matched!(multi_def_2, types(), "index.htm", "html");
|
||||||
|
matched!(no_match, types(), "leftpad.ada", None);
|
||||||
|
matched!(more_specific, types(), "package-lock.json", "lock");
|
||||||
|
|
||||||
|
macro_rules! sort {
|
||||||
|
($name:ident, $actual:expr, $expected:expr) => {
|
||||||
|
#[test]
|
||||||
|
fn $name() {
|
||||||
|
let expected = $expected.into_iter().collect::<Vec<&str>>();
|
||||||
|
|
||||||
|
let mut actual = $actual.into_iter().collect::<Vec<&str>>();
|
||||||
|
actual.sort_by_key(|s| sort_key(s));
|
||||||
|
|
||||||
|
assert_eq!(expected, actual);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
sort!(literal_sort, ["b", "c", "a"], ["a", "b", "c"]);
|
||||||
|
sort!(
|
||||||
|
basic_glob_sort,
|
||||||
|
["a_specific", "z_partial*"],
|
||||||
|
["z_partial*", "a_specific"]
|
||||||
|
);
|
||||||
|
sort!(
|
||||||
|
nested_glob_sort,
|
||||||
|
["a.specific", "z*.partial", "z.partial*"],
|
||||||
|
["z.partial*", "z*.partial", "a.specific"]
|
||||||
|
);
|
||||||
|
sort!(most_specific, ["*.txt.in", "*.in"], ["*.in", "*.txt.in"]);
|
||||||
|
}
|
|
@ -13,3 +13,4 @@ pub mod policy;
|
||||||
pub mod report;
|
pub mod report;
|
||||||
|
|
||||||
mod default_types;
|
mod default_types;
|
||||||
|
mod file_type;
|
||||||
|
|
|
@ -76,7 +76,10 @@ impl<'s> ConfigEngine<'s> {
|
||||||
self.get_walk(dir)
|
self.get_walk(dir)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn file_types(&self, cwd: &std::path::Path) -> &[ignore::types::FileTypeDef] {
|
pub fn file_types(
|
||||||
|
&self,
|
||||||
|
cwd: &std::path::Path,
|
||||||
|
) -> &std::collections::BTreeMap<kstring::KString, Vec<kstring::KString>> {
|
||||||
debug_assert!(cwd.is_absolute(), "{} is not absolute", cwd.display());
|
debug_assert!(cwd.is_absolute(), "{} is not absolute", cwd.display());
|
||||||
let dir = self
|
let dir = self
|
||||||
.configs
|
.configs
|
||||||
|
@ -176,25 +179,17 @@ impl<'s> ConfigEngine<'s> {
|
||||||
|
|
||||||
let walk = self.walk.intern(files);
|
let walk = self.walk.intern(files);
|
||||||
|
|
||||||
let mut type_matcher = ignore::types::TypesBuilder::new();
|
let mut type_matcher = crate::file_type::TypesBuilder::new();
|
||||||
for &(name, exts) in crate::default_types::DEFAULT_TYPES {
|
type_matcher.add_defaults();
|
||||||
for ext in exts {
|
|
||||||
type_matcher.add(name, ext).expect("all defaults are valid");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
let mut types: std::collections::HashMap<_, _> = Default::default();
|
let mut types: std::collections::HashMap<_, _> = Default::default();
|
||||||
for (type_name, type_engine) in type_.patterns() {
|
for (type_name, type_engine) in type_.patterns() {
|
||||||
if type_engine.extend_glob.is_empty() {
|
if type_engine.extend_glob.is_empty() {
|
||||||
if type_matcher
|
if !type_matcher.contains_name(&type_name) {
|
||||||
.definitions()
|
|
||||||
.iter()
|
|
||||||
.all(|def| def.name() != type_name.as_str())
|
|
||||||
{
|
|
||||||
anyhow::bail!("Unknown type definition `{}`, pass `--type-list` to see valid names or set `extend_glob` to add a new one.", type_name);
|
anyhow::bail!("Unknown type definition `{}`, pass `--type-list` to see valid names or set `extend_glob` to add a new one.", type_name);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
for glob in type_engine.extend_glob.iter() {
|
for glob in type_engine.extend_glob.iter() {
|
||||||
type_matcher.add(type_name.as_str(), glob.as_str())?;
|
type_matcher.add(type_name.as_ref(), glob.as_ref());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -208,8 +203,6 @@ impl<'s> ConfigEngine<'s> {
|
||||||
default.update(&overrides);
|
default.update(&overrides);
|
||||||
let default = self.init_file_config(default);
|
let default = self.init_file_config(default);
|
||||||
|
|
||||||
type_matcher.select("all");
|
|
||||||
|
|
||||||
let dir = DirConfig {
|
let dir = DirConfig {
|
||||||
walk,
|
walk,
|
||||||
default,
|
default,
|
||||||
|
@ -302,16 +295,12 @@ struct DirConfig {
|
||||||
walk: usize,
|
walk: usize,
|
||||||
default: FileConfig,
|
default: FileConfig,
|
||||||
types: std::collections::HashMap<kstring::KString, FileConfig>,
|
types: std::collections::HashMap<kstring::KString, FileConfig>,
|
||||||
type_matcher: ignore::types::Types,
|
type_matcher: crate::file_type::Types,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl DirConfig {
|
impl DirConfig {
|
||||||
fn get_file_config(&self, path: &std::path::Path) -> FileConfig {
|
fn get_file_config(&self, path: &std::path::Path) -> FileConfig {
|
||||||
let match_ = self.type_matcher.matched(path, false);
|
let name = self.type_matcher.file_matched(path);
|
||||||
let name = match_
|
|
||||||
.inner()
|
|
||||||
.and_then(|g| g.file_type_def())
|
|
||||||
.map(|f| f.name());
|
|
||||||
|
|
||||||
name.and_then(|name| {
|
name.and_then(|name| {
|
||||||
log::debug!("{}: `{}` policy", path.display(), name);
|
log::debug!("{}: `{}` policy", path.display(), name);
|
||||||
|
|
Loading…
Reference in a new issue