From 8cb2d442848a90d5cf14f91e38e974dd81434ca4 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Wed, 9 Mar 2022 08:28:09 -0600 Subject: [PATCH 1/3] refactor: Pull out custom default types This is https://github.com/BurntSushi/ripgrep/blob/master/crates/ignore/src/default_types.rs at 418d048. --- src/default_types.rs | 276 +++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 2 + src/policy.rs | 6 +- 3 files changed, 283 insertions(+), 1 deletion(-) create mode 100644 src/default_types.rs diff --git a/src/default_types.rs b/src/default_types.rs new file mode 100644 index 0000000..519bb4c --- /dev/null +++ b/src/default_types.rs @@ -0,0 +1,276 @@ +/// This list represents the default file types that ripgrep ships with. In +/// general, any file format is fair game, although it should generally be +/// limited to reasonably popular open formats. For other cases, you can add +/// types to each invocation of ripgrep with the '--type-add' flag. +/// +/// If you would like to add or improve this list, please file a PR: +/// . +/// +/// Please try to keep this list sorted lexicographically and wrapped to 79 +/// columns (inclusive). +#[rustfmt::skip] +pub const DEFAULT_TYPES: &[(&str, &[&str])] = &[ + ("agda", &["*.agda", "*.lagda"]), + ("aidl", &["*.aidl"]), + ("amake", &["*.mk", "*.bp"]), + ("asciidoc", &["*.adoc", "*.asc", "*.asciidoc"]), + ("asm", &["*.asm", "*.s", "*.S"]), + ("asp", &[ + "*.aspx", "*.aspx.cs", "*.aspx.vb", "*.ascx", "*.ascx.cs", "*.ascx.vb", + ]), + ("ats", &["*.ats", "*.dats", "*.sats", "*.hats"]), + ("avro", &["*.avdl", "*.avpr", "*.avsc"]), + ("awk", &["*.awk"]), + ("bazel", &[ + "*.bazel", "*.bzl", "*.BUILD", "*.bazelrc", "BUILD", "WORKSPACE", + ]), + ("bitbake", &["*.bb", "*.bbappend", "*.bbclass", "*.conf", "*.inc"]), + ("brotli", &["*.br"]), + ("buildstream", &["*.bst"]), + ("bzip2", &["*.bz2", "*.tbz2"]), + ("c", &["*.[chH]", "*.[chH].in", "*.cats"]), + ("cabal", &["*.cabal"]), + ("cbor", &["*.cbor"]), + ("ceylon", &["*.ceylon"]), + ("clojure", &["*.clj", "*.cljc", "*.cljs", "*.cljx"]), + ("cmake", &["*.cmake", "CMakeLists.txt"]), + ("coffeescript", &["*.coffee"]), + ("config", &["*.cfg", "*.conf", "*.config", "*.ini"]), + ("coq", &["*.v"]), + ("cpp", &[ + "*.[ChH]", "*.cc", "*.[ch]pp", "*.[ch]xx", "*.hh", "*.inl", + "*.[ChH].in", "*.cc.in", "*.[ch]pp.in", "*.[ch]xx.in", "*.hh.in", + ]), + ("creole", &["*.creole"]), + ("crystal", &["Projectfile", "*.cr", "*.ecr", "shard.yml"]), + ("cs", &["*.cs"]), + ("csharp", &["*.cs"]), + ("cshtml", &["*.cshtml"]), + ("css", &["*.css", "*.scss"]), + ("csv", &["*.csv"]), + ("cuda", &["*.cu", "*.cuh"]), + ("cython", &["*.pyx", "*.pxi", "*.pxd"]), + ("d", &["*.d"]), + ("dart", &["*.dart"]), + ("dhall", &["*.dhall"]), + ("diff", &["*.patch", "*.diff"]), + ("docker", &["*Dockerfile*"]), + ("dvc", &["Dvcfile", "*.dvc"]), + ("ebuild", &["*.ebuild"]), + ("edn", &["*.edn"]), + ("elisp", &["*.el"]), + ("elixir", &["*.ex", "*.eex", "*.exs"]), + ("elm", &["*.elm"]), + ("erb", &["*.erb"]), + ("erlang", &["*.erl", "*.hrl"]), + ("fennel", &["*.fnl"]), + ("fidl", &["*.fidl"]), + ("fish", &["*.fish"]), + ("flatbuffers", &["*.fbs"]), + ("fortran", &[ + "*.f", "*.F", "*.f77", "*.F77", "*.pfo", + "*.f90", "*.F90", "*.f95", "*.F95", + ]), + ("fsharp", &["*.fs", "*.fsx", "*.fsi"]), + ("fut", &["*.fut"]), + ("gap", &["*.g", "*.gap", "*.gi", "*.gd", "*.tst"]), + ("gn", &["*.gn", "*.gni"]), + ("go", &["*.go"]), + ("gradle", &["*.gradle"]), + ("groovy", &["*.groovy", "*.gradle"]), + ("gzip", &["*.gz", "*.tgz"]), + ("h", &["*.h", "*.hpp"]), + ("haml", &["*.haml"]), + ("haskell", &["*.hs", "*.lhs", "*.cpphs", "*.c2hs", "*.hsc"]), + ("hbs", &["*.hbs"]), + ("hs", &["*.hs", "*.lhs"]), + ("html", &["*.htm", "*.html", "*.ejs"]), + ("hy", &["*.hy"]), + ("idris", &["*.idr", "*.lidr"]), + ("janet", &["*.janet"]), + ("java", &["*.java", "*.jsp", "*.jspx", "*.properties"]), + ("jinja", &["*.j2", "*.jinja", "*.jinja2"]), + ("jl", &["*.jl"]), + ("js", &["*.js", "*.jsx", "*.vue"]), + ("json", &["*.json", "composer.lock"]), + ("jsonl", &["*.jsonl"]), + ("julia", &["*.jl"]), + ("jupyter", &["*.ipynb", "*.jpynb"]), + ("k", &["*.k"]), + ("kotlin", &["*.kt", "*.kts"]), + ("less", &["*.less"]), + ("license", &[ + // General + "COPYING", "COPYING[.-]*", + "COPYRIGHT", "COPYRIGHT[.-]*", + "EULA", "EULA[.-]*", + "licen[cs]e", "licen[cs]e.*", + "LICEN[CS]E", "LICEN[CS]E[.-]*", "*[.-]LICEN[CS]E*", + "NOTICE", "NOTICE[.-]*", + "PATENTS", "PATENTS[.-]*", + "UNLICEN[CS]E", "UNLICEN[CS]E[.-]*", + // GPL (gpl.txt, etc.) + "agpl[.-]*", + "gpl[.-]*", + "lgpl[.-]*", + // Other license-specific (APACHE-2.0.txt, etc.) + "AGPL-*[0-9]*", + "APACHE-*[0-9]*", + "BSD-*[0-9]*", + "CC-BY-*", + "GFDL-*[0-9]*", + "GNU-*[0-9]*", + "GPL-*[0-9]*", + "LGPL-*[0-9]*", + "MIT-*[0-9]*", + "MPL-*[0-9]*", + "OFL-*[0-9]*", + ]), + ("lilypond", &["*.ly", "*.ily"]), + ("lisp", &["*.el", "*.jl", "*.lisp", "*.lsp", "*.sc", "*.scm"]), + ("lock", &["*.lock", "package-lock.json"]), + ("log", &["*.log"]), + ("lua", &["*.lua"]), + ("lz4", &["*.lz4"]), + ("lzma", &["*.lzma"]), + ("m4", &["*.ac", "*.m4"]), + ("make", &[ + "[Gg][Nn][Uu]makefile", "[Mm]akefile", + "[Gg][Nn][Uu]makefile.am", "[Mm]akefile.am", + "[Gg][Nn][Uu]makefile.in", "[Mm]akefile.in", + "*.mk", "*.mak" + ]), + ("mako", &["*.mako", "*.mao"]), + ("man", &["*.[0-9lnpx]", "*.[0-9][cEFMmpSx]"]), + ("markdown", &["*.markdown", "*.md", "*.mdown", "*.mkdn"]), + ("matlab", &["*.m"]), + ("md", &["*.markdown", "*.md", "*.mdown", "*.mkdn"]), + ("meson", &["meson.build", "meson_options.txt"]), + ("minified", &["*.min.html", "*.min.css", "*.min.js"]), + ("mint", &["*.mint"]), + ("mk", &["mkfile"]), + ("ml", &["*.ml"]), + ("msbuild", &[ + "*.csproj", "*.fsproj", "*.vcxproj", "*.proj", "*.props", "*.targets", + ]), + ("nim", &["*.nim", "*.nimf", "*.nimble", "*.nims"]), + ("nix", &["*.nix"]), + ("objc", &["*.h", "*.m"]), + ("objcpp", &["*.h", "*.mm"]), + ("ocaml", &["*.ml", "*.mli", "*.mll", "*.mly"]), + ("org", &["*.org", "*.org_archive"]), + ("pascal", &["*.pas", "*.dpr", "*.lpr", "*.pp", "*.inc"]), + ("pdf", &["*.pdf"]), + ("perl", &["*.perl", "*.pl", "*.PL", "*.plh", "*.plx", "*.pm", "*.t"]), + ("php", &["*.php", "*.php3", "*.php4", "*.php5", "*.phtml"]), + ("po", &["*.po"]), + ("pod", &["*.pod"]), + ("postscript", &["*.eps", "*.ps"]), + ("protobuf", &["*.proto"]), + ("ps", &["*.cdxml", "*.ps1", "*.ps1xml", "*.psd1", "*.psm1"]), + ("puppet", &["*.erb", "*.pp", "*.rb"]), + ("purs", &["*.purs"]), + ("py", &["*.py"]), + ("qmake", &["*.pro", "*.pri", "*.prf"]), + ("qml", &["*.qml"]), + ("r", &["*.R", "*.r", "*.Rmd", "*.Rnw"]), + ("racket", &["*.rkt"]), + ("rdoc", &["*.rdoc"]), + ("readme", &["README*", "*README"]), + ("red", &["*.r", "*.red", "*.reds"]), + ("robot", &["*.robot"]), + ("rst", &["*.rst"]), + ("ruby", &[ + // Idiomatic files + "config.ru", "Gemfile", ".irbrc", "Rakefile", + // Extensions + "*.gemspec", "*.rb", "*.rbw" + ]), + ("rust", &["*.rs"]), + ("sass", &["*.sass", "*.scss"]), + ("scala", &["*.scala", "*.sbt"]), + ("sh", &[ + // Portable/misc. init files + ".login", ".logout", ".profile", "profile", + // bash-specific init files + ".bash_login", "bash_login", + ".bash_logout", "bash_logout", + ".bash_profile", "bash_profile", + ".bashrc", "bashrc", "*.bashrc", + // csh-specific init files + ".cshrc", "*.cshrc", + // ksh-specific init files + ".kshrc", "*.kshrc", + // tcsh-specific init files + ".tcshrc", + // zsh-specific init files + ".zshenv", "zshenv", + ".zlogin", "zlogin", + ".zlogout", "zlogout", + ".zprofile", "zprofile", + ".zshrc", "zshrc", + // Extensions + "*.bash", "*.csh", "*.ksh", "*.sh", "*.tcsh", "*.zsh", + ]), + ("slim", &["*.skim", "*.slim", "*.slime"]), + ("smarty", &["*.tpl"]), + ("sml", &["*.sml", "*.sig"]), + ("soy", &["*.soy"]), + ("spark", &["*.spark"]), + ("spec", &["*.spec"]), + ("sql", &["*.sql", "*.psql"]), + ("stylus", &["*.styl"]), + ("sv", &["*.v", "*.vg", "*.sv", "*.svh", "*.h"]), + ("svg", &["*.svg"]), + ("swift", &["*.swift"]), + ("swig", &["*.def", "*.i"]), + ("systemd", &[ + "*.automount", "*.conf", "*.device", "*.link", "*.mount", "*.path", + "*.scope", "*.service", "*.slice", "*.socket", "*.swap", "*.target", + "*.timer", + ]), + ("taskpaper", &["*.taskpaper"]), + ("tcl", &["*.tcl"]), + ("tex", &["*.tex", "*.ltx", "*.cls", "*.sty", "*.bib", "*.dtx", "*.ins"]), + ("texinfo", &["*.texi"]), + ("textile", &["*.textile"]), + ("tf", &["*.tf"]), + ("thrift", &["*.thrift"]), + ("toml", &["*.toml", "Cargo.lock"]), + ("ts", &["*.ts", "*.tsx"]), + ("twig", &["*.twig"]), + ("txt", &["*.txt"]), + ("typoscript", &["*.typoscript", "*.ts"]), + ("vala", &["*.vala"]), + ("vb", &["*.vb"]), + ("vcl", &["*.vcl"]), + ("verilog", &["*.v", "*.vh", "*.sv", "*.svh"]), + ("vhdl", &["*.vhd", "*.vhdl"]), + ("vim", &[ + "*.vim", ".vimrc", ".gvimrc", "vimrc", "gvimrc", "_vimrc", "_gvimrc", + ]), + ("vimscript", &[ + "*.vim", ".vimrc", ".gvimrc", "vimrc", "gvimrc", "_vimrc", "_gvimrc", + ]), + ("webidl", &["*.idl", "*.webidl", "*.widl"]), + ("wiki", &["*.mediawiki", "*.wiki"]), + ("xml", &[ + "*.xml", "*.xml.dist", "*.dtd", "*.xsl", "*.xslt", "*.xsd", "*.xjb", + "*.rng", "*.sch", "*.xhtml", + ]), + ("xz", &["*.xz", "*.txz"]), + ("yacc", &["*.y"]), + ("yaml", &["*.yaml", "*.yml"]), + ("yang", &["*.yang"]), + ("z", &["*.Z"]), + ("zig", &["*.zig"]), + ("zsh", &[ + ".zshenv", "zshenv", + ".zlogin", "zlogin", + ".zlogout", "zlogout", + ".zprofile", "zprofile", + ".zshrc", "zshrc", + "*.zsh", + ]), + ("zstd", &["*.zst", "*.zstd"]), +]; diff --git a/src/lib.rs b/src/lib.rs index 3a7b687..c836c0d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -11,3 +11,5 @@ pub mod file; pub mod policy; #[doc(hidden)] pub mod report; + +mod default_types; diff --git a/src/policy.rs b/src/policy.rs index 87787c8..cda4010 100644 --- a/src/policy.rs +++ b/src/policy.rs @@ -177,7 +177,11 @@ impl<'s> ConfigEngine<'s> { let walk = self.walk.intern(files); let mut type_matcher = ignore::types::TypesBuilder::new(); - type_matcher.add_defaults(); + for &(name, exts) in crate::default_types::DEFAULT_TYPES { + for ext in exts { + type_matcher.add(name, ext).expect("all defaults are valid"); + } + } let mut types: std::collections::HashMap<_, _> = Default::default(); for (type_name, type_engine) in type_.patterns() { if type_engine.extend_glob.is_empty() { From 560de68f5810748ce5635794d8b007648583521f Mon Sep 17 00:00:00 2001 From: Ed Page Date: Wed, 9 Mar 2022 08:37:22 -0600 Subject: [PATCH 2/3] refactor: Move default config to default types --- src/config.rs | 35 ++++------------------------------- src/default_types.rs | 30 ++++++++++++++++++++++++++++-- 2 files changed, 32 insertions(+), 33 deletions(-) diff --git a/src/config.rs b/src/config.rs index f7b64a1..3094eea 100644 --- a/src/config.rs +++ b/src/config.rs @@ -182,8 +182,7 @@ impl TypeEngineConfig { patterns .entry("rust".into()) .or_insert_with(|| GlobEngineConfig { - // From a spell-check perspective, these are more closely related to Rust than Toml - extend_glob: vec!["Cargo.toml".into()], + extend_glob: Vec::new(), engine: EngineConfig { dict: Some(DictConfig { extend_words: maplit::hashmap! { @@ -196,40 +195,14 @@ impl TypeEngineConfig { }, }); patterns - .entry("python".into()) + .entry("cert".into()) .or_insert_with(|| GlobEngineConfig { - // From a spell-check perspective, these are more closely related to Python than Toml - extend_glob: vec!["pyproject.toml".into()], - engine: EngineConfig { - ..Default::default() - }, - }); - patterns.entry("cert".into()).or_insert_with(|| { - GlobEngineConfig { - extend_glob: vec![ - // Certificate files: - "*.crt".into(), - "*.cer".into(), - "*.ca-bundle".into(), - "*.p7b".into(), - "*.p7c".into(), - "*.p7s".into(), - "*.pem".into(), - // Keystore Files: - "*.key".into(), - "*.keystore".into(), - "*.jks".into(), - // Combined certificate and key files: - "*.p12".into(), - "*.pfx".into(), - "*.pem".into(), - ], + extend_glob: Vec::new(), engine: EngineConfig { check_file: Some(false), ..Default::default() }, - } - }); + }); patterns.into_iter() } } diff --git a/src/default_types.rs b/src/default_types.rs index 519bb4c..eaaa57e 100644 --- a/src/default_types.rs +++ b/src/default_types.rs @@ -31,6 +31,24 @@ pub const DEFAULT_TYPES: &[(&str, &[&str])] = &[ ("c", &["*.[chH]", "*.[chH].in", "*.cats"]), ("cabal", &["*.cabal"]), ("cbor", &["*.cbor"]), + ("cert", &[ + // Certificate files: + "*.crt", + "*.cer", + "*.ca-bundle", + "*.p7b", + "*.p7c", + "*.p7s", + "*.pem", + // Keystore Files: + "*.key", + "*.keystore", + "*.jks", + // Combined certificate and key files: + "*.p12", + "*.pfx", + "*.pem", + ]), ("ceylon", &["*.ceylon"]), ("clojure", &["*.clj", "*.cljc", "*.cljs", "*.cljx"]), ("cmake", &["*.cmake", "CMakeLists.txt"]), @@ -170,7 +188,11 @@ pub const DEFAULT_TYPES: &[(&str, &[&str])] = &[ ("ps", &["*.cdxml", "*.ps1", "*.ps1xml", "*.psd1", "*.psm1"]), ("puppet", &["*.erb", "*.pp", "*.rb"]), ("purs", &["*.purs"]), - ("py", &["*.py"]), + ("py", &[ + "*.py", + // From a spell-check perspective, this is more like Python than toml + "pyproject.toml", + ]), ("qmake", &["*.pro", "*.pri", "*.prf"]), ("qml", &["*.qml"]), ("r", &["*.R", "*.r", "*.Rmd", "*.Rnw"]), @@ -186,7 +208,11 @@ pub const DEFAULT_TYPES: &[(&str, &[&str])] = &[ // Extensions "*.gemspec", "*.rb", "*.rbw" ]), - ("rust", &["*.rs"]), + ("rust", &[ + "*.rs", + // From a spell-check perspective, this is more like Python than toml + "Cargo.toml", + ]), ("sass", &["*.sass", "*.scss"]), ("scala", &["*.scala", "*.sbt"]), ("sh", &[ From e70667ebae57264b13f184ac06124034f9eb52a9 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Wed, 9 Mar 2022 08:42:36 -0600 Subject: [PATCH 3/3] fix: Ignore lock files by default First, this centralizes the concept of lock files, focusing on intent, rather than syntax. We are assuming `requirements.txt` for Python is being used like a regular lock file and not as a dependency specification. Second, we then ignore the content. Though a lock file will generally contain things that could show up in a dependency specification, the large dependency trees make that harder to manage. We still have the dependency specification file which will match with the users code. Fixes #445 --- src/config.rs | 9 +++++++++ src/default_types.rs | 6 +++--- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/config.rs b/src/config.rs index 3094eea..c1cc1f7 100644 --- a/src/config.rs +++ b/src/config.rs @@ -203,6 +203,15 @@ impl TypeEngineConfig { ..Default::default() }, }); + patterns + .entry("lock".into()) + .or_insert_with(|| GlobEngineConfig { + extend_glob: Vec::new(), + engine: EngineConfig { + check_file: Some(false), + ..Default::default() + }, + }); patterns.into_iter() } } diff --git a/src/default_types.rs b/src/default_types.rs index eaaa57e..f296412 100644 --- a/src/default_types.rs +++ b/src/default_types.rs @@ -110,7 +110,7 @@ pub const DEFAULT_TYPES: &[(&str, &[&str])] = &[ ("jinja", &["*.j2", "*.jinja", "*.jinja2"]), ("jl", &["*.jl"]), ("js", &["*.js", "*.jsx", "*.vue"]), - ("json", &["*.json", "composer.lock"]), + ("json", &["*.json"]), ("jsonl", &["*.jsonl"]), ("julia", &["*.jl"]), ("jupyter", &["*.ipynb", "*.jpynb"]), @@ -146,7 +146,7 @@ pub const DEFAULT_TYPES: &[(&str, &[&str])] = &[ ]), ("lilypond", &["*.ly", "*.ily"]), ("lisp", &["*.el", "*.jl", "*.lisp", "*.lsp", "*.sc", "*.scm"]), - ("lock", &["*.lock", "package-lock.json"]), + ("lock", &["*.lock", "package-lock.json", "requirements.txt"]), ("log", &["*.log"]), ("lua", &["*.lua"]), ("lz4", &["*.lz4"]), @@ -262,7 +262,7 @@ pub const DEFAULT_TYPES: &[(&str, &[&str])] = &[ ("textile", &["*.textile"]), ("tf", &["*.tf"]), ("thrift", &["*.thrift"]), - ("toml", &["*.toml", "Cargo.lock"]), + ("toml", &["*.toml"]), ("ts", &["*.ts", "*.tsx"]), ("twig", &["*.twig"]), ("txt", &["*.txt"]),