mirror of
https://github.com/crate-ci/typos.git
synced 2024-12-25 17:12:16 -05:00
commit
5253e5589b
10 changed files with 154 additions and 6 deletions
|
@ -156,6 +156,7 @@ impl FileArgs {
|
|||
locale: self.locale,
|
||||
..Default::default()
|
||||
}),
|
||||
extend_ignore_re: Default::default(),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -268,7 +268,7 @@ impl GlobEngineConfig {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
|
||||
//#[serde(deny_unknown_fields)] // Doesn't work with `flatten`
|
||||
#[serde(default)]
|
||||
#[serde(rename_all = "kebab-case")]
|
||||
|
@ -283,6 +283,8 @@ pub struct EngineConfig {
|
|||
pub tokenizer: Option<TokenizerConfig>,
|
||||
#[serde(flatten)]
|
||||
pub dict: Option<DictConfig>,
|
||||
#[serde(with = "serde_regex")]
|
||||
pub extend_ignore_re: Vec<regex::Regex>,
|
||||
}
|
||||
|
||||
impl EngineConfig {
|
||||
|
@ -298,6 +300,7 @@ impl EngineConfig {
|
|||
.unwrap_or_else(TokenizerConfig::from_defaults),
|
||||
),
|
||||
dict: Some(empty.dict.unwrap_or_else(DictConfig::from_defaults)),
|
||||
extend_ignore_re: Default::default(),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -327,6 +330,8 @@ impl EngineConfig {
|
|||
let mut dict = Some(dict);
|
||||
std::mem::swap(&mut dict, &mut self.dict);
|
||||
}
|
||||
self.extend_ignore_re
|
||||
.extend(source.extend_ignore_re.iter().cloned());
|
||||
}
|
||||
|
||||
pub fn binary(&self) -> bool {
|
||||
|
@ -340,8 +345,29 @@ impl EngineConfig {
|
|||
pub fn check_file(&self) -> bool {
|
||||
self.check_file.unwrap_or(true)
|
||||
}
|
||||
|
||||
pub fn extend_ignore_re(&self) -> Box<dyn Iterator<Item = ®ex::Regex> + '_> {
|
||||
Box::new(self.extend_ignore_re.iter())
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq for EngineConfig {
|
||||
fn eq(&self, rhs: &Self) -> bool {
|
||||
self.binary == rhs.binary
|
||||
&& self.check_filename == rhs.check_filename
|
||||
&& self.check_file == rhs.check_file
|
||||
&& self.tokenizer == rhs.tokenizer
|
||||
&& self.dict == rhs.dict
|
||||
&& self
|
||||
.extend_ignore_re
|
||||
.iter()
|
||||
.map(|r| r.as_str())
|
||||
.eq(rhs.extend_ignore_re.iter().map(|r| r.as_str()))
|
||||
}
|
||||
}
|
||||
|
||||
impl Eq for EngineConfig {}
|
||||
|
||||
#[derive(Debug, Clone, Default, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
#[serde(default)]
|
||||
|
|
|
@ -48,7 +48,14 @@ impl FileChecker for Typos {
|
|||
reporter.report(msg.into())?;
|
||||
} else {
|
||||
let mut accum_line_num = AccumulateLineNum::new();
|
||||
let mut ignores: Option<Ignores> = None;
|
||||
for typo in typos::check_bytes(&buffer, policy.tokenizer, policy.dict) {
|
||||
if ignores
|
||||
.get_or_insert_with(|| Ignores::new(&buffer, policy.ignore))
|
||||
.is_ignored(typo.span())
|
||||
{
|
||||
continue;
|
||||
}
|
||||
let line_num = accum_line_num.line_num(&buffer, typo.byte_offset);
|
||||
let (line, line_offset) = extract_line(&buffer, typo.byte_offset);
|
||||
let msg = report::Typo {
|
||||
|
@ -86,7 +93,14 @@ impl FileChecker for FixTypos {
|
|||
} else {
|
||||
let mut fixes = Vec::new();
|
||||
let mut accum_line_num = AccumulateLineNum::new();
|
||||
let mut ignores: Option<Ignores> = None;
|
||||
for typo in typos::check_bytes(&buffer, policy.tokenizer, policy.dict) {
|
||||
if ignores
|
||||
.get_or_insert_with(|| Ignores::new(&buffer, policy.ignore))
|
||||
.is_ignored(typo.span())
|
||||
{
|
||||
continue;
|
||||
}
|
||||
if is_fixable(&typo) {
|
||||
fixes.push(typo.into_owned());
|
||||
} else {
|
||||
|
@ -163,7 +177,14 @@ impl FileChecker for DiffTypos {
|
|||
} else {
|
||||
let mut fixes = Vec::new();
|
||||
let mut accum_line_num = AccumulateLineNum::new();
|
||||
let mut ignores: Option<Ignores> = None;
|
||||
for typo in typos::check_bytes(&buffer, policy.tokenizer, policy.dict) {
|
||||
if ignores
|
||||
.get_or_insert_with(|| Ignores::new(&buffer, policy.ignore))
|
||||
.is_ignored(typo.span())
|
||||
{
|
||||
continue;
|
||||
}
|
||||
if is_fixable(&typo) {
|
||||
fixes.push(typo.into_owned());
|
||||
} else {
|
||||
|
@ -276,7 +297,14 @@ impl FileChecker for Identifiers {
|
|||
let msg = report::BinaryFile { path };
|
||||
reporter.report(msg.into())?;
|
||||
} else {
|
||||
let mut ignores: Option<Ignores> = None;
|
||||
for word in policy.tokenizer.parse_bytes(&buffer) {
|
||||
if ignores
|
||||
.get_or_insert_with(|| Ignores::new(&buffer, policy.ignore))
|
||||
.is_ignored(word.span())
|
||||
{
|
||||
continue;
|
||||
}
|
||||
// HACK: Don't look up the line_num per entry to better match the performance
|
||||
// of Typos for comparison purposes. We don't really get much out of it
|
||||
// anyway.
|
||||
|
@ -329,11 +357,18 @@ impl FileChecker for Words {
|
|||
let msg = report::BinaryFile { path };
|
||||
reporter.report(msg.into())?;
|
||||
} else {
|
||||
let mut ignores: Option<Ignores> = None;
|
||||
for word in policy
|
||||
.tokenizer
|
||||
.parse_bytes(&buffer)
|
||||
.flat_map(|i| i.split())
|
||||
{
|
||||
if ignores
|
||||
.get_or_insert_with(|| Ignores::new(&buffer, policy.ignore))
|
||||
.is_ignored(word.span())
|
||||
{
|
||||
continue;
|
||||
}
|
||||
// HACK: Don't look up the line_num per entry to better match the performance
|
||||
// of Typos for comparison purposes. We don't really get much out of it
|
||||
// anyway.
|
||||
|
@ -644,6 +679,33 @@ fn walk_entry(
|
|||
Ok(())
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct Ignores {
|
||||
blocks: Vec<std::ops::Range<usize>>,
|
||||
}
|
||||
|
||||
impl Ignores {
|
||||
fn new(content: &[u8], ignores: &[regex::Regex]) -> Self {
|
||||
let mut blocks = Vec::new();
|
||||
if let Ok(content) = std::str::from_utf8(content) {
|
||||
for ignore in ignores {
|
||||
for mat in ignore.find_iter(content) {
|
||||
blocks.push(mat.range());
|
||||
}
|
||||
}
|
||||
}
|
||||
Self { blocks }
|
||||
}
|
||||
|
||||
fn is_ignored(&self, span: std::ops::Range<usize>) -> bool {
|
||||
let start = span.start;
|
||||
let end = span.end.saturating_sub(1);
|
||||
self.blocks
|
||||
.iter()
|
||||
.any(|block| block.contains(&start) || block.contains(&end))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
|
|
@ -42,6 +42,7 @@ pub struct ConfigEngine<'s> {
|
|||
walk: Intern<crate::config::Walk>,
|
||||
tokenizer: Intern<typos::tokens::Tokenizer>,
|
||||
dict: Intern<crate::dict::Override<'s, 's, crate::dict::BuiltIn>>,
|
||||
ignore: Intern<Vec<regex::Regex>>,
|
||||
}
|
||||
|
||||
impl<'s> ConfigEngine<'s> {
|
||||
|
@ -54,6 +55,7 @@ impl<'s> ConfigEngine<'s> {
|
|||
walk: Default::default(),
|
||||
tokenizer: Default::default(),
|
||||
dict: Default::default(),
|
||||
ignore: Default::default(),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -88,7 +90,7 @@ impl<'s> ConfigEngine<'s> {
|
|||
dir.type_matcher.definitions()
|
||||
}
|
||||
|
||||
pub fn policy(&self, path: &std::path::Path) -> Policy<'_, '_> {
|
||||
pub fn policy(&self, path: &std::path::Path) -> Policy<'_, '_, '_> {
|
||||
debug_assert!(path.is_absolute(), "{} is not absolute", path.display());
|
||||
let dir = self.get_dir(path).expect("`walk()` should be called first");
|
||||
let (file_type, file_config) = dir.get_file_config(path);
|
||||
|
@ -99,6 +101,7 @@ impl<'s> ConfigEngine<'s> {
|
|||
binary: file_config.binary,
|
||||
tokenizer: self.get_tokenizer(&file_config),
|
||||
dict: self.get_dict(&file_config),
|
||||
ignore: self.get_ignore(&file_config),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -114,6 +117,10 @@ impl<'s> ConfigEngine<'s> {
|
|||
self.dict.get(file.dict)
|
||||
}
|
||||
|
||||
fn get_ignore(&self, file: &FileConfig) -> &[regex::Regex] {
|
||||
self.ignore.get(file.ignore)
|
||||
}
|
||||
|
||||
fn get_dir(&self, path: &std::path::Path) -> Option<&DirConfig> {
|
||||
for path in path.ancestors() {
|
||||
if let Some(dir) = self.configs.get(path) {
|
||||
|
@ -220,7 +227,10 @@ impl<'s> ConfigEngine<'s> {
|
|||
let check_filename = engine.check_filename();
|
||||
let check_file = engine.check_file();
|
||||
let crate::config::EngineConfig {
|
||||
tokenizer, dict, ..
|
||||
tokenizer,
|
||||
dict,
|
||||
extend_ignore_re,
|
||||
..
|
||||
} = engine;
|
||||
let tokenizer_config =
|
||||
tokenizer.unwrap_or_else(crate::config::TokenizerConfig::from_defaults);
|
||||
|
@ -254,12 +264,15 @@ impl<'s> ConfigEngine<'s> {
|
|||
let dict = self.dict.intern(dict);
|
||||
let tokenizer = self.tokenizer.intern(tokenizer);
|
||||
|
||||
let ignore = self.ignore.intern(extend_ignore_re);
|
||||
|
||||
FileConfig {
|
||||
check_filenames: check_filename,
|
||||
check_files: check_file,
|
||||
binary,
|
||||
tokenizer,
|
||||
dict,
|
||||
ignore,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -328,20 +341,22 @@ struct FileConfig {
|
|||
check_filenames: bool,
|
||||
check_files: bool,
|
||||
binary: bool,
|
||||
ignore: usize,
|
||||
}
|
||||
|
||||
#[non_exhaustive]
|
||||
#[derive(derive_setters::Setters)]
|
||||
pub struct Policy<'t, 'd> {
|
||||
pub struct Policy<'t, 'd, 'i> {
|
||||
pub check_filenames: bool,
|
||||
pub check_files: bool,
|
||||
pub file_type: Option<&'d str>,
|
||||
pub binary: bool,
|
||||
pub tokenizer: &'t typos::tokens::Tokenizer,
|
||||
pub dict: &'d dyn typos::Dictionary,
|
||||
pub ignore: &'i [regex::Regex],
|
||||
}
|
||||
|
||||
impl<'t, 'd> Policy<'t, 'd> {
|
||||
impl<'t, 'd, 'i> Policy<'t, 'd, 'i> {
|
||||
pub fn new() -> Self {
|
||||
Default::default()
|
||||
}
|
||||
|
@ -350,8 +365,9 @@ impl<'t, 'd> Policy<'t, 'd> {
|
|||
static DEFAULT_TOKENIZER: once_cell::sync::Lazy<typos::tokens::Tokenizer> =
|
||||
once_cell::sync::Lazy::new(typos::tokens::Tokenizer::new);
|
||||
static DEFAULT_DICT: crate::dict::BuiltIn = crate::dict::BuiltIn::new(crate::config::Locale::En);
|
||||
static DEFAULT_IGNORE: &[regex::Regex] = &[];
|
||||
|
||||
impl<'t, 'd> Default for Policy<'t, 'd> {
|
||||
impl<'t, 'd, 'i> Default for Policy<'t, 'd, 'i> {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
check_filenames: true,
|
||||
|
@ -360,6 +376,7 @@ impl<'t, 'd> Default for Policy<'t, 'd> {
|
|||
binary: false,
|
||||
tokenizer: &DEFAULT_TOKENIZER,
|
||||
dict: &DEFAULT_DICT,
|
||||
ignore: DEFAULT_IGNORE,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,8 @@
|
|||
[files]
|
||||
extend-exclude = ["_typos.toml"]
|
||||
|
||||
[default]
|
||||
extend-ignore-re = ["`.*`"]
|
||||
|
||||
[default.extend-identifiers]
|
||||
hello = "goodbye"
|
|
@ -0,0 +1 @@
|
|||
hello `hello`
|
12
crates/typos-cli/tests/cmd/extend-ignore-re.toml
Normal file
12
crates/typos-cli/tests/cmd/extend-ignore-re.toml
Normal file
|
@ -0,0 +1,12 @@
|
|||
bin.name = "typos"
|
||||
stdin = ""
|
||||
stdout = """
|
||||
error: `hello` should be `goodbye`
|
||||
--> ./file.ignore:1:1
|
||||
|
|
||||
1 | hello `hello`
|
||||
| ^^^^^
|
||||
|
|
||||
"""
|
||||
stderr = ""
|
||||
status.code = 2
|
|
@ -86,6 +86,12 @@ impl<'m> Typo<'m> {
|
|||
corrections: self.corrections.borrow(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn span(&self) -> std::ops::Range<usize> {
|
||||
let start = self.byte_offset;
|
||||
let end = start + self.typo.len();
|
||||
start..end
|
||||
}
|
||||
}
|
||||
|
||||
impl<'m> Default for Typo<'m> {
|
||||
|
|
|
@ -634,6 +634,13 @@ impl<'t> Identifier<'t> {
|
|||
self.offset
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn span(&self) -> std::ops::Range<usize> {
|
||||
let start = self.offset;
|
||||
let end = start + self.token.len();
|
||||
start..end
|
||||
}
|
||||
|
||||
/// Split into individual Words.
|
||||
#[inline]
|
||||
pub fn split(&self) -> impl Iterator<Item = Word<'t>> {
|
||||
|
@ -702,6 +709,13 @@ impl<'t> Word<'t> {
|
|||
pub fn offset(&self) -> usize {
|
||||
self.offset
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn span(&self) -> std::ops::Range<usize> {
|
||||
let start = self.offset;
|
||||
let end = start + self.token.len();
|
||||
start..end
|
||||
}
|
||||
}
|
||||
|
||||
struct SplitIdent<'s> {
|
||||
|
|
|
@ -27,6 +27,7 @@ Configuration is read from the following (in precedence order)
|
|||
| default.check-file | \- | bool | Verifying spelling in files. |
|
||||
| default.unicode | --unicode | bool | Allow unicode characters in identifiers (and not just ASCII) |
|
||||
| default.locale | --locale | en, en-us, en-gb, en-ca, en-au | English dialect to correct to. |
|
||||
| default.extend-ignore-re | \- | list of [regexes](https://docs.rs/regex/latest/regex/index.html#syntax) | Custom uncorrectable sections (e.g. markdown code fences, PGP signatures, etc) |
|
||||
| default.extend-identifiers | \- | table of strings | Corrections for [identifiers](./design.md#identifiers-and-words). When the correction is blank, the identifier is never valid. When the correction is the key, the identifier is always valid. |
|
||||
| default.extend-ignore-identifiers-re | \- | list of [regexes](https://docs.rs/regex/latest/regex/index.html#syntax) | Pattern-match always-valid identifiers |
|
||||
| default.extend-words | \- | table of strings | Corrections for [words](./design.md#identifiers-and-words). When the correction is blank, the word is never valid. When the correction is the key, the word is always valid. |
|
||||
|
|
Loading…
Reference in a new issue