mirror of
https://github.com/crate-ci/typos.git
synced 2025-01-24 07:28:57 -05:00
Merge pull request #218 from epage/types
refactor(cli): Prepare for more advanced config
This commit is contained in:
commit
a148054a49
11 changed files with 579 additions and 301 deletions
18
Cargo.lock
generated
18
Cargo.lock
generated
|
@ -724,6 +724,15 @@ dependencies = [
|
||||||
"wasm-bindgen",
|
"wasm-bindgen",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "kstring"
|
||||||
|
version = "1.0.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1167388385b43067bd74f967def6c93b969284f14f41e2ab6035b715d9343215"
|
||||||
|
dependencies = [
|
||||||
|
"serde",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lazy_static"
|
name = "lazy_static"
|
||||||
version = "1.4.0"
|
version = "1.4.0"
|
||||||
|
@ -1465,6 +1474,12 @@ version = "0.1.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "a7f741b240f1a48843f9b8e0444fb55fb2a4ff67293b50a9179dfd5ea67f8d41"
|
checksum = "a7f741b240f1a48843f9b8e0444fb55fb2a4ff67293b50a9179dfd5ea67f8d41"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "typed-arena"
|
||||||
|
version = "2.0.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "0685c84d5d54d1c26f7d3eb96cd41550adb97baed141a761cf335d3d33bcd0ae"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "typos"
|
name = "typos"
|
||||||
version = "0.3.0"
|
version = "0.3.0"
|
||||||
|
@ -1499,7 +1514,9 @@ dependencies = [
|
||||||
"human-panic",
|
"human-panic",
|
||||||
"ignore",
|
"ignore",
|
||||||
"itertools 0.10.0",
|
"itertools 0.10.0",
|
||||||
|
"kstring",
|
||||||
"log",
|
"log",
|
||||||
|
"once_cell",
|
||||||
"phf",
|
"phf",
|
||||||
"predicates",
|
"predicates",
|
||||||
"proc-exit",
|
"proc-exit",
|
||||||
|
@ -1507,6 +1524,7 @@ dependencies = [
|
||||||
"serde_json",
|
"serde_json",
|
||||||
"structopt",
|
"structopt",
|
||||||
"toml",
|
"toml",
|
||||||
|
"typed-arena",
|
||||||
"typos",
|
"typos",
|
||||||
"typos-dict",
|
"typos-dict",
|
||||||
"typos-vars",
|
"typos-vars",
|
||||||
|
|
|
@ -46,6 +46,7 @@ toml = "0.5"
|
||||||
log = "0.4"
|
log = "0.4"
|
||||||
env_logger = "0.8"
|
env_logger = "0.8"
|
||||||
bstr = "0.2"
|
bstr = "0.2"
|
||||||
|
once_cell = "1.2.0"
|
||||||
ahash = "0.7"
|
ahash = "0.7"
|
||||||
difflib = "0.4"
|
difflib = "0.4"
|
||||||
proc-exit = "1.0"
|
proc-exit = "1.0"
|
||||||
|
@ -57,6 +58,8 @@ derive_setters = "0.1"
|
||||||
itertools = "0.10"
|
itertools = "0.10"
|
||||||
serde_json = "1.0"
|
serde_json = "1.0"
|
||||||
encoding = "0.2"
|
encoding = "0.2"
|
||||||
|
kstring = "1.0"
|
||||||
|
typed-arena = "2.0.1"
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
assert_fs = "1.0"
|
assert_fs = "1.0"
|
||||||
|
|
|
@ -5,6 +5,12 @@ use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
|
||||||
use typos_cli::file::FileChecker;
|
use typos_cli::file::FileChecker;
|
||||||
|
|
||||||
fn bench_checks(c: &mut Criterion) {
|
fn bench_checks(c: &mut Criterion) {
|
||||||
|
let dict = typos_cli::dict::BuiltIn::new(Default::default());
|
||||||
|
let tokenizer = typos::tokens::Tokenizer::new();
|
||||||
|
let policy = typos_cli::policy::Policy::new()
|
||||||
|
.dict(&dict)
|
||||||
|
.tokenizer(&tokenizer);
|
||||||
|
|
||||||
let mut group = c.benchmark_group("checks");
|
let mut group = c.benchmark_group("checks");
|
||||||
for (name, sample) in data::DATA {
|
for (name, sample) in data::DATA {
|
||||||
let len = sample.len();
|
let len = sample.len();
|
||||||
|
@ -13,16 +19,11 @@ fn bench_checks(c: &mut Criterion) {
|
||||||
let sample_path = temp.child("sample");
|
let sample_path = temp.child("sample");
|
||||||
sample_path.write_str(sample).unwrap();
|
sample_path.write_str(sample).unwrap();
|
||||||
|
|
||||||
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
|
|
||||||
let parser = typos::tokens::Tokenizer::new();
|
|
||||||
let settings = typos_cli::file::CheckSettings::new();
|
|
||||||
b.iter(|| {
|
b.iter(|| {
|
||||||
typos_cli::file::FoundFiles.check_file(
|
typos_cli::file::FoundFiles.check_file(
|
||||||
sample_path.path(),
|
sample_path.path(),
|
||||||
true,
|
true,
|
||||||
&settings,
|
&policy,
|
||||||
&parser,
|
|
||||||
&corrections,
|
|
||||||
&typos_cli::report::PrintSilent,
|
&typos_cli::report::PrintSilent,
|
||||||
)
|
)
|
||||||
});
|
});
|
||||||
|
@ -34,16 +35,11 @@ fn bench_checks(c: &mut Criterion) {
|
||||||
let sample_path = temp.child("sample");
|
let sample_path = temp.child("sample");
|
||||||
sample_path.write_str(sample).unwrap();
|
sample_path.write_str(sample).unwrap();
|
||||||
|
|
||||||
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
|
|
||||||
let parser = typos::tokens::Tokenizer::new();
|
|
||||||
let settings = typos_cli::file::CheckSettings::new();
|
|
||||||
b.iter(|| {
|
b.iter(|| {
|
||||||
typos_cli::file::Identifiers.check_file(
|
typos_cli::file::Identifiers.check_file(
|
||||||
sample_path.path(),
|
sample_path.path(),
|
||||||
true,
|
true,
|
||||||
&settings,
|
&policy,
|
||||||
&parser,
|
|
||||||
&corrections,
|
|
||||||
&typos_cli::report::PrintSilent,
|
&typos_cli::report::PrintSilent,
|
||||||
)
|
)
|
||||||
});
|
});
|
||||||
|
@ -55,16 +51,11 @@ fn bench_checks(c: &mut Criterion) {
|
||||||
let sample_path = temp.child("sample");
|
let sample_path = temp.child("sample");
|
||||||
sample_path.write_str(sample).unwrap();
|
sample_path.write_str(sample).unwrap();
|
||||||
|
|
||||||
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
|
|
||||||
let parser = typos::tokens::Tokenizer::new();
|
|
||||||
let settings = typos_cli::file::CheckSettings::new();
|
|
||||||
b.iter(|| {
|
b.iter(|| {
|
||||||
typos_cli::file::Words.check_file(
|
typos_cli::file::Words.check_file(
|
||||||
sample_path.path(),
|
sample_path.path(),
|
||||||
true,
|
true,
|
||||||
&settings,
|
&policy,
|
||||||
&parser,
|
|
||||||
&corrections,
|
|
||||||
&typos_cli::report::PrintSilent,
|
&typos_cli::report::PrintSilent,
|
||||||
)
|
)
|
||||||
});
|
});
|
||||||
|
@ -76,16 +67,11 @@ fn bench_checks(c: &mut Criterion) {
|
||||||
let sample_path = temp.child("sample");
|
let sample_path = temp.child("sample");
|
||||||
sample_path.write_str(sample).unwrap();
|
sample_path.write_str(sample).unwrap();
|
||||||
|
|
||||||
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
|
|
||||||
let parser = typos::tokens::Tokenizer::new();
|
|
||||||
let settings = typos_cli::file::CheckSettings::new();
|
|
||||||
b.iter(|| {
|
b.iter(|| {
|
||||||
typos_cli::file::Typos.check_file(
|
typos_cli::file::Typos.check_file(
|
||||||
sample_path.path(),
|
sample_path.path(),
|
||||||
true,
|
true,
|
||||||
&settings,
|
&policy,
|
||||||
&parser,
|
|
||||||
&corrections,
|
|
||||||
&typos_cli::report::PrintSilent,
|
&typos_cli::report::PrintSilent,
|
||||||
)
|
)
|
||||||
});
|
});
|
||||||
|
|
|
@ -28,6 +28,6 @@ Configuration is read from the following (in precedence order)
|
||||||
| default.identifier-include-digits | \- | bool | Allow identifiers to include digits, in addition to letters. |
|
| default.identifier-include-digits | \- | bool | Allow identifiers to include digits, in addition to letters. |
|
||||||
| default.identifier-leading-chars | \- | string | Allow identifiers to start with one of these characters. |
|
| default.identifier-leading-chars | \- | string | Allow identifiers to start with one of these characters. |
|
||||||
| default.identifier-include-chars | \- | string | Allow identifiers to include these characters. |
|
| default.identifier-include-chars | \- | string | Allow identifiers to include these characters. |
|
||||||
| default.locale | \- | en, en-us, en-gb, en-ca, en-au | English dialect to correct to. |
|
| default.locale | --locale | en, en-us, en-gb, en-ca, en-au | English dialect to correct to. |
|
||||||
| default.extend-identifiers | \- | table of strings | Corrections for identifiers. When the correction is blank, the word is never valid. When the correction is the key, the word is always valid. |
|
| default.extend-identifiers | \- | table of strings | Corrections for identifiers. When the correction is blank, the word is never valid. When the correction is the key, the word is always valid. |
|
||||||
| default.extend-words | \- | table of strings | Corrections for identifiers. When the correction is blank, the word is never valid. When the correction is the key, the word is always valid. |
|
| default.extend-words | \- | table of strings | Corrections for identifiers. When the correction is blank, the word is never valid. When the correction is the key, the word is always valid. |
|
||||||
|
|
21
src/args.rs
21
src/args.rs
|
@ -101,7 +101,7 @@ pub(crate) struct Args {
|
||||||
pub(crate) verbose: clap_verbosity_flag::Verbosity,
|
pub(crate) verbose: clap_verbosity_flag::Verbosity,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, StructOpt)]
|
#[derive(Debug, Clone, StructOpt)]
|
||||||
#[structopt(rename_all = "kebab-case")]
|
#[structopt(rename_all = "kebab-case")]
|
||||||
pub(crate) struct FileArgs {
|
pub(crate) struct FileArgs {
|
||||||
#[structopt(long, overrides_with("no-binary"))]
|
#[structopt(long, overrides_with("no-binary"))]
|
||||||
|
@ -122,12 +122,6 @@ pub(crate) struct FileArgs {
|
||||||
#[structopt(long, overrides_with("no-check-files"), hidden(true))]
|
#[structopt(long, overrides_with("no-check-files"), hidden(true))]
|
||||||
check_files: bool,
|
check_files: bool,
|
||||||
|
|
||||||
#[structopt(long, overrides_with("hex"))]
|
|
||||||
/// Don't try to detect that an identifier looks like hex
|
|
||||||
no_hex: bool,
|
|
||||||
#[structopt(long, overrides_with("no-hex"), hidden(true))]
|
|
||||||
hex: bool,
|
|
||||||
|
|
||||||
#[structopt(
|
#[structopt(
|
||||||
long,
|
long,
|
||||||
possible_values(&config::Locale::variants()),
|
possible_values(&config::Locale::variants()),
|
||||||
|
@ -135,7 +129,7 @@ pub(crate) struct FileArgs {
|
||||||
pub(crate) locale: Option<config::Locale>,
|
pub(crate) locale: Option<config::Locale>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl config::FileSource for FileArgs {
|
impl config::EngineSource for FileArgs {
|
||||||
fn binary(&self) -> Option<bool> {
|
fn binary(&self) -> Option<bool> {
|
||||||
match (self.binary, self.no_binary) {
|
match (self.binary, self.no_binary) {
|
||||||
(true, false) => Some(true),
|
(true, false) => Some(true),
|
||||||
|
@ -163,15 +157,12 @@ impl config::FileSource for FileArgs {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn ignore_hex(&self) -> Option<bool> {
|
fn dict(&self) -> Option<&dyn config::DictSource> {
|
||||||
match (self.hex, self.no_hex) {
|
Some(self)
|
||||||
(true, false) => Some(true),
|
|
||||||
(false, true) => Some(false),
|
|
||||||
(false, false) => None,
|
|
||||||
(_, _) => unreachable!("StructOpt should make this impossible"),
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl config::DictSource for FileArgs {
|
||||||
fn locale(&self) -> Option<config::Locale> {
|
fn locale(&self) -> Option<config::Locale> {
|
||||||
self.locale
|
self.locale
|
||||||
}
|
}
|
||||||
|
|
296
src/config.rs
296
src/config.rs
|
@ -1,12 +1,11 @@
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::io::Read;
|
|
||||||
|
|
||||||
pub trait ConfigSource {
|
pub trait ConfigSource {
|
||||||
fn walk(&self) -> Option<&dyn WalkSource> {
|
fn walk(&self) -> Option<&dyn WalkSource> {
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
|
|
||||||
fn default(&self) -> Option<&dyn FileSource> {
|
fn default(&self) -> Option<&dyn EngineSource> {
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -43,7 +42,7 @@ pub trait WalkSource {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub trait FileSource {
|
pub trait EngineSource {
|
||||||
/// Check binary files.
|
/// Check binary files.
|
||||||
fn binary(&self) -> Option<bool> {
|
fn binary(&self) -> Option<bool> {
|
||||||
None
|
None
|
||||||
|
@ -59,6 +58,16 @@ pub trait FileSource {
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn tokenizer(&self) -> Option<&dyn TokenizerSource> {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
|
fn dict(&self) -> Option<&dyn DictSource> {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub trait TokenizerSource {
|
||||||
/// Do not check identifiers that appear to be hexadecimal values.
|
/// Do not check identifiers that appear to be hexadecimal values.
|
||||||
fn ignore_hex(&self) -> Option<bool> {
|
fn ignore_hex(&self) -> Option<bool> {
|
||||||
None
|
None
|
||||||
|
@ -83,7 +92,9 @@ pub trait FileSource {
|
||||||
fn identifier_include_chars(&self) -> Option<&str> {
|
fn identifier_include_chars(&self) -> Option<&str> {
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub trait DictSource {
|
||||||
fn locale(&self) -> Option<Locale> {
|
fn locale(&self) -> Option<Locale> {
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
|
@ -102,14 +113,23 @@ pub trait FileSource {
|
||||||
#[serde(rename_all = "kebab-case")]
|
#[serde(rename_all = "kebab-case")]
|
||||||
pub struct Config {
|
pub struct Config {
|
||||||
pub files: Walk,
|
pub files: Walk,
|
||||||
pub default: FileConfig,
|
pub default: EngineConfig,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Config {
|
impl Config {
|
||||||
|
pub fn from_dir(cwd: &std::path::Path) -> Result<Option<Self>, anyhow::Error> {
|
||||||
|
let config = if let Some(path) =
|
||||||
|
find_project_file(cwd, &["typos.toml", "_typos.toml", ".typos.toml"])
|
||||||
|
{
|
||||||
|
Some(Self::from_file(&path)?)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
Ok(config)
|
||||||
|
}
|
||||||
|
|
||||||
pub fn from_file(path: &std::path::Path) -> Result<Self, anyhow::Error> {
|
pub fn from_file(path: &std::path::Path) -> Result<Self, anyhow::Error> {
|
||||||
let mut file = std::fs::File::open(path)?;
|
let s = std::fs::read_to_string(path)?;
|
||||||
let mut s = String::new();
|
|
||||||
file.read_to_string(&mut s)?;
|
|
||||||
Self::from_toml(&s)
|
Self::from_toml(&s)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -121,15 +141,7 @@ impl Config {
|
||||||
pub fn from_defaults() -> Self {
|
pub fn from_defaults() -> Self {
|
||||||
Self {
|
Self {
|
||||||
files: Walk::from_defaults(),
|
files: Walk::from_defaults(),
|
||||||
default: FileConfig::from_defaults(),
|
default: EngineConfig::from_defaults(),
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn derive(cwd: &std::path::Path) -> Result<Self, anyhow::Error> {
|
|
||||||
if let Some(path) = find_project_file(cwd, &["typos.toml", "_typos.toml", ".typos.toml"]) {
|
|
||||||
Self::from_file(&path)
|
|
||||||
} else {
|
|
||||||
Ok(Default::default())
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -148,7 +160,7 @@ impl ConfigSource for Config {
|
||||||
Some(&self.files)
|
Some(&self.files)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn default(&self) -> Option<&dyn FileSource> {
|
fn default(&self) -> Option<&dyn EngineSource> {
|
||||||
Some(&self.default)
|
Some(&self.default)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -257,39 +269,33 @@ impl WalkSource for Walk {
|
||||||
#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
|
#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
|
||||||
#[serde(deny_unknown_fields, default)]
|
#[serde(deny_unknown_fields, default)]
|
||||||
#[serde(rename_all = "kebab-case")]
|
#[serde(rename_all = "kebab-case")]
|
||||||
pub struct FileConfig {
|
pub struct EngineConfig {
|
||||||
pub binary: Option<bool>,
|
pub binary: Option<bool>,
|
||||||
pub check_filename: Option<bool>,
|
pub check_filename: Option<bool>,
|
||||||
pub check_file: Option<bool>,
|
pub check_file: Option<bool>,
|
||||||
pub ignore_hex: Option<bool>,
|
#[serde(flatten)]
|
||||||
pub identifier_leading_digits: Option<bool>,
|
pub tokenizer: Option<TokenizerConfig>,
|
||||||
pub identifier_leading_chars: Option<String>,
|
#[serde(flatten)]
|
||||||
pub identifier_include_digits: Option<bool>,
|
pub dict: Option<DictConfig>,
|
||||||
pub identifier_include_chars: Option<String>,
|
|
||||||
pub locale: Option<Locale>,
|
|
||||||
pub extend_identifiers: HashMap<String, String>,
|
|
||||||
pub extend_words: HashMap<String, String>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl FileConfig {
|
impl EngineConfig {
|
||||||
pub fn from_defaults() -> Self {
|
pub fn from_defaults() -> Self {
|
||||||
let empty = Self::default();
|
let empty = Self::default();
|
||||||
FileConfig {
|
EngineConfig {
|
||||||
binary: Some(empty.binary()),
|
binary: Some(empty.binary()),
|
||||||
check_filename: Some(empty.check_filename()),
|
check_filename: Some(empty.check_filename()),
|
||||||
check_file: Some(empty.check_file()),
|
check_file: Some(empty.check_file()),
|
||||||
ignore_hex: Some(empty.ignore_hex()),
|
tokenizer: Some(
|
||||||
identifier_leading_digits: Some(empty.identifier_leading_digits()),
|
empty
|
||||||
identifier_leading_chars: Some(empty.identifier_leading_chars().to_owned()),
|
.tokenizer
|
||||||
identifier_include_digits: Some(empty.identifier_include_digits()),
|
.unwrap_or_else(TokenizerConfig::from_defaults),
|
||||||
identifier_include_chars: Some(empty.identifier_include_chars().to_owned()),
|
),
|
||||||
locale: Some(empty.locale()),
|
dict: Some(empty.dict.unwrap_or_else(DictConfig::from_defaults)),
|
||||||
extend_identifiers: Default::default(),
|
|
||||||
extend_words: Default::default(),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn update(&mut self, source: &dyn FileSource) {
|
pub fn update(&mut self, source: &dyn EngineSource) {
|
||||||
if let Some(source) = source.binary() {
|
if let Some(source) = source.binary() {
|
||||||
self.binary = Some(source);
|
self.binary = Some(source);
|
||||||
}
|
}
|
||||||
|
@ -299,34 +305,22 @@ impl FileConfig {
|
||||||
if let Some(source) = source.check_file() {
|
if let Some(source) = source.check_file() {
|
||||||
self.check_file = Some(source);
|
self.check_file = Some(source);
|
||||||
}
|
}
|
||||||
if let Some(source) = source.ignore_hex() {
|
if let Some(source) = source.tokenizer() {
|
||||||
self.ignore_hex = Some(source);
|
let mut tokenizer = None;
|
||||||
|
std::mem::swap(&mut tokenizer, &mut self.tokenizer);
|
||||||
|
let mut tokenizer = tokenizer.unwrap_or_default();
|
||||||
|
tokenizer.update(source);
|
||||||
|
let mut tokenizer = Some(tokenizer);
|
||||||
|
std::mem::swap(&mut tokenizer, &mut self.tokenizer);
|
||||||
}
|
}
|
||||||
if let Some(source) = source.identifier_leading_digits() {
|
if let Some(source) = source.dict() {
|
||||||
self.identifier_leading_digits = Some(source);
|
let mut dict = None;
|
||||||
|
std::mem::swap(&mut dict, &mut self.dict);
|
||||||
|
let mut dict = dict.unwrap_or_default();
|
||||||
|
dict.update(source);
|
||||||
|
let mut dict = Some(dict);
|
||||||
|
std::mem::swap(&mut dict, &mut self.dict);
|
||||||
}
|
}
|
||||||
if let Some(source) = source.identifier_leading_chars() {
|
|
||||||
self.identifier_leading_chars = Some(source.to_owned());
|
|
||||||
}
|
|
||||||
if let Some(source) = source.identifier_include_digits() {
|
|
||||||
self.identifier_include_digits = Some(source);
|
|
||||||
}
|
|
||||||
if let Some(source) = source.identifier_include_chars() {
|
|
||||||
self.identifier_include_chars = Some(source.to_owned());
|
|
||||||
}
|
|
||||||
if let Some(source) = source.locale() {
|
|
||||||
self.locale = Some(source);
|
|
||||||
}
|
|
||||||
self.extend_identifiers.extend(
|
|
||||||
source
|
|
||||||
.extend_identifiers()
|
|
||||||
.map(|(k, v)| (k.to_owned(), v.to_owned())),
|
|
||||||
);
|
|
||||||
self.extend_words.extend(
|
|
||||||
source
|
|
||||||
.extend_words()
|
|
||||||
.map(|(k, v)| (k.to_owned(), v.to_owned())),
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn binary(&self) -> bool {
|
pub fn binary(&self) -> bool {
|
||||||
|
@ -340,6 +334,74 @@ impl FileConfig {
|
||||||
pub fn check_file(&self) -> bool {
|
pub fn check_file(&self) -> bool {
|
||||||
self.check_file.unwrap_or(true)
|
self.check_file.unwrap_or(true)
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl EngineSource for EngineConfig {
|
||||||
|
fn binary(&self) -> Option<bool> {
|
||||||
|
self.binary
|
||||||
|
}
|
||||||
|
|
||||||
|
fn check_filename(&self) -> Option<bool> {
|
||||||
|
self.check_filename
|
||||||
|
}
|
||||||
|
|
||||||
|
fn check_file(&self) -> Option<bool> {
|
||||||
|
self.check_file
|
||||||
|
}
|
||||||
|
|
||||||
|
fn tokenizer(&self) -> Option<&dyn TokenizerSource> {
|
||||||
|
self.tokenizer.as_ref().map(|t| t as &dyn TokenizerSource)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn dict(&self) -> Option<&dyn DictSource> {
|
||||||
|
self.dict.as_ref().map(|d| d as &dyn DictSource)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
|
||||||
|
#[serde(deny_unknown_fields, default)]
|
||||||
|
#[serde(rename_all = "kebab-case")]
|
||||||
|
pub struct TokenizerConfig {
|
||||||
|
pub ignore_hex: Option<bool>,
|
||||||
|
pub identifier_leading_digits: Option<bool>,
|
||||||
|
pub identifier_leading_chars: Option<kstring::KString>,
|
||||||
|
pub identifier_include_digits: Option<bool>,
|
||||||
|
pub identifier_include_chars: Option<kstring::KString>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TokenizerConfig {
|
||||||
|
pub fn from_defaults() -> Self {
|
||||||
|
let empty = Self::default();
|
||||||
|
Self {
|
||||||
|
ignore_hex: Some(empty.ignore_hex()),
|
||||||
|
identifier_leading_digits: Some(empty.identifier_leading_digits()),
|
||||||
|
identifier_leading_chars: Some(kstring::KString::from_ref(
|
||||||
|
empty.identifier_leading_chars(),
|
||||||
|
)),
|
||||||
|
identifier_include_digits: Some(empty.identifier_include_digits()),
|
||||||
|
identifier_include_chars: Some(kstring::KString::from_ref(
|
||||||
|
empty.identifier_include_chars(),
|
||||||
|
)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn update(&mut self, source: &dyn TokenizerSource) {
|
||||||
|
if let Some(source) = source.ignore_hex() {
|
||||||
|
self.ignore_hex = Some(source);
|
||||||
|
}
|
||||||
|
if let Some(source) = source.identifier_leading_digits() {
|
||||||
|
self.identifier_leading_digits = Some(source);
|
||||||
|
}
|
||||||
|
if let Some(source) = source.identifier_leading_chars() {
|
||||||
|
self.identifier_leading_chars = Some(kstring::KString::from_ref(source));
|
||||||
|
}
|
||||||
|
if let Some(source) = source.identifier_include_digits() {
|
||||||
|
self.identifier_include_digits = Some(source);
|
||||||
|
}
|
||||||
|
if let Some(source) = source.identifier_include_chars() {
|
||||||
|
self.identifier_include_chars = Some(kstring::KString::from_ref(source));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn ignore_hex(&self) -> bool {
|
pub fn ignore_hex(&self) -> bool {
|
||||||
self.ignore_hex.unwrap_or(true)
|
self.ignore_hex.unwrap_or(true)
|
||||||
|
@ -360,6 +422,64 @@ impl FileConfig {
|
||||||
pub fn identifier_include_chars(&self) -> &str {
|
pub fn identifier_include_chars(&self) -> &str {
|
||||||
self.identifier_include_chars.as_deref().unwrap_or("_'")
|
self.identifier_include_chars.as_deref().unwrap_or("_'")
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TokenizerSource for TokenizerConfig {
|
||||||
|
fn ignore_hex(&self) -> Option<bool> {
|
||||||
|
self.ignore_hex
|
||||||
|
}
|
||||||
|
|
||||||
|
fn identifier_leading_digits(&self) -> Option<bool> {
|
||||||
|
self.identifier_leading_digits
|
||||||
|
}
|
||||||
|
|
||||||
|
fn identifier_leading_chars(&self) -> Option<&str> {
|
||||||
|
self.identifier_leading_chars.as_deref()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn identifier_include_digits(&self) -> Option<bool> {
|
||||||
|
self.identifier_include_digits
|
||||||
|
}
|
||||||
|
|
||||||
|
fn identifier_include_chars(&self) -> Option<&str> {
|
||||||
|
self.identifier_include_chars.as_deref()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
|
||||||
|
#[serde(deny_unknown_fields, default)]
|
||||||
|
#[serde(rename_all = "kebab-case")]
|
||||||
|
pub struct DictConfig {
|
||||||
|
pub locale: Option<Locale>,
|
||||||
|
pub extend_identifiers: HashMap<kstring::KString, kstring::KString>,
|
||||||
|
pub extend_words: HashMap<kstring::KString, kstring::KString>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl DictConfig {
|
||||||
|
pub fn from_defaults() -> Self {
|
||||||
|
let empty = Self::default();
|
||||||
|
Self {
|
||||||
|
locale: Some(empty.locale()),
|
||||||
|
extend_identifiers: Default::default(),
|
||||||
|
extend_words: Default::default(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn update(&mut self, source: &dyn DictSource) {
|
||||||
|
if let Some(source) = source.locale() {
|
||||||
|
self.locale = Some(source);
|
||||||
|
}
|
||||||
|
self.extend_identifiers.extend(
|
||||||
|
source
|
||||||
|
.extend_identifiers()
|
||||||
|
.map(|(k, v)| (kstring::KString::from_ref(k), kstring::KString::from_ref(v))),
|
||||||
|
);
|
||||||
|
self.extend_words.extend(
|
||||||
|
source
|
||||||
|
.extend_words()
|
||||||
|
.map(|(k, v)| (kstring::KString::from_ref(k), kstring::KString::from_ref(v))),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
pub fn locale(&self) -> Locale {
|
pub fn locale(&self) -> Locale {
|
||||||
self.locale.unwrap_or_default()
|
self.locale.unwrap_or_default()
|
||||||
|
@ -382,39 +502,7 @@ impl FileConfig {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl FileSource for FileConfig {
|
impl DictSource for DictConfig {
|
||||||
fn binary(&self) -> Option<bool> {
|
|
||||||
self.binary
|
|
||||||
}
|
|
||||||
|
|
||||||
fn check_filename(&self) -> Option<bool> {
|
|
||||||
self.check_filename
|
|
||||||
}
|
|
||||||
|
|
||||||
fn check_file(&self) -> Option<bool> {
|
|
||||||
self.check_file
|
|
||||||
}
|
|
||||||
|
|
||||||
fn ignore_hex(&self) -> Option<bool> {
|
|
||||||
self.ignore_hex
|
|
||||||
}
|
|
||||||
|
|
||||||
fn identifier_leading_digits(&self) -> Option<bool> {
|
|
||||||
self.identifier_leading_digits
|
|
||||||
}
|
|
||||||
|
|
||||||
fn identifier_leading_chars(&self) -> Option<&str> {
|
|
||||||
self.identifier_leading_chars.as_deref()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn identifier_include_digits(&self) -> Option<bool> {
|
|
||||||
self.identifier_include_digits
|
|
||||||
}
|
|
||||||
|
|
||||||
fn identifier_include_chars(&self) -> Option<&str> {
|
|
||||||
self.identifier_include_chars.as_deref()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn locale(&self) -> Option<Locale> {
|
fn locale(&self) -> Option<Locale> {
|
||||||
self.locale
|
self.locale
|
||||||
}
|
}
|
||||||
|
@ -437,13 +525,11 @@ impl FileSource for FileConfig {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn find_project_file(dir: &std::path::Path, names: &[&str]) -> Option<std::path::PathBuf> {
|
fn find_project_file(dir: &std::path::Path, names: &[&str]) -> Option<std::path::PathBuf> {
|
||||||
for ancestor in dir.ancestors() {
|
let mut file_path = dir.join("placeholder");
|
||||||
let mut file_path = ancestor.join("placeholder");
|
for name in names {
|
||||||
for name in names {
|
file_path.set_file_name(name);
|
||||||
file_path.set_file_name(name);
|
if file_path.exists() {
|
||||||
if file_path.exists() {
|
return Some(file_path);
|
||||||
return Some(file_path);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
None
|
None
|
||||||
|
@ -460,7 +546,7 @@ pub enum Locale {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Locale {
|
impl Locale {
|
||||||
pub fn category(self) -> Option<typos_vars::Category> {
|
pub const fn category(self) -> Option<typos_vars::Category> {
|
||||||
match self {
|
match self {
|
||||||
Locale::En => None,
|
Locale::En => None,
|
||||||
Locale::EnUs => Some(typos_vars::Category::American),
|
Locale::EnUs => Some(typos_vars::Category::American),
|
||||||
|
@ -470,7 +556,7 @@ impl Locale {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn variants() -> [&'static str; 5] {
|
pub const fn variants() -> [&'static str; 5] {
|
||||||
["en", "en-us", "en-gb", "en-ca", "en-au"]
|
["en", "en-us", "en-gb", "en-ca", "en-au"]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -12,7 +12,7 @@ pub struct BuiltIn {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl BuiltIn {
|
impl BuiltIn {
|
||||||
pub fn new(locale: crate::config::Locale) -> Self {
|
pub const fn new(locale: crate::config::Locale) -> Self {
|
||||||
Self {
|
Self {
|
||||||
locale: locale.category(),
|
locale: locale.category(),
|
||||||
}
|
}
|
||||||
|
|
149
src/file.rs
149
src/file.rs
|
@ -4,59 +4,17 @@ use std::io::Read;
|
||||||
use std::io::Write;
|
use std::io::Write;
|
||||||
|
|
||||||
use crate::report;
|
use crate::report;
|
||||||
use typos::tokens;
|
|
||||||
use typos::Dictionary;
|
|
||||||
|
|
||||||
pub trait FileChecker: Send + Sync {
|
pub trait FileChecker: Send + Sync {
|
||||||
fn check_file(
|
fn check_file(
|
||||||
&self,
|
&self,
|
||||||
path: &std::path::Path,
|
path: &std::path::Path,
|
||||||
explicit: bool,
|
explicit: bool,
|
||||||
settings: &CheckSettings,
|
policy: &crate::policy::Policy,
|
||||||
tokenizer: &tokens::Tokenizer,
|
|
||||||
dictionary: &dyn Dictionary,
|
|
||||||
reporter: &dyn report::Report,
|
reporter: &dyn report::Report,
|
||||||
) -> Result<(), std::io::Error>;
|
) -> Result<(), std::io::Error>;
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
|
||||||
pub struct CheckSettings {
|
|
||||||
check_filenames: bool,
|
|
||||||
check_files: bool,
|
|
||||||
binary: bool,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl CheckSettings {
|
|
||||||
pub fn new() -> Self {
|
|
||||||
Default::default()
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn check_filenames(&mut self, yes: bool) -> &mut Self {
|
|
||||||
self.check_filenames = yes;
|
|
||||||
self
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn check_files(&mut self, yes: bool) -> &mut Self {
|
|
||||||
self.check_files = yes;
|
|
||||||
self
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn binary(&mut self, yes: bool) -> &mut Self {
|
|
||||||
self.binary = yes;
|
|
||||||
self
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Default for CheckSettings {
|
|
||||||
fn default() -> Self {
|
|
||||||
Self {
|
|
||||||
check_filenames: true,
|
|
||||||
check_files: true,
|
|
||||||
binary: false,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy)]
|
#[derive(Debug, Clone, Copy)]
|
||||||
pub struct Typos;
|
pub struct Typos;
|
||||||
|
|
||||||
|
@ -65,14 +23,12 @@ impl FileChecker for Typos {
|
||||||
&self,
|
&self,
|
||||||
path: &std::path::Path,
|
path: &std::path::Path,
|
||||||
explicit: bool,
|
explicit: bool,
|
||||||
settings: &CheckSettings,
|
policy: &crate::policy::Policy,
|
||||||
tokenizer: &tokens::Tokenizer,
|
|
||||||
dictionary: &dyn Dictionary,
|
|
||||||
reporter: &dyn report::Report,
|
reporter: &dyn report::Report,
|
||||||
) -> Result<(), std::io::Error> {
|
) -> Result<(), std::io::Error> {
|
||||||
if settings.check_filenames {
|
if policy.check_filenames {
|
||||||
if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
|
if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
|
||||||
for typo in typos::check_str(file_name, tokenizer, dictionary) {
|
for typo in typos::check_str(file_name, policy.tokenizer, policy.dict) {
|
||||||
let msg = report::Typo {
|
let msg = report::Typo {
|
||||||
context: Some(report::PathContext { path }.into()),
|
context: Some(report::PathContext { path }.into()),
|
||||||
buffer: std::borrow::Cow::Borrowed(file_name.as_bytes()),
|
buffer: std::borrow::Cow::Borrowed(file_name.as_bytes()),
|
||||||
|
@ -85,14 +41,14 @@ impl FileChecker for Typos {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if settings.check_files {
|
if policy.check_files {
|
||||||
let (buffer, content_type) = read_file(path, reporter)?;
|
let (buffer, content_type) = read_file(path, reporter)?;
|
||||||
if !explicit && !settings.binary && content_type.is_binary() {
|
if !explicit && !policy.binary && content_type.is_binary() {
|
||||||
let msg = report::BinaryFile { path };
|
let msg = report::BinaryFile { path };
|
||||||
reporter.report(msg.into())?;
|
reporter.report(msg.into())?;
|
||||||
} else {
|
} else {
|
||||||
let mut accum_line_num = AccumulateLineNum::new();
|
let mut accum_line_num = AccumulateLineNum::new();
|
||||||
for typo in typos::check_bytes(&buffer, tokenizer, dictionary) {
|
for typo in typos::check_bytes(&buffer, policy.tokenizer, policy.dict) {
|
||||||
let line_num = accum_line_num.line_num(&buffer, typo.byte_offset);
|
let line_num = accum_line_num.line_num(&buffer, typo.byte_offset);
|
||||||
let (line, line_offset) = extract_line(&buffer, typo.byte_offset);
|
let (line, line_offset) = extract_line(&buffer, typo.byte_offset);
|
||||||
let msg = report::Typo {
|
let msg = report::Typo {
|
||||||
|
@ -119,20 +75,18 @@ impl FileChecker for FixTypos {
|
||||||
&self,
|
&self,
|
||||||
path: &std::path::Path,
|
path: &std::path::Path,
|
||||||
explicit: bool,
|
explicit: bool,
|
||||||
settings: &CheckSettings,
|
policy: &crate::policy::Policy,
|
||||||
tokenizer: &tokens::Tokenizer,
|
|
||||||
dictionary: &dyn Dictionary,
|
|
||||||
reporter: &dyn report::Report,
|
reporter: &dyn report::Report,
|
||||||
) -> Result<(), std::io::Error> {
|
) -> Result<(), std::io::Error> {
|
||||||
if settings.check_files {
|
if policy.check_files {
|
||||||
let (buffer, content_type) = read_file(path, reporter)?;
|
let (buffer, content_type) = read_file(path, reporter)?;
|
||||||
if !explicit && !settings.binary && content_type.is_binary() {
|
if !explicit && !policy.binary && content_type.is_binary() {
|
||||||
let msg = report::BinaryFile { path };
|
let msg = report::BinaryFile { path };
|
||||||
reporter.report(msg.into())?;
|
reporter.report(msg.into())?;
|
||||||
} else {
|
} else {
|
||||||
let mut fixes = Vec::new();
|
let mut fixes = Vec::new();
|
||||||
let mut accum_line_num = AccumulateLineNum::new();
|
let mut accum_line_num = AccumulateLineNum::new();
|
||||||
for typo in typos::check_bytes(&buffer, tokenizer, dictionary) {
|
for typo in typos::check_bytes(&buffer, policy.tokenizer, policy.dict) {
|
||||||
if is_fixable(&typo) {
|
if is_fixable(&typo) {
|
||||||
fixes.push(typo.into_owned());
|
fixes.push(typo.into_owned());
|
||||||
} else {
|
} else {
|
||||||
|
@ -156,10 +110,10 @@ impl FileChecker for FixTypos {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Ensure the above write can happen before renaming the file.
|
// Ensure the above write can happen before renaming the file.
|
||||||
if settings.check_filenames {
|
if policy.check_filenames {
|
||||||
if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
|
if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
|
||||||
let mut fixes = Vec::new();
|
let mut fixes = Vec::new();
|
||||||
for typo in typos::check_str(file_name, tokenizer, dictionary) {
|
for typo in typos::check_str(file_name, policy.tokenizer, policy.dict) {
|
||||||
if is_fixable(&typo) {
|
if is_fixable(&typo) {
|
||||||
fixes.push(typo.into_owned());
|
fixes.push(typo.into_owned());
|
||||||
} else {
|
} else {
|
||||||
|
@ -196,22 +150,20 @@ impl FileChecker for DiffTypos {
|
||||||
&self,
|
&self,
|
||||||
path: &std::path::Path,
|
path: &std::path::Path,
|
||||||
explicit: bool,
|
explicit: bool,
|
||||||
settings: &CheckSettings,
|
policy: &crate::policy::Policy,
|
||||||
tokenizer: &tokens::Tokenizer,
|
|
||||||
dictionary: &dyn Dictionary,
|
|
||||||
reporter: &dyn report::Report,
|
reporter: &dyn report::Report,
|
||||||
) -> Result<(), std::io::Error> {
|
) -> Result<(), std::io::Error> {
|
||||||
let mut content = Vec::new();
|
let mut content = Vec::new();
|
||||||
let mut new_content = Vec::new();
|
let mut new_content = Vec::new();
|
||||||
if settings.check_files {
|
if policy.check_files {
|
||||||
let (buffer, content_type) = read_file(path, reporter)?;
|
let (buffer, content_type) = read_file(path, reporter)?;
|
||||||
if !explicit && !settings.binary && content_type.is_binary() {
|
if !explicit && !policy.binary && content_type.is_binary() {
|
||||||
let msg = report::BinaryFile { path };
|
let msg = report::BinaryFile { path };
|
||||||
reporter.report(msg.into())?;
|
reporter.report(msg.into())?;
|
||||||
} else {
|
} else {
|
||||||
let mut fixes = Vec::new();
|
let mut fixes = Vec::new();
|
||||||
let mut accum_line_num = AccumulateLineNum::new();
|
let mut accum_line_num = AccumulateLineNum::new();
|
||||||
for typo in typos::check_bytes(&buffer, tokenizer, dictionary) {
|
for typo in typos::check_bytes(&buffer, policy.tokenizer, policy.dict) {
|
||||||
if is_fixable(&typo) {
|
if is_fixable(&typo) {
|
||||||
fixes.push(typo.into_owned());
|
fixes.push(typo.into_owned());
|
||||||
} else {
|
} else {
|
||||||
|
@ -236,10 +188,10 @@ impl FileChecker for DiffTypos {
|
||||||
|
|
||||||
// Match FixTypos ordering for easy diffing.
|
// Match FixTypos ordering for easy diffing.
|
||||||
let mut new_path = None;
|
let mut new_path = None;
|
||||||
if settings.check_filenames {
|
if policy.check_filenames {
|
||||||
if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
|
if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
|
||||||
let mut fixes = Vec::new();
|
let mut fixes = Vec::new();
|
||||||
for typo in typos::check_str(file_name, tokenizer, dictionary) {
|
for typo in typos::check_str(file_name, policy.tokenizer, policy.dict) {
|
||||||
if is_fixable(&typo) {
|
if is_fixable(&typo) {
|
||||||
fixes.push(typo.into_owned());
|
fixes.push(typo.into_owned());
|
||||||
} else {
|
} else {
|
||||||
|
@ -300,14 +252,12 @@ impl FileChecker for Identifiers {
|
||||||
&self,
|
&self,
|
||||||
path: &std::path::Path,
|
path: &std::path::Path,
|
||||||
explicit: bool,
|
explicit: bool,
|
||||||
settings: &CheckSettings,
|
policy: &crate::policy::Policy,
|
||||||
tokenizer: &tokens::Tokenizer,
|
|
||||||
_dictionary: &dyn Dictionary,
|
|
||||||
reporter: &dyn report::Report,
|
reporter: &dyn report::Report,
|
||||||
) -> Result<(), std::io::Error> {
|
) -> Result<(), std::io::Error> {
|
||||||
if settings.check_filenames {
|
if policy.check_filenames {
|
||||||
if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
|
if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
|
||||||
for word in tokenizer.parse_str(file_name) {
|
for word in policy.tokenizer.parse_str(file_name) {
|
||||||
let msg = report::Parse {
|
let msg = report::Parse {
|
||||||
context: Some(report::PathContext { path }.into()),
|
context: Some(report::PathContext { path }.into()),
|
||||||
kind: report::ParseKind::Identifier,
|
kind: report::ParseKind::Identifier,
|
||||||
|
@ -318,13 +268,13 @@ impl FileChecker for Identifiers {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if settings.check_files {
|
if policy.check_files {
|
||||||
let (buffer, content_type) = read_file(path, reporter)?;
|
let (buffer, content_type) = read_file(path, reporter)?;
|
||||||
if !explicit && !settings.binary && content_type.is_binary() {
|
if !explicit && !policy.binary && content_type.is_binary() {
|
||||||
let msg = report::BinaryFile { path };
|
let msg = report::BinaryFile { path };
|
||||||
reporter.report(msg.into())?;
|
reporter.report(msg.into())?;
|
||||||
} else {
|
} else {
|
||||||
for word in tokenizer.parse_bytes(&buffer) {
|
for word in policy.tokenizer.parse_bytes(&buffer) {
|
||||||
// HACK: Don't look up the line_num per entry to better match the performance
|
// HACK: Don't look up the line_num per entry to better match the performance
|
||||||
// of Typos for comparison purposes. We don't really get much out of it
|
// of Typos for comparison purposes. We don't really get much out of it
|
||||||
// anyway.
|
// anyway.
|
||||||
|
@ -351,14 +301,16 @@ impl FileChecker for Words {
|
||||||
&self,
|
&self,
|
||||||
path: &std::path::Path,
|
path: &std::path::Path,
|
||||||
explicit: bool,
|
explicit: bool,
|
||||||
settings: &CheckSettings,
|
policy: &crate::policy::Policy,
|
||||||
tokenizer: &tokens::Tokenizer,
|
|
||||||
_dictionary: &dyn Dictionary,
|
|
||||||
reporter: &dyn report::Report,
|
reporter: &dyn report::Report,
|
||||||
) -> Result<(), std::io::Error> {
|
) -> Result<(), std::io::Error> {
|
||||||
if settings.check_filenames {
|
if policy.check_filenames {
|
||||||
if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
|
if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
|
||||||
for word in tokenizer.parse_str(file_name).flat_map(|i| i.split()) {
|
for word in policy
|
||||||
|
.tokenizer
|
||||||
|
.parse_str(file_name)
|
||||||
|
.flat_map(|i| i.split())
|
||||||
|
{
|
||||||
let msg = report::Parse {
|
let msg = report::Parse {
|
||||||
context: Some(report::PathContext { path }.into()),
|
context: Some(report::PathContext { path }.into()),
|
||||||
kind: report::ParseKind::Word,
|
kind: report::ParseKind::Word,
|
||||||
|
@ -369,13 +321,17 @@ impl FileChecker for Words {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if settings.check_files {
|
if policy.check_files {
|
||||||
let (buffer, content_type) = read_file(path, reporter)?;
|
let (buffer, content_type) = read_file(path, reporter)?;
|
||||||
if !explicit && !settings.binary && content_type.is_binary() {
|
if !explicit && !policy.binary && content_type.is_binary() {
|
||||||
let msg = report::BinaryFile { path };
|
let msg = report::BinaryFile { path };
|
||||||
reporter.report(msg.into())?;
|
reporter.report(msg.into())?;
|
||||||
} else {
|
} else {
|
||||||
for word in tokenizer.parse_bytes(&buffer).flat_map(|i| i.split()) {
|
for word in policy
|
||||||
|
.tokenizer
|
||||||
|
.parse_bytes(&buffer)
|
||||||
|
.flat_map(|i| i.split())
|
||||||
|
{
|
||||||
// HACK: Don't look up the line_num per entry to better match the performance
|
// HACK: Don't look up the line_num per entry to better match the performance
|
||||||
// of Typos for comparison purposes. We don't really get much out of it
|
// of Typos for comparison purposes. We don't really get much out of it
|
||||||
// anyway.
|
// anyway.
|
||||||
|
@ -402,13 +358,11 @@ impl FileChecker for FoundFiles {
|
||||||
&self,
|
&self,
|
||||||
path: &std::path::Path,
|
path: &std::path::Path,
|
||||||
explicit: bool,
|
explicit: bool,
|
||||||
settings: &CheckSettings,
|
policy: &crate::policy::Policy,
|
||||||
_parser: &tokens::Tokenizer,
|
|
||||||
_dictionary: &dyn Dictionary,
|
|
||||||
reporter: &dyn report::Report,
|
reporter: &dyn report::Report,
|
||||||
) -> Result<(), std::io::Error> {
|
) -> Result<(), std::io::Error> {
|
||||||
// Check `settings.binary` first so we can easily check performance of walking vs reading
|
// Check `policy.binary` first so we can easily check performance of walking vs reading
|
||||||
if settings.binary {
|
if policy.binary {
|
||||||
let msg = report::File::new(path);
|
let msg = report::File::new(path);
|
||||||
reporter.report(msg.into())?;
|
reporter.report(msg.into())?;
|
||||||
} else {
|
} else {
|
||||||
|
@ -598,13 +552,11 @@ fn fix_buffer(mut buffer: Vec<u8>, typos: impl Iterator<Item = typos::Typo<'stat
|
||||||
pub fn walk_path(
|
pub fn walk_path(
|
||||||
walk: ignore::Walk,
|
walk: ignore::Walk,
|
||||||
checks: &dyn FileChecker,
|
checks: &dyn FileChecker,
|
||||||
settings: &CheckSettings,
|
engine: &crate::policy::ConfigEngine,
|
||||||
tokenizer: &typos::tokens::Tokenizer,
|
|
||||||
dictionary: &dyn typos::Dictionary,
|
|
||||||
reporter: &dyn report::Report,
|
reporter: &dyn report::Report,
|
||||||
) -> Result<(), ignore::Error> {
|
) -> Result<(), ignore::Error> {
|
||||||
for entry in walk {
|
for entry in walk {
|
||||||
walk_entry(entry, checks, settings, tokenizer, dictionary, reporter)?;
|
walk_entry(entry, checks, engine, reporter)?;
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
@ -612,15 +564,13 @@ pub fn walk_path(
|
||||||
pub fn walk_path_parallel(
|
pub fn walk_path_parallel(
|
||||||
walk: ignore::WalkParallel,
|
walk: ignore::WalkParallel,
|
||||||
checks: &dyn FileChecker,
|
checks: &dyn FileChecker,
|
||||||
settings: &CheckSettings,
|
engine: &crate::policy::ConfigEngine,
|
||||||
tokenizer: &typos::tokens::Tokenizer,
|
|
||||||
dictionary: &dyn typos::Dictionary,
|
|
||||||
reporter: &dyn report::Report,
|
reporter: &dyn report::Report,
|
||||||
) -> Result<(), ignore::Error> {
|
) -> Result<(), ignore::Error> {
|
||||||
let error: std::sync::Mutex<Result<(), ignore::Error>> = std::sync::Mutex::new(Ok(()));
|
let error: std::sync::Mutex<Result<(), ignore::Error>> = std::sync::Mutex::new(Ok(()));
|
||||||
walk.run(|| {
|
walk.run(|| {
|
||||||
Box::new(|entry: Result<ignore::DirEntry, ignore::Error>| {
|
Box::new(|entry: Result<ignore::DirEntry, ignore::Error>| {
|
||||||
match walk_entry(entry, checks, settings, tokenizer, dictionary, reporter) {
|
match walk_entry(entry, checks, engine, reporter) {
|
||||||
Ok(()) => ignore::WalkState::Continue,
|
Ok(()) => ignore::WalkState::Continue,
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
*error.lock().unwrap() = Err(err);
|
*error.lock().unwrap() = Err(err);
|
||||||
|
@ -636,9 +586,7 @@ pub fn walk_path_parallel(
|
||||||
fn walk_entry(
|
fn walk_entry(
|
||||||
entry: Result<ignore::DirEntry, ignore::Error>,
|
entry: Result<ignore::DirEntry, ignore::Error>,
|
||||||
checks: &dyn FileChecker,
|
checks: &dyn FileChecker,
|
||||||
settings: &CheckSettings,
|
engine: &crate::policy::ConfigEngine,
|
||||||
tokenizer: &typos::tokens::Tokenizer,
|
|
||||||
dictionary: &dyn typos::Dictionary,
|
|
||||||
reporter: &dyn report::Report,
|
reporter: &dyn report::Report,
|
||||||
) -> Result<(), ignore::Error> {
|
) -> Result<(), ignore::Error> {
|
||||||
let entry = match entry {
|
let entry = match entry {
|
||||||
|
@ -655,7 +603,8 @@ fn walk_entry(
|
||||||
} else {
|
} else {
|
||||||
entry.path()
|
entry.path()
|
||||||
};
|
};
|
||||||
checks.check_file(path, explicit, settings, tokenizer, dictionary, reporter)?;
|
let policy = engine.policy(path);
|
||||||
|
checks.check_file(path, explicit, &policy, reporter)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
pub mod config;
|
pub mod config;
|
||||||
pub mod dict;
|
pub mod dict;
|
||||||
pub mod file;
|
pub mod file;
|
||||||
|
pub mod policy;
|
||||||
pub mod report;
|
pub mod report;
|
||||||
|
|
90
src/main.rs
90
src/main.rs
|
@ -8,7 +8,6 @@ use structopt::StructOpt;
|
||||||
|
|
||||||
mod args;
|
mod args;
|
||||||
use typos_cli::config;
|
use typos_cli::config;
|
||||||
use typos_cli::dict;
|
|
||||||
use typos_cli::report;
|
use typos_cli::report;
|
||||||
|
|
||||||
use proc_exit::WithCodeResultExt;
|
use proc_exit::WithCodeResultExt;
|
||||||
|
@ -58,7 +57,19 @@ fn run_dump_config(args: &args::Args, output_path: &std::path::Path) -> proc_exi
|
||||||
path.as_path()
|
path.as_path()
|
||||||
};
|
};
|
||||||
|
|
||||||
let config = load_config(cwd, &args).with_code(proc_exit::Code::CONFIG_ERR)?;
|
let storage = typos_cli::policy::ConfigStorage::new();
|
||||||
|
let mut overrides = config::EngineConfig::default();
|
||||||
|
overrides.update(&args.overrides);
|
||||||
|
let mut engine = typos_cli::policy::ConfigEngine::new(&storage);
|
||||||
|
engine.set_isolated(args.isolated).set_overrides(overrides);
|
||||||
|
if let Some(path) = args.custom_config.as_ref() {
|
||||||
|
let custom = config::Config::from_file(path).with_code(proc_exit::Code::CONFIG_ERR)?;
|
||||||
|
engine.set_custom_config(custom);
|
||||||
|
}
|
||||||
|
let config = engine
|
||||||
|
.load_config(cwd)
|
||||||
|
.with_code(proc_exit::Code::CONFIG_ERR)?;
|
||||||
|
|
||||||
let mut defaulted_config = config::Config::from_defaults();
|
let mut defaulted_config = config::Config::from_defaults();
|
||||||
defaulted_config.update(&config);
|
defaulted_config.update(&config);
|
||||||
let output = toml::to_string_pretty(&defaulted_config).with_code(proc_exit::Code::FAILURE)?;
|
let output = toml::to_string_pretty(&defaulted_config).with_code(proc_exit::Code::FAILURE)?;
|
||||||
|
@ -74,6 +85,16 @@ fn run_dump_config(args: &args::Args, output_path: &std::path::Path) -> proc_exi
|
||||||
fn run_checks(args: &args::Args) -> proc_exit::ExitResult {
|
fn run_checks(args: &args::Args) -> proc_exit::ExitResult {
|
||||||
let global_cwd = std::env::current_dir()?;
|
let global_cwd = std::env::current_dir()?;
|
||||||
|
|
||||||
|
let storage = typos_cli::policy::ConfigStorage::new();
|
||||||
|
let mut overrides = config::EngineConfig::default();
|
||||||
|
overrides.update(&args.overrides);
|
||||||
|
let mut engine = typos_cli::policy::ConfigEngine::new(&storage);
|
||||||
|
engine.set_isolated(args.isolated).set_overrides(overrides);
|
||||||
|
if let Some(path) = args.custom_config.as_ref() {
|
||||||
|
let custom = config::Config::from_file(path).with_code(proc_exit::Code::CONFIG_ERR)?;
|
||||||
|
engine.set_custom_config(custom);
|
||||||
|
}
|
||||||
|
|
||||||
let mut typos_found = false;
|
let mut typos_found = false;
|
||||||
let mut errors_found = false;
|
let mut errors_found = false;
|
||||||
for path in args.path.iter() {
|
for path in args.path.iter() {
|
||||||
|
@ -89,38 +110,23 @@ fn run_checks(args: &args::Args) -> proc_exit::ExitResult {
|
||||||
} else {
|
} else {
|
||||||
path.as_path()
|
path.as_path()
|
||||||
};
|
};
|
||||||
let config = load_config(cwd, &args).with_code(proc_exit::Code::CONFIG_ERR)?;
|
|
||||||
|
|
||||||
let tokenizer = typos::tokens::TokenizerBuilder::new()
|
engine
|
||||||
.ignore_hex(config.default.ignore_hex())
|
.init_dir(cwd)
|
||||||
.leading_digits(config.default.identifier_leading_digits())
|
.with_code(proc_exit::Code::CONFIG_ERR)?;
|
||||||
.leading_chars(config.default.identifier_leading_chars().to_owned())
|
let files = engine.files(cwd);
|
||||||
.include_digits(config.default.identifier_include_digits())
|
|
||||||
.include_chars(config.default.identifier_include_chars().to_owned())
|
|
||||||
.build();
|
|
||||||
|
|
||||||
let dictionary = crate::dict::BuiltIn::new(config.default.locale());
|
|
||||||
let mut dictionary = crate::dict::Override::new(dictionary);
|
|
||||||
dictionary.identifiers(config.default.extend_identifiers());
|
|
||||||
dictionary.words(config.default.extend_words());
|
|
||||||
|
|
||||||
let mut settings = typos_cli::file::CheckSettings::new();
|
|
||||||
settings
|
|
||||||
.check_filenames(config.default.check_filename())
|
|
||||||
.check_files(config.default.check_file())
|
|
||||||
.binary(config.default.binary());
|
|
||||||
|
|
||||||
let threads = if path.is_file() { 1 } else { args.threads };
|
let threads = if path.is_file() { 1 } else { args.threads };
|
||||||
let single_threaded = threads == 1;
|
let single_threaded = threads == 1;
|
||||||
|
|
||||||
let mut walk = ignore::WalkBuilder::new(path);
|
let mut walk = ignore::WalkBuilder::new(path);
|
||||||
walk.threads(args.threads)
|
walk.threads(args.threads)
|
||||||
.hidden(config.files.ignore_hidden())
|
.hidden(files.ignore_hidden())
|
||||||
.ignore(config.files.ignore_dot())
|
.ignore(files.ignore_dot())
|
||||||
.git_global(config.files.ignore_global())
|
.git_global(files.ignore_global())
|
||||||
.git_ignore(config.files.ignore_vcs())
|
.git_ignore(files.ignore_vcs())
|
||||||
.git_exclude(config.files.ignore_vcs())
|
.git_exclude(files.ignore_vcs())
|
||||||
.parents(config.files.ignore_parent());
|
.parents(files.ignore_parent());
|
||||||
|
|
||||||
// HACK: Diff doesn't handle mixing content
|
// HACK: Diff doesn't handle mixing content
|
||||||
let output_reporter = if args.diff {
|
let output_reporter = if args.diff {
|
||||||
|
@ -146,21 +152,12 @@ fn run_checks(args: &args::Args) -> proc_exit::ExitResult {
|
||||||
};
|
};
|
||||||
|
|
||||||
if single_threaded {
|
if single_threaded {
|
||||||
typos_cli::file::walk_path(
|
typos_cli::file::walk_path(walk.build(), selected_checks, &engine, reporter)
|
||||||
walk.build(),
|
|
||||||
selected_checks,
|
|
||||||
&settings,
|
|
||||||
&tokenizer,
|
|
||||||
&dictionary,
|
|
||||||
reporter,
|
|
||||||
)
|
|
||||||
} else {
|
} else {
|
||||||
typos_cli::file::walk_path_parallel(
|
typos_cli::file::walk_path_parallel(
|
||||||
walk.build_parallel(),
|
walk.build_parallel(),
|
||||||
selected_checks,
|
selected_checks,
|
||||||
&settings,
|
&engine,
|
||||||
&tokenizer,
|
|
||||||
&dictionary,
|
|
||||||
reporter,
|
reporter,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
@ -213,20 +210,3 @@ fn init_logging(level: Option<log::Level>) {
|
||||||
builder.init();
|
builder.init();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn load_config(cwd: &std::path::Path, args: &args::Args) -> Result<config::Config, anyhow::Error> {
|
|
||||||
let mut config = config::Config::default();
|
|
||||||
|
|
||||||
if !args.isolated {
|
|
||||||
let derived = config::Config::derive(cwd)?;
|
|
||||||
config.update(&derived);
|
|
||||||
}
|
|
||||||
if let Some(path) = args.custom_config.as_ref() {
|
|
||||||
config.update(&config::Config::from_file(path)?);
|
|
||||||
}
|
|
||||||
|
|
||||||
config.update(&args.config);
|
|
||||||
config.default.update(&args.overrides);
|
|
||||||
|
|
||||||
Ok(config)
|
|
||||||
}
|
|
||||||
|
|
264
src/policy.rs
Normal file
264
src/policy.rs
Normal file
|
@ -0,0 +1,264 @@
|
||||||
|
pub struct ConfigStorage {
|
||||||
|
arena: std::sync::Mutex<typed_arena::Arena<kstring::KString>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ConfigStorage {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
arena: std::sync::Mutex::new(typed_arena::Arena::new()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get<'s>(&'s self, other: &str) -> &'s str {
|
||||||
|
// Safe because we the references are stable once created.
|
||||||
|
//
|
||||||
|
// Trying to get this handled inside of `typed_arena` directly, see
|
||||||
|
// https://github.com/SimonSapin/rust-typed-arena/issues/49#issuecomment-809517312
|
||||||
|
unsafe {
|
||||||
|
std::mem::transmute::<&str, &str>(
|
||||||
|
self.arena
|
||||||
|
.lock()
|
||||||
|
.unwrap()
|
||||||
|
.alloc(kstring::KString::from_ref(other))
|
||||||
|
.as_str(),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for ConfigStorage {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self::new()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct ConfigEngine<'s> {
|
||||||
|
storage: &'s ConfigStorage,
|
||||||
|
|
||||||
|
overrides: Option<crate::config::EngineConfig>,
|
||||||
|
custom: Option<crate::config::Config>,
|
||||||
|
isolated: bool,
|
||||||
|
|
||||||
|
configs: std::collections::HashMap<std::path::PathBuf, DirConfig>,
|
||||||
|
files: Intern<crate::config::Walk>,
|
||||||
|
tokenizer: Intern<typos::tokens::Tokenizer>,
|
||||||
|
dict: Intern<crate::dict::Override<'s, 's, crate::dict::BuiltIn>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'s> ConfigEngine<'s> {
|
||||||
|
pub fn new(storage: &'s ConfigStorage) -> Self {
|
||||||
|
Self {
|
||||||
|
storage,
|
||||||
|
overrides: Default::default(),
|
||||||
|
custom: Default::default(),
|
||||||
|
configs: Default::default(),
|
||||||
|
isolated: false,
|
||||||
|
files: Default::default(),
|
||||||
|
tokenizer: Default::default(),
|
||||||
|
dict: Default::default(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn set_overrides(&mut self, overrides: crate::config::EngineConfig) -> &mut Self {
|
||||||
|
self.overrides = Some(overrides);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn set_custom_config(&mut self, custom: crate::config::Config) -> &mut Self {
|
||||||
|
self.custom = Some(custom);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn set_isolated(&mut self, isolated: bool) -> &mut Self {
|
||||||
|
self.isolated = isolated;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn files(&mut self, cwd: &std::path::Path) -> &crate::config::Walk {
|
||||||
|
let dir = self
|
||||||
|
.configs
|
||||||
|
.get(cwd)
|
||||||
|
.expect("`init_dir` must be called first");
|
||||||
|
self.get_files(dir)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn policy(&self, path: &std::path::Path) -> Policy<'_, '_> {
|
||||||
|
let dir = self
|
||||||
|
.get_dir(path)
|
||||||
|
.expect("`files()` should be called first");
|
||||||
|
Policy {
|
||||||
|
check_filenames: dir.check_filenames,
|
||||||
|
check_files: dir.check_files,
|
||||||
|
binary: dir.binary,
|
||||||
|
tokenizer: self.get_tokenizer(dir),
|
||||||
|
dict: self.get_dict(dir),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_files(&self, dir: &DirConfig) -> &crate::config::Walk {
|
||||||
|
self.files.get(dir.files)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_tokenizer(&self, dir: &DirConfig) -> &typos::tokens::Tokenizer {
|
||||||
|
self.tokenizer.get(dir.tokenizer)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_dict(&self, dir: &DirConfig) -> &dyn typos::Dictionary {
|
||||||
|
self.dict.get(dir.dict)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_dir(&self, path: &std::path::Path) -> Option<&DirConfig> {
|
||||||
|
for path in path.ancestors() {
|
||||||
|
if let Some(dir) = self.configs.get(path) {
|
||||||
|
return Some(dir);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn load_config(
|
||||||
|
&self,
|
||||||
|
cwd: &std::path::Path,
|
||||||
|
) -> Result<crate::config::Config, anyhow::Error> {
|
||||||
|
let mut config = crate::config::Config::default();
|
||||||
|
|
||||||
|
if !self.isolated {
|
||||||
|
if let Some(derived) = crate::config::Config::from_dir(cwd)? {
|
||||||
|
config.update(&derived);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if let Some(custom) = self.custom.as_ref() {
|
||||||
|
config.update(custom);
|
||||||
|
}
|
||||||
|
if let Some(overrides) = self.overrides.as_ref() {
|
||||||
|
config.default.update(overrides);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(config)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn init_dir(&mut self, cwd: &std::path::Path) -> Result<(), anyhow::Error> {
|
||||||
|
if self.configs.contains_key(cwd) {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
let config = self.load_config(cwd)?;
|
||||||
|
|
||||||
|
let crate::config::Config { files, default } = config;
|
||||||
|
let binary = default.binary();
|
||||||
|
let check_filename = default.check_filename();
|
||||||
|
let check_file = default.check_file();
|
||||||
|
let crate::config::EngineConfig {
|
||||||
|
tokenizer, dict, ..
|
||||||
|
} = default;
|
||||||
|
let tokenizer_config =
|
||||||
|
tokenizer.unwrap_or_else(crate::config::TokenizerConfig::from_defaults);
|
||||||
|
let dict_config = dict.unwrap_or_else(crate::config::DictConfig::from_defaults);
|
||||||
|
|
||||||
|
let tokenizer = typos::tokens::TokenizerBuilder::new()
|
||||||
|
.ignore_hex(tokenizer_config.ignore_hex())
|
||||||
|
.leading_digits(tokenizer_config.identifier_leading_digits())
|
||||||
|
.leading_chars(tokenizer_config.identifier_leading_chars().to_owned())
|
||||||
|
.include_digits(tokenizer_config.identifier_include_digits())
|
||||||
|
.include_chars(tokenizer_config.identifier_include_chars().to_owned())
|
||||||
|
.build();
|
||||||
|
|
||||||
|
let dict = crate::dict::BuiltIn::new(dict_config.locale());
|
||||||
|
let mut dict = crate::dict::Override::new(dict);
|
||||||
|
dict.identifiers(
|
||||||
|
dict_config
|
||||||
|
.extend_identifiers()
|
||||||
|
.map(|(k, v)| (self.storage.get(k), self.storage.get(v))),
|
||||||
|
);
|
||||||
|
dict.words(
|
||||||
|
dict_config
|
||||||
|
.extend_words()
|
||||||
|
.map(|(k, v)| (self.storage.get(k), self.storage.get(v))),
|
||||||
|
);
|
||||||
|
|
||||||
|
let dict = self.dict.intern(dict);
|
||||||
|
let files = self.files.intern(files);
|
||||||
|
let tokenizer = self.tokenizer.intern(tokenizer);
|
||||||
|
|
||||||
|
let dir = DirConfig {
|
||||||
|
files,
|
||||||
|
check_filenames: check_filename,
|
||||||
|
check_files: check_file,
|
||||||
|
binary,
|
||||||
|
tokenizer,
|
||||||
|
dict,
|
||||||
|
};
|
||||||
|
|
||||||
|
self.configs.insert(cwd.to_owned(), dir);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct Intern<T> {
|
||||||
|
data: Vec<T>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T> Intern<T> {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
data: Default::default(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn intern(&mut self, value: T) -> usize {
|
||||||
|
let symbol = self.data.len();
|
||||||
|
self.data.push(value);
|
||||||
|
symbol
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get(&self, symbol: usize) -> &T {
|
||||||
|
&self.data[symbol]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T> Default for Intern<T> {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self::new()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct DirConfig {
|
||||||
|
files: usize,
|
||||||
|
tokenizer: usize,
|
||||||
|
dict: usize,
|
||||||
|
check_filenames: bool,
|
||||||
|
check_files: bool,
|
||||||
|
binary: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[non_exhaustive]
|
||||||
|
#[derive(derive_setters::Setters)]
|
||||||
|
pub struct Policy<'t, 'd> {
|
||||||
|
pub check_filenames: bool,
|
||||||
|
pub check_files: bool,
|
||||||
|
pub binary: bool,
|
||||||
|
pub tokenizer: &'t typos::tokens::Tokenizer,
|
||||||
|
pub dict: &'d dyn typos::Dictionary,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'t, 'd> Policy<'t, 'd> {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Default::default()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static DEFAULT_TOKENIZER: once_cell::sync::Lazy<typos::tokens::Tokenizer> =
|
||||||
|
once_cell::sync::Lazy::new(typos::tokens::Tokenizer::new);
|
||||||
|
static DEFAULT_DICT: crate::dict::BuiltIn = crate::dict::BuiltIn::new(crate::config::Locale::En);
|
||||||
|
|
||||||
|
impl<'t, 'd> Default for Policy<'t, 'd> {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self {
|
||||||
|
check_filenames: true,
|
||||||
|
check_files: true,
|
||||||
|
binary: false,
|
||||||
|
tokenizer: &DEFAULT_TOKENIZER,
|
||||||
|
dict: &DEFAULT_DICT,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
Add table
Reference in a new issue