2021-02-12 19:43:12 -05:00
|
|
|
pub struct ConfigStorage {
|
2021-03-29 14:39:48 -04:00
|
|
|
arena: std::sync::Mutex<typed_arena::Arena<kstring::KString>>,
|
2021-02-12 19:43:12 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
impl ConfigStorage {
|
|
|
|
pub fn new() -> Self {
|
|
|
|
Self {
|
2021-03-29 14:39:48 -04:00
|
|
|
arena: std::sync::Mutex::new(typed_arena::Arena::new()),
|
2021-02-12 19:43:12 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn get<'s>(&'s self, other: &str) -> &'s str {
|
2021-03-29 14:39:48 -04:00
|
|
|
// Safe because we the references are stable once created.
|
|
|
|
//
|
|
|
|
// Trying to get this handled inside of `typed_arena` directly, see
|
|
|
|
// https://github.com/SimonSapin/rust-typed-arena/issues/49#issuecomment-809517312
|
|
|
|
unsafe {
|
|
|
|
std::mem::transmute::<&str, &str>(
|
|
|
|
self.arena
|
|
|
|
.lock()
|
|
|
|
.unwrap()
|
2021-03-29 21:28:01 -04:00
|
|
|
.alloc(kstring::KString::from_ref(other))
|
|
|
|
.as_str(),
|
2021-03-29 14:39:48 -04:00
|
|
|
)
|
|
|
|
}
|
2021-02-12 19:43:12 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-29 21:28:01 -04:00
|
|
|
impl Default for ConfigStorage {
|
|
|
|
fn default() -> Self {
|
|
|
|
Self::new()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-02-12 19:43:12 -05:00
|
|
|
pub struct ConfigEngine<'s> {
|
2021-03-29 14:39:48 -04:00
|
|
|
storage: &'s ConfigStorage,
|
|
|
|
|
2021-04-05 08:34:05 -04:00
|
|
|
overrides: Option<crate::config::Config>,
|
2021-03-29 14:39:48 -04:00
|
|
|
isolated: bool,
|
|
|
|
|
|
|
|
configs: std::collections::HashMap<std::path::PathBuf, DirConfig>,
|
2021-03-31 21:06:33 -04:00
|
|
|
walk: Intern<crate::config::Walk>,
|
2021-03-29 14:39:48 -04:00
|
|
|
tokenizer: Intern<typos::tokens::Tokenizer>,
|
|
|
|
dict: Intern<crate::dict::Override<'s, 's, crate::dict::BuiltIn>>,
|
2021-02-12 19:43:12 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
impl<'s> ConfigEngine<'s> {
|
2021-03-29 14:39:48 -04:00
|
|
|
pub fn new(storage: &'s ConfigStorage) -> Self {
|
|
|
|
Self {
|
|
|
|
storage,
|
|
|
|
overrides: Default::default(),
|
|
|
|
configs: Default::default(),
|
|
|
|
isolated: false,
|
2021-03-31 21:06:33 -04:00
|
|
|
walk: Default::default(),
|
2021-03-29 14:39:48 -04:00
|
|
|
tokenizer: Default::default(),
|
|
|
|
dict: Default::default(),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-04-05 08:34:05 -04:00
|
|
|
pub fn set_overrides(&mut self, overrides: crate::config::Config) -> &mut Self {
|
2021-03-29 14:39:48 -04:00
|
|
|
self.overrides = Some(overrides);
|
|
|
|
self
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn set_isolated(&mut self, isolated: bool) -> &mut Self {
|
|
|
|
self.isolated = isolated;
|
|
|
|
self
|
|
|
|
}
|
|
|
|
|
2021-03-31 21:06:33 -04:00
|
|
|
pub fn walk(&mut self, cwd: &std::path::Path) -> &crate::config::Walk {
|
2021-03-29 14:39:48 -04:00
|
|
|
let dir = self
|
|
|
|
.configs
|
|
|
|
.get(cwd)
|
|
|
|
.expect("`init_dir` must be called first");
|
2021-03-31 21:06:33 -04:00
|
|
|
self.get_walk(dir)
|
2021-03-29 14:39:48 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
pub fn policy(&self, path: &std::path::Path) -> Policy<'_, '_> {
|
2021-03-31 21:06:33 -04:00
|
|
|
let dir = self.get_dir(path).expect("`walk()` should be called first");
|
2021-03-29 14:39:48 -04:00
|
|
|
Policy {
|
2021-03-31 21:19:52 -04:00
|
|
|
check_filenames: dir.default.check_filenames,
|
|
|
|
check_files: dir.default.check_files,
|
|
|
|
binary: dir.default.binary,
|
|
|
|
tokenizer: self.get_tokenizer(&dir.default),
|
|
|
|
dict: self.get_dict(&dir.default),
|
2021-03-29 14:39:48 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-31 21:06:33 -04:00
|
|
|
fn get_walk(&self, dir: &DirConfig) -> &crate::config::Walk {
|
|
|
|
self.walk.get(dir.walk)
|
2021-03-29 14:39:48 -04:00
|
|
|
}
|
|
|
|
|
2021-03-31 21:19:52 -04:00
|
|
|
fn get_tokenizer(&self, file: &FileConfig) -> &typos::tokens::Tokenizer {
|
|
|
|
self.tokenizer.get(file.tokenizer)
|
2021-03-29 14:39:48 -04:00
|
|
|
}
|
|
|
|
|
2021-03-31 21:19:52 -04:00
|
|
|
fn get_dict(&self, file: &FileConfig) -> &dyn typos::Dictionary {
|
|
|
|
self.dict.get(file.dict)
|
2021-03-29 14:39:48 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
fn get_dir(&self, path: &std::path::Path) -> Option<&DirConfig> {
|
|
|
|
for path in path.ancestors() {
|
|
|
|
if let Some(dir) = self.configs.get(path) {
|
|
|
|
return Some(dir);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
None
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn load_config(
|
|
|
|
&self,
|
|
|
|
cwd: &std::path::Path,
|
|
|
|
) -> Result<crate::config::Config, anyhow::Error> {
|
|
|
|
let mut config = crate::config::Config::default();
|
|
|
|
|
|
|
|
if !self.isolated {
|
|
|
|
if let Some(derived) = crate::config::Config::from_dir(cwd)? {
|
|
|
|
config.update(&derived);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if let Some(overrides) = self.overrides.as_ref() {
|
2021-04-05 08:34:05 -04:00
|
|
|
config.update(overrides);
|
2021-03-29 14:39:48 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
Ok(config)
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn init_dir(&mut self, cwd: &std::path::Path) -> Result<(), anyhow::Error> {
|
|
|
|
if self.configs.contains_key(cwd) {
|
|
|
|
return Ok(());
|
|
|
|
}
|
|
|
|
|
|
|
|
let config = self.load_config(cwd)?;
|
2021-04-05 08:34:05 -04:00
|
|
|
let crate::config::Config {
|
|
|
|
files,
|
|
|
|
mut default,
|
|
|
|
overrides,
|
|
|
|
} = config;
|
|
|
|
if let Some(overrides) = overrides {
|
|
|
|
default.update(&overrides);
|
|
|
|
}
|
2021-03-31 21:19:52 -04:00
|
|
|
|
|
|
|
let walk = self.walk.intern(files);
|
|
|
|
let default = self.init_file_config(default)?;
|
|
|
|
|
|
|
|
let dir = DirConfig { walk, default };
|
|
|
|
|
|
|
|
self.configs.insert(cwd.to_owned(), dir);
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
|
|
|
fn init_file_config(
|
|
|
|
&mut self,
|
|
|
|
engine: crate::config::EngineConfig,
|
|
|
|
) -> Result<FileConfig, anyhow::Error> {
|
|
|
|
let binary = engine.binary();
|
|
|
|
let check_filename = engine.check_filename();
|
|
|
|
let check_file = engine.check_file();
|
2021-03-01 21:37:05 -05:00
|
|
|
let crate::config::EngineConfig {
|
|
|
|
tokenizer, dict, ..
|
2021-03-31 21:19:52 -04:00
|
|
|
} = engine;
|
2021-03-01 21:37:05 -05:00
|
|
|
let tokenizer_config =
|
2021-03-29 21:28:01 -04:00
|
|
|
tokenizer.unwrap_or_else(crate::config::TokenizerConfig::from_defaults);
|
|
|
|
let dict_config = dict.unwrap_or_else(crate::config::DictConfig::from_defaults);
|
2021-02-12 19:43:12 -05:00
|
|
|
|
|
|
|
let tokenizer = typos::tokens::TokenizerBuilder::new()
|
2021-03-01 21:37:05 -05:00
|
|
|
.ignore_hex(tokenizer_config.ignore_hex())
|
|
|
|
.leading_digits(tokenizer_config.identifier_leading_digits())
|
|
|
|
.leading_chars(tokenizer_config.identifier_leading_chars().to_owned())
|
|
|
|
.include_digits(tokenizer_config.identifier_include_digits())
|
|
|
|
.include_chars(tokenizer_config.identifier_include_chars().to_owned())
|
2021-02-12 19:43:12 -05:00
|
|
|
.build();
|
|
|
|
|
2021-03-01 21:37:05 -05:00
|
|
|
let dict = crate::dict::BuiltIn::new(dict_config.locale());
|
|
|
|
let mut dict = crate::dict::Override::new(dict);
|
|
|
|
dict.identifiers(
|
|
|
|
dict_config
|
2021-02-12 19:43:12 -05:00
|
|
|
.extend_identifiers()
|
2021-03-29 14:39:48 -04:00
|
|
|
.map(|(k, v)| (self.storage.get(k), self.storage.get(v))),
|
2021-02-12 19:43:12 -05:00
|
|
|
);
|
2021-03-01 21:37:05 -05:00
|
|
|
dict.words(
|
|
|
|
dict_config
|
2021-02-12 19:43:12 -05:00
|
|
|
.extend_words()
|
2021-03-29 14:39:48 -04:00
|
|
|
.map(|(k, v)| (self.storage.get(k), self.storage.get(v))),
|
2021-02-12 19:43:12 -05:00
|
|
|
);
|
|
|
|
|
2021-03-29 14:39:48 -04:00
|
|
|
let dict = self.dict.intern(dict);
|
|
|
|
let tokenizer = self.tokenizer.intern(tokenizer);
|
|
|
|
|
2021-03-31 21:19:52 -04:00
|
|
|
let file = FileConfig {
|
2021-03-01 21:37:05 -05:00
|
|
|
check_filenames: check_filename,
|
|
|
|
check_files: check_file,
|
2021-03-29 21:28:01 -04:00
|
|
|
binary,
|
2021-02-12 19:43:12 -05:00
|
|
|
tokenizer,
|
2021-03-01 21:37:05 -05:00
|
|
|
dict,
|
2021-03-29 14:39:48 -04:00
|
|
|
};
|
2021-03-31 21:19:52 -04:00
|
|
|
Ok(file)
|
2021-03-29 14:39:48 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
struct Intern<T> {
|
|
|
|
data: Vec<T>,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<T> Intern<T> {
|
|
|
|
pub fn new() -> Self {
|
|
|
|
Self {
|
|
|
|
data: Default::default(),
|
2021-02-12 19:43:12 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-29 14:39:48 -04:00
|
|
|
pub fn intern(&mut self, value: T) -> usize {
|
|
|
|
let symbol = self.data.len();
|
|
|
|
self.data.push(value);
|
|
|
|
symbol
|
2021-02-12 19:43:12 -05:00
|
|
|
}
|
|
|
|
|
2021-03-29 14:39:48 -04:00
|
|
|
pub fn get(&self, symbol: usize) -> &T {
|
|
|
|
&self.data[symbol]
|
2021-02-12 19:43:12 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-29 14:39:48 -04:00
|
|
|
impl<T> Default for Intern<T> {
|
|
|
|
fn default() -> Self {
|
|
|
|
Self::new()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
struct DirConfig {
|
2021-03-31 21:06:33 -04:00
|
|
|
walk: usize,
|
2021-03-31 21:19:52 -04:00
|
|
|
default: FileConfig,
|
|
|
|
}
|
|
|
|
|
|
|
|
struct FileConfig {
|
2021-03-29 14:39:48 -04:00
|
|
|
tokenizer: usize,
|
|
|
|
dict: usize,
|
|
|
|
check_filenames: bool,
|
|
|
|
check_files: bool,
|
|
|
|
binary: bool,
|
|
|
|
}
|
|
|
|
|
2021-02-12 19:43:12 -05:00
|
|
|
#[non_exhaustive]
|
|
|
|
#[derive(derive_setters::Setters)]
|
|
|
|
pub struct Policy<'t, 'd> {
|
|
|
|
pub check_filenames: bool,
|
|
|
|
pub check_files: bool,
|
|
|
|
pub binary: bool,
|
|
|
|
pub tokenizer: &'t typos::tokens::Tokenizer,
|
2021-03-01 21:37:05 -05:00
|
|
|
pub dict: &'d dyn typos::Dictionary,
|
2021-02-12 19:43:12 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
impl<'t, 'd> Policy<'t, 'd> {
|
|
|
|
pub fn new() -> Self {
|
|
|
|
Default::default()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static DEFAULT_TOKENIZER: once_cell::sync::Lazy<typos::tokens::Tokenizer> =
|
|
|
|
once_cell::sync::Lazy::new(typos::tokens::Tokenizer::new);
|
|
|
|
static DEFAULT_DICT: crate::dict::BuiltIn = crate::dict::BuiltIn::new(crate::config::Locale::En);
|
|
|
|
|
|
|
|
impl<'t, 'd> Default for Policy<'t, 'd> {
|
|
|
|
fn default() -> Self {
|
|
|
|
Self {
|
|
|
|
check_filenames: true,
|
|
|
|
check_files: true,
|
|
|
|
binary: false,
|
|
|
|
tokenizer: &DEFAULT_TOKENIZER,
|
2021-03-01 21:37:05 -05:00
|
|
|
dict: &DEFAULT_DICT,
|
2021-02-12 19:43:12 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|