mirror of
https://github.com/crate-ci/typos.git
synced 2024-11-25 02:20:58 -05:00
feat(parse): Make identifier symbols configurable
This commit is contained in:
parent
e093135ac1
commit
3419a8df85
3 changed files with 19 additions and 4 deletions
7
Cargo.lock
generated
7
Cargo.lock
generated
|
@ -271,7 +271,7 @@ name = "heck"
|
|||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"unicode-segmentation 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"unicode-segmentation 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -811,6 +811,7 @@ dependencies = [
|
|||
"serde_json 1.0.36 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"structopt 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"unicase 1.4.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"unicode-segmentation 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -828,7 +829,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "unicode-segmentation"
|
||||
version = "1.2.1"
|
||||
version = "1.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
|
@ -1013,7 +1014,7 @@ dependencies = [
|
|||
"checksum treeline 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a7f741b240f1a48843f9b8e0444fb55fb2a4ff67293b50a9179dfd5ea67f8d41"
|
||||
"checksum ucd-util 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "535c204ee4d8434478593480b8f86ab45ec9aae0e83c568ca81abf0fd0e88f86"
|
||||
"checksum unicase 1.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7f4765f83163b74f957c797ad9253caf97f103fb064d3999aea9568d09fc8a33"
|
||||
"checksum unicode-segmentation 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "aa6024fc12ddfd1c6dbc14a80fa2324d4568849869b779f6bd37e5e4c03344d1"
|
||||
"checksum unicode-segmentation 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1967f4cdfc355b37fd76d2a954fb2ed3871034eb4f26d60537d88795cfc332a9"
|
||||
"checksum unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "882386231c45df4700b275c7ff55b6f3698780a650026380e72dabe76fa46526"
|
||||
"checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc"
|
||||
"checksum unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "382810877fe448991dfc7f0dd6e3ae5d58088fd0ea5e35189655f84e6814fa56"
|
||||
|
|
|
@ -36,6 +36,7 @@ unicase = "1.1"
|
|||
bstr = "0.2"
|
||||
log = "0.4"
|
||||
env_logger = "0.6"
|
||||
unicode-segmentation = "1.3.0"
|
||||
|
||||
[dev-dependencies]
|
||||
assert_fs = "0.10"
|
||||
|
|
|
@ -10,6 +10,7 @@ pub enum Case {
|
|||
pub struct ParserBuilder {
|
||||
ignore_hex: bool,
|
||||
include_digits: bool,
|
||||
include_chars: String,
|
||||
}
|
||||
|
||||
impl ParserBuilder {
|
||||
|
@ -27,11 +28,22 @@ impl ParserBuilder {
|
|||
self
|
||||
}
|
||||
|
||||
pub fn include_chars(&mut self, chars: String) -> &mut Self {
|
||||
self.include_chars = chars;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn build(&self) -> Parser {
|
||||
let mut pattern = r#"\b(\p{Alphabetic}|_|'"#.to_owned();
|
||||
let mut pattern = r#"\b(\p{Alphabetic}"#.to_owned();
|
||||
if self.include_digits {
|
||||
pattern.push_str(r#"|\d"#);
|
||||
}
|
||||
for grapheme in
|
||||
unicode_segmentation::UnicodeSegmentation::graphemes(self.include_chars.as_str(), true)
|
||||
{
|
||||
let escaped = regex::escape(&grapheme);
|
||||
pattern.push_str(&format!("|{}", escaped));
|
||||
}
|
||||
pattern.push_str(r#")+\b"#);
|
||||
let words_str = regex::Regex::new(&pattern).unwrap();
|
||||
let words_bytes = regex::bytes::Regex::new(&pattern).unwrap();
|
||||
|
@ -48,6 +60,7 @@ impl Default for ParserBuilder {
|
|||
Self {
|
||||
ignore_hex: true,
|
||||
include_digits: true,
|
||||
include_chars: "_'".to_owned(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue