mirror of
https://github.com/crate-ci/typos.git
synced 2024-11-21 16:41:01 -05:00
feat(dict): Preserve correction order
We want to be able to recommend more likely corrections first, e.g. for "poped" we want to recommend "popped" before "pooped".
This commit is contained in:
parent
357aa55c6c
commit
8d026ac23e
3 changed files with 40 additions and 8 deletions
29
Cargo.lock
generated
29
Cargo.lock
generated
|
@ -233,7 +233,7 @@ dependencies = [
|
|||
"bitflags",
|
||||
"clap_derive 3.2.18",
|
||||
"clap_lex 0.2.4",
|
||||
"indexmap",
|
||||
"indexmap 1.9.2",
|
||||
"once_cell",
|
||||
"strsim 0.10.0",
|
||||
"termcolor",
|
||||
|
@ -615,6 +615,12 @@ dependencies = [
|
|||
"termcolor",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "equivalent"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "88bffebc5d80432c9b140ee17875ff173a8ab62faad5b257da912bd2f6c1c0a1"
|
||||
|
||||
[[package]]
|
||||
name = "errno"
|
||||
version = "0.2.8"
|
||||
|
@ -733,6 +739,12 @@ version = "0.12.3"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.14.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a"
|
||||
|
||||
[[package]]
|
||||
name = "heck"
|
||||
version = "0.4.1"
|
||||
|
@ -826,7 +838,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "1885e79c1fc4b10f0e172c475f458b7f7b93061064d98c3293e98c5ba0c8b399"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"hashbrown",
|
||||
"hashbrown 0.12.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "indexmap"
|
||||
version = "2.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d5477fe2230a79769d8dc68e0eabf5437907c0457a5614a9e8dddb67f65eb65d"
|
||||
dependencies = [
|
||||
"equivalent",
|
||||
"hashbrown 0.14.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -1584,7 +1606,7 @@ version = "0.19.10"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2380d56e8670370eee6566b0bfd4265f65b3f432e8c6d85623f728d4fa31f739"
|
||||
dependencies = [
|
||||
"indexmap",
|
||||
"indexmap 1.9.2",
|
||||
"serde",
|
||||
"serde_spanned",
|
||||
"toml_datetime",
|
||||
|
@ -1685,6 +1707,7 @@ dependencies = [
|
|||
"csv",
|
||||
"dictgen",
|
||||
"edit-distance",
|
||||
"indexmap 2.0.0",
|
||||
"itertools",
|
||||
"snapbox",
|
||||
"unicase",
|
||||
|
|
|
@ -24,3 +24,4 @@ codegenrs = "2.0"
|
|||
dictgen = { version = "^0.2", path = "../dictgen", features = ["codegen"] }
|
||||
varcon = { version = "^0.6", path = "../varcon" }
|
||||
snapbox = { version = "0.4.11", features = ["path"] }
|
||||
indexmap = "2.0.0"
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
use indexmap::IndexSet;
|
||||
use std::collections::BTreeMap;
|
||||
use std::collections::BTreeSet;
|
||||
use std::collections::HashMap;
|
||||
use std::collections::HashSet;
|
||||
use unicase::UniCase;
|
||||
|
||||
type Dict = BTreeMap<UniCase<String>, BTreeSet<String>>;
|
||||
type Dict = BTreeMap<UniCase<String>, IndexSet<String>>;
|
||||
|
||||
#[test]
|
||||
fn verify() {
|
||||
|
@ -62,7 +62,7 @@ fn dict_from_iter<S: Into<String>>(
|
|||
|
||||
// duplicate entries are merged
|
||||
dict.entry(typo)
|
||||
.or_insert_with(BTreeSet::new)
|
||||
.or_default()
|
||||
.extend(corrections.into_iter().map(|c| {
|
||||
let mut c = c.into();
|
||||
c.make_ascii_lowercase();
|
||||
|
@ -82,7 +82,7 @@ fn process<S: Into<String>>(
|
|||
.into_iter()
|
||||
.filter(|(t, _)| is_word(t))
|
||||
.filter_map(|(t, c)| {
|
||||
let new_c: BTreeSet<_> = c.into_iter().filter(|c| is_word(c)).collect();
|
||||
let new_c: IndexSet<_> = c.into_iter().filter(|c| is_word(c)).collect();
|
||||
if new_c.is_empty() {
|
||||
None
|
||||
} else {
|
||||
|
@ -112,7 +112,7 @@ fn process<S: Into<String>>(
|
|||
}
|
||||
})
|
||||
.map(|(typo, corrections)| {
|
||||
let mut new_corrections = BTreeSet::new();
|
||||
let mut new_corrections = IndexSet::new();
|
||||
for correction in corrections {
|
||||
let correction = word_variants
|
||||
.get(correction.as_str())
|
||||
|
@ -145,6 +145,14 @@ fn process<S: Into<String>>(
|
|||
.collect()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_preserve_correction_order() {
|
||||
let dict = process([("foo", ["xyz", "abc"])]);
|
||||
let mut corrections = dict.get(&UniCase::new("foo".into())).unwrap().iter();
|
||||
assert_eq!(corrections.next().unwrap(), "xyz");
|
||||
assert_eq!(corrections.next().unwrap(), "abc");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_merge_duplicates() {
|
||||
assert_eq!(
|
||||
|
|
Loading…
Reference in a new issue