mirror of
https://github.com/crate-ci/typos.git
synced 2024-11-22 00:51:11 -05:00
feat(dict): Preserve correction order
We want to be able to recommend more likely corrections first, e.g. for "poped" we want to recommend "popped" before "pooped".
This commit is contained in:
parent
357aa55c6c
commit
8d026ac23e
3 changed files with 40 additions and 8 deletions
29
Cargo.lock
generated
29
Cargo.lock
generated
|
@ -233,7 +233,7 @@ dependencies = [
|
||||||
"bitflags",
|
"bitflags",
|
||||||
"clap_derive 3.2.18",
|
"clap_derive 3.2.18",
|
||||||
"clap_lex 0.2.4",
|
"clap_lex 0.2.4",
|
||||||
"indexmap",
|
"indexmap 1.9.2",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
"strsim 0.10.0",
|
"strsim 0.10.0",
|
||||||
"termcolor",
|
"termcolor",
|
||||||
|
@ -615,6 +615,12 @@ dependencies = [
|
||||||
"termcolor",
|
"termcolor",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "equivalent"
|
||||||
|
version = "1.0.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "88bffebc5d80432c9b140ee17875ff173a8ab62faad5b257da912bd2f6c1c0a1"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "errno"
|
name = "errno"
|
||||||
version = "0.2.8"
|
version = "0.2.8"
|
||||||
|
@ -733,6 +739,12 @@ version = "0.12.3"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
|
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "hashbrown"
|
||||||
|
version = "0.14.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "heck"
|
name = "heck"
|
||||||
version = "0.4.1"
|
version = "0.4.1"
|
||||||
|
@ -826,7 +838,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "1885e79c1fc4b10f0e172c475f458b7f7b93061064d98c3293e98c5ba0c8b399"
|
checksum = "1885e79c1fc4b10f0e172c475f458b7f7b93061064d98c3293e98c5ba0c8b399"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"autocfg",
|
"autocfg",
|
||||||
"hashbrown",
|
"hashbrown 0.12.3",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "indexmap"
|
||||||
|
version = "2.0.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d5477fe2230a79769d8dc68e0eabf5437907c0457a5614a9e8dddb67f65eb65d"
|
||||||
|
dependencies = [
|
||||||
|
"equivalent",
|
||||||
|
"hashbrown 0.14.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -1584,7 +1606,7 @@ version = "0.19.10"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "2380d56e8670370eee6566b0bfd4265f65b3f432e8c6d85623f728d4fa31f739"
|
checksum = "2380d56e8670370eee6566b0bfd4265f65b3f432e8c6d85623f728d4fa31f739"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"indexmap",
|
"indexmap 1.9.2",
|
||||||
"serde",
|
"serde",
|
||||||
"serde_spanned",
|
"serde_spanned",
|
||||||
"toml_datetime",
|
"toml_datetime",
|
||||||
|
@ -1685,6 +1707,7 @@ dependencies = [
|
||||||
"csv",
|
"csv",
|
||||||
"dictgen",
|
"dictgen",
|
||||||
"edit-distance",
|
"edit-distance",
|
||||||
|
"indexmap 2.0.0",
|
||||||
"itertools",
|
"itertools",
|
||||||
"snapbox",
|
"snapbox",
|
||||||
"unicase",
|
"unicase",
|
||||||
|
|
|
@ -24,3 +24,4 @@ codegenrs = "2.0"
|
||||||
dictgen = { version = "^0.2", path = "../dictgen", features = ["codegen"] }
|
dictgen = { version = "^0.2", path = "../dictgen", features = ["codegen"] }
|
||||||
varcon = { version = "^0.6", path = "../varcon" }
|
varcon = { version = "^0.6", path = "../varcon" }
|
||||||
snapbox = { version = "0.4.11", features = ["path"] }
|
snapbox = { version = "0.4.11", features = ["path"] }
|
||||||
|
indexmap = "2.0.0"
|
||||||
|
|
|
@ -1,10 +1,10 @@
|
||||||
|
use indexmap::IndexSet;
|
||||||
use std::collections::BTreeMap;
|
use std::collections::BTreeMap;
|
||||||
use std::collections::BTreeSet;
|
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
use unicase::UniCase;
|
use unicase::UniCase;
|
||||||
|
|
||||||
type Dict = BTreeMap<UniCase<String>, BTreeSet<String>>;
|
type Dict = BTreeMap<UniCase<String>, IndexSet<String>>;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn verify() {
|
fn verify() {
|
||||||
|
@ -62,7 +62,7 @@ fn dict_from_iter<S: Into<String>>(
|
||||||
|
|
||||||
// duplicate entries are merged
|
// duplicate entries are merged
|
||||||
dict.entry(typo)
|
dict.entry(typo)
|
||||||
.or_insert_with(BTreeSet::new)
|
.or_default()
|
||||||
.extend(corrections.into_iter().map(|c| {
|
.extend(corrections.into_iter().map(|c| {
|
||||||
let mut c = c.into();
|
let mut c = c.into();
|
||||||
c.make_ascii_lowercase();
|
c.make_ascii_lowercase();
|
||||||
|
@ -82,7 +82,7 @@ fn process<S: Into<String>>(
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.filter(|(t, _)| is_word(t))
|
.filter(|(t, _)| is_word(t))
|
||||||
.filter_map(|(t, c)| {
|
.filter_map(|(t, c)| {
|
||||||
let new_c: BTreeSet<_> = c.into_iter().filter(|c| is_word(c)).collect();
|
let new_c: IndexSet<_> = c.into_iter().filter(|c| is_word(c)).collect();
|
||||||
if new_c.is_empty() {
|
if new_c.is_empty() {
|
||||||
None
|
None
|
||||||
} else {
|
} else {
|
||||||
|
@ -112,7 +112,7 @@ fn process<S: Into<String>>(
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
.map(|(typo, corrections)| {
|
.map(|(typo, corrections)| {
|
||||||
let mut new_corrections = BTreeSet::new();
|
let mut new_corrections = IndexSet::new();
|
||||||
for correction in corrections {
|
for correction in corrections {
|
||||||
let correction = word_variants
|
let correction = word_variants
|
||||||
.get(correction.as_str())
|
.get(correction.as_str())
|
||||||
|
@ -145,6 +145,14 @@ fn process<S: Into<String>>(
|
||||||
.collect()
|
.collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_preserve_correction_order() {
|
||||||
|
let dict = process([("foo", ["xyz", "abc"])]);
|
||||||
|
let mut corrections = dict.get(&UniCase::new("foo".into())).unwrap().iter();
|
||||||
|
assert_eq!(corrections.next().unwrap(), "xyz");
|
||||||
|
assert_eq!(corrections.next().unwrap(), "abc");
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_merge_duplicates() {
|
fn test_merge_duplicates() {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
|
|
Loading…
Reference in a new issue