From 8d026ac23e682d1cfb9d6e4aa334dd423be0679d Mon Sep 17 00:00:00 2001 From: Martin Fischer Date: Mon, 26 Jun 2023 21:33:59 +0200 Subject: [PATCH] feat(dict): Preserve correction order We want to be able to recommend more likely corrections first, e.g. for "poped" we want to recommend "popped" before "pooped". --- Cargo.lock | 29 ++++++++++++++++++++++++++--- crates/typos-dict/Cargo.toml | 1 + crates/typos-dict/tests/verify.rs | 18 +++++++++++++----- 3 files changed, 40 insertions(+), 8 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index cddc161..656bc30 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -233,7 +233,7 @@ dependencies = [ "bitflags", "clap_derive 3.2.18", "clap_lex 0.2.4", - "indexmap", + "indexmap 1.9.2", "once_cell", "strsim 0.10.0", "termcolor", @@ -615,6 +615,12 @@ dependencies = [ "termcolor", ] +[[package]] +name = "equivalent" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88bffebc5d80432c9b140ee17875ff173a8ab62faad5b257da912bd2f6c1c0a1" + [[package]] name = "errno" version = "0.2.8" @@ -733,6 +739,12 @@ version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +[[package]] +name = "hashbrown" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a" + [[package]] name = "heck" version = "0.4.1" @@ -826,7 +838,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1885e79c1fc4b10f0e172c475f458b7f7b93061064d98c3293e98c5ba0c8b399" dependencies = [ "autocfg", - "hashbrown", + "hashbrown 0.12.3", +] + +[[package]] +name = "indexmap" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5477fe2230a79769d8dc68e0eabf5437907c0457a5614a9e8dddb67f65eb65d" +dependencies = [ + "equivalent", + "hashbrown 0.14.0", ] [[package]] @@ -1584,7 +1606,7 @@ version = "0.19.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2380d56e8670370eee6566b0bfd4265f65b3f432e8c6d85623f728d4fa31f739" dependencies = [ - "indexmap", + "indexmap 1.9.2", "serde", "serde_spanned", "toml_datetime", @@ -1685,6 +1707,7 @@ dependencies = [ "csv", "dictgen", "edit-distance", + "indexmap 2.0.0", "itertools", "snapbox", "unicase", diff --git a/crates/typos-dict/Cargo.toml b/crates/typos-dict/Cargo.toml index 54f16fa..b6a2292 100644 --- a/crates/typos-dict/Cargo.toml +++ b/crates/typos-dict/Cargo.toml @@ -24,3 +24,4 @@ codegenrs = "2.0" dictgen = { version = "^0.2", path = "../dictgen", features = ["codegen"] } varcon = { version = "^0.6", path = "../varcon" } snapbox = { version = "0.4.11", features = ["path"] } +indexmap = "2.0.0" diff --git a/crates/typos-dict/tests/verify.rs b/crates/typos-dict/tests/verify.rs index b824a85..24f63bc 100644 --- a/crates/typos-dict/tests/verify.rs +++ b/crates/typos-dict/tests/verify.rs @@ -1,10 +1,10 @@ +use indexmap::IndexSet; use std::collections::BTreeMap; -use std::collections::BTreeSet; use std::collections::HashMap; use std::collections::HashSet; use unicase::UniCase; -type Dict = BTreeMap, BTreeSet>; +type Dict = BTreeMap, IndexSet>; #[test] fn verify() { @@ -62,7 +62,7 @@ fn dict_from_iter>( // duplicate entries are merged dict.entry(typo) - .or_insert_with(BTreeSet::new) + .or_default() .extend(corrections.into_iter().map(|c| { let mut c = c.into(); c.make_ascii_lowercase(); @@ -82,7 +82,7 @@ fn process>( .into_iter() .filter(|(t, _)| is_word(t)) .filter_map(|(t, c)| { - let new_c: BTreeSet<_> = c.into_iter().filter(|c| is_word(c)).collect(); + let new_c: IndexSet<_> = c.into_iter().filter(|c| is_word(c)).collect(); if new_c.is_empty() { None } else { @@ -112,7 +112,7 @@ fn process>( } }) .map(|(typo, corrections)| { - let mut new_corrections = BTreeSet::new(); + let mut new_corrections = IndexSet::new(); for correction in corrections { let correction = word_variants .get(correction.as_str()) @@ -145,6 +145,14 @@ fn process>( .collect() } +#[test] +fn test_preserve_correction_order() { + let dict = process([("foo", ["xyz", "abc"])]); + let mut corrections = dict.get(&UniCase::new("foo".into())).unwrap().iter(); + assert_eq!(corrections.next().unwrap(), "xyz"); + assert_eq!(corrections.next().unwrap(), "abc"); +} + #[test] fn test_merge_duplicates() { assert_eq!(