mirror of
https://github.com/crate-ci/typos.git
synced 2024-11-25 02:20:58 -05:00
feat(dict): varcon dict
This commit is contained in:
parent
814ff82aff
commit
7f983992bd
15 changed files with 484863 additions and 3 deletions
93
Cargo.lock
generated
93
Cargo.lock
generated
|
@ -24,6 +24,15 @@ version = "1.0.28"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "d9a60d744a80c30fcb657dfe2c1b22bcb3e814c1a1e3674f32bf5820b570fbff"
|
checksum = "d9a60d744a80c30fcb657dfe2c1b22bcb3e814c1a1e3674f32bf5820b570fbff"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "arrayvec"
|
||||||
|
version = "0.4.12"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "cd9fd44efafa8690358b7408d253adf110036b88f55672a933f01d616ad9b1b9"
|
||||||
|
dependencies = [
|
||||||
|
"nodrop",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "assert_fs"
|
name = "assert_fs"
|
||||||
version = "1.0.0"
|
version = "1.0.0"
|
||||||
|
@ -81,9 +90,9 @@ checksum = "08c48aae112d48ed9f069b33538ea9e3e90aa263cfa3d1c24309612b1f7472de"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cfg-if"
|
name = "cfg-if"
|
||||||
version = "0.1.10"
|
version = "0.1.9"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822"
|
checksum = "b486ce3ccf7ffd79fdeb678eac06a9e6c09fc88d33836340becb8fffe87c5e33"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "clap"
|
name = "clap"
|
||||||
|
@ -276,6 +285,26 @@ version = "1.5.3"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "bb1f6b1ce1c140482ea30ddd3335fc0024ac7ee112895426e0a629a6c20adfe3"
|
checksum = "bb1f6b1ce1c140482ea30ddd3335fc0024ac7ee112895426e0a629a6c20adfe3"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "enumflags2"
|
||||||
|
version = "0.6.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "83c8d82922337cd23a15f88b70d8e4ef5f11da38dd7cdb55e84dd5de99695da0"
|
||||||
|
dependencies = [
|
||||||
|
"enumflags2_derive",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "enumflags2_derive"
|
||||||
|
version = "0.6.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "946ee94e3dbf58fdd324f9ce245c7b238d46a66f00e86a020b71996349e46cce"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2 1.0.12",
|
||||||
|
"quote 1.0.4",
|
||||||
|
"syn 1.0.19",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "env_logger"
|
name = "env_logger"
|
||||||
version = "0.7.1"
|
version = "0.7.1"
|
||||||
|
@ -411,6 +440,20 @@ version = "1.4.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "lexical-core"
|
||||||
|
version = "0.6.7"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f86d66d380c9c5a685aaac7a11818bdfa1f733198dfd9ec09c70b762cd12ad6f"
|
||||||
|
dependencies = [
|
||||||
|
"arrayvec",
|
||||||
|
"bitflags",
|
||||||
|
"cfg-if",
|
||||||
|
"rustc_version",
|
||||||
|
"ryu",
|
||||||
|
"static_assertions",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "libc"
|
name = "libc"
|
||||||
version = "0.2.69"
|
version = "0.2.69"
|
||||||
|
@ -460,6 +503,23 @@ dependencies = [
|
||||||
"unicase",
|
"unicase",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "nodrop"
|
||||||
|
version = "0.1.14"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "nom"
|
||||||
|
version = "5.1.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "0b471253da97532da4b61552249c521e01e736071f71c1a4f7ebbfbf0a06aad6"
|
||||||
|
dependencies = [
|
||||||
|
"lexical-core",
|
||||||
|
"memchr",
|
||||||
|
"version_check",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "normalize-line-endings"
|
name = "normalize-line-endings"
|
||||||
version = "0.3.0"
|
version = "0.3.0"
|
||||||
|
@ -786,6 +846,12 @@ version = "0.3.3"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "fa8f3741c7372e75519bd9346068370c9cdaabcc1f9599cbcf2a2719352286b7"
|
checksum = "fa8f3741c7372e75519bd9346068370c9cdaabcc1f9599cbcf2a2719352286b7"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "static_assertions"
|
||||||
|
version = "0.3.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7f3eb36b47e512f8f1c9e3d10c2c1965bc992bd9cdb024fa581e2194501c83d3"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "strsim"
|
name = "strsim"
|
||||||
version = "0.8.0"
|
version = "0.8.0"
|
||||||
|
@ -1025,6 +1091,29 @@ version = "0.2.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "826e7639553986605ec5979c7dd957c7895e93eabed50ab2ffa7f6128a75097c"
|
checksum = "826e7639553986605ec5979c7dd957c7895e93eabed50ab2ffa7f6128a75097c"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "varcon-codegen"
|
||||||
|
version = "1.0.2"
|
||||||
|
dependencies = [
|
||||||
|
"codegenrs",
|
||||||
|
"structopt",
|
||||||
|
"varcon-parser",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "varcon-dict"
|
||||||
|
version = "0.2.1"
|
||||||
|
dependencies = [
|
||||||
|
"enumflags2",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "varcon-parser"
|
||||||
|
version = "1.0.0"
|
||||||
|
dependencies = [
|
||||||
|
"nom",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "vec_map"
|
name = "vec_map"
|
||||||
version = "0.8.2"
|
version = "0.8.2"
|
||||||
|
|
|
@ -5,6 +5,7 @@ members = [
|
||||||
"crates/codespell-dict", "crates/codespell-dict/codegen",
|
"crates/codespell-dict", "crates/codespell-dict/codegen",
|
||||||
"crates/misspell-dict", "crates/misspell-dict/codegen",
|
"crates/misspell-dict", "crates/misspell-dict/codegen",
|
||||||
"crates/wikipedia-dict", "crates/wikipedia-dict/codegen",
|
"crates/wikipedia-dict", "crates/wikipedia-dict/codegen",
|
||||||
|
"crates/varcon", "crates/varcon/codegen", "crates/varcon-parser",
|
||||||
]
|
]
|
||||||
|
|
||||||
[package]
|
[package]
|
||||||
|
|
|
@ -39,6 +39,9 @@ stages:
|
||||||
- script: |
|
- script: |
|
||||||
cargo run --package wikipedia-codegen -- --output crates/wikipedia-dict/src/dict_codegen.rs --check
|
cargo run --package wikipedia-codegen -- --output crates/wikipedia-dict/src/dict_codegen.rs --check
|
||||||
displayName: Verify wikipedia-dict
|
displayName: Verify wikipedia-dict
|
||||||
|
- script: |
|
||||||
|
cargo run --package varcon-codegen -- --output crates/varcon/src/codegen.rs --check
|
||||||
|
displayName: Verify varcon-dict
|
||||||
- stage: committed
|
- stage: committed
|
||||||
displayName: Lint History
|
displayName: Lint History
|
||||||
dependsOn: []
|
dependsOn: []
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
use structopt::StructOpt;
|
use structopt::StructOpt;
|
||||||
|
|
||||||
pub const DICT: &[u8] = include_bytes!("../../assets/words.csv");
|
const DICT: &[u8] = include_bytes!("../../assets/words.csv");
|
||||||
|
|
||||||
fn generate<W: std::io::Write>(file: &mut W) {
|
fn generate<W: std::io::Write>(file: &mut W) {
|
||||||
writeln!(
|
writeln!(
|
||||||
|
|
17
crates/varcon-parser/Cargo.toml
Normal file
17
crates/varcon-parser/Cargo.toml
Normal file
|
@ -0,0 +1,17 @@
|
||||||
|
[package]
|
||||||
|
name = "varcon-parser"
|
||||||
|
version = "1.0.0"
|
||||||
|
authors = ["Ed Page <eopage@gmail.com>"]
|
||||||
|
description = "Parse varcon.txt file"
|
||||||
|
repository = "https://github.com/crate-ci/typos"
|
||||||
|
readme = "../../../README.md"
|
||||||
|
categories = ["text-processing"]
|
||||||
|
license = "MIT"
|
||||||
|
edition = "2018"
|
||||||
|
|
||||||
|
[badges]
|
||||||
|
azure-devops = { project = "crate-ci", pipeline = "typos" }
|
||||||
|
codecov = { repository = "crate-ci/typos" }
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
nom = "5.1.1"
|
107
crates/varcon-parser/src/lib.rs
Normal file
107
crates/varcon-parser/src/lib.rs
Normal file
|
@ -0,0 +1,107 @@
|
||||||
|
mod parser;
|
||||||
|
|
||||||
|
pub use parser::ClusterIter;
|
||||||
|
|
||||||
|
#[derive(Clone, PartialEq, Eq, Debug)]
|
||||||
|
pub struct Cluster {
|
||||||
|
pub header: Option<String>,
|
||||||
|
pub entries: Vec<Entry>,
|
||||||
|
pub notes: Vec<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Cluster {
|
||||||
|
pub fn infer(&mut self) {
|
||||||
|
for entry in self.entries.iter_mut() {
|
||||||
|
entry.infer();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, PartialEq, Eq, Debug)]
|
||||||
|
pub struct Entry {
|
||||||
|
pub variants: Vec<Variant>,
|
||||||
|
pub pos: Option<Pos>,
|
||||||
|
pub archaic: bool,
|
||||||
|
pub note: bool,
|
||||||
|
pub description: Option<String>,
|
||||||
|
pub comment: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Entry {
|
||||||
|
pub fn infer(&mut self) {
|
||||||
|
imply(
|
||||||
|
&mut self.variants,
|
||||||
|
Category::BritishIse,
|
||||||
|
Category::BritishIze,
|
||||||
|
);
|
||||||
|
imply(&mut self.variants, Category::BritishIze, Category::Canadian);
|
||||||
|
imply(
|
||||||
|
&mut self.variants,
|
||||||
|
Category::BritishIse,
|
||||||
|
Category::Australian,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn imply(variants: &mut Vec<Variant>, required: Category, missing: Category) {
|
||||||
|
let missing_exists = variants
|
||||||
|
.iter()
|
||||||
|
.any(|v| v.types.iter().any(|t| t.category == missing));
|
||||||
|
if missing_exists {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
for variant in variants.iter_mut() {
|
||||||
|
let types: Vec<_> = variant
|
||||||
|
.types
|
||||||
|
.iter()
|
||||||
|
.filter(|t| t.category == required)
|
||||||
|
.cloned()
|
||||||
|
.map(|mut t| {
|
||||||
|
t.category = missing;
|
||||||
|
t
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
variant.types.extend(types);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, PartialEq, Eq, Debug)]
|
||||||
|
pub struct Variant {
|
||||||
|
pub types: Vec<Type>,
|
||||||
|
pub word: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, PartialEq, Eq, Debug)]
|
||||||
|
pub struct Type {
|
||||||
|
pub category: Category,
|
||||||
|
pub tag: Option<Tag>,
|
||||||
|
pub num: Option<usize>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
|
||||||
|
pub enum Category {
|
||||||
|
American,
|
||||||
|
BritishIse,
|
||||||
|
BritishIze,
|
||||||
|
Canadian,
|
||||||
|
Australian,
|
||||||
|
Other,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
|
||||||
|
pub enum Tag {
|
||||||
|
Eq,
|
||||||
|
Variant,
|
||||||
|
Seldom,
|
||||||
|
Possible,
|
||||||
|
Improper,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
|
||||||
|
pub enum Pos {
|
||||||
|
Noun,
|
||||||
|
Verb,
|
||||||
|
Adjective,
|
||||||
|
Adverb,
|
||||||
|
}
|
568
crates/varcon-parser/src/parser.rs
Normal file
568
crates/varcon-parser/src/parser.rs
Normal file
|
@ -0,0 +1,568 @@
|
||||||
|
use nom::IResult;
|
||||||
|
use nom::InputTakeAtPosition;
|
||||||
|
|
||||||
|
use crate::*;
|
||||||
|
|
||||||
|
#[derive(Clone, PartialEq, Eq, Debug)]
|
||||||
|
pub struct ClusterIter<'i> {
|
||||||
|
input: &'i str,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'i> ClusterIter<'i> {
|
||||||
|
pub fn new(input: &'i str) -> Self {
|
||||||
|
Self { input }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'i> Iterator for ClusterIter<'i> {
|
||||||
|
type Item = Cluster;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Cluster> {
|
||||||
|
let i = self.input.trim_start();
|
||||||
|
let (i, c) = Cluster::parse(i).ok()?;
|
||||||
|
self.input = i;
|
||||||
|
Some(c)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test_cluster_iter {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_single() {
|
||||||
|
let iter = ClusterIter::new(
|
||||||
|
"# acknowledgment <verified> (level 35)
|
||||||
|
A Cv: acknowledgment / Av B C: acknowledgement
|
||||||
|
A Cv: acknowledgments / Av B C: acknowledgements
|
||||||
|
A Cv: acknowledgment's / Av B C: acknowledgement's
|
||||||
|
|
||||||
|
",
|
||||||
|
);
|
||||||
|
let all: Vec<_> = iter.collect();
|
||||||
|
assert_eq!(all.len(), 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_multiple() {
|
||||||
|
let iter = ClusterIter::new(
|
||||||
|
"# acknowledgment <verified> (level 35)
|
||||||
|
A Cv: acknowledgment / Av B C: acknowledgement
|
||||||
|
A Cv: acknowledgments / Av B C: acknowledgements
|
||||||
|
A Cv: acknowledgment's / Av B C: acknowledgement's
|
||||||
|
|
||||||
|
# acknowledgment <verified> (level 35)
|
||||||
|
A Cv: acknowledgment / Av B C: acknowledgement
|
||||||
|
A Cv: acknowledgments / Av B C: acknowledgements
|
||||||
|
A Cv: acknowledgment's / Av B C: acknowledgement's
|
||||||
|
|
||||||
|
",
|
||||||
|
);
|
||||||
|
let all: Vec<_> = iter.collect();
|
||||||
|
assert_eq!(all.len(), 2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Cluster {
|
||||||
|
pub fn parse(input: &str) -> IResult<&str, Self> {
|
||||||
|
let header = nom::sequence::tuple((
|
||||||
|
nom::bytes::streaming::tag("#"),
|
||||||
|
nom::character::streaming::space0,
|
||||||
|
nom::character::streaming::not_line_ending,
|
||||||
|
nom::character::streaming::line_ending,
|
||||||
|
));
|
||||||
|
let note = nom::sequence::preceded(
|
||||||
|
nom::sequence::pair(
|
||||||
|
nom::bytes::streaming::tag("##"),
|
||||||
|
nom::character::streaming::space0,
|
||||||
|
),
|
||||||
|
nom::sequence::terminated(
|
||||||
|
nom::character::streaming::not_line_ending,
|
||||||
|
nom::character::streaming::line_ending,
|
||||||
|
),
|
||||||
|
);
|
||||||
|
let cluster = nom::sequence::tuple((
|
||||||
|
nom::combinator::opt(header),
|
||||||
|
nom::multi::many1(nom::sequence::terminated(
|
||||||
|
Entry::parse,
|
||||||
|
nom::character::streaming::line_ending,
|
||||||
|
)),
|
||||||
|
nom::multi::many0(note),
|
||||||
|
));
|
||||||
|
let (input, (header, entries, notes)) = (cluster)(input)?;
|
||||||
|
|
||||||
|
let header = header.map(|s| s.2.to_owned());
|
||||||
|
let notes = notes.into_iter().map(|s| s.to_owned()).collect();
|
||||||
|
let c = Self {
|
||||||
|
header,
|
||||||
|
entries,
|
||||||
|
notes,
|
||||||
|
};
|
||||||
|
Ok((input, c))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test_cluster {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_basic() {
|
||||||
|
let (input, actual) = Cluster::parse(
|
||||||
|
"# acknowledgment <verified> (level 35)
|
||||||
|
A Cv: acknowledgment / Av B C: acknowledgement
|
||||||
|
A Cv: acknowledgments / Av B C: acknowledgements
|
||||||
|
A Cv: acknowledgment's / Av B C: acknowledgement's
|
||||||
|
|
||||||
|
",
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(input, "\n");
|
||||||
|
assert_eq!(
|
||||||
|
actual.header,
|
||||||
|
Some("acknowledgment <verified> (level 35)".to_owned())
|
||||||
|
);
|
||||||
|
assert_eq!(actual.entries.len(), 3);
|
||||||
|
assert_eq!(actual.notes.len(), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_notes() {
|
||||||
|
let (input, actual) = Cluster::parse(
|
||||||
|
"# coloration <verified> (level 50)
|
||||||
|
A B C: coloration / B. Cv: colouration
|
||||||
|
A B C: colorations / B. Cv: colourations
|
||||||
|
A B C: coloration's / B. Cv: colouration's
|
||||||
|
## OED has coloration as the preferred spelling and discolouration as a
|
||||||
|
## variant for British Engl or some reason
|
||||||
|
|
||||||
|
",
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(input, "\n");
|
||||||
|
assert_eq!(
|
||||||
|
actual.header,
|
||||||
|
Some("coloration <verified> (level 50)".to_owned())
|
||||||
|
);
|
||||||
|
assert_eq!(actual.entries.len(), 3);
|
||||||
|
assert_eq!(actual.notes.len(), 2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Entry {
|
||||||
|
pub fn parse(input: &str) -> IResult<&str, Self> {
|
||||||
|
let var_sep = nom::sequence::tuple((
|
||||||
|
nom::character::streaming::space0,
|
||||||
|
nom::bytes::streaming::tag("/"),
|
||||||
|
nom::character::streaming::space0,
|
||||||
|
));
|
||||||
|
let (input, variants) =
|
||||||
|
nom::multi::separated_nonempty_list(var_sep, Variant::parse)(input)?;
|
||||||
|
|
||||||
|
let desc_sep = nom::sequence::tuple((
|
||||||
|
nom::character::streaming::space0,
|
||||||
|
nom::bytes::streaming::tag("|"),
|
||||||
|
));
|
||||||
|
let (input, description) =
|
||||||
|
nom::combinator::opt(nom::sequence::tuple((desc_sep, Self::parse_description)))(input)?;
|
||||||
|
|
||||||
|
let comment_sep = nom::sequence::tuple((
|
||||||
|
nom::character::streaming::space0,
|
||||||
|
nom::bytes::streaming::tag("#"),
|
||||||
|
));
|
||||||
|
let (input, comment) = nom::combinator::opt(nom::sequence::tuple((
|
||||||
|
comment_sep,
|
||||||
|
nom::character::streaming::space1,
|
||||||
|
nom::character::streaming::not_line_ending,
|
||||||
|
)))(input)?;
|
||||||
|
|
||||||
|
let mut e = match description {
|
||||||
|
Some((_, description)) => description,
|
||||||
|
None => Self {
|
||||||
|
variants: Vec::new(),
|
||||||
|
pos: None,
|
||||||
|
archaic: false,
|
||||||
|
note: false,
|
||||||
|
description: None,
|
||||||
|
comment: None,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
e.variants = variants;
|
||||||
|
e.comment = comment.map(|c| c.2.to_owned());
|
||||||
|
Ok((input, e))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_description(input: &str) -> IResult<&str, Self> {
|
||||||
|
let (input, (pos, archaic, note, description)) = nom::sequence::tuple((
|
||||||
|
nom::combinator::opt(nom::sequence::tuple((
|
||||||
|
nom::character::streaming::space1,
|
||||||
|
Pos::parse,
|
||||||
|
))),
|
||||||
|
nom::combinator::opt(nom::sequence::tuple((
|
||||||
|
nom::character::streaming::space1,
|
||||||
|
nom::bytes::streaming::tag("(-)"),
|
||||||
|
))),
|
||||||
|
nom::combinator::opt(nom::sequence::tuple((
|
||||||
|
nom::character::streaming::space1,
|
||||||
|
nom::bytes::streaming::tag("--"),
|
||||||
|
))),
|
||||||
|
nom::combinator::opt(nom::sequence::tuple((
|
||||||
|
nom::character::streaming::space1,
|
||||||
|
nom::bytes::streaming::take_till(|c| c == '\n' || c == '\r' || c == '#'),
|
||||||
|
))),
|
||||||
|
))(input)?;
|
||||||
|
|
||||||
|
let variants = Vec::new();
|
||||||
|
let pos = pos.map(|(_, p)| p);
|
||||||
|
let archaic = archaic.is_some();
|
||||||
|
let note = note.is_some();
|
||||||
|
let description = description.map(|(_, d)| d.to_owned());
|
||||||
|
let e = Self {
|
||||||
|
variants,
|
||||||
|
pos,
|
||||||
|
archaic,
|
||||||
|
note,
|
||||||
|
description,
|
||||||
|
comment: None,
|
||||||
|
};
|
||||||
|
Ok((input, e))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test_entry {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_variant_only() {
|
||||||
|
// Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
|
||||||
|
// cases.
|
||||||
|
let (input, actual) =
|
||||||
|
Entry::parse("A Cv: acknowledgment's / Av B C: acknowledgement's\n").unwrap();
|
||||||
|
assert_eq!(input, "\n");
|
||||||
|
assert_eq!(actual.variants.len(), 2);
|
||||||
|
assert_eq!(actual.pos, None);
|
||||||
|
assert_eq!(actual.archaic, false);
|
||||||
|
assert_eq!(actual.note, false);
|
||||||
|
assert_eq!(actual.description, None);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_description() {
|
||||||
|
// Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
|
||||||
|
// cases.
|
||||||
|
let (input, actual) = Entry::parse("A C: prize / B: prise | otherwise\n").unwrap();
|
||||||
|
assert_eq!(input, "\n");
|
||||||
|
assert_eq!(actual.variants.len(), 2);
|
||||||
|
assert_eq!(actual.pos, None);
|
||||||
|
assert_eq!(actual.archaic, false);
|
||||||
|
assert_eq!(actual.note, false);
|
||||||
|
assert_eq!(actual.description, Some("otherwise".to_owned()));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_pos() {
|
||||||
|
// Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
|
||||||
|
// cases.
|
||||||
|
let (input, actual) = Entry::parse("A B C: practice / AV Cv: practise | <N>\n").unwrap();
|
||||||
|
assert_eq!(input, "\n");
|
||||||
|
assert_eq!(actual.variants.len(), 2);
|
||||||
|
assert_eq!(actual.pos, Some(Pos::Noun));
|
||||||
|
assert_eq!(actual.archaic, false);
|
||||||
|
assert_eq!(actual.note, false);
|
||||||
|
assert_eq!(actual.description, None);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_archaic() {
|
||||||
|
// Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
|
||||||
|
// cases.
|
||||||
|
let (input, actual) = Entry::parse("A: bark / Av B: barque | (-) ship\n").unwrap();
|
||||||
|
assert_eq!(input, "\n");
|
||||||
|
assert_eq!(actual.variants.len(), 2);
|
||||||
|
assert_eq!(actual.pos, None);
|
||||||
|
assert_eq!(actual.archaic, true);
|
||||||
|
assert_eq!(actual.note, false);
|
||||||
|
assert_eq!(actual.description, Some("ship".to_owned()));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_note() {
|
||||||
|
// Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
|
||||||
|
// cases.
|
||||||
|
let (input, actual) = Entry::parse("_: cabbies | -- plural\n").unwrap();
|
||||||
|
assert_eq!(input, "\n");
|
||||||
|
assert_eq!(actual.variants.len(), 1);
|
||||||
|
assert_eq!(actual.pos, None);
|
||||||
|
assert_eq!(actual.archaic, false);
|
||||||
|
assert_eq!(actual.note, true);
|
||||||
|
assert_eq!(actual.description, Some("plural".to_owned()));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_trailing_comment() {
|
||||||
|
let (input, actual) = Entry::parse(
|
||||||
|
"A B: accursed / AV B-: accurst # ODE: archaic, M-W: 'or' but can find little evidence of use\n",
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(input, "\n");
|
||||||
|
assert_eq!(actual.variants.len(), 2);
|
||||||
|
assert_eq!(actual.pos, None);
|
||||||
|
assert_eq!(actual.archaic, false);
|
||||||
|
assert_eq!(actual.note, false);
|
||||||
|
assert_eq!(actual.description, None);
|
||||||
|
assert_eq!(
|
||||||
|
actual.comment,
|
||||||
|
Some("ODE: archaic, M-W: 'or' but can find little evidence of use".to_owned())
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Variant {
|
||||||
|
pub fn parse(input: &str) -> IResult<&str, Self> {
|
||||||
|
let types =
|
||||||
|
nom::multi::separated_nonempty_list(nom::character::streaming::space1, Type::parse);
|
||||||
|
let sep = nom::sequence::tuple((
|
||||||
|
nom::bytes::streaming::tag(":"),
|
||||||
|
nom::character::streaming::space0,
|
||||||
|
));
|
||||||
|
let (input, (types, word)) = nom::sequence::separated_pair(types, sep, word)(input)?;
|
||||||
|
let v = Self { types, word };
|
||||||
|
Ok((input, v))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn word(input: &str) -> IResult<&str, String> {
|
||||||
|
input
|
||||||
|
.split_at_position1(
|
||||||
|
|item| item.is_ascii_whitespace(),
|
||||||
|
nom::error::ErrorKind::Alpha,
|
||||||
|
)
|
||||||
|
.map(|(i, s)| (i, s.to_owned().replace('_', " ")))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test_variant {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_valid() {
|
||||||
|
// Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
|
||||||
|
// cases.
|
||||||
|
let (input, actual) = Variant::parse("A Cv: acknowledgment ").unwrap();
|
||||||
|
assert_eq!(input, " ");
|
||||||
|
assert_eq!(
|
||||||
|
actual.types,
|
||||||
|
vec![
|
||||||
|
Type {
|
||||||
|
category: Category::American,
|
||||||
|
tag: None,
|
||||||
|
num: None,
|
||||||
|
},
|
||||||
|
Type {
|
||||||
|
category: Category::Canadian,
|
||||||
|
tag: Some(Tag::Variant),
|
||||||
|
num: None,
|
||||||
|
}
|
||||||
|
]
|
||||||
|
);
|
||||||
|
assert_eq!(actual.word, "acknowledgment");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_extra() {
|
||||||
|
let (input, actual) =
|
||||||
|
Variant::parse("A Cv: acknowledgment's / Av B C: acknowledgement's").unwrap();
|
||||||
|
assert_eq!(input, " / Av B C: acknowledgement's");
|
||||||
|
assert_eq!(
|
||||||
|
actual.types,
|
||||||
|
vec![
|
||||||
|
Type {
|
||||||
|
category: Category::American,
|
||||||
|
tag: None,
|
||||||
|
num: None,
|
||||||
|
},
|
||||||
|
Type {
|
||||||
|
category: Category::Canadian,
|
||||||
|
tag: Some(Tag::Variant),
|
||||||
|
num: None,
|
||||||
|
}
|
||||||
|
]
|
||||||
|
);
|
||||||
|
assert_eq!(actual.word, "acknowledgment's");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_underscore() {
|
||||||
|
let (input, actual) = Variant::parse("_: air_gun\n").unwrap();
|
||||||
|
assert_eq!(input, "\n");
|
||||||
|
assert_eq!(
|
||||||
|
actual.types,
|
||||||
|
vec![Type {
|
||||||
|
category: Category::Other,
|
||||||
|
tag: None,
|
||||||
|
num: None,
|
||||||
|
},]
|
||||||
|
);
|
||||||
|
assert_eq!(actual.word, "air gun");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Type {
|
||||||
|
pub fn parse(input: &str) -> IResult<&str, Type> {
|
||||||
|
let (input, category) = Category::parse(input)?;
|
||||||
|
let (input, tag) = nom::combinator::opt(Tag::parse)(input)?;
|
||||||
|
let (input, num) = nom::combinator::opt(nom::character::streaming::digit1)(input)?;
|
||||||
|
let num = num.map(|s| s.parse().expect("parser ensured its a number"));
|
||||||
|
let t = Type { category, tag, num };
|
||||||
|
Ok((input, t))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test_type {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_valid() {
|
||||||
|
// Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
|
||||||
|
// cases.
|
||||||
|
let (input, actual) = Type::parse("A ").unwrap();
|
||||||
|
assert_eq!(input, " ");
|
||||||
|
assert_eq!(actual.category, Category::American);
|
||||||
|
assert_eq!(actual.tag, None);
|
||||||
|
assert_eq!(actual.num, None);
|
||||||
|
|
||||||
|
let (input, actual) = Type::parse("Bv ").unwrap();
|
||||||
|
assert_eq!(input, " ");
|
||||||
|
assert_eq!(actual.category, Category::BritishIse);
|
||||||
|
assert_eq!(actual.tag, Some(Tag::Variant));
|
||||||
|
assert_eq!(actual.num, None);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_extra() {
|
||||||
|
let (input, actual) = Type::parse("Z foobar").unwrap();
|
||||||
|
assert_eq!(input, " foobar");
|
||||||
|
assert_eq!(actual.category, Category::BritishIze);
|
||||||
|
assert_eq!(actual.tag, None);
|
||||||
|
assert_eq!(actual.num, None);
|
||||||
|
|
||||||
|
let (input, actual) = Type::parse("C- foobar").unwrap();
|
||||||
|
assert_eq!(input, " foobar");
|
||||||
|
assert_eq!(actual.category, Category::Canadian);
|
||||||
|
assert_eq!(actual.tag, Some(Tag::Possible));
|
||||||
|
assert_eq!(actual.num, None);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_num() {
|
||||||
|
let (input, actual) = Type::parse("Av1 ").unwrap();
|
||||||
|
assert_eq!(input, " ");
|
||||||
|
assert_eq!(actual.category, Category::American);
|
||||||
|
assert_eq!(actual.tag, Some(Tag::Variant));
|
||||||
|
assert_eq!(actual.num, Some(1));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Category {
|
||||||
|
pub fn parse(input: &str) -> IResult<&str, Category> {
|
||||||
|
let symbols = nom::character::streaming::one_of("ABZCD_");
|
||||||
|
nom::combinator::map(symbols, |c| match c {
|
||||||
|
'A' => Category::American,
|
||||||
|
'B' => Category::BritishIse,
|
||||||
|
'Z' => Category::BritishIze,
|
||||||
|
'C' => Category::Canadian,
|
||||||
|
'D' => Category::Australian,
|
||||||
|
'_' => Category::Other,
|
||||||
|
_ => unreachable!("parser won't select this option"),
|
||||||
|
})(input)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test_category {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_valid() {
|
||||||
|
let (input, actual) = Category::parse("A").unwrap();
|
||||||
|
assert_eq!(input, "");
|
||||||
|
assert_eq!(actual, Category::American);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_extra() {
|
||||||
|
let (input, actual) = Category::parse("_ foobar").unwrap();
|
||||||
|
assert_eq!(input, " foobar");
|
||||||
|
assert_eq!(actual, Category::Other);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Tag {
|
||||||
|
pub fn parse(input: &str) -> IResult<&str, Tag> {
|
||||||
|
let symbols = nom::character::streaming::one_of(".vV-x");
|
||||||
|
nom::combinator::map(symbols, |c| match c {
|
||||||
|
'.' => Tag::Eq,
|
||||||
|
'v' => Tag::Variant,
|
||||||
|
'V' => Tag::Seldom,
|
||||||
|
'-' => Tag::Possible,
|
||||||
|
'x' => Tag::Improper,
|
||||||
|
_ => unreachable!("parser won't select this option"),
|
||||||
|
})(input)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test_tag {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_valid() {
|
||||||
|
let (input, actual) = Tag::parse(".").unwrap();
|
||||||
|
assert_eq!(input, "");
|
||||||
|
assert_eq!(actual, Tag::Eq);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_extra() {
|
||||||
|
let (input, actual) = Tag::parse("x foobar").unwrap();
|
||||||
|
assert_eq!(input, " foobar");
|
||||||
|
assert_eq!(actual, Tag::Improper);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Pos {
|
||||||
|
pub fn parse(input: &str) -> IResult<&str, Pos> {
|
||||||
|
use nom::bytes::streaming::tag;
|
||||||
|
let noun = tag("<N>");
|
||||||
|
let verb = tag("<V>");
|
||||||
|
let adjective = tag("<Adj>");
|
||||||
|
let adverb = tag("<Adv>");
|
||||||
|
nom::alt!(input,
|
||||||
|
noun => {|_| Pos::Noun } |
|
||||||
|
verb => {|_| Pos::Verb } |
|
||||||
|
adjective => {|_| Pos::Adjective } |
|
||||||
|
adverb => {|_| Pos::Adverb }
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test_pos {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_valid() {
|
||||||
|
let (input, actual) = Pos::parse("<N>").unwrap();
|
||||||
|
assert_eq!(input, "");
|
||||||
|
assert_eq!(actual, Pos::Noun);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_extra() {
|
||||||
|
let (input, actual) = Pos::parse("<Adj> foobar").unwrap();
|
||||||
|
assert_eq!(input, " foobar");
|
||||||
|
assert_eq!(actual, Pos::Adjective);
|
||||||
|
}
|
||||||
|
}
|
23
crates/varcon/Cargo.toml
Normal file
23
crates/varcon/Cargo.toml
Normal file
|
@ -0,0 +1,23 @@
|
||||||
|
[package]
|
||||||
|
name = "varcon-dict"
|
||||||
|
version = "0.2.1"
|
||||||
|
authors = ["Ed Page <eopage@gmail.com>"]
|
||||||
|
description = "Source Code Spelling Correction"
|
||||||
|
repository = "https://github.com/crate-ci/typos"
|
||||||
|
readme = "../../README.md"
|
||||||
|
categories = ["development-tools", "text-processing"]
|
||||||
|
keywords = ["development", "spelling"]
|
||||||
|
license = "MIT"
|
||||||
|
edition = "2018"
|
||||||
|
|
||||||
|
[badges]
|
||||||
|
azure-devops = { project = "crate-ci", pipeline = "typos" }
|
||||||
|
codecov = { repository = "crate-ci/typos" }
|
||||||
|
|
||||||
|
[features]
|
||||||
|
default = ["all"]
|
||||||
|
all = ["flags"]
|
||||||
|
flags = ["enumflags2"]
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
enumflags2 = { version = "0.6", optional = true }
|
1
crates/varcon/assets/.gitattributes
vendored
Normal file
1
crates/varcon/assets/.gitattributes
vendored
Normal file
|
@ -0,0 +1 @@
|
||||||
|
* linguist-vendored
|
481
crates/varcon/assets/README
vendored
Normal file
481
crates/varcon/assets/README
vendored
Normal file
|
@ -0,0 +1,481 @@
|
||||||
|
Variant Conversion Info (VarCon)
|
||||||
|
|
||||||
|
Version 2019.10.06
|
||||||
|
|
||||||
|
Copyright 2000-2016 by Kevin Atkinson (kevina@gnu.org) and Benjamin
|
||||||
|
Titze (btitze@protonmail.ch).
|
||||||
|
|
||||||
|
This package contains information to convert between American,
|
||||||
|
British, Canadian, and Australian spellings and vocabulary as well as
|
||||||
|
other variant information.
|
||||||
|
|
||||||
|
The latest version can be found at http://wordlist.aspell.net/.
|
||||||
|
|
||||||
|
The main data file is varcon.txt. It contains information on the
|
||||||
|
preferred American, British, and Canadian spelling of a word as well
|
||||||
|
as other variant information.
|
||||||
|
|
||||||
|
Each line contains a mapping between the various spellings of a word.
|
||||||
|
Words are tagged to indicate where the spelling is used, and each
|
||||||
|
word/tag pair is separated with a " / ". For example in the line:
|
||||||
|
A Cv: acknowledgment / Av B C: acknowledgement
|
||||||
|
"acknowledgment" and "acknowledgement" are two spellings of the same
|
||||||
|
word and "A", "Cv", "B", etc are the tags. Tags are separated by
|
||||||
|
spaces and the group of tags is separated from the word with a ": ".
|
||||||
|
Here, "acknowledgment" is the preferred American spelling (as
|
||||||
|
indicated by the "A") of the word, and "acknowledgement" is the
|
||||||
|
preferred Canadian and British spelling ("B" and "C"). However the
|
||||||
|
American spelling is sometimes used in Canada (as indicated by "Cv",
|
||||||
|
where the lowercase "v" indicated a variant form) and the British
|
||||||
|
spelling is sometimes used in America (as indicated the "Av").
|
||||||
|
|
||||||
|
More generally each tag consists of a spelling category (for example
|
||||||
|
"A") followed possible by a variant indicator. The spelling
|
||||||
|
categories are as follows:
|
||||||
|
A: American
|
||||||
|
B: British "ise" spelling
|
||||||
|
Z: British "ize" spelling or OED preferred Spelling
|
||||||
|
C: Canadian
|
||||||
|
D: Australian
|
||||||
|
_: Other (Variant info based on American dictionaries, never used
|
||||||
|
with any of the above).
|
||||||
|
and the variants tags are as follows:
|
||||||
|
.: equal
|
||||||
|
v: variant
|
||||||
|
V: seldom used variant
|
||||||
|
-: possible variant, should generally not used
|
||||||
|
x: improper variant (should not use)
|
||||||
|
|
||||||
|
The "." or equal variant tags are reserved for special cases when
|
||||||
|
there is little agreement between dictionaries or when I think the
|
||||||
|
dictionary is wrong. The "v" indicator is used for most words marked
|
||||||
|
as variants in the dictionary. However, some variants will be demoted
|
||||||
|
to a "V". For example, if the variant is marked as "also" by
|
||||||
|
Merriam-Webster, or also if only some dictionaries acknowledge the
|
||||||
|
existence the variant. "-" is used when the variant is generally not
|
||||||
|
listed is the dictionary but I could find some evidence of its use, or
|
||||||
|
when it is marked as an archaic spelling for the word. The "x"
|
||||||
|
is used when the spelling is almost generally considered a
|
||||||
|
misspelling, and is only included for completeness.
|
||||||
|
|
||||||
|
For Australian English "v" was used for variants that are widely used,
|
||||||
|
but not preferred, and "V" for all "-or" (vs. "-our") variants and
|
||||||
|
variants considered "chiefly US".
|
||||||
|
|
||||||
|
If there are no tags with the 'Z' spelling category on the line then
|
||||||
|
'B' implies 'Z'. Similarly if there are no 'C' tags then 'Z' implies
|
||||||
|
'C'. If there are no 'D' tags then 'B' implies 'D'.
|
||||||
|
|
||||||
|
For ease of reading and maintaining the data file, each line is
|
||||||
|
grouped in a cluster of closely related words. Each cluster is
|
||||||
|
uniquely identified by a headword, which is generally the American
|
||||||
|
spelling of word on the first line of the cluster. Each cluster is
|
||||||
|
started with a '#' and is followed by the headword with some
|
||||||
|
additional information after it. For example the cluster for
|
||||||
|
acknowledgment is:
|
||||||
|
# acknowledgment <verified> (level 35)
|
||||||
|
A Cv: acknowledgment / Av B C: acknowledgement
|
||||||
|
A Cv: acknowledgments / Av B C: acknowledgements
|
||||||
|
A Cv: acknowledgment's / Av B C: acknowledgement's
|
||||||
|
The "<verified>" tag will be explained latter, and "(level 35)"
|
||||||
|
indicate what level in SCOWL (see http://wordlist.sourceforge.net)
|
||||||
|
the headword is found in. The levels generally mean the following:
|
||||||
|
<= 35: Very common word
|
||||||
|
<= 70: Can be found in the dictionary
|
||||||
|
80: Likely a valid word, can likely be found in an
|
||||||
|
unabridged dictionary
|
||||||
|
> 80: May not even be a legal word
|
||||||
|
|
||||||
|
Sometimes the spelling of a word depends on the usage. If so the word
|
||||||
|
is listed more than once within a cluster, with any usage information
|
||||||
|
being indicated after a " | ". For example here is part of the cluster
|
||||||
|
for prize:
|
||||||
|
A B: prize | reward
|
||||||
|
A B: prizes | reward
|
||||||
|
A C: prize / B: prise | otherwise
|
||||||
|
A C: prizes / B: prises | otherwise
|
||||||
|
which indicated than the preferred spelling of prize is always with a
|
||||||
|
"z" when meaning a reward, but otherwise is spelled with a "s" is
|
||||||
|
British English. In the example above a brief definition of the word
|
||||||
|
is given, but often no such attempt is made, and the definition simply
|
||||||
|
consists of a number, for example:
|
||||||
|
A B: sake | :1
|
||||||
|
A C: sake / Av B Cv: saki | :2
|
||||||
|
|
||||||
|
Sometimes part-of-speech (POS) info is given to help distinguish which
|
||||||
|
form is used. For example:
|
||||||
|
A B C: practice / AV Cv: practise | <N>
|
||||||
|
A Cv: practice / AV B C: practise | <V>
|
||||||
|
POS info is always given in the form "<POS>" and if a definition
|
||||||
|
is also given the POS info is always first. The POS tags used are as
|
||||||
|
follows:
|
||||||
|
<N>: Noun
|
||||||
|
<V>: Verb
|
||||||
|
<Adj>: Adjective
|
||||||
|
<Adv>: Adverb
|
||||||
|
|
||||||
|
A "(-)" before the definition indicated a rarely used or archaic form
|
||||||
|
of a word, for example:
|
||||||
|
A B: bark | :1
|
||||||
|
A: bark / Av B: barque | (-) ship
|
||||||
|
|
||||||
|
A "--" indicates a note rather than definition. This is generally
|
||||||
|
used to indicate that the spelling of the plural form not depend on
|
||||||
|
the spelling of the root word, for example:
|
||||||
|
_: cabby / _.: cabbie
|
||||||
|
_: cabbies | -- plural
|
||||||
|
|
||||||
|
Misc. notes on a particular form of a word are given after a "#" on
|
||||||
|
the same line. Misc. notes for the cluster are given at the end of
|
||||||
|
the cluster and are prefixed with "##", for example:
|
||||||
|
# coloration <verified> (level 50)
|
||||||
|
A B C: coloration / B. Cv: colouration
|
||||||
|
A B C: colorations / B. Cv: colourations
|
||||||
|
A B C: coloration's / B. Cv: colouration's
|
||||||
|
## OED has coloration as the preferred spelling and discolouration as a
|
||||||
|
## variant for British Engl or some reason
|
||||||
|
In the notes ODE (not to be confused with OED) stands for Oxford
|
||||||
|
Dictionary of English, "Ox" is used for any Oxford dictionary, and
|
||||||
|
"M-W" for Merriam-Webster.
|
||||||
|
|
||||||
|
Earlier versions of varcon contained numerous errors. With version
|
||||||
|
5.0 massive effort has been made to correct many of these errors.
|
||||||
|
Clusters that have undergone some form of verification (and likely
|
||||||
|
correction) are marked with "<verified>". As of version 5.0, most
|
||||||
|
clusters with headwords word in common usage (SCOWL level 35 and
|
||||||
|
below) should now be checked, as well as many others. No effort was
|
||||||
|
made to check clusters with headwords in SCOWL level 80 and above;
|
||||||
|
many of those entries are unlikely to be in the dictionary anyway.
|
||||||
|
|
||||||
|
The file variant-also.tab contains additional mappings between various
|
||||||
|
spellings of a word which are not yet in varcon.txt. No attempt is
|
||||||
|
made to distinguish the primary form of a word. The file
|
||||||
|
variant-infl.tab is like variant-also.tab except that it is created
|
||||||
|
automatically from the AGID inflection database. The file
|
||||||
|
variant-wroot.tab is like variant-infl.tab except that it also
|
||||||
|
included the root form of the word.
|
||||||
|
|
||||||
|
The file voc.tab is similar to varcon.txt but converts between
|
||||||
|
vocabulary instead of spelling. Unlike varcon.tab it is a simple tab
|
||||||
|
separated file with the fields corresponding to the American, British,
|
||||||
|
and Canadian words. If more than one word if often used to describe
|
||||||
|
the same thing the words are separated with commas. The last column
|
||||||
|
contains additional notes on when the word is used. Unlike varcon.txt
|
||||||
|
it is generally not suitable for automatic conversion.
|
||||||
|
|
||||||
|
The "make-variant" Perl script will combine varcon.txt,
|
||||||
|
variant-also.tab, and variant-infl.tab into one huge mapping and will
|
||||||
|
output the result to "variant.tab". If the "no-infl" option is given
|
||||||
|
than variant-infl.tab will not be included.
|
||||||
|
|
||||||
|
The "split" script will split out the information in varcon.txt into
|
||||||
|
several word lists named as follows:
|
||||||
|
<spelling>[-v<variant level>][-uncommon].lst
|
||||||
|
where <spelling> is one of: american, british, british_z, canadian,
|
||||||
|
common, or other. "common" is used for words which appear in
|
||||||
|
varcon.txt, yet are used in all versions of english, such as "prize",
|
||||||
|
and "other" is used for the "_" spelling category. The mapping from
|
||||||
|
the variant indicators in varcon.txt to the numeric variant level is
|
||||||
|
as follows:
|
||||||
|
v => 0
|
||||||
|
V => 1
|
||||||
|
- => 2
|
||||||
|
"-uncommon" is used for forms marked with "(-)" as already described.
|
||||||
|
|
||||||
|
The "translate" Perl script will translate a text file from one
|
||||||
|
spelling to another. Its usage is:
|
||||||
|
|
||||||
|
translate <options> [<translation array>] <from> <to>
|
||||||
|
<options> is any of
|
||||||
|
-?,-h,--help this screen
|
||||||
|
-m,--mark mark words where the translation is questionable
|
||||||
|
-i,--include include words where the translation is questionable
|
||||||
|
<translation array> is the file name of the translation array,
|
||||||
|
defaults to "abbc.tab".
|
||||||
|
<from> and <to> are one of: american, british, british_z, or canadian.
|
||||||
|
british-ise and british-ize can also be used.
|
||||||
|
|
||||||
|
Text is read in from standard input and is outputted to standard out.
|
||||||
|
Words are marked with a '?' before and after the questionable word
|
||||||
|
when the option is enabled.
|
||||||
|
|
||||||
|
The file varcon.pm contains some library routines for parsing
|
||||||
|
varcon.txt and is used by many of the scripts above.
|
||||||
|
|
||||||
|
If you discover any errors in these mappings or have suggestions for
|
||||||
|
additions please file a bug report at
|
||||||
|
https://github.com/kevina/wordlist/issues, or alternatively email me
|
||||||
|
directly at kevina@gnu.org, but I will likely tell you to file a bug
|
||||||
|
report so that I don't forget about it.
|
||||||
|
|
||||||
|
SOURCE:
|
||||||
|
|
||||||
|
These mappings were compiled from numerous sources.
|
||||||
|
|
||||||
|
The abc.tab was originally created from the American and British word
|
||||||
|
lists found in the Ispell distribution and the Canadian word list
|
||||||
|
created by Garst R. Reese <reese@isn.net>:
|
||||||
|
|
||||||
|
What I have discovered is that Canadian is a modification of British.
|
||||||
|
Canadians use ize ization, izing izable like Americans, and gram instead
|
||||||
|
of gramme. The one exception I found was practise. It does not go to
|
||||||
|
practize. Otherwise they use British spelling. So, what I am currently
|
||||||
|
checking books with is a an edited version of British, where I have
|
||||||
|
changed all occurrences of ise to ize, isab to izab, isation to ization,
|
||||||
|
ising to izing, and gramme to gram except I allow programme, which is
|
||||||
|
sometimes proper unless you are talking about a computer program. I did
|
||||||
|
bunches of greps to be sure these substitutions would work as expected.
|
||||||
|
|
||||||
|
Many other words have been added to abc.tab which were not in the
|
||||||
|
original Ispell word lists.
|
||||||
|
|
||||||
|
Many different web sources were consulted when crating the tables. They
|
||||||
|
include:
|
||||||
|
|
||||||
|
The American-British British-American Dictionary
|
||||||
|
http://www.peak.org/~jeremy/dictionary/dictionary.html
|
||||||
|
American and British Spelling Differences
|
||||||
|
http://www.peak.org/~jeremy/dictionary/spellcat.html
|
||||||
|
Dave (VE7CNV)'s Truly Canadian Dictionary of Canadian Spelling
|
||||||
|
http://www.luther.bc.ca/~dave7cnv/cdnspelling/cdnspelling.html
|
||||||
|
Canadian Spelling Convention
|
||||||
|
http://imej.wfu.edu/articles/1999/1/02/demo/tutorial/canas.html
|
||||||
|
Cornerstone's Canadian English Page
|
||||||
|
http://www.web.net/cornerstone/cdneng.htm
|
||||||
|
Inter-Play Translation: British/Canadian/American Spelling
|
||||||
|
http://www.inter-play.com/translation/spel-ukus.htm
|
||||||
|
Inter-Play Translation: British/Canadian/American Vocabulary
|
||||||
|
http://www.inter-play.com/translation/voc-ukus.htm
|
||||||
|
|
||||||
|
As well as several online dictionaries:
|
||||||
|
|
||||||
|
Marriam-Webster: http://www.m-w.com/
|
||||||
|
American Heritage: http://www.bartleby.com/61/
|
||||||
|
Cambridge (ESL): http://dictionary.cambridge.org/
|
||||||
|
|
||||||
|
In version 5.0 a massive effort to correct the numerous errors in
|
||||||
|
VarCon was done. The primary sources used for verification were:
|
||||||
|
|
||||||
|
Marriam-Webster: http://www.m-w.com/
|
||||||
|
Free version of Oxford Dictionaries Online:
|
||||||
|
http://www.oxforddictionaries.com/
|
||||||
|
Oxford dictionaries available via Oxford Reference Online
|
||||||
|
(subscription service, http://www.oxfordreference.com/):
|
||||||
|
The New Oxford American Dictionary (2nd edition, 2006)
|
||||||
|
and sometimes: The Oxford American Dictionary of Current English (2002)
|
||||||
|
The Concise Oxford English Dictionary (11th edition revised, 2008)
|
||||||
|
and sometimes: The Oxford Dictionary of English (2nd edition revised, 2005)
|
||||||
|
The Canadian Oxford Dictionary (2004)
|
||||||
|
|
||||||
|
I also used Tysto UK vs US spelling list available at:
|
||||||
|
http://www.tysto.com/articles05/q1/20050324uk-us.shtml
|
||||||
|
to make sure I didn't leave out any information in VarCon, however any
|
||||||
|
additions from his lists where verified using the dictionaries
|
||||||
|
mentioned above as his lists contained numerous errors (such as
|
||||||
|
including archaic spellings of words)
|
||||||
|
|
||||||
|
I also made indirect use of Luke's Canadian, British and American
|
||||||
|
Spelling page available at:
|
||||||
|
http://www.lukemastin.com/testing/spelling/cgi-bin/database.cgi?database=spelling
|
||||||
|
but only to perform some initial verification, in the end I rechecked
|
||||||
|
his data using the dictionaries above. (However, his data is, by far,
|
||||||
|
more accurate than Tysto's)
|
||||||
|
|
||||||
|
In Version 2016.11.20 Benjamin Titze added support for Australian English.
|
||||||
|
The primary sources for this addition were:
|
||||||
|
|
||||||
|
The Macquarie Dictionary: https://www.macquariedictionary.com.au/
|
||||||
|
Style Manual: For Authors, Editors and Printers, 6th Edition. DCITA.
|
||||||
|
University of Technology Sydney Publications Style Guide:
|
||||||
|
http://www.gsu.uts.edu.au/publications/styleguide/spelling.html
|
||||||
|
Style Manual, Department of Treasury and Finance, Tasmania:
|
||||||
|
http://conference.tasa.org.au/wp-content/uploads/2015/03/Style-Manual.pdf
|
||||||
|
Editor Australia - Style Guide:
|
||||||
|
http://www.editoraustralia.com/styleguide_spelling.html
|
||||||
|
Webster in Australia (history of "our"/"or" spelling variants):
|
||||||
|
http://blogs.usyd.edu.au/elac/2008/01/webster_in_australia.html
|
||||||
|
|
||||||
|
|
||||||
|
CHANGELOG:
|
||||||
|
|
||||||
|
From 2017.08.24 to 2018.10.06
|
||||||
|
|
||||||
|
- Added entries for: eukaryote, prokaryote, virtualization, volcanism
|
||||||
|
|
||||||
|
From 2016.11.20 to 2017.08.24
|
||||||
|
|
||||||
|
- Typo fixes thanks to Jakub Wilk
|
||||||
|
|
||||||
|
From 2016.06.26 to 2016.11.20
|
||||||
|
|
||||||
|
- New Australian spelling category thanks to the work of Benjamin
|
||||||
|
Titze.
|
||||||
|
|
||||||
|
- Various other fixes.
|
||||||
|
|
||||||
|
From 2016.01.19 to 2016.06.26
|
||||||
|
|
||||||
|
- Fix plural of "bus".
|
||||||
|
|
||||||
|
From 2015.08.24 to 2016.01.19
|
||||||
|
|
||||||
|
- Undo the effects of PERL_UNICODE in the translate script.
|
||||||
|
|
||||||
|
- Other minor fixes and new entries.
|
||||||
|
|
||||||
|
From 2014.02.15 to 2015.08.24 (Aug 24, 2015)
|
||||||
|
|
||||||
|
- Added entry for Koran/Koranic.
|
||||||
|
|
||||||
|
- Tweaked "adviser" cluster.
|
||||||
|
|
||||||
|
- Fix formatting problems.
|
||||||
|
|
||||||
|
From 2015.01.28 to 2014.02.15 (February 15, 2015)
|
||||||
|
|
||||||
|
- Various new entries
|
||||||
|
|
||||||
|
From 2014.11.17 to 2015.01.28 (January 28, 2015)
|
||||||
|
|
||||||
|
- Minor adjustments to a few entries (analytic, amid)
|
||||||
|
|
||||||
|
- Added entry for shareable
|
||||||
|
|
||||||
|
- Remove a junk entry (ted/taed).
|
||||||
|
|
||||||
|
From 2014.08.11 to 2014.11.17 (November 17, 2014)
|
||||||
|
|
||||||
|
- Fix typos in README
|
||||||
|
|
||||||
|
- Enhancement to VarCon translate script. It will now, by default,
|
||||||
|
filter clusters with a SCOWL level > 80. This behavior can be
|
||||||
|
controlled with the new "--thresh" option.
|
||||||
|
|
||||||
|
- Remove a few junk entries.
|
||||||
|
|
||||||
|
From Revision 5.1 to Version 2014.08.11 (August 8, 2014)
|
||||||
|
|
||||||
|
- Various corrections. Most of them minor. Two notable exceptions:
|
||||||
|
|
||||||
|
- Added an entry for furor as the correct British spelling is furore
|
||||||
|
|
||||||
|
- Fixed racket entries as Canadians still use racquet even
|
||||||
|
though it is a British English (at least according to the
|
||||||
|
Oxford dictionaries)
|
||||||
|
|
||||||
|
- Other minor changes.
|
||||||
|
|
||||||
|
From Revision 5.0 to Revision 5.1 (January 6, 2010)
|
||||||
|
|
||||||
|
- Corrected numerous errors after running various forms
|
||||||
|
of verification on varcon.txt.
|
||||||
|
|
||||||
|
- Reordered the clusters in varcon.txt so that they are
|
||||||
|
mostly in alphabetic order based on the headword.
|
||||||
|
|
||||||
|
From Revision 4.1 to Revision 5.0 (December 27, 2010)
|
||||||
|
|
||||||
|
- Completely new format for the main table which, in addition to
|
||||||
|
providing the preferred spelling of a word for various forms of
|
||||||
|
English, also records variant and other information. To reflect
|
||||||
|
this change, the name of the file was renamed from abbc.tab to
|
||||||
|
varcon.txt.
|
||||||
|
|
||||||
|
- Massive effort to verify the variant information against
|
||||||
|
authoritative sources (mainly Oxford dictionaries). Most entries
|
||||||
|
for common words (SCOWL level 35 and below) have been checked
|
||||||
|
against at least a British and Canadian dictionary.
|
||||||
|
|
||||||
|
- Added variant information for numerous other words, even when
|
||||||
|
there is no difference between the various forms on English.
|
||||||
|
|
||||||
|
- Other changes corresponding to the new format.
|
||||||
|
|
||||||
|
From Revision 4 to Revision 4.1 (August 10, 2004)
|
||||||
|
|
||||||
|
- Fixed various errors in abbc.tab
|
||||||
|
|
||||||
|
- Removed clause 4 from the Ispell copyright with permission of Geoff
|
||||||
|
Kuenning.
|
||||||
|
|
||||||
|
From Revision 3 to Revision 4 (August 7, 2004)
|
||||||
|
|
||||||
|
- Added a column to "abc.tab" for the British "ize" spelling and
|
||||||
|
renamed the file to abbc.tab.
|
||||||
|
- Added verb forms of prize/prise to abbc.tab, removed from
|
||||||
|
variant-also.tab
|
||||||
|
|
||||||
|
From Revision 2 to Revision 3 (January 2, 2003)
|
||||||
|
|
||||||
|
- Added an option for not including variant-infl.tab for the
|
||||||
|
make-variant perl script
|
||||||
|
- Added the file variant-wroot.tab
|
||||||
|
- Added a few entries given to me by Francis Bond and Edward Betts
|
||||||
|
|
||||||
|
From Revision 1 to Revision 2 (January 27, 2001)
|
||||||
|
|
||||||
|
- Removed all "B" markers because I could not find any evidence for
|
||||||
|
them
|
||||||
|
- Corrected a few Canadian entries, especially those with the "B"
|
||||||
|
markers
|
||||||
|
- Added some more entries by trying fixed changes (such as ize to
|
||||||
|
ise) to words in SCOWL and hand-checking over the ones with semi-common
|
||||||
|
words in them.
|
||||||
|
- Added variant-infl.tab
|
||||||
|
|
||||||
|
COPYRIGHT:
|
||||||
|
|
||||||
|
Copyright 2000-2018 by Kevin Atkinson
|
||||||
|
|
||||||
|
Permission to use, copy, modify, distribute and sell this array, the
|
||||||
|
associated software, and its documentation for any purpose is hereby
|
||||||
|
granted without fee, provided that the above copyright notice appears
|
||||||
|
in all copies and that both that copyright notice and this permission
|
||||||
|
notice appear in supporting documentation. Kevin Atkinson makes no
|
||||||
|
representations about the suitability of this array for any
|
||||||
|
purpose. It is provided "as is" without express or implied warranty.
|
||||||
|
|
||||||
|
Copyright 2016 by Benjamin Titze
|
||||||
|
|
||||||
|
Permission to use, copy, modify, distribute and sell this array, the
|
||||||
|
associated software, and its documentation for any purpose is hereby
|
||||||
|
granted without fee, provided that the above copyright notice appears
|
||||||
|
in all copies and that both that copyright notice and this permission
|
||||||
|
notice appear in supporting documentation. Benjamin Titze makes no
|
||||||
|
representations about the suitability of this array for any
|
||||||
|
purpose. It is provided "as is" without express or implied warranty.
|
||||||
|
|
||||||
|
Since the original words lists come from the Ispell distribution:
|
||||||
|
|
||||||
|
Copyright 1993, Geoff Kuenning, Granada Hills, CA
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
3. All modifications to the source code must be clearly marked as
|
||||||
|
such. Binary redistributions based on modified source code
|
||||||
|
must be clearly marked as modified versions in the documentation
|
||||||
|
and/or other materials provided with the distribution.
|
||||||
|
(clause 4 removed with permission from Geoff Kuenning)
|
||||||
|
5. The name of Geoff Kuenning may not be used to endorse or promote
|
||||||
|
products derived from this software without specific prior
|
||||||
|
written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE
|
||||||
|
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||||
|
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||||
|
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
SUCH DAMAGE.
|
32432
crates/varcon/assets/varcon.txt
vendored
Normal file
32432
crates/varcon/assets/varcon.txt
vendored
Normal file
File diff suppressed because it is too large
Load diff
20
crates/varcon/codegen/Cargo.toml
Normal file
20
crates/varcon/codegen/Cargo.toml
Normal file
|
@ -0,0 +1,20 @@
|
||||||
|
[package]
|
||||||
|
name = "varcon-codegen"
|
||||||
|
version = "1.0.2"
|
||||||
|
authors = ["Ed Page <eopage@gmail.com>"]
|
||||||
|
description = "Source Code Spelling Correction"
|
||||||
|
repository = "https://github.com/crate-ci/typos"
|
||||||
|
readme = "../../../README.md"
|
||||||
|
categories = ["text-processing"]
|
||||||
|
license = "MIT"
|
||||||
|
edition = "2018"
|
||||||
|
publish = false
|
||||||
|
|
||||||
|
[badges]
|
||||||
|
azure-devops = { project = "crate-ci", pipeline = "typos" }
|
||||||
|
codecov = { repository = "crate-ci/typos" }
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
codegenrs = "0.1"
|
||||||
|
structopt = "0.3"
|
||||||
|
varcon-parser = { version = "1.0", path = "../../varcon-parser" }
|
99
crates/varcon/codegen/src/main.rs
Normal file
99
crates/varcon/codegen/src/main.rs
Normal file
|
@ -0,0 +1,99 @@
|
||||||
|
use structopt::StructOpt;
|
||||||
|
|
||||||
|
const DICT: &[u8] = include_bytes!("../../assets/varcon.txt");
|
||||||
|
|
||||||
|
fn generate<W: std::io::Write>(file: &mut W) {
|
||||||
|
let dict = String::from_utf8_lossy(DICT);
|
||||||
|
let clusters = varcon_parser::ClusterIter::new(&dict);
|
||||||
|
|
||||||
|
writeln!(
|
||||||
|
file,
|
||||||
|
"// This file is code-genned by {}",
|
||||||
|
env!("CARGO_PKG_NAME")
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
writeln!(file, "#![allow(clippy::unreadable_literal)]",).unwrap();
|
||||||
|
writeln!(file).unwrap();
|
||||||
|
writeln!(file, "use crate::*;").unwrap();
|
||||||
|
writeln!(file).unwrap();
|
||||||
|
|
||||||
|
writeln!(file, "pub static VARCON: &'static [Cluster] = &[").unwrap();
|
||||||
|
for mut cluster in clusters {
|
||||||
|
cluster.infer();
|
||||||
|
writeln!(file, "Cluster {{").unwrap();
|
||||||
|
writeln!(file, " header: {:?},", cluster.header).unwrap();
|
||||||
|
writeln!(file, " entries: &[").unwrap();
|
||||||
|
for entry in &cluster.entries {
|
||||||
|
writeln!(file, " Entry {{").unwrap();
|
||||||
|
writeln!(file, " variants: &[").unwrap();
|
||||||
|
for variant in &entry.variants {
|
||||||
|
writeln!(file, " Variant {{").unwrap();
|
||||||
|
writeln!(file, " word: {:?},", variant.word).unwrap();
|
||||||
|
writeln!(file, " types: &[").unwrap();
|
||||||
|
for t in &variant.types {
|
||||||
|
write!(file, " Type {{").unwrap();
|
||||||
|
write!(file, "category: Category::{:?}, ", t.category).unwrap();
|
||||||
|
if let Some(tag) = t.tag {
|
||||||
|
write!(file, "tag: Some(Tag::{:?}), ", tag).unwrap();
|
||||||
|
} else {
|
||||||
|
write!(file, "tag: {:?}, ", t.tag).unwrap();
|
||||||
|
}
|
||||||
|
write!(file, "num: {:?},", t.num).unwrap();
|
||||||
|
writeln!(file, "}},").unwrap();
|
||||||
|
}
|
||||||
|
writeln!(file, " ],").unwrap();
|
||||||
|
writeln!(file, " }},").unwrap();
|
||||||
|
}
|
||||||
|
writeln!(file, " ],").unwrap();
|
||||||
|
if let Some(pos) = entry.pos {
|
||||||
|
write!(file, " pos: Some(Pos::{:?}),", pos).unwrap();
|
||||||
|
} else {
|
||||||
|
write!(file, " pos: {:?},", entry.pos).unwrap();
|
||||||
|
}
|
||||||
|
writeln!(
|
||||||
|
file,
|
||||||
|
" archaic: {:?}, note: {:?},",
|
||||||
|
entry.archaic, entry.note
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
writeln!(file, " description: {:?},", entry.description).unwrap();
|
||||||
|
writeln!(file, " comment: {:?},", entry.comment).unwrap();
|
||||||
|
writeln!(file, " }},").unwrap();
|
||||||
|
}
|
||||||
|
writeln!(file, " ],").unwrap();
|
||||||
|
writeln!(file, " notes: &[").unwrap();
|
||||||
|
for note in &cluster.notes {
|
||||||
|
writeln!(file, " {:?},", note).unwrap();
|
||||||
|
}
|
||||||
|
writeln!(file, " ],").unwrap();
|
||||||
|
writeln!(file, " }},").unwrap();
|
||||||
|
}
|
||||||
|
writeln!(file, "];").unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, StructOpt)]
|
||||||
|
#[structopt(rename_all = "kebab-case")]
|
||||||
|
struct Options {
|
||||||
|
#[structopt(flatten)]
|
||||||
|
codegen: codegenrs::CodeGenArgs,
|
||||||
|
#[structopt(flatten)]
|
||||||
|
rustmft: codegenrs::RustfmtArgs,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn run() -> Result<i32, Box<dyn std::error::Error>> {
|
||||||
|
let options = Options::from_args();
|
||||||
|
|
||||||
|
let mut content = vec![];
|
||||||
|
generate(&mut content);
|
||||||
|
|
||||||
|
let content = String::from_utf8(content)?;
|
||||||
|
let content = options.rustmft.reformat(&content)?;
|
||||||
|
options.codegen.write_str(&content)?;
|
||||||
|
|
||||||
|
Ok(0)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
let code = run().unwrap();
|
||||||
|
std::process::exit(code);
|
||||||
|
}
|
450944
crates/varcon/src/codegen.rs
Normal file
450944
crates/varcon/src/codegen.rs
Normal file
File diff suppressed because it is too large
Load diff
75
crates/varcon/src/lib.rs
Normal file
75
crates/varcon/src/lib.rs
Normal file
|
@ -0,0 +1,75 @@
|
||||||
|
mod codegen;
|
||||||
|
|
||||||
|
pub use codegen::*;
|
||||||
|
|
||||||
|
#[derive(Clone, PartialEq, Eq, Debug)]
|
||||||
|
pub struct Cluster {
|
||||||
|
pub header: Option<&'static str>,
|
||||||
|
pub entries: &'static [Entry],
|
||||||
|
pub notes: &'static [&'static str],
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, PartialEq, Eq, Debug)]
|
||||||
|
pub struct Entry {
|
||||||
|
pub variants: &'static [Variant],
|
||||||
|
pub pos: Option<Pos>,
|
||||||
|
pub archaic: bool,
|
||||||
|
pub note: bool,
|
||||||
|
pub description: Option<&'static str>,
|
||||||
|
pub comment: Option<&'static str>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, PartialEq, Eq, Debug)]
|
||||||
|
pub struct Variant {
|
||||||
|
pub types: &'static [Type],
|
||||||
|
pub word: &'static str,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, PartialEq, Eq, Debug)]
|
||||||
|
pub struct Type {
|
||||||
|
pub category: Category,
|
||||||
|
pub tag: Option<Tag>,
|
||||||
|
pub num: Option<usize>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
|
||||||
|
#[cfg_attr(feature = "flags", derive(enumflags2::BitFlags))]
|
||||||
|
#[repr(u8)]
|
||||||
|
pub enum Category {
|
||||||
|
American = 0x01,
|
||||||
|
BritishIse = 0x02,
|
||||||
|
BritishIze = 0x04,
|
||||||
|
Canadian = 0x08,
|
||||||
|
Australian = 0x10,
|
||||||
|
Other = 0x20,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "flags")]
|
||||||
|
pub type CategorySet = enumflags2::BitFlags<Category>;
|
||||||
|
|
||||||
|
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
|
||||||
|
#[cfg_attr(feature = "flags", derive(enumflags2::BitFlags))]
|
||||||
|
#[repr(u8)]
|
||||||
|
pub enum Tag {
|
||||||
|
Eq = 0x01,
|
||||||
|
Variant = 0x02,
|
||||||
|
Seldom = 0x04,
|
||||||
|
Possible = 0x08,
|
||||||
|
Improper = 0x10,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "flags")]
|
||||||
|
pub type TagSet = enumflags2::BitFlags<Tag>;
|
||||||
|
|
||||||
|
#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
|
||||||
|
#[cfg_attr(feature = "flags", derive(enumflags2::BitFlags))]
|
||||||
|
#[repr(u8)]
|
||||||
|
pub enum Pos {
|
||||||
|
Noun = 0x01,
|
||||||
|
Verb = 0x02,
|
||||||
|
Adjective = 0x04,
|
||||||
|
Adverb = 0x08,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "flags")]
|
||||||
|
pub type PosSet = enumflags2::BitFlags<Pos>;
|
Loading…
Reference in a new issue