mirror of
https://github.com/crate-ci/typos.git
synced 2024-11-24 10:00:59 -05:00
Merge pull request #1084 from epage/update
feat(varcon): Update to Version 2020.12.07
This commit is contained in:
commit
19ed24a45f
10 changed files with 64099 additions and 60443 deletions
66
Cargo.lock
generated
66
Cargo.lock
generated
|
@ -1134,9 +1134,9 @@ checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "snapbox"
|
name = "snapbox"
|
||||||
version = "0.6.16"
|
version = "0.6.17"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "027c936207f85d10d015e21faf5c676c7e08c453ed371adf55c0874c443ca77a"
|
checksum = "840b73eb3148bc3cbc10ebe00ec9bc6d96033e658d022c4adcbf3f35596fd64a"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anstream",
|
"anstream",
|
||||||
"anstyle",
|
"anstyle",
|
||||||
|
@ -1151,7 +1151,7 @@ dependencies = [
|
||||||
"tempfile",
|
"tempfile",
|
||||||
"wait-timeout",
|
"wait-timeout",
|
||||||
"walkdir",
|
"walkdir",
|
||||||
"windows-sys 0.52.0",
|
"windows-sys 0.59.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -1504,6 +1504,7 @@ name = "varcon-core"
|
||||||
version = "4.0.10"
|
version = "4.0.10"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"enumflags2",
|
"enumflags2",
|
||||||
|
"snapbox",
|
||||||
"winnow",
|
"winnow",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -1591,11 +1592,11 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "windows-sys"
|
name = "windows-sys"
|
||||||
version = "0.52.0"
|
version = "0.59.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
|
checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"windows-targets 0.52.0",
|
"windows-targets 0.52.6",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -1615,17 +1616,18 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "windows-targets"
|
name = "windows-targets"
|
||||||
version = "0.52.0"
|
version = "0.52.6"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd"
|
checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"windows_aarch64_gnullvm 0.52.0",
|
"windows_aarch64_gnullvm 0.52.6",
|
||||||
"windows_aarch64_msvc 0.52.0",
|
"windows_aarch64_msvc 0.52.6",
|
||||||
"windows_i686_gnu 0.52.0",
|
"windows_i686_gnu 0.52.6",
|
||||||
"windows_i686_msvc 0.52.0",
|
"windows_i686_gnullvm",
|
||||||
"windows_x86_64_gnu 0.52.0",
|
"windows_i686_msvc 0.52.6",
|
||||||
"windows_x86_64_gnullvm 0.52.0",
|
"windows_x86_64_gnu 0.52.6",
|
||||||
"windows_x86_64_msvc 0.52.0",
|
"windows_x86_64_gnullvm 0.52.6",
|
||||||
|
"windows_x86_64_msvc 0.52.6",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -1636,9 +1638,9 @@ checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "windows_aarch64_gnullvm"
|
name = "windows_aarch64_gnullvm"
|
||||||
version = "0.52.0"
|
version = "0.52.6"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea"
|
checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "windows_aarch64_msvc"
|
name = "windows_aarch64_msvc"
|
||||||
|
@ -1648,9 +1650,9 @@ checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "windows_aarch64_msvc"
|
name = "windows_aarch64_msvc"
|
||||||
version = "0.52.0"
|
version = "0.52.6"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef"
|
checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "windows_i686_gnu"
|
name = "windows_i686_gnu"
|
||||||
|
@ -1660,9 +1662,15 @@ checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "windows_i686_gnu"
|
name = "windows_i686_gnu"
|
||||||
version = "0.52.0"
|
version = "0.52.6"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313"
|
checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_i686_gnullvm"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "windows_i686_msvc"
|
name = "windows_i686_msvc"
|
||||||
|
@ -1672,9 +1680,9 @@ checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "windows_i686_msvc"
|
name = "windows_i686_msvc"
|
||||||
version = "0.52.0"
|
version = "0.52.6"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a"
|
checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "windows_x86_64_gnu"
|
name = "windows_x86_64_gnu"
|
||||||
|
@ -1684,9 +1692,9 @@ checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "windows_x86_64_gnu"
|
name = "windows_x86_64_gnu"
|
||||||
version = "0.52.0"
|
version = "0.52.6"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd"
|
checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "windows_x86_64_gnullvm"
|
name = "windows_x86_64_gnullvm"
|
||||||
|
@ -1696,9 +1704,9 @@ checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "windows_x86_64_gnullvm"
|
name = "windows_x86_64_gnullvm"
|
||||||
version = "0.52.0"
|
version = "0.52.6"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e"
|
checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "windows_x86_64_msvc"
|
name = "windows_x86_64_msvc"
|
||||||
|
@ -1708,9 +1716,9 @@ checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "windows_x86_64_msvc"
|
name = "windows_x86_64_msvc"
|
||||||
version = "0.52.0"
|
version = "0.52.6"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04"
|
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "winnow"
|
name = "winnow"
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -207,7 +207,7 @@ mod parser {
|
||||||
<T as Stream>::Token: AsChar + Copy,
|
<T as Stream>::Token: AsChar + Copy,
|
||||||
{
|
{
|
||||||
alt((
|
alt((
|
||||||
one_of(|c| !is_xid_continue(c)).recognize(),
|
one_of(|c| !is_xid_continue(c)).take(),
|
||||||
eof.map(|_| <T as Stream>::Slice::default()),
|
eof.map(|_| <T as Stream>::Slice::default()),
|
||||||
))
|
))
|
||||||
.parse_next(input)
|
.parse_next(input)
|
||||||
|
@ -225,7 +225,7 @@ mod parser {
|
||||||
one_of(|c| !is_xid_continue(c)),
|
one_of(|c| !is_xid_continue(c)),
|
||||||
take_while(0.., is_ignore_char),
|
take_while(0.., is_ignore_char),
|
||||||
)
|
)
|
||||||
.recognize(),
|
.take(),
|
||||||
)
|
)
|
||||||
.parse_next(input)
|
.parse_next(input)
|
||||||
}
|
}
|
||||||
|
@ -251,7 +251,7 @@ mod parser {
|
||||||
alt((('s', 't'), ('n', 'd'), ('r', 'd'), ('t', 'h'))),
|
alt((('s', 't'), ('n', 'd'), ('r', 'd'), ('t', 'h'))),
|
||||||
take_while(0.., is_sep),
|
take_while(0.., is_sep),
|
||||||
)
|
)
|
||||||
.recognize(),
|
.take(),
|
||||||
)
|
)
|
||||||
.parse_next(input)
|
.parse_next(input)
|
||||||
}
|
}
|
||||||
|
@ -273,7 +273,7 @@ mod parser {
|
||||||
<T as Stream>::Token: AsChar + Copy,
|
<T as Stream>::Token: AsChar + Copy,
|
||||||
{
|
{
|
||||||
('0', alt(('x', 'X')), take_while(1.., is_hex_digit_with_sep))
|
('0', alt(('x', 'X')), take_while(1.., is_hex_digit_with_sep))
|
||||||
.recognize()
|
.take()
|
||||||
.parse_next(input)
|
.parse_next(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -293,7 +293,7 @@ mod parser {
|
||||||
(take_while(3..=8, is_upper_hex_digit), peek(sep1)),
|
(take_while(3..=8, is_upper_hex_digit), peek(sep1)),
|
||||||
)),
|
)),
|
||||||
)
|
)
|
||||||
.recognize(),
|
.take(),
|
||||||
)
|
)
|
||||||
.parse_next(input)
|
.parse_next(input)
|
||||||
}
|
}
|
||||||
|
@ -318,7 +318,7 @@ mod parser {
|
||||||
'.',
|
'.',
|
||||||
take_while(20.., is_jwt_token),
|
take_while(20.., is_jwt_token),
|
||||||
)
|
)
|
||||||
.recognize(),
|
.take(),
|
||||||
)
|
)
|
||||||
.parse_next(input)
|
.parse_next(input)
|
||||||
}
|
}
|
||||||
|
@ -366,7 +366,7 @@ mod parser {
|
||||||
take_while(12, is_upper_hex_digit),
|
take_while(12, is_upper_hex_digit),
|
||||||
),
|
),
|
||||||
))
|
))
|
||||||
.recognize(),
|
.take(),
|
||||||
)
|
)
|
||||||
.parse_next(input)
|
.parse_next(input)
|
||||||
}
|
}
|
||||||
|
@ -450,7 +450,7 @@ mod parser {
|
||||||
'@',
|
'@',
|
||||||
take_while(1.., is_domain_char),
|
take_while(1.., is_domain_char),
|
||||||
)
|
)
|
||||||
.recognize(),
|
.take(),
|
||||||
)
|
)
|
||||||
.parse_next(input)
|
.parse_next(input)
|
||||||
}
|
}
|
||||||
|
@ -480,7 +480,7 @@ mod parser {
|
||||||
// HACK: Too lazy to enumerate
|
// HACK: Too lazy to enumerate
|
||||||
take_while(0.., is_path_query_fragment),
|
take_while(0.., is_path_query_fragment),
|
||||||
)
|
)
|
||||||
.recognize(),
|
.take(),
|
||||||
)
|
)
|
||||||
.parse_next(input)
|
.parse_next(input)
|
||||||
}
|
}
|
||||||
|
@ -498,7 +498,7 @@ mod parser {
|
||||||
take_while(1.., is_localport_char),
|
take_while(1.., is_localport_char),
|
||||||
opt((':', take_while(0.., is_localport_char))),
|
opt((':', take_while(0.., is_localport_char))),
|
||||||
)
|
)
|
||||||
.recognize(),
|
.take(),
|
||||||
)
|
)
|
||||||
.parse_next(input)
|
.parse_next(input)
|
||||||
}
|
}
|
||||||
|
@ -515,7 +515,7 @@ mod parser {
|
||||||
// incorrectly, we opt for just not evaluating it at all.
|
// incorrectly, we opt for just not evaluating it at all.
|
||||||
trace(
|
trace(
|
||||||
"escape",
|
"escape",
|
||||||
(take_while(1.., is_escape), take_while(0.., is_xid_continue)).recognize(),
|
(take_while(1.., is_escape), take_while(0.., is_xid_continue)).take(),
|
||||||
)
|
)
|
||||||
.parse_next(input)
|
.parse_next(input)
|
||||||
}
|
}
|
||||||
|
@ -527,11 +527,7 @@ mod parser {
|
||||||
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
||||||
<T as Stream>::Token: AsChar + Copy,
|
<T as Stream>::Token: AsChar + Copy,
|
||||||
{
|
{
|
||||||
trace(
|
trace("printf", ('%', take_while(1.., is_xid_continue)).take()).parse_next(input)
|
||||||
"printf",
|
|
||||||
('%', take_while(1.., is_xid_continue)).recognize(),
|
|
||||||
)
|
|
||||||
.parse_next(input)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn take_many0<I, E, F>(mut f: F) -> impl Parser<I, <I as Stream>::Slice, E>
|
fn take_many0<I, E, F>(mut f: F) -> impl Parser<I, <I as Stream>::Slice, E>
|
||||||
|
@ -540,12 +536,7 @@ mod parser {
|
||||||
F: Parser<I, <I as Stream>::Slice, E>,
|
F: Parser<I, <I as Stream>::Slice, E>,
|
||||||
E: ParserError<I>,
|
E: ParserError<I>,
|
||||||
{
|
{
|
||||||
move |i: &mut I| {
|
move |i: &mut I| repeat(0.., f.by_ref()).map(|()| ()).take().parse_next(i)
|
||||||
repeat(0.., f.by_ref())
|
|
||||||
.map(|()| ())
|
|
||||||
.recognize()
|
|
||||||
.parse_next(i)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
|
|
|
@ -25,3 +25,6 @@ enumflags2 = { version = "0.7", optional = true }
|
||||||
|
|
||||||
[lints]
|
[lints]
|
||||||
workspace = true
|
workspace = true
|
||||||
|
|
||||||
|
[dev-dependencies]
|
||||||
|
snapbox = "0.6.17"
|
||||||
|
|
|
@ -20,8 +20,8 @@ pub struct Entry {
|
||||||
pub variants: &'static [Variant],
|
pub variants: &'static [Variant],
|
||||||
pub pos: Option<crate::Pos>,
|
pub pos: Option<crate::Pos>,
|
||||||
pub archaic: bool,
|
pub archaic: bool,
|
||||||
pub note: bool,
|
|
||||||
pub description: Option<&'static str>,
|
pub description: Option<&'static str>,
|
||||||
|
pub note: Option<&'static str>,
|
||||||
pub comment: Option<&'static str>,
|
pub comment: Option<&'static str>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -31,8 +31,8 @@ impl Entry {
|
||||||
variants: self.variants.iter().map(|v| v.into_owned()).collect(),
|
variants: self.variants.iter().map(|v| v.into_owned()).collect(),
|
||||||
pos: self.pos,
|
pos: self.pos,
|
||||||
archaic: self.archaic,
|
archaic: self.archaic,
|
||||||
note: self.note,
|
|
||||||
description: self.description.map(|s| s.to_owned()),
|
description: self.description.map(|s| s.to_owned()),
|
||||||
|
note: self.note.map(|s| s.to_owned()),
|
||||||
comment: self.comment.map(|s| s.to_owned()),
|
comment: self.comment.map(|s| s.to_owned()),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -30,8 +30,8 @@ pub struct Entry {
|
||||||
pub variants: Vec<Variant>,
|
pub variants: Vec<Variant>,
|
||||||
pub pos: Option<Pos>,
|
pub pos: Option<Pos>,
|
||||||
pub archaic: bool,
|
pub archaic: bool,
|
||||||
pub note: bool,
|
|
||||||
pub description: Option<String>,
|
pub description: Option<String>,
|
||||||
|
pub note: Option<String>,
|
||||||
pub comment: Option<String>,
|
pub comment: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -124,6 +124,9 @@ pub enum Pos {
|
||||||
Verb = 0x02,
|
Verb = 0x02,
|
||||||
Adjective = 0x04,
|
Adjective = 0x04,
|
||||||
Adverb = 0x08,
|
Adverb = 0x08,
|
||||||
|
AdjectiveOrAdverb = 0x10,
|
||||||
|
Interjection = 0x20,
|
||||||
|
Preposition = 0x40,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(feature = "flags")]
|
#[cfg(feature = "flags")]
|
||||||
|
|
File diff suppressed because it is too large
Load diff
167
crates/varcon/assets/README
vendored
167
crates/varcon/assets/README
vendored
|
@ -1,8 +1,9 @@
|
||||||
Variant Conversion Info (VarCon)
|
Variant Conversion Info (VarCon)
|
||||||
|
********************************
|
||||||
|
|
||||||
Version 2019.10.06
|
Version 2020.12.07
|
||||||
|
|
||||||
Copyright 2000-2016 by Kevin Atkinson (kevina@gnu.org) and Benjamin
|
Copyright 2000-2020 by Kevin Atkinson (kevina@gnu.org) and Benjamin
|
||||||
Titze (btitze@protonmail.ch).
|
Titze (btitze@protonmail.ch).
|
||||||
|
|
||||||
This package contains information to convert between American,
|
This package contains information to convert between American,
|
||||||
|
@ -11,9 +12,17 @@ other variant information.
|
||||||
|
|
||||||
The latest version can be found at http://wordlist.aspell.net/.
|
The latest version can be found at http://wordlist.aspell.net/.
|
||||||
|
|
||||||
|
|
||||||
|
File Format
|
||||||
|
===========
|
||||||
|
|
||||||
The main data file is varcon.txt. It contains information on the
|
The main data file is varcon.txt. It contains information on the
|
||||||
preferred American, British, and Canadian spelling of a word as well
|
preferred American, British, Canadian and Australian spelling of a
|
||||||
as other variant information.
|
word as well as other variant information.
|
||||||
|
|
||||||
|
|
||||||
|
Varcon Lines
|
||||||
|
------------
|
||||||
|
|
||||||
Each line contains a mapping between the various spellings of a word.
|
Each line contains a mapping between the various spellings of a word.
|
||||||
Words are tagged to indicate where the spelling is used, and each
|
Words are tagged to indicate where the spelling is used, and each
|
||||||
|
@ -32,6 +41,7 @@ spelling is sometimes used in America (as indicated the "Av").
|
||||||
More generally each tag consists of a spelling category (for example
|
More generally each tag consists of a spelling category (for example
|
||||||
"A") followed possible by a variant indicator. The spelling
|
"A") followed possible by a variant indicator. The spelling
|
||||||
categories are as follows:
|
categories are as follows:
|
||||||
|
|
||||||
A: American
|
A: American
|
||||||
B: British "ise" spelling
|
B: British "ise" spelling
|
||||||
Z: British "ize" spelling or OED preferred Spelling
|
Z: British "ize" spelling or OED preferred Spelling
|
||||||
|
@ -39,7 +49,9 @@ categories are as follows:
|
||||||
D: Australian
|
D: Australian
|
||||||
_: Other (Variant info based on American dictionaries, never used
|
_: Other (Variant info based on American dictionaries, never used
|
||||||
with any of the above).
|
with any of the above).
|
||||||
|
|
||||||
and the variants tags are as follows:
|
and the variants tags are as follows:
|
||||||
|
|
||||||
.: equal
|
.: equal
|
||||||
v: variant
|
v: variant
|
||||||
V: seldom used variant
|
V: seldom used variant
|
||||||
|
@ -66,6 +78,13 @@ If there are no tags with the 'Z' spelling category on the line then
|
||||||
'B' implies 'Z'. Similarly if there are no 'C' tags then 'Z' implies
|
'B' implies 'Z'. Similarly if there are no 'C' tags then 'Z' implies
|
||||||
'C'. If there are no 'D' tags then 'B' implies 'D'.
|
'C'. If there are no 'D' tags then 'B' implies 'D'.
|
||||||
|
|
||||||
|
Some entries may have a number after the tags, this is a column
|
||||||
|
number and will be explained later.
|
||||||
|
|
||||||
|
|
||||||
|
Varcon Clusters
|
||||||
|
---------------
|
||||||
|
|
||||||
For ease of reading and maintaining the data file, each line is
|
For ease of reading and maintaining the data file, each line is
|
||||||
grouped in a cluster of closely related words. Each cluster is
|
grouped in a cluster of closely related words. Each cluster is
|
||||||
uniquely identified by a headword, which is generally the American
|
uniquely identified by a headword, which is generally the American
|
||||||
|
@ -86,10 +105,26 @@ the headword is found in. The levels generally mean the following:
|
||||||
unabridged dictionary
|
unabridged dictionary
|
||||||
> 80: May not even be a legal word
|
> 80: May not even be a legal word
|
||||||
|
|
||||||
Sometimes the spelling of a word depends on the usage. If so the word
|
Earlier versions of varcon contained numerous errors. With version
|
||||||
is listed more than once within a cluster, with any usage information
|
5.0 massive effort has been made to correct many of these errors.
|
||||||
being indicated after a " | ". For example here is part of the cluster
|
Clusters that have undergone some form of verification (and likely
|
||||||
for prize:
|
correction) are marked with "<verified>". As of version 5.0, most
|
||||||
|
clusters with headwords word in common usage (SCOWL level 35 and
|
||||||
|
below) should now be checked, as well as many others. No effort was
|
||||||
|
made to check clusters with headwords in SCOWL level 80 and above;
|
||||||
|
many of those entries are unlikely to be in the dictionary anyway.
|
||||||
|
|
||||||
|
|
||||||
|
Varcon Groups
|
||||||
|
-------------
|
||||||
|
|
||||||
|
Sometimes the spelling of a word depends on the usage in which case a
|
||||||
|
cluster is split into multiple groups with each group represting one
|
||||||
|
usage of a word. Usage annotations and/or pos tags are used to
|
||||||
|
distinguish one group from another.
|
||||||
|
|
||||||
|
Usage information is given after a " | ". For example here is part of
|
||||||
|
the cluster for prize:
|
||||||
A B: prize | reward
|
A B: prize | reward
|
||||||
A B: prizes | reward
|
A B: prizes | reward
|
||||||
A C: prize / B: prise | otherwise
|
A C: prize / B: prise | otherwise
|
||||||
|
@ -102,50 +137,90 @@ consists of a number, for example:
|
||||||
A B: sake | :1
|
A B: sake | :1
|
||||||
A C: sake / Av B Cv: saki | :2
|
A C: sake / Av B Cv: saki | :2
|
||||||
|
|
||||||
Sometimes part-of-speech (POS) info is given to help distinguish which
|
A part-of-speech (POS) tag may also given after a " | ", for example:
|
||||||
form is used. For example:
|
|
||||||
A B C: practice / AV Cv: practise | <N>
|
A B C: practice / AV Cv: practise | <N>
|
||||||
A Cv: practice / AV B C: practise | <V>
|
A Cv: practice / AV B C: practise | <V>
|
||||||
POS info is always given in the form "<POS>" and if a definition
|
POS tags are always given in the form "<POS>" and if a definition
|
||||||
is also given the POS info is always first. The POS tags used are as
|
is also given the POS info is always first. The POS tags used are as
|
||||||
follows:
|
follows:
|
||||||
<N>: Noun
|
<N>: Noun
|
||||||
<V>: Verb
|
<V>: Verb
|
||||||
<Adj>: Adjective
|
<Adj>: Adjective
|
||||||
<Adv>: Adverb
|
<Adv>: Adverb
|
||||||
|
<A>: Adjective or Adverb
|
||||||
|
<Inj>
|
||||||
|
<Prep>
|
||||||
|
<abbr>
|
||||||
|
|
||||||
|
|
||||||
|
Additional Annotations
|
||||||
|
----------------------
|
||||||
|
|
||||||
A "(-)" before the definition indicated a rarely used or archaic form
|
A "(-)" before the definition indicated a rarely used or archaic form
|
||||||
of a word, for example:
|
of a word, for example:
|
||||||
A B: bark | :1
|
A B: bark | :1
|
||||||
A: bark / Av B: barque | (-) ship
|
A: bark / Av B: barque | (-) ship
|
||||||
|
|
||||||
A "--" indicates a note rather than definition. This is generally
|
A "| -- pl: someword" indicates that the word is a plural and the root
|
||||||
used to indicate that the spelling of the plural form not depend on
|
is someword.
|
||||||
the spelling of the root word, for example:
|
|
||||||
_: cabby / _.: cabbie
|
|
||||||
_: cabbies | -- plural
|
|
||||||
|
|
||||||
Misc. notes on a particular form of a word are given after a "#" on
|
A plain "| -- pl" indicates that the word is a plural and the root is
|
||||||
the same line. Misc. notes for the cluster are given at the end of
|
elsewhere within the group. It is used when one form of the plural is
|
||||||
the cluster and are prefixed with "##", for example:
|
the same as the root word, for example:
|
||||||
|
_1: yak | :1
|
||||||
|
_ 1: yaks / _V 1: yak | :1 | -- pl
|
||||||
|
_ 1: yak's | :1
|
||||||
|
|
||||||
|
A "| --" otherwise indicates a note which gives additional context but
|
||||||
|
does not create it's own group like a definition does.
|
||||||
|
|
||||||
|
A "#" after a line indicates a comment that is often used to indicate
|
||||||
|
why. A "##" after a cluster indicates the the comment applies to the
|
||||||
|
entire cluster, for example:
|
||||||
# coloration <verified> (level 50)
|
# coloration <verified> (level 50)
|
||||||
A B C: coloration / B. Cv: colouration
|
A B C: coloration / B. Cv: colouration
|
||||||
A B C: colorations / B. Cv: colourations
|
A B C: colorations / B. Cv: colourations
|
||||||
A B C: coloration's / B. Cv: colouration's
|
A B C: coloration's / B. Cv: colouration's
|
||||||
## OED has coloration as the preferred spelling and discolouration as a
|
## OED has coloration as the preferred spelling and discolouration as a
|
||||||
## variant for British Engl or some reason
|
## variant for British Engl or some reason
|
||||||
In the notes ODE (not to be confused with OED) stands for Oxford
|
In the comments ODE (not to be confused with OED) stands for Oxford
|
||||||
Dictionary of English, "Ox" is used for any Oxford dictionary, and
|
Dictionary of English, "Ox" is used for any Oxford dictionary, and
|
||||||
"M-W" for Merriam-Webster.
|
"M-W" for Merriam-Webster.
|
||||||
|
|
||||||
Earlier versions of varcon contained numerous errors. With version
|
|
||||||
5.0 massive effort has been made to correct many of these errors.
|
Varcon Columns
|
||||||
Clusters that have undergone some form of verification (and likely
|
--------------
|
||||||
correction) are marked with "<verified>". As of version 5.0, most
|
|
||||||
clusters with headwords word in common usage (SCOWL level 35 and
|
Varcon does not directly expresses the relation of words within a
|
||||||
below) should now be checked, as well as many others. No effort was
|
group as it is normally easy to derive. For example given a simple
|
||||||
made to check clusters with headwords in SCOWL level 80 and above;
|
group of:
|
||||||
many of those entries are unlikely to be in the dictionary anyway.
|
A: acknowledgment / B: acknowledgement
|
||||||
|
A: acknowledgments / B: acknowledgements
|
||||||
|
A: acknowledgment's / B: acknowledgement's
|
||||||
|
it is clear that acknowledgments is the plural form of acknowledgment
|
||||||
|
since they are both the American spelling of a word. While
|
||||||
|
acknowledgEments is the plural form of acknowledgEment since they are
|
||||||
|
both the British forms of a word. Within a group each varcon line
|
||||||
|
is considered a row in a table and each entry within a line is considered
|
||||||
|
a column. Within this group the first column is the American spelling
|
||||||
|
and the second is the British.
|
||||||
|
|
||||||
|
Sometime the column assignment unclear, when they are explicit column
|
||||||
|
numbers may be given. For example:
|
||||||
|
A B: caulk / Av: calk / AV Bv 1: caulking / AV 2: calking | <N> :3
|
||||||
|
A B: caulks / Av: calks / AV Bv 1: caulkings / AV 2: calkings | <N> :3
|
||||||
|
A B: caulk's / Av: calk's / AV Bv 1: caulking's / AV 2: calking's | <N> :3
|
||||||
|
|
||||||
|
Each column must contain exactly one spelling of the base form of a
|
||||||
|
word, however a column may contain multiple derived forms for a single
|
||||||
|
spelling of the base form, for example:
|
||||||
|
A B D 1: amoeba / Av Dv 2: ameba
|
||||||
|
A B D 1: amoebas / Av Bv Dv 1: amoebae / Av Dv 2: amebas / Av Dv 2: amebae
|
||||||
|
A B D 1: amoeba's / Av Dv 2: ameba's
|
||||||
|
|
||||||
|
|
||||||
|
Additional Files
|
||||||
|
================
|
||||||
|
|
||||||
The file variant-also.tab contains additional mappings between various
|
The file variant-also.tab contains additional mappings between various
|
||||||
spellings of a word which are not yet in varcon.txt. No attempt is
|
spellings of a word which are not yet in varcon.txt. No attempt is
|
||||||
|
@ -155,6 +230,7 @@ automatically from the AGID inflection database. The file
|
||||||
variant-wroot.tab is like variant-infl.tab except that it also
|
variant-wroot.tab is like variant-infl.tab except that it also
|
||||||
included the root form of the word.
|
included the root form of the word.
|
||||||
|
|
||||||
|
|
||||||
The file voc.tab is similar to varcon.txt but converts between
|
The file voc.tab is similar to varcon.txt but converts between
|
||||||
vocabulary instead of spelling. Unlike varcon.tab it is a simple tab
|
vocabulary instead of spelling. Unlike varcon.tab it is a simple tab
|
||||||
separated file with the fields corresponding to the American, British,
|
separated file with the fields corresponding to the American, British,
|
||||||
|
@ -163,11 +239,13 @@ the same thing the words are separated with commas. The last column
|
||||||
contains additional notes on when the word is used. Unlike varcon.txt
|
contains additional notes on when the word is used. Unlike varcon.txt
|
||||||
it is generally not suitable for automatic conversion.
|
it is generally not suitable for automatic conversion.
|
||||||
|
|
||||||
|
|
||||||
The "make-variant" Perl script will combine varcon.txt,
|
The "make-variant" Perl script will combine varcon.txt,
|
||||||
variant-also.tab, and variant-infl.tab into one huge mapping and will
|
variant-also.tab, and variant-infl.tab into one huge mapping and will
|
||||||
output the result to "variant.tab". If the "no-infl" option is given
|
output the result to "variant.tab". If the "no-infl" option is given
|
||||||
than variant-infl.tab will not be included.
|
than variant-infl.tab will not be included.
|
||||||
|
|
||||||
|
|
||||||
The "split" script will split out the information in varcon.txt into
|
The "split" script will split out the information in varcon.txt into
|
||||||
several word lists named as follows:
|
several word lists named as follows:
|
||||||
<spelling>[-v<variant level>][-uncommon].lst
|
<spelling>[-v<variant level>][-uncommon].lst
|
||||||
|
@ -182,6 +260,7 @@ as follows:
|
||||||
- => 2
|
- => 2
|
||||||
"-uncommon" is used for forms marked with "(-)" as already described.
|
"-uncommon" is used for forms marked with "(-)" as already described.
|
||||||
|
|
||||||
|
|
||||||
The "translate" Perl script will translate a text file from one
|
The "translate" Perl script will translate a text file from one
|
||||||
spelling to another. Its usage is:
|
spelling to another. Its usage is:
|
||||||
|
|
||||||
|
@ -199,16 +278,23 @@ Text is read in from standard input and is outputted to standard out.
|
||||||
Words are marked with a '?' before and after the questionable word
|
Words are marked with a '?' before and after the questionable word
|
||||||
when the option is enabled.
|
when the option is enabled.
|
||||||
|
|
||||||
|
|
||||||
The file varcon.pm contains some library routines for parsing
|
The file varcon.pm contains some library routines for parsing
|
||||||
varcon.txt and is used by many of the scripts above.
|
varcon.txt and is used by many of the scripts above.
|
||||||
|
|
||||||
|
|
||||||
|
Feedback
|
||||||
|
========
|
||||||
|
|
||||||
If you discover any errors in these mappings or have suggestions for
|
If you discover any errors in these mappings or have suggestions for
|
||||||
additions please file a bug report at
|
additions please file a bug report at
|
||||||
https://github.com/kevina/wordlist/issues, or alternatively email me
|
https://github.com/kevina/wordlist/issues, or alternatively email me
|
||||||
directly at kevina@gnu.org, but I will likely tell you to file a bug
|
directly at kevina@gnu.org, but I will likely tell you to file a bug
|
||||||
report so that I don't forget about it.
|
report so that I don't forget about it.
|
||||||
|
|
||||||
SOURCE:
|
|
||||||
|
Sources
|
||||||
|
=======
|
||||||
|
|
||||||
These mappings were compiled from numerous sources.
|
These mappings were compiled from numerous sources.
|
||||||
|
|
||||||
|
@ -296,9 +382,22 @@ The primary sources for this addition were:
|
||||||
http://blogs.usyd.edu.au/elac/2008/01/webster_in_australia.html
|
http://blogs.usyd.edu.au/elac/2008/01/webster_in_australia.html
|
||||||
|
|
||||||
|
|
||||||
CHANGELOG:
|
Changelog
|
||||||
|
=========
|
||||||
|
|
||||||
From 2017.08.24 to 2018.10.06
|
From 2018.10.06 to 2020.12.07
|
||||||
|
|
||||||
|
- Additional documentation on file format
|
||||||
|
|
||||||
|
- Minor change in file format
|
||||||
|
|
||||||
|
- Fix scripts to work with modern versions of Perl.
|
||||||
|
|
||||||
|
- Various new entries
|
||||||
|
|
||||||
|
- Additional cleanups
|
||||||
|
|
||||||
|
From 2017.08.24 to 2019.10.06
|
||||||
|
|
||||||
- Added entries for: eukaryote, prokaryote, virtualization, volcanism
|
- Added entries for: eukaryote, prokaryote, virtualization, volcanism
|
||||||
|
|
||||||
|
@ -423,9 +522,11 @@ From Revision 1 to Revision 2 (January 27, 2001)
|
||||||
words in them.
|
words in them.
|
||||||
- Added variant-infl.tab
|
- Added variant-infl.tab
|
||||||
|
|
||||||
COPYRIGHT:
|
|
||||||
|
|
||||||
Copyright 2000-2018 by Kevin Atkinson
|
Copyright
|
||||||
|
=========
|
||||||
|
|
||||||
|
Copyright 2000-2019 by Kevin Atkinson
|
||||||
|
|
||||||
Permission to use, copy, modify, distribute and sell this array, the
|
Permission to use, copy, modify, distribute and sell this array, the
|
||||||
associated software, and its documentation for any purpose is hereby
|
associated software, and its documentation for any purpose is hereby
|
||||||
|
|
783
crates/varcon/assets/varcon.txt
vendored
783
crates/varcon/assets/varcon.txt
vendored
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
Loading…
Reference in a new issue