mirror of
https://github.com/crate-ci/typos.git
synced 2024-11-24 10:00:59 -05:00
Merge pull request #1084 from epage/update
feat(varcon): Update to Version 2020.12.07
This commit is contained in:
commit
19ed24a45f
10 changed files with 64099 additions and 60443 deletions
66
Cargo.lock
generated
66
Cargo.lock
generated
|
@ -1134,9 +1134,9 @@ checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d"
|
|||
|
||||
[[package]]
|
||||
name = "snapbox"
|
||||
version = "0.6.16"
|
||||
version = "0.6.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "027c936207f85d10d015e21faf5c676c7e08c453ed371adf55c0874c443ca77a"
|
||||
checksum = "840b73eb3148bc3cbc10ebe00ec9bc6d96033e658d022c4adcbf3f35596fd64a"
|
||||
dependencies = [
|
||||
"anstream",
|
||||
"anstyle",
|
||||
|
@ -1151,7 +1151,7 @@ dependencies = [
|
|||
"tempfile",
|
||||
"wait-timeout",
|
||||
"walkdir",
|
||||
"windows-sys 0.52.0",
|
||||
"windows-sys 0.59.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -1504,6 +1504,7 @@ name = "varcon-core"
|
|||
version = "4.0.10"
|
||||
dependencies = [
|
||||
"enumflags2",
|
||||
"snapbox",
|
||||
"winnow",
|
||||
]
|
||||
|
||||
|
@ -1591,11 +1592,11 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "windows-sys"
|
||||
version = "0.52.0"
|
||||
version = "0.59.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
|
||||
checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
|
||||
dependencies = [
|
||||
"windows-targets 0.52.0",
|
||||
"windows-targets 0.52.6",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -1615,17 +1616,18 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "windows-targets"
|
||||
version = "0.52.0"
|
||||
version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd"
|
||||
checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
|
||||
dependencies = [
|
||||
"windows_aarch64_gnullvm 0.52.0",
|
||||
"windows_aarch64_msvc 0.52.0",
|
||||
"windows_i686_gnu 0.52.0",
|
||||
"windows_i686_msvc 0.52.0",
|
||||
"windows_x86_64_gnu 0.52.0",
|
||||
"windows_x86_64_gnullvm 0.52.0",
|
||||
"windows_x86_64_msvc 0.52.0",
|
||||
"windows_aarch64_gnullvm 0.52.6",
|
||||
"windows_aarch64_msvc 0.52.6",
|
||||
"windows_i686_gnu 0.52.6",
|
||||
"windows_i686_gnullvm",
|
||||
"windows_i686_msvc 0.52.6",
|
||||
"windows_x86_64_gnu 0.52.6",
|
||||
"windows_x86_64_gnullvm 0.52.6",
|
||||
"windows_x86_64_msvc 0.52.6",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -1636,9 +1638,9 @@ checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8"
|
|||
|
||||
[[package]]
|
||||
name = "windows_aarch64_gnullvm"
|
||||
version = "0.52.0"
|
||||
version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea"
|
||||
checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
|
||||
|
||||
[[package]]
|
||||
name = "windows_aarch64_msvc"
|
||||
|
@ -1648,9 +1650,9 @@ checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc"
|
|||
|
||||
[[package]]
|
||||
name = "windows_aarch64_msvc"
|
||||
version = "0.52.0"
|
||||
version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef"
|
||||
checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_gnu"
|
||||
|
@ -1660,9 +1662,15 @@ checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e"
|
|||
|
||||
[[package]]
|
||||
name = "windows_i686_gnu"
|
||||
version = "0.52.0"
|
||||
version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313"
|
||||
checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_gnullvm"
|
||||
version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_msvc"
|
||||
|
@ -1672,9 +1680,9 @@ checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406"
|
|||
|
||||
[[package]]
|
||||
name = "windows_i686_msvc"
|
||||
version = "0.52.0"
|
||||
version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a"
|
||||
checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_gnu"
|
||||
|
@ -1684,9 +1692,9 @@ checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e"
|
|||
|
||||
[[package]]
|
||||
name = "windows_x86_64_gnu"
|
||||
version = "0.52.0"
|
||||
version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd"
|
||||
checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_gnullvm"
|
||||
|
@ -1696,9 +1704,9 @@ checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc"
|
|||
|
||||
[[package]]
|
||||
name = "windows_x86_64_gnullvm"
|
||||
version = "0.52.0"
|
||||
version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e"
|
||||
checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_msvc"
|
||||
|
@ -1708,9 +1716,9 @@ checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538"
|
|||
|
||||
[[package]]
|
||||
name = "windows_x86_64_msvc"
|
||||
version = "0.52.0"
|
||||
version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04"
|
||||
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
|
||||
|
||||
[[package]]
|
||||
name = "winnow"
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -207,7 +207,7 @@ mod parser {
|
|||
<T as Stream>::Token: AsChar + Copy,
|
||||
{
|
||||
alt((
|
||||
one_of(|c| !is_xid_continue(c)).recognize(),
|
||||
one_of(|c| !is_xid_continue(c)).take(),
|
||||
eof.map(|_| <T as Stream>::Slice::default()),
|
||||
))
|
||||
.parse_next(input)
|
||||
|
@ -225,7 +225,7 @@ mod parser {
|
|||
one_of(|c| !is_xid_continue(c)),
|
||||
take_while(0.., is_ignore_char),
|
||||
)
|
||||
.recognize(),
|
||||
.take(),
|
||||
)
|
||||
.parse_next(input)
|
||||
}
|
||||
|
@ -251,7 +251,7 @@ mod parser {
|
|||
alt((('s', 't'), ('n', 'd'), ('r', 'd'), ('t', 'h'))),
|
||||
take_while(0.., is_sep),
|
||||
)
|
||||
.recognize(),
|
||||
.take(),
|
||||
)
|
||||
.parse_next(input)
|
||||
}
|
||||
|
@ -273,7 +273,7 @@ mod parser {
|
|||
<T as Stream>::Token: AsChar + Copy,
|
||||
{
|
||||
('0', alt(('x', 'X')), take_while(1.., is_hex_digit_with_sep))
|
||||
.recognize()
|
||||
.take()
|
||||
.parse_next(input)
|
||||
}
|
||||
|
||||
|
@ -293,7 +293,7 @@ mod parser {
|
|||
(take_while(3..=8, is_upper_hex_digit), peek(sep1)),
|
||||
)),
|
||||
)
|
||||
.recognize(),
|
||||
.take(),
|
||||
)
|
||||
.parse_next(input)
|
||||
}
|
||||
|
@ -318,7 +318,7 @@ mod parser {
|
|||
'.',
|
||||
take_while(20.., is_jwt_token),
|
||||
)
|
||||
.recognize(),
|
||||
.take(),
|
||||
)
|
||||
.parse_next(input)
|
||||
}
|
||||
|
@ -366,7 +366,7 @@ mod parser {
|
|||
take_while(12, is_upper_hex_digit),
|
||||
),
|
||||
))
|
||||
.recognize(),
|
||||
.take(),
|
||||
)
|
||||
.parse_next(input)
|
||||
}
|
||||
|
@ -450,7 +450,7 @@ mod parser {
|
|||
'@',
|
||||
take_while(1.., is_domain_char),
|
||||
)
|
||||
.recognize(),
|
||||
.take(),
|
||||
)
|
||||
.parse_next(input)
|
||||
}
|
||||
|
@ -480,7 +480,7 @@ mod parser {
|
|||
// HACK: Too lazy to enumerate
|
||||
take_while(0.., is_path_query_fragment),
|
||||
)
|
||||
.recognize(),
|
||||
.take(),
|
||||
)
|
||||
.parse_next(input)
|
||||
}
|
||||
|
@ -498,7 +498,7 @@ mod parser {
|
|||
take_while(1.., is_localport_char),
|
||||
opt((':', take_while(0.., is_localport_char))),
|
||||
)
|
||||
.recognize(),
|
||||
.take(),
|
||||
)
|
||||
.parse_next(input)
|
||||
}
|
||||
|
@ -515,7 +515,7 @@ mod parser {
|
|||
// incorrectly, we opt for just not evaluating it at all.
|
||||
trace(
|
||||
"escape",
|
||||
(take_while(1.., is_escape), take_while(0.., is_xid_continue)).recognize(),
|
||||
(take_while(1.., is_escape), take_while(0.., is_xid_continue)).take(),
|
||||
)
|
||||
.parse_next(input)
|
||||
}
|
||||
|
@ -527,11 +527,7 @@ mod parser {
|
|||
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
||||
<T as Stream>::Token: AsChar + Copy,
|
||||
{
|
||||
trace(
|
||||
"printf",
|
||||
('%', take_while(1.., is_xid_continue)).recognize(),
|
||||
)
|
||||
.parse_next(input)
|
||||
trace("printf", ('%', take_while(1.., is_xid_continue)).take()).parse_next(input)
|
||||
}
|
||||
|
||||
fn take_many0<I, E, F>(mut f: F) -> impl Parser<I, <I as Stream>::Slice, E>
|
||||
|
@ -540,12 +536,7 @@ mod parser {
|
|||
F: Parser<I, <I as Stream>::Slice, E>,
|
||||
E: ParserError<I>,
|
||||
{
|
||||
move |i: &mut I| {
|
||||
repeat(0.., f.by_ref())
|
||||
.map(|()| ())
|
||||
.recognize()
|
||||
.parse_next(i)
|
||||
}
|
||||
move |i: &mut I| repeat(0.., f.by_ref()).map(|()| ()).take().parse_next(i)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
|
|
|
@ -25,3 +25,6 @@ enumflags2 = { version = "0.7", optional = true }
|
|||
|
||||
[lints]
|
||||
workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
snapbox = "0.6.17"
|
||||
|
|
|
@ -20,8 +20,8 @@ pub struct Entry {
|
|||
pub variants: &'static [Variant],
|
||||
pub pos: Option<crate::Pos>,
|
||||
pub archaic: bool,
|
||||
pub note: bool,
|
||||
pub description: Option<&'static str>,
|
||||
pub note: Option<&'static str>,
|
||||
pub comment: Option<&'static str>,
|
||||
}
|
||||
|
||||
|
@ -31,8 +31,8 @@ impl Entry {
|
|||
variants: self.variants.iter().map(|v| v.into_owned()).collect(),
|
||||
pos: self.pos,
|
||||
archaic: self.archaic,
|
||||
note: self.note,
|
||||
description: self.description.map(|s| s.to_owned()),
|
||||
note: self.note.map(|s| s.to_owned()),
|
||||
comment: self.comment.map(|s| s.to_owned()),
|
||||
}
|
||||
}
|
||||
|
|
|
@ -30,8 +30,8 @@ pub struct Entry {
|
|||
pub variants: Vec<Variant>,
|
||||
pub pos: Option<Pos>,
|
||||
pub archaic: bool,
|
||||
pub note: bool,
|
||||
pub description: Option<String>,
|
||||
pub note: Option<String>,
|
||||
pub comment: Option<String>,
|
||||
}
|
||||
|
||||
|
@ -124,6 +124,9 @@ pub enum Pos {
|
|||
Verb = 0x02,
|
||||
Adjective = 0x04,
|
||||
Adverb = 0x08,
|
||||
AdjectiveOrAdverb = 0x10,
|
||||
Interjection = 0x20,
|
||||
Preposition = 0x40,
|
||||
}
|
||||
|
||||
#[cfg(feature = "flags")]
|
||||
|
|
File diff suppressed because it is too large
Load diff
167
crates/varcon/assets/README
vendored
167
crates/varcon/assets/README
vendored
|
@ -1,8 +1,9 @@
|
|||
Variant Conversion Info (VarCon)
|
||||
********************************
|
||||
|
||||
Version 2019.10.06
|
||||
Version 2020.12.07
|
||||
|
||||
Copyright 2000-2016 by Kevin Atkinson (kevina@gnu.org) and Benjamin
|
||||
Copyright 2000-2020 by Kevin Atkinson (kevina@gnu.org) and Benjamin
|
||||
Titze (btitze@protonmail.ch).
|
||||
|
||||
This package contains information to convert between American,
|
||||
|
@ -11,9 +12,17 @@ other variant information.
|
|||
|
||||
The latest version can be found at http://wordlist.aspell.net/.
|
||||
|
||||
|
||||
File Format
|
||||
===========
|
||||
|
||||
The main data file is varcon.txt. It contains information on the
|
||||
preferred American, British, and Canadian spelling of a word as well
|
||||
as other variant information.
|
||||
preferred American, British, Canadian and Australian spelling of a
|
||||
word as well as other variant information.
|
||||
|
||||
|
||||
Varcon Lines
|
||||
------------
|
||||
|
||||
Each line contains a mapping between the various spellings of a word.
|
||||
Words are tagged to indicate where the spelling is used, and each
|
||||
|
@ -32,6 +41,7 @@ spelling is sometimes used in America (as indicated the "Av").
|
|||
More generally each tag consists of a spelling category (for example
|
||||
"A") followed possible by a variant indicator. The spelling
|
||||
categories are as follows:
|
||||
|
||||
A: American
|
||||
B: British "ise" spelling
|
||||
Z: British "ize" spelling or OED preferred Spelling
|
||||
|
@ -39,7 +49,9 @@ categories are as follows:
|
|||
D: Australian
|
||||
_: Other (Variant info based on American dictionaries, never used
|
||||
with any of the above).
|
||||
|
||||
and the variants tags are as follows:
|
||||
|
||||
.: equal
|
||||
v: variant
|
||||
V: seldom used variant
|
||||
|
@ -66,6 +78,13 @@ If there are no tags with the 'Z' spelling category on the line then
|
|||
'B' implies 'Z'. Similarly if there are no 'C' tags then 'Z' implies
|
||||
'C'. If there are no 'D' tags then 'B' implies 'D'.
|
||||
|
||||
Some entries may have a number after the tags, this is a column
|
||||
number and will be explained later.
|
||||
|
||||
|
||||
Varcon Clusters
|
||||
---------------
|
||||
|
||||
For ease of reading and maintaining the data file, each line is
|
||||
grouped in a cluster of closely related words. Each cluster is
|
||||
uniquely identified by a headword, which is generally the American
|
||||
|
@ -86,10 +105,26 @@ the headword is found in. The levels generally mean the following:
|
|||
unabridged dictionary
|
||||
> 80: May not even be a legal word
|
||||
|
||||
Sometimes the spelling of a word depends on the usage. If so the word
|
||||
is listed more than once within a cluster, with any usage information
|
||||
being indicated after a " | ". For example here is part of the cluster
|
||||
for prize:
|
||||
Earlier versions of varcon contained numerous errors. With version
|
||||
5.0 massive effort has been made to correct many of these errors.
|
||||
Clusters that have undergone some form of verification (and likely
|
||||
correction) are marked with "<verified>". As of version 5.0, most
|
||||
clusters with headwords word in common usage (SCOWL level 35 and
|
||||
below) should now be checked, as well as many others. No effort was
|
||||
made to check clusters with headwords in SCOWL level 80 and above;
|
||||
many of those entries are unlikely to be in the dictionary anyway.
|
||||
|
||||
|
||||
Varcon Groups
|
||||
-------------
|
||||
|
||||
Sometimes the spelling of a word depends on the usage in which case a
|
||||
cluster is split into multiple groups with each group represting one
|
||||
usage of a word. Usage annotations and/or pos tags are used to
|
||||
distinguish one group from another.
|
||||
|
||||
Usage information is given after a " | ". For example here is part of
|
||||
the cluster for prize:
|
||||
A B: prize | reward
|
||||
A B: prizes | reward
|
||||
A C: prize / B: prise | otherwise
|
||||
|
@ -102,50 +137,90 @@ consists of a number, for example:
|
|||
A B: sake | :1
|
||||
A C: sake / Av B Cv: saki | :2
|
||||
|
||||
Sometimes part-of-speech (POS) info is given to help distinguish which
|
||||
form is used. For example:
|
||||
A part-of-speech (POS) tag may also given after a " | ", for example:
|
||||
A B C: practice / AV Cv: practise | <N>
|
||||
A Cv: practice / AV B C: practise | <V>
|
||||
POS info is always given in the form "<POS>" and if a definition
|
||||
POS tags are always given in the form "<POS>" and if a definition
|
||||
is also given the POS info is always first. The POS tags used are as
|
||||
follows:
|
||||
<N>: Noun
|
||||
<V>: Verb
|
||||
<Adj>: Adjective
|
||||
<Adv>: Adverb
|
||||
<A>: Adjective or Adverb
|
||||
<Inj>
|
||||
<Prep>
|
||||
<abbr>
|
||||
|
||||
|
||||
Additional Annotations
|
||||
----------------------
|
||||
|
||||
A "(-)" before the definition indicated a rarely used or archaic form
|
||||
of a word, for example:
|
||||
A B: bark | :1
|
||||
A: bark / Av B: barque | (-) ship
|
||||
|
||||
A "--" indicates a note rather than definition. This is generally
|
||||
used to indicate that the spelling of the plural form not depend on
|
||||
the spelling of the root word, for example:
|
||||
_: cabby / _.: cabbie
|
||||
_: cabbies | -- plural
|
||||
A "| -- pl: someword" indicates that the word is a plural and the root
|
||||
is someword.
|
||||
|
||||
Misc. notes on a particular form of a word are given after a "#" on
|
||||
the same line. Misc. notes for the cluster are given at the end of
|
||||
the cluster and are prefixed with "##", for example:
|
||||
A plain "| -- pl" indicates that the word is a plural and the root is
|
||||
elsewhere within the group. It is used when one form of the plural is
|
||||
the same as the root word, for example:
|
||||
_1: yak | :1
|
||||
_ 1: yaks / _V 1: yak | :1 | -- pl
|
||||
_ 1: yak's | :1
|
||||
|
||||
A "| --" otherwise indicates a note which gives additional context but
|
||||
does not create it's own group like a definition does.
|
||||
|
||||
A "#" after a line indicates a comment that is often used to indicate
|
||||
why. A "##" after a cluster indicates the the comment applies to the
|
||||
entire cluster, for example:
|
||||
# coloration <verified> (level 50)
|
||||
A B C: coloration / B. Cv: colouration
|
||||
A B C: colorations / B. Cv: colourations
|
||||
A B C: coloration's / B. Cv: colouration's
|
||||
## OED has coloration as the preferred spelling and discolouration as a
|
||||
## variant for British Engl or some reason
|
||||
In the notes ODE (not to be confused with OED) stands for Oxford
|
||||
In the comments ODE (not to be confused with OED) stands for Oxford
|
||||
Dictionary of English, "Ox" is used for any Oxford dictionary, and
|
||||
"M-W" for Merriam-Webster.
|
||||
|
||||
Earlier versions of varcon contained numerous errors. With version
|
||||
5.0 massive effort has been made to correct many of these errors.
|
||||
Clusters that have undergone some form of verification (and likely
|
||||
correction) are marked with "<verified>". As of version 5.0, most
|
||||
clusters with headwords word in common usage (SCOWL level 35 and
|
||||
below) should now be checked, as well as many others. No effort was
|
||||
made to check clusters with headwords in SCOWL level 80 and above;
|
||||
many of those entries are unlikely to be in the dictionary anyway.
|
||||
|
||||
Varcon Columns
|
||||
--------------
|
||||
|
||||
Varcon does not directly expresses the relation of words within a
|
||||
group as it is normally easy to derive. For example given a simple
|
||||
group of:
|
||||
A: acknowledgment / B: acknowledgement
|
||||
A: acknowledgments / B: acknowledgements
|
||||
A: acknowledgment's / B: acknowledgement's
|
||||
it is clear that acknowledgments is the plural form of acknowledgment
|
||||
since they are both the American spelling of a word. While
|
||||
acknowledgEments is the plural form of acknowledgEment since they are
|
||||
both the British forms of a word. Within a group each varcon line
|
||||
is considered a row in a table and each entry within a line is considered
|
||||
a column. Within this group the first column is the American spelling
|
||||
and the second is the British.
|
||||
|
||||
Sometime the column assignment unclear, when they are explicit column
|
||||
numbers may be given. For example:
|
||||
A B: caulk / Av: calk / AV Bv 1: caulking / AV 2: calking | <N> :3
|
||||
A B: caulks / Av: calks / AV Bv 1: caulkings / AV 2: calkings | <N> :3
|
||||
A B: caulk's / Av: calk's / AV Bv 1: caulking's / AV 2: calking's | <N> :3
|
||||
|
||||
Each column must contain exactly one spelling of the base form of a
|
||||
word, however a column may contain multiple derived forms for a single
|
||||
spelling of the base form, for example:
|
||||
A B D 1: amoeba / Av Dv 2: ameba
|
||||
A B D 1: amoebas / Av Bv Dv 1: amoebae / Av Dv 2: amebas / Av Dv 2: amebae
|
||||
A B D 1: amoeba's / Av Dv 2: ameba's
|
||||
|
||||
|
||||
Additional Files
|
||||
================
|
||||
|
||||
The file variant-also.tab contains additional mappings between various
|
||||
spellings of a word which are not yet in varcon.txt. No attempt is
|
||||
|
@ -155,6 +230,7 @@ automatically from the AGID inflection database. The file
|
|||
variant-wroot.tab is like variant-infl.tab except that it also
|
||||
included the root form of the word.
|
||||
|
||||
|
||||
The file voc.tab is similar to varcon.txt but converts between
|
||||
vocabulary instead of spelling. Unlike varcon.tab it is a simple tab
|
||||
separated file with the fields corresponding to the American, British,
|
||||
|
@ -163,11 +239,13 @@ the same thing the words are separated with commas. The last column
|
|||
contains additional notes on when the word is used. Unlike varcon.txt
|
||||
it is generally not suitable for automatic conversion.
|
||||
|
||||
|
||||
The "make-variant" Perl script will combine varcon.txt,
|
||||
variant-also.tab, and variant-infl.tab into one huge mapping and will
|
||||
output the result to "variant.tab". If the "no-infl" option is given
|
||||
than variant-infl.tab will not be included.
|
||||
|
||||
|
||||
The "split" script will split out the information in varcon.txt into
|
||||
several word lists named as follows:
|
||||
<spelling>[-v<variant level>][-uncommon].lst
|
||||
|
@ -182,6 +260,7 @@ as follows:
|
|||
- => 2
|
||||
"-uncommon" is used for forms marked with "(-)" as already described.
|
||||
|
||||
|
||||
The "translate" Perl script will translate a text file from one
|
||||
spelling to another. Its usage is:
|
||||
|
||||
|
@ -199,16 +278,23 @@ Text is read in from standard input and is outputted to standard out.
|
|||
Words are marked with a '?' before and after the questionable word
|
||||
when the option is enabled.
|
||||
|
||||
|
||||
The file varcon.pm contains some library routines for parsing
|
||||
varcon.txt and is used by many of the scripts above.
|
||||
|
||||
|
||||
Feedback
|
||||
========
|
||||
|
||||
If you discover any errors in these mappings or have suggestions for
|
||||
additions please file a bug report at
|
||||
https://github.com/kevina/wordlist/issues, or alternatively email me
|
||||
directly at kevina@gnu.org, but I will likely tell you to file a bug
|
||||
report so that I don't forget about it.
|
||||
|
||||
SOURCE:
|
||||
|
||||
Sources
|
||||
=======
|
||||
|
||||
These mappings were compiled from numerous sources.
|
||||
|
||||
|
@ -296,9 +382,22 @@ The primary sources for this addition were:
|
|||
http://blogs.usyd.edu.au/elac/2008/01/webster_in_australia.html
|
||||
|
||||
|
||||
CHANGELOG:
|
||||
Changelog
|
||||
=========
|
||||
|
||||
From 2017.08.24 to 2018.10.06
|
||||
From 2018.10.06 to 2020.12.07
|
||||
|
||||
- Additional documentation on file format
|
||||
|
||||
- Minor change in file format
|
||||
|
||||
- Fix scripts to work with modern versions of Perl.
|
||||
|
||||
- Various new entries
|
||||
|
||||
- Additional cleanups
|
||||
|
||||
From 2017.08.24 to 2019.10.06
|
||||
|
||||
- Added entries for: eukaryote, prokaryote, virtualization, volcanism
|
||||
|
||||
|
@ -423,9 +522,11 @@ From Revision 1 to Revision 2 (January 27, 2001)
|
|||
words in them.
|
||||
- Added variant-infl.tab
|
||||
|
||||
COPYRIGHT:
|
||||
|
||||
Copyright 2000-2018 by Kevin Atkinson
|
||||
Copyright
|
||||
=========
|
||||
|
||||
Copyright 2000-2019 by Kevin Atkinson
|
||||
|
||||
Permission to use, copy, modify, distribute and sell this array, the
|
||||
associated software, and its documentation for any purpose is hereby
|
||||
|
|
759
crates/varcon/assets/varcon.txt
vendored
759
crates/varcon/assets/varcon.txt
vendored
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
Loading…
Reference in a new issue