docs(typos): Clarify intent

This commit is contained in:
Ed Page 2021-01-01 21:35:49 -06:00
parent 5f82dd6017
commit aba85df435
3 changed files with 49 additions and 22 deletions

View file

@ -1,5 +1,34 @@
use std::borrow::Cow;
/// Look up the validity of a term.
pub trait Dictionary: Send + Sync {
/// Look up the validity of an Identifier.
///
/// `None` if the status is unknown.
fn correct_ident<'s, 'w>(&'s self, ident: crate::tokens::Identifier<'w>) -> Option<Status<'s>>;
/// Look up the validity of a Word.
///
/// `None` if the status is unknown.
fn correct_word<'s, 'w>(&'s self, word: crate::tokens::Word<'w>) -> Option<Status<'s>>;
}
pub(crate) struct NullDictionary;
impl Dictionary for NullDictionary {
fn correct_ident<'s, 'w>(
&'s self,
_ident: crate::tokens::Identifier<'w>,
) -> Option<Status<'s>> {
None
}
fn correct_word<'s, 'w>(&'s self, _word: crate::tokens::Word<'w>) -> Option<Status<'s>> {
None
}
}
/// Validity of a term in a Dictionary.
#[derive(Clone, PartialEq, Eq, Debug, serde::Serialize)]
#[serde(rename_all = "snake_case")]
#[serde(untagged)]
@ -54,25 +83,3 @@ impl<'c> Status<'c> {
}
}
}
pub trait Dictionary: Send + Sync {
fn correct_ident<'s, 'w>(&'s self, _ident: crate::tokens::Identifier<'w>)
-> Option<Status<'s>>;
fn correct_word<'s, 'w>(&'s self, word: crate::tokens::Word<'w>) -> Option<Status<'s>>;
}
pub(crate) struct NullDictionary;
impl Dictionary for NullDictionary {
fn correct_ident<'s, 'w>(
&'s self,
_ident: crate::tokens::Identifier<'w>,
) -> Option<Status<'s>> {
None
}
fn correct_word<'s, 'w>(&'s self, _word: crate::tokens::Word<'w>) -> Option<Status<'s>> {
None
}
}

View file

@ -15,11 +15,13 @@ impl<'p> ParserBuilder<'p, 'static> {
}
impl<'p, 'd> ParserBuilder<'p, 'd> {
/// Set the Tokenizer used when parsing.
pub fn tokenizer(mut self, tokenizer: &'p tokens::Tokenizer) -> Self {
self.tokenizer = Some(tokenizer);
self
}
/// Set the dictionary used when parsing.
pub fn dictionary<'d1>(self, dictionary: &'d1 dyn Dictionary) -> ParserBuilder<'p, 'd1> {
ParserBuilder {
tokenizer: self.tokenizer,
@ -27,6 +29,7 @@ impl<'p, 'd> ParserBuilder<'p, 'd> {
}
}
/// Extract typos from the buffer.
pub fn typos(&self) -> TyposParser<'p, 'd> {
TyposParser {
tokenizer: self.tokenizer.unwrap_or_else(|| &DEFAULT_TOKENIZER),
@ -34,12 +37,14 @@ impl<'p, 'd> ParserBuilder<'p, 'd> {
}
}
/// Parse for Identifiers.
pub fn identifiers(&self) -> IdentifiersParser<'p> {
IdentifiersParser {
tokenizer: self.tokenizer.unwrap_or_else(|| &DEFAULT_TOKENIZER),
}
}
/// Parse for Words.
pub fn words(&self) -> WordsParser<'p> {
WordsParser {
tokenizer: self.tokenizer.unwrap_or_else(|| &DEFAULT_TOKENIZER),
@ -59,6 +64,7 @@ impl<'p> Default for ParserBuilder<'p, 'static> {
static DEFAULT_TOKENIZER: once_cell::sync::Lazy<tokens::Tokenizer> =
once_cell::sync::Lazy::new(|| tokens::Tokenizer::new());
/// Extract typos from the buffer.
#[derive(Clone)]
pub struct TyposParser<'p, 'd> {
tokenizer: &'p tokens::Tokenizer,
@ -116,6 +122,7 @@ impl<'p, 'd> TyposParser<'p, 'd> {
}
}
/// An invalid term found in the buffer.
#[derive(Clone, Debug)]
#[non_exhaustive]
pub struct Typo<'m> {
@ -152,6 +159,7 @@ impl<'m> Default for Typo<'m> {
}
}
/// Parse for Identifiers.
#[derive(Debug, Clone)]
pub struct IdentifiersParser<'p> {
tokenizer: &'p tokens::Tokenizer,
@ -167,6 +175,7 @@ impl<'p> IdentifiersParser<'p> {
}
}
/// Parse for Words.
#[derive(Debug, Clone)]
pub struct WordsParser<'p> {
tokenizer: &'p tokens::Tokenizer,

View file

@ -1,3 +1,4 @@
/// Define rules for tokenizaing a buffer.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct TokenizerBuilder {
ignore_hex: bool,
@ -12,26 +13,31 @@ impl TokenizerBuilder {
Default::default()
}
/// Specify that hexadecimal numbers should be ignored.
pub fn ignore_hex(&mut self, yes: bool) -> &mut Self {
self.ignore_hex = yes;
self
}
/// Specify that leading digits are allowed for Identifiers.
pub fn leading_digits(&mut self, yes: bool) -> &mut Self {
self.leading_digits = yes;
self
}
/// Extend accepted leading characters for Identifiers.
pub fn leading_chars(&mut self, chars: String) -> &mut Self {
self.leading_chars = chars;
self
}
/// Specify that digits can be included in Identifiers.
pub fn include_digits(&mut self, yes: bool) -> &mut Self {
self.include_digits = yes;
self
}
/// Extend accepted characters for Identifiers.
pub fn include_chars(&mut self, chars: String) -> &mut Self {
self.include_chars = chars;
self
@ -81,6 +87,7 @@ impl Default for TokenizerBuilder {
}
}
/// Extract Identifiers from a buffer.
#[derive(Debug, Clone)]
pub struct Tokenizer {
words_str: regex::Regex,
@ -148,6 +155,7 @@ fn is_hex(ident: &[u8]) -> bool {
HEX.is_match(ident)
}
/// A term composed of Words.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Identifier<'t> {
token: &'t str,
@ -171,11 +179,13 @@ impl<'t> Identifier<'t> {
self.offset
}
/// Split into individual Words.
pub fn split(&self) -> impl Iterator<Item = Word<'t>> {
split_ident(self.token, self.offset)
}
}
/// An indivisible term.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Word<'t> {
token: &'t str,
@ -325,6 +335,7 @@ impl<'s> Iterator for SplitIdent<'s> {
}
}
/// Format of the term.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Case {
Title,