docs(typos): Clarify intent

This commit is contained in:
Ed Page 2021-01-01 21:35:49 -06:00
parent 5f82dd6017
commit aba85df435
3 changed files with 49 additions and 22 deletions

View file

@ -1,5 +1,34 @@
use std::borrow::Cow; use std::borrow::Cow;
/// Look up the validity of a term.
pub trait Dictionary: Send + Sync {
/// Look up the validity of an Identifier.
///
/// `None` if the status is unknown.
fn correct_ident<'s, 'w>(&'s self, ident: crate::tokens::Identifier<'w>) -> Option<Status<'s>>;
/// Look up the validity of a Word.
///
/// `None` if the status is unknown.
fn correct_word<'s, 'w>(&'s self, word: crate::tokens::Word<'w>) -> Option<Status<'s>>;
}
pub(crate) struct NullDictionary;
impl Dictionary for NullDictionary {
fn correct_ident<'s, 'w>(
&'s self,
_ident: crate::tokens::Identifier<'w>,
) -> Option<Status<'s>> {
None
}
fn correct_word<'s, 'w>(&'s self, _word: crate::tokens::Word<'w>) -> Option<Status<'s>> {
None
}
}
/// Validity of a term in a Dictionary.
#[derive(Clone, PartialEq, Eq, Debug, serde::Serialize)] #[derive(Clone, PartialEq, Eq, Debug, serde::Serialize)]
#[serde(rename_all = "snake_case")] #[serde(rename_all = "snake_case")]
#[serde(untagged)] #[serde(untagged)]
@ -54,25 +83,3 @@ impl<'c> Status<'c> {
} }
} }
} }
pub trait Dictionary: Send + Sync {
fn correct_ident<'s, 'w>(&'s self, _ident: crate::tokens::Identifier<'w>)
-> Option<Status<'s>>;
fn correct_word<'s, 'w>(&'s self, word: crate::tokens::Word<'w>) -> Option<Status<'s>>;
}
pub(crate) struct NullDictionary;
impl Dictionary for NullDictionary {
fn correct_ident<'s, 'w>(
&'s self,
_ident: crate::tokens::Identifier<'w>,
) -> Option<Status<'s>> {
None
}
fn correct_word<'s, 'w>(&'s self, _word: crate::tokens::Word<'w>) -> Option<Status<'s>> {
None
}
}

View file

@ -15,11 +15,13 @@ impl<'p> ParserBuilder<'p, 'static> {
} }
impl<'p, 'd> ParserBuilder<'p, 'd> { impl<'p, 'd> ParserBuilder<'p, 'd> {
/// Set the Tokenizer used when parsing.
pub fn tokenizer(mut self, tokenizer: &'p tokens::Tokenizer) -> Self { pub fn tokenizer(mut self, tokenizer: &'p tokens::Tokenizer) -> Self {
self.tokenizer = Some(tokenizer); self.tokenizer = Some(tokenizer);
self self
} }
/// Set the dictionary used when parsing.
pub fn dictionary<'d1>(self, dictionary: &'d1 dyn Dictionary) -> ParserBuilder<'p, 'd1> { pub fn dictionary<'d1>(self, dictionary: &'d1 dyn Dictionary) -> ParserBuilder<'p, 'd1> {
ParserBuilder { ParserBuilder {
tokenizer: self.tokenizer, tokenizer: self.tokenizer,
@ -27,6 +29,7 @@ impl<'p, 'd> ParserBuilder<'p, 'd> {
} }
} }
/// Extract typos from the buffer.
pub fn typos(&self) -> TyposParser<'p, 'd> { pub fn typos(&self) -> TyposParser<'p, 'd> {
TyposParser { TyposParser {
tokenizer: self.tokenizer.unwrap_or_else(|| &DEFAULT_TOKENIZER), tokenizer: self.tokenizer.unwrap_or_else(|| &DEFAULT_TOKENIZER),
@ -34,12 +37,14 @@ impl<'p, 'd> ParserBuilder<'p, 'd> {
} }
} }
/// Parse for Identifiers.
pub fn identifiers(&self) -> IdentifiersParser<'p> { pub fn identifiers(&self) -> IdentifiersParser<'p> {
IdentifiersParser { IdentifiersParser {
tokenizer: self.tokenizer.unwrap_or_else(|| &DEFAULT_TOKENIZER), tokenizer: self.tokenizer.unwrap_or_else(|| &DEFAULT_TOKENIZER),
} }
} }
/// Parse for Words.
pub fn words(&self) -> WordsParser<'p> { pub fn words(&self) -> WordsParser<'p> {
WordsParser { WordsParser {
tokenizer: self.tokenizer.unwrap_or_else(|| &DEFAULT_TOKENIZER), tokenizer: self.tokenizer.unwrap_or_else(|| &DEFAULT_TOKENIZER),
@ -59,6 +64,7 @@ impl<'p> Default for ParserBuilder<'p, 'static> {
static DEFAULT_TOKENIZER: once_cell::sync::Lazy<tokens::Tokenizer> = static DEFAULT_TOKENIZER: once_cell::sync::Lazy<tokens::Tokenizer> =
once_cell::sync::Lazy::new(|| tokens::Tokenizer::new()); once_cell::sync::Lazy::new(|| tokens::Tokenizer::new());
/// Extract typos from the buffer.
#[derive(Clone)] #[derive(Clone)]
pub struct TyposParser<'p, 'd> { pub struct TyposParser<'p, 'd> {
tokenizer: &'p tokens::Tokenizer, tokenizer: &'p tokens::Tokenizer,
@ -116,6 +122,7 @@ impl<'p, 'd> TyposParser<'p, 'd> {
} }
} }
/// An invalid term found in the buffer.
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
#[non_exhaustive] #[non_exhaustive]
pub struct Typo<'m> { pub struct Typo<'m> {
@ -152,6 +159,7 @@ impl<'m> Default for Typo<'m> {
} }
} }
/// Parse for Identifiers.
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct IdentifiersParser<'p> { pub struct IdentifiersParser<'p> {
tokenizer: &'p tokens::Tokenizer, tokenizer: &'p tokens::Tokenizer,
@ -167,6 +175,7 @@ impl<'p> IdentifiersParser<'p> {
} }
} }
/// Parse for Words.
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct WordsParser<'p> { pub struct WordsParser<'p> {
tokenizer: &'p tokens::Tokenizer, tokenizer: &'p tokens::Tokenizer,

View file

@ -1,3 +1,4 @@
/// Define rules for tokenizaing a buffer.
#[derive(Debug, Clone, PartialEq, Eq, Hash)] #[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct TokenizerBuilder { pub struct TokenizerBuilder {
ignore_hex: bool, ignore_hex: bool,
@ -12,26 +13,31 @@ impl TokenizerBuilder {
Default::default() Default::default()
} }
/// Specify that hexadecimal numbers should be ignored.
pub fn ignore_hex(&mut self, yes: bool) -> &mut Self { pub fn ignore_hex(&mut self, yes: bool) -> &mut Self {
self.ignore_hex = yes; self.ignore_hex = yes;
self self
} }
/// Specify that leading digits are allowed for Identifiers.
pub fn leading_digits(&mut self, yes: bool) -> &mut Self { pub fn leading_digits(&mut self, yes: bool) -> &mut Self {
self.leading_digits = yes; self.leading_digits = yes;
self self
} }
/// Extend accepted leading characters for Identifiers.
pub fn leading_chars(&mut self, chars: String) -> &mut Self { pub fn leading_chars(&mut self, chars: String) -> &mut Self {
self.leading_chars = chars; self.leading_chars = chars;
self self
} }
/// Specify that digits can be included in Identifiers.
pub fn include_digits(&mut self, yes: bool) -> &mut Self { pub fn include_digits(&mut self, yes: bool) -> &mut Self {
self.include_digits = yes; self.include_digits = yes;
self self
} }
/// Extend accepted characters for Identifiers.
pub fn include_chars(&mut self, chars: String) -> &mut Self { pub fn include_chars(&mut self, chars: String) -> &mut Self {
self.include_chars = chars; self.include_chars = chars;
self self
@ -81,6 +87,7 @@ impl Default for TokenizerBuilder {
} }
} }
/// Extract Identifiers from a buffer.
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct Tokenizer { pub struct Tokenizer {
words_str: regex::Regex, words_str: regex::Regex,
@ -148,6 +155,7 @@ fn is_hex(ident: &[u8]) -> bool {
HEX.is_match(ident) HEX.is_match(ident)
} }
/// A term composed of Words.
#[derive(Debug, Clone, Copy, PartialEq, Eq)] #[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Identifier<'t> { pub struct Identifier<'t> {
token: &'t str, token: &'t str,
@ -171,11 +179,13 @@ impl<'t> Identifier<'t> {
self.offset self.offset
} }
/// Split into individual Words.
pub fn split(&self) -> impl Iterator<Item = Word<'t>> { pub fn split(&self) -> impl Iterator<Item = Word<'t>> {
split_ident(self.token, self.offset) split_ident(self.token, self.offset)
} }
} }
/// An indivisible term.
#[derive(Debug, Clone, Copy, PartialEq, Eq)] #[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Word<'t> { pub struct Word<'t> {
token: &'t str, token: &'t str,
@ -325,6 +335,7 @@ impl<'s> Iterator for SplitIdent<'s> {
} }
} }
/// Format of the term.
#[derive(Debug, Clone, Copy, PartialEq, Eq)] #[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Case { pub enum Case {
Title, Title,