docs(typos): Clarify intent

2024-11-25 18:41:05 -05:00 · 2021-01-01 21:35:49 -06:00 · 2021-01-01 21:35:49 -06:00 · aba85df435
commit aba85df435
parent 5f82dd6017
3 changed files with 49 additions and 22 deletions
--- a/crates/typos/src/dict.rs
+++ b/crates/typos/src/dict.rs
@ -1,5 +1,34 @@
 use std::borrow::Cow;
 /// Look up the validity of a term.
 pub trait Dictionary: Send + Sync {
    /// Look up the validity of an Identifier.
    ///
    /// `None` if the status is unknown.
    fn correct_ident<'s, 'w>(&'s self, ident: crate::tokens::Identifier<'w>) -> Option<Status<'s>>;
    /// Look up the validity of a Word.
    ///
    /// `None` if the status is unknown.
    fn correct_word<'s, 'w>(&'s self, word: crate::tokens::Word<'w>) -> Option<Status<'s>>;
 }
 pub(crate) struct NullDictionary;
 impl Dictionary for NullDictionary {
    fn correct_ident<'s, 'w>(
        &'s self,
        _ident: crate::tokens::Identifier<'w>,
    ) -> Option<Status<'s>> {
        None
    }
    fn correct_word<'s, 'w>(&'s self, _word: crate::tokens::Word<'w>) -> Option<Status<'s>> {
        None
    }
 }
 /// Validity of a term in a Dictionary.
 #[derive(Clone, PartialEq, Eq, Debug, serde::Serialize)]
 #[serde(rename_all = "snake_case")]
 #[serde(untagged)]
@ -54,25 +83,3 @@ impl<'c> Status<'c> {
        }
    }
 }
 pub trait Dictionary: Send + Sync {
    fn correct_ident<'s, 'w>(&'s self, _ident: crate::tokens::Identifier<'w>)
        -> Option<Status<'s>>;
    fn correct_word<'s, 'w>(&'s self, word: crate::tokens::Word<'w>) -> Option<Status<'s>>;
 }
 pub(crate) struct NullDictionary;
 impl Dictionary for NullDictionary {
    fn correct_ident<'s, 'w>(
        &'s self,
        _ident: crate::tokens::Identifier<'w>,
    ) -> Option<Status<'s>> {
        None
    }
    fn correct_word<'s, 'w>(&'s self, _word: crate::tokens::Word<'w>) -> Option<Status<'s>> {
        None
    }
 }
--- a/crates/typos/src/parser.rs
+++ b/crates/typos/src/parser.rs
@ -15,11 +15,13 @@ impl<'p> ParserBuilder<'p, 'static> {
 }
 impl<'p, 'd> ParserBuilder<'p, 'd> {
    /// Set the Tokenizer used when parsing.
    pub fn tokenizer(mut self, tokenizer: &'p tokens::Tokenizer) -> Self {
        self.tokenizer = Some(tokenizer);
        self
    }
    /// Set the dictionary used when parsing.
    pub fn dictionary<'d1>(self, dictionary: &'d1 dyn Dictionary) -> ParserBuilder<'p, 'd1> {
        ParserBuilder {
            tokenizer: self.tokenizer,
@ -27,6 +29,7 @@ impl<'p, 'd> ParserBuilder<'p, 'd> {
        }
    }
    /// Extract typos from the buffer.
    pub fn typos(&self) -> TyposParser<'p, 'd> {
        TyposParser {
            tokenizer: self.tokenizer.unwrap_or_else(|| &DEFAULT_TOKENIZER),
@ -34,12 +37,14 @@ impl<'p, 'd> ParserBuilder<'p, 'd> {
        }
    }
    /// Parse for Identifiers.
    pub fn identifiers(&self) -> IdentifiersParser<'p> {
        IdentifiersParser {
            tokenizer: self.tokenizer.unwrap_or_else(|| &DEFAULT_TOKENIZER),
        }
    }
    /// Parse for Words.
    pub fn words(&self) -> WordsParser<'p> {
        WordsParser {
            tokenizer: self.tokenizer.unwrap_or_else(|| &DEFAULT_TOKENIZER),
@ -59,6 +64,7 @@ impl<'p> Default for ParserBuilder<'p, 'static> {
 static DEFAULT_TOKENIZER: once_cell::sync::Lazy<tokens::Tokenizer> =
    once_cell::sync::Lazy::new(|| tokens::Tokenizer::new());
 /// Extract typos from the buffer.
 #[derive(Clone)]
 pub struct TyposParser<'p, 'd> {
    tokenizer: &'p tokens::Tokenizer,
@ -116,6 +122,7 @@ impl<'p, 'd> TyposParser<'p, 'd> {
    }
 }
 /// An invalid term found in the buffer.
 #[derive(Clone, Debug)]
 #[non_exhaustive]
 pub struct Typo<'m> {
@ -152,6 +159,7 @@ impl<'m> Default for Typo<'m> {
    }
 }
 /// Parse for Identifiers.
 #[derive(Debug, Clone)]
 pub struct IdentifiersParser<'p> {
    tokenizer: &'p tokens::Tokenizer,
@ -167,6 +175,7 @@ impl<'p> IdentifiersParser<'p> {
    }
 }
 /// Parse for Words.
 #[derive(Debug, Clone)]
 pub struct WordsParser<'p> {
    tokenizer: &'p tokens::Tokenizer,
--- a/crates/typos/src/tokens.rs
+++ b/crates/typos/src/tokens.rs
@ -1,3 +1,4 @@
 /// Define rules for tokenizaing a buffer.
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub struct TokenizerBuilder {
    ignore_hex: bool,
@ -12,26 +13,31 @@ impl TokenizerBuilder {
        Default::default()
    }
    /// Specify that hexadecimal numbers should be ignored.
    pub fn ignore_hex(&mut self, yes: bool) -> &mut Self {
        self.ignore_hex = yes;
        self
    }
    /// Specify that leading digits are allowed for Identifiers.
    pub fn leading_digits(&mut self, yes: bool) -> &mut Self {
        self.leading_digits = yes;
        self
    }
    /// Extend accepted leading characters for Identifiers.
    pub fn leading_chars(&mut self, chars: String) -> &mut Self {
        self.leading_chars = chars;
        self
    }
    /// Specify that digits can be included in Identifiers.
    pub fn include_digits(&mut self, yes: bool) -> &mut Self {
        self.include_digits = yes;
        self
    }
    /// Extend accepted characters for Identifiers.
    pub fn include_chars(&mut self, chars: String) -> &mut Self {
        self.include_chars = chars;
        self
@ -81,6 +87,7 @@ impl Default for TokenizerBuilder {
    }
 }
 /// Extract Identifiers from a buffer.
 #[derive(Debug, Clone)]
 pub struct Tokenizer {
    words_str: regex::Regex,
@ -148,6 +155,7 @@ fn is_hex(ident: &[u8]) -> bool {
    HEX.is_match(ident)
 }
 /// A term composed of Words.
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub struct Identifier<'t> {
    token: &'t str,
@ -171,11 +179,13 @@ impl<'t> Identifier<'t> {
        self.offset
    }
    /// Split into individual Words.
    pub fn split(&self) -> impl Iterator<Item = Word<'t>> {
        split_ident(self.token, self.offset)
    }
 }
 /// An indivisible term.
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub struct Word<'t> {
    token: &'t str,
@ -325,6 +335,7 @@ impl<'s> Iterator for SplitIdent<'s> {
    }
 }
 /// Format of the term.
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum Case {
    Title,