mirror of
https://github.com/crate-ci/typos.git
synced 2024-11-22 09:01:04 -05:00
parent
b673b81146
commit
c83f655109
1 changed files with 73 additions and 4 deletions
|
@ -186,6 +186,7 @@ mod parser {
|
||||||
terminated(dec_literal, sep1),
|
terminated(dec_literal, sep1),
|
||||||
terminated(base64_literal, sep1),
|
terminated(base64_literal, sep1),
|
||||||
terminated(email_literal, sep1),
|
terminated(email_literal, sep1),
|
||||||
|
terminated(url_literal, sep1),
|
||||||
sep1,
|
sep1,
|
||||||
)))(input)
|
)))(input)
|
||||||
}
|
}
|
||||||
|
@ -321,9 +322,41 @@ mod parser {
|
||||||
<T as nom::InputIter>::Item: AsChar + Copy,
|
<T as nom::InputIter>::Item: AsChar + Copy,
|
||||||
{
|
{
|
||||||
recognize(tuple((
|
recognize(tuple((
|
||||||
take_while1(is_email_localport_char),
|
take_while1(is_localport_char),
|
||||||
char('@'),
|
char('@'),
|
||||||
take_while1(is_email_domain_char),
|
take_while1(is_domain_char),
|
||||||
|
)))(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn url_literal<T>(input: T) -> IResult<T, T>
|
||||||
|
where
|
||||||
|
T: nom::InputTakeAtPosition
|
||||||
|
+ nom::InputTake
|
||||||
|
+ nom::InputIter
|
||||||
|
+ nom::InputLength
|
||||||
|
+ nom::Offset
|
||||||
|
+ nom::Slice<std::ops::RangeTo<usize>>
|
||||||
|
+ nom::Slice<std::ops::RangeFrom<usize>>
|
||||||
|
+ std::fmt::Debug
|
||||||
|
+ Clone,
|
||||||
|
<T as nom::InputTakeAtPosition>::Item: AsChar + Copy,
|
||||||
|
<T as nom::InputIter>::Item: AsChar + Copy,
|
||||||
|
{
|
||||||
|
recognize(tuple((
|
||||||
|
opt(terminated(
|
||||||
|
take_while1(is_scheme_char),
|
||||||
|
// HACK: Technically you can skip `//` if you don't have a domain but that would
|
||||||
|
// get messy to support.
|
||||||
|
tuple((char(':'), char('/'), char('/'))),
|
||||||
|
)),
|
||||||
|
tuple((
|
||||||
|
opt(terminated(take_while1(is_localport_char), char('@'))),
|
||||||
|
take_while1(is_domain_char),
|
||||||
|
opt(preceded(char(':'), take_while1(AsChar::is_dec_digit))),
|
||||||
|
)),
|
||||||
|
char('/'),
|
||||||
|
// HACK: Too lazy to enumerate
|
||||||
|
take_while(is_localport_char),
|
||||||
)))(input)
|
)))(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -393,7 +426,7 @@ mod parser {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn is_email_localport_char(i: impl AsChar + Copy) -> bool {
|
fn is_localport_char(i: impl AsChar + Copy) -> bool {
|
||||||
let c = i.as_char();
|
let c = i.as_char();
|
||||||
('a'..='z').contains(&c)
|
('a'..='z').contains(&c)
|
||||||
|| ('A'..='Z').contains(&c)
|
|| ('A'..='Z').contains(&c)
|
||||||
|
@ -402,7 +435,7 @@ mod parser {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn is_email_domain_char(i: impl AsChar + Copy) -> bool {
|
fn is_domain_char(i: impl AsChar + Copy) -> bool {
|
||||||
let c = i.as_char();
|
let c = i.as_char();
|
||||||
('a'..='z').contains(&c)
|
('a'..='z').contains(&c)
|
||||||
|| ('A'..='Z').contains(&c)
|
|| ('A'..='Z').contains(&c)
|
||||||
|
@ -410,6 +443,12 @@ mod parser {
|
||||||
|| "-().".find(c).is_some()
|
|| "-().".find(c).is_some()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn is_scheme_char(i: impl AsChar + Copy) -> bool {
|
||||||
|
let c = i.as_char();
|
||||||
|
('a'..='z').contains(&c) || ('0'..='9').contains(&c) || "+.-".find(c).is_some()
|
||||||
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn is_xid_continue(i: impl AsChar + Copy) -> bool {
|
fn is_xid_continue(i: impl AsChar + Copy) -> bool {
|
||||||
let c = i.as_char();
|
let c = i.as_char();
|
||||||
|
@ -860,6 +899,36 @@ mod test {
|
||||||
assert_eq!(expected, actual);
|
assert_eq!(expected, actual);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn tokenize_ignore_min_url() {
|
||||||
|
let parser = TokenizerBuilder::new().build();
|
||||||
|
|
||||||
|
let input = "Good example.com/hello Bye";
|
||||||
|
let expected: Vec<Identifier> = vec![
|
||||||
|
Identifier::new_unchecked("Good", Case::None, 0),
|
||||||
|
Identifier::new_unchecked("Bye", Case::None, 23),
|
||||||
|
];
|
||||||
|
let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect();
|
||||||
|
assert_eq!(expected, actual);
|
||||||
|
let actual: Vec<_> = parser.parse_str(input).collect();
|
||||||
|
assert_eq!(expected, actual);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn tokenize_ignore_max_url() {
|
||||||
|
let parser = TokenizerBuilder::new().build();
|
||||||
|
|
||||||
|
let input = "Good http://user@example.com:3142/hello?query=value&extra=two#fragment Bye";
|
||||||
|
let expected: Vec<Identifier> = vec![
|
||||||
|
Identifier::new_unchecked("Good", Case::None, 0),
|
||||||
|
Identifier::new_unchecked("Bye", Case::None, 71),
|
||||||
|
];
|
||||||
|
let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect();
|
||||||
|
assert_eq!(expected, actual);
|
||||||
|
let actual: Vec<_> = parser.parse_str(input).collect();
|
||||||
|
assert_eq!(expected, actual);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn tokenize_leading_digits() {
|
fn tokenize_leading_digits() {
|
||||||
let parser = TokenizerBuilder::new().build();
|
let parser = TokenizerBuilder::new().build();
|
||||||
|
|
Loading…
Reference in a new issue