mirror of
https://github.com/crate-ci/typos.git
synced 2024-12-22 23:52:12 -05:00
Merge pull request #434 from epage/url
fix(parser): Allow commas in urls
This commit is contained in:
commit
3b2bcf267a
1 changed files with 52 additions and 4 deletions
|
@ -422,13 +422,33 @@ mod parser {
|
|||
tuple((char(':'), char('/'), char('/'))),
|
||||
)),
|
||||
tuple((
|
||||
opt(terminated(take_while1(is_localport_char), char('@'))),
|
||||
opt(terminated(url_userinfo, char('@'))),
|
||||
take_while1(is_domain_char),
|
||||
opt(preceded(char(':'), take_while1(AsChar::is_dec_digit))),
|
||||
)),
|
||||
char('/'),
|
||||
// HACK: Too lazy to enumerate
|
||||
take_while(is_localport_char),
|
||||
take_while(is_path_query_fragment),
|
||||
)))(input)
|
||||
}
|
||||
|
||||
fn url_userinfo<T>(input: T) -> IResult<T, T>
|
||||
where
|
||||
T: nom::InputTakeAtPosition
|
||||
+ nom::InputTake
|
||||
+ nom::InputIter
|
||||
+ nom::InputLength
|
||||
+ nom::Offset
|
||||
+ nom::Slice<std::ops::RangeTo<usize>>
|
||||
+ nom::Slice<std::ops::RangeFrom<usize>>
|
||||
+ Clone
|
||||
+ std::fmt::Debug,
|
||||
<T as nom::InputTakeAtPosition>::Item: AsChar + Copy,
|
||||
<T as nom::InputIter>::Item: AsChar + Copy,
|
||||
{
|
||||
recognize(tuple((
|
||||
take_while1(is_localport_char),
|
||||
opt(preceded(char(':'), take_while(is_localport_char))),
|
||||
)))(input)
|
||||
}
|
||||
|
||||
|
@ -564,6 +584,33 @@ mod parser {
|
|||
|| "-().".find(c).is_some()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn is_path_query_fragment(i: impl AsChar + Copy) -> bool {
|
||||
let c = i.as_char();
|
||||
is_pchar(c) || "/?#".find(c).is_some()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn is_pchar(i: impl AsChar + Copy) -> bool {
|
||||
let c = i.as_char();
|
||||
is_uri_unreserved(c) || is_uri_sub_delims(c) || "%:@".find(c).is_some()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn is_uri_unreserved(i: impl AsChar + Copy) -> bool {
|
||||
let c = i.as_char();
|
||||
('a'..='z').contains(&c)
|
||||
|| ('A'..='Z').contains(&c)
|
||||
|| ('0'..='9').contains(&c)
|
||||
|| "-._~".find(c).is_some()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn is_uri_sub_delims(i: impl AsChar + Copy) -> bool {
|
||||
let c = i.as_char();
|
||||
"!$&'()*+,;=".find(c).is_some()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn is_scheme_char(i: impl AsChar + Copy) -> bool {
|
||||
let c = i.as_char();
|
||||
|
@ -1113,10 +1160,11 @@ mod test {
|
|||
fn tokenize_ignore_max_url() {
|
||||
let parser = TokenizerBuilder::new().build();
|
||||
|
||||
let input = "Good http://user@example.com:3142/hello?query=value&extra=two#fragment Bye";
|
||||
let input =
|
||||
"Good http://user:password@example.com:3142/hello?query=value&extra=two#fragment,split Bye";
|
||||
let expected: Vec<Identifier> = vec![
|
||||
Identifier::new_unchecked("Good", Case::None, 0),
|
||||
Identifier::new_unchecked("Bye", Case::None, 71),
|
||||
Identifier::new_unchecked("Bye", Case::None, 86),
|
||||
];
|
||||
let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect();
|
||||
assert_eq!(expected, actual);
|
||||
|
|
Loading…
Reference in a new issue