fix(parser): Allow commas in urls

Got us closer to https://www.ietf.org/rfc/rfc3986.txt

Fixes #433
This commit is contained in:
Ed Page 2022-02-14 08:49:53 -06:00
parent 09203fd592
commit c3bb4adfa1

View file

@ -428,7 +428,7 @@ mod parser {
)), )),
char('/'), char('/'),
// HACK: Too lazy to enumerate // HACK: Too lazy to enumerate
take_while(is_localport_char), take_while(is_path_query_fragment),
)))(input) )))(input)
} }
@ -584,6 +584,33 @@ mod parser {
|| "-().".find(c).is_some() || "-().".find(c).is_some()
} }
#[inline]
fn is_path_query_fragment(i: impl AsChar + Copy) -> bool {
let c = i.as_char();
is_pchar(c) || "/?#".find(c).is_some()
}
#[inline]
fn is_pchar(i: impl AsChar + Copy) -> bool {
let c = i.as_char();
is_uri_unreserved(c) || is_uri_sub_delims(c) || "%:@".find(c).is_some()
}
#[inline]
fn is_uri_unreserved(i: impl AsChar + Copy) -> bool {
let c = i.as_char();
('a'..='z').contains(&c)
|| ('A'..='Z').contains(&c)
|| ('0'..='9').contains(&c)
|| "-._~".find(c).is_some()
}
#[inline]
fn is_uri_sub_delims(i: impl AsChar + Copy) -> bool {
let c = i.as_char();
"!$&'()*+,;=".find(c).is_some()
}
#[inline] #[inline]
fn is_scheme_char(i: impl AsChar + Copy) -> bool { fn is_scheme_char(i: impl AsChar + Copy) -> bool {
let c = i.as_char(); let c = i.as_char();
@ -1134,10 +1161,10 @@ mod test {
let parser = TokenizerBuilder::new().build(); let parser = TokenizerBuilder::new().build();
let input = let input =
"Good http://user:password@example.com:3142/hello?query=value&extra=two#fragment Bye"; "Good http://user:password@example.com:3142/hello?query=value&extra=two#fragment,split Bye";
let expected: Vec<Identifier> = vec![ let expected: Vec<Identifier> = vec![
Identifier::new_unchecked("Good", Case::None, 0), Identifier::new_unchecked("Good", Case::None, 0),
Identifier::new_unchecked("Bye", Case::None, 80), Identifier::new_unchecked("Bye", Case::None, 86),
]; ];
let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect(); let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect();
assert_eq!(expected, actual); assert_eq!(expected, actual);