mirror of
https://github.com/crate-ci/typos.git
synced 2024-11-24 10:00:59 -05:00
perf(token): Don't allow unbounded backtrackable parsing
In some test data for rinja, they check some parsing corner cases. Unfortunately for us, also hit a performance corner case. The entire file was a valid email username but without an `@`. This mean for every byte, we checked that every byte after it was a valid username but then backtracked at the end, repeating this until the whole file was read. Fixes #1088
This commit is contained in:
parent
773e4aaa23
commit
bf98193204
1 changed files with 18 additions and 7 deletions
|
@ -141,6 +141,10 @@ mod parser {
|
||||||
use winnow::stream::StreamIsPartial;
|
use winnow::stream::StreamIsPartial;
|
||||||
use winnow::token::{one_of, take_while};
|
use winnow::token::{one_of, take_while};
|
||||||
|
|
||||||
|
/// Avoid worst-case parse times by limiting how much a `take_while` can take if something
|
||||||
|
/// later may cause it to fail.
|
||||||
|
const NON_TERMINATING_CAP: usize = 1024;
|
||||||
|
|
||||||
pub(crate) fn next_identifier<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
|
pub(crate) fn next_identifier<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
|
||||||
where
|
where
|
||||||
T: Compare<char>,
|
T: Compare<char>,
|
||||||
|
@ -446,7 +450,7 @@ mod parser {
|
||||||
trace(
|
trace(
|
||||||
"email",
|
"email",
|
||||||
(
|
(
|
||||||
take_while(1.., is_localport_char),
|
take_while(1..NON_TERMINATING_CAP, is_localport_char),
|
||||||
'@',
|
'@',
|
||||||
take_while(1.., is_domain_char),
|
take_while(1.., is_domain_char),
|
||||||
)
|
)
|
||||||
|
@ -466,15 +470,18 @@ mod parser {
|
||||||
"url",
|
"url",
|
||||||
(
|
(
|
||||||
opt((
|
opt((
|
||||||
take_while(1.., is_scheme_char),
|
take_while(1..NON_TERMINATING_CAP, is_scheme_char),
|
||||||
// HACK: Technically you can skip `//` if you don't have a domain but that would
|
// HACK: Technically you can skip `//` if you don't have a domain but that would
|
||||||
// get messy to support.
|
// get messy to support.
|
||||||
(':', '/', '/'),
|
(':', '/', '/'),
|
||||||
)),
|
)),
|
||||||
(
|
(
|
||||||
opt((url_userinfo, '@')),
|
opt((url_userinfo, '@')),
|
||||||
take_while(1.., is_domain_char),
|
take_while(1..NON_TERMINATING_CAP, is_domain_char),
|
||||||
opt((':', take_while(1.., AsChar::is_dec_digit))),
|
opt((
|
||||||
|
':',
|
||||||
|
take_while(1..NON_TERMINATING_CAP, AsChar::is_dec_digit),
|
||||||
|
)),
|
||||||
),
|
),
|
||||||
'/',
|
'/',
|
||||||
// HACK: Too lazy to enumerate
|
// HACK: Too lazy to enumerate
|
||||||
|
@ -495,8 +502,8 @@ mod parser {
|
||||||
trace(
|
trace(
|
||||||
"userinfo",
|
"userinfo",
|
||||||
(
|
(
|
||||||
take_while(1.., is_localport_char),
|
take_while(1..NON_TERMINATING_CAP, is_localport_char),
|
||||||
opt((':', take_while(0.., is_localport_char))),
|
opt((':', take_while(0..NON_TERMINATING_CAP, is_localport_char))),
|
||||||
)
|
)
|
||||||
.take(),
|
.take(),
|
||||||
)
|
)
|
||||||
|
@ -515,7 +522,11 @@ mod parser {
|
||||||
// incorrectly, we opt for just not evaluating it at all.
|
// incorrectly, we opt for just not evaluating it at all.
|
||||||
trace(
|
trace(
|
||||||
"escape",
|
"escape",
|
||||||
(take_while(1.., is_escape), take_while(0.., is_xid_continue)).take(),
|
(
|
||||||
|
take_while(1..NON_TERMINATING_CAP, is_escape),
|
||||||
|
take_while(0.., is_xid_continue),
|
||||||
|
)
|
||||||
|
.take(),
|
||||||
)
|
)
|
||||||
.parse_next(input)
|
.parse_next(input)
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue