From bf9819320408daf9685a13bc9f88ff4b510afc9e Mon Sep 17 00:00:00 2001 From: Ed Page Date: Fri, 30 Aug 2024 14:52:13 -0500 Subject: [PATCH] perf(token): Don't allow unbounded backtrackable parsing In some test data for rinja, they check some parsing corner cases. Unfortunately for us, also hit a performance corner case. The entire file was a valid email username but without an `@`. This mean for every byte, we checked that every byte after it was a valid username but then backtracked at the end, repeating this until the whole file was read. Fixes #1088 --- crates/typos/src/tokens.rs | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/crates/typos/src/tokens.rs b/crates/typos/src/tokens.rs index ad901eb..72bfb3a 100644 --- a/crates/typos/src/tokens.rs +++ b/crates/typos/src/tokens.rs @@ -141,6 +141,10 @@ mod parser { use winnow::stream::StreamIsPartial; use winnow::token::{one_of, take_while}; + /// Avoid worst-case parse times by limiting how much a `take_while` can take if something + /// later may cause it to fail. + const NON_TERMINATING_CAP: usize = 1024; + pub(crate) fn next_identifier(input: &mut T) -> PResult<::Slice, ()> where T: Compare, @@ -446,7 +450,7 @@ mod parser { trace( "email", ( - take_while(1.., is_localport_char), + take_while(1..NON_TERMINATING_CAP, is_localport_char), '@', take_while(1.., is_domain_char), ) @@ -466,15 +470,18 @@ mod parser { "url", ( opt(( - take_while(1.., is_scheme_char), + take_while(1..NON_TERMINATING_CAP, is_scheme_char), // HACK: Technically you can skip `//` if you don't have a domain but that would // get messy to support. (':', '/', '/'), )), ( opt((url_userinfo, '@')), - take_while(1.., is_domain_char), - opt((':', take_while(1.., AsChar::is_dec_digit))), + take_while(1..NON_TERMINATING_CAP, is_domain_char), + opt(( + ':', + take_while(1..NON_TERMINATING_CAP, AsChar::is_dec_digit), + )), ), '/', // HACK: Too lazy to enumerate @@ -495,8 +502,8 @@ mod parser { trace( "userinfo", ( - take_while(1.., is_localport_char), - opt((':', take_while(0.., is_localport_char))), + take_while(1..NON_TERMINATING_CAP, is_localport_char), + opt((':', take_while(0..NON_TERMINATING_CAP, is_localport_char))), ) .take(), ) @@ -515,7 +522,11 @@ mod parser { // incorrectly, we opt for just not evaluating it at all. trace( "escape", - (take_while(1.., is_escape), take_while(0.., is_xid_continue)).take(), + ( + take_while(1..NON_TERMINATING_CAP, is_escape), + take_while(0.., is_xid_continue), + ) + .take(), ) .parse_next(input) }