mirror of
https://github.com/crate-ci/typos.git
synced 2025-01-10 08:44:47 -05:00
chore(typos): Add parse tracing
This commit is contained in:
parent
a1ad167632
commit
e98fc52b0d
1 changed files with 152 additions and 117 deletions
|
@ -133,6 +133,7 @@ mod parser {
|
|||
use winnow::stream::Stream;
|
||||
use winnow::stream::StreamIsPartial;
|
||||
use winnow::token::*;
|
||||
use winnow::trace::trace;
|
||||
|
||||
pub(crate) fn next_identifier<T>(input: T) -> IResult<T, <T as Stream>::Slice>
|
||||
where
|
||||
|
@ -153,7 +154,7 @@ mod parser {
|
|||
// `{XID_Continue}+` because XID_Continue is a superset of XID_Start and rather catch odd
|
||||
// or unexpected cases than strip off start characters to a word since we aren't doing a
|
||||
// proper word boundary parse
|
||||
take_while(1.., is_xid_continue).parse_next(input)
|
||||
trace("identifier", take_while(1.., is_xid_continue)).parse_next(input)
|
||||
}
|
||||
|
||||
fn ignore<T>(input: T) -> IResult<T, <T as Stream>::Slice>
|
||||
|
@ -162,23 +163,26 @@ mod parser {
|
|||
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
||||
<T as Stream>::Token: AsChar + Copy,
|
||||
{
|
||||
take_many0(alt((
|
||||
// CAUTION: If adding an ignorable literal, if it doesn't start with `is_xid_continue`,
|
||||
// - Update `is_ignore_char` to make sure `sep1` doesn't eat it all up
|
||||
// - Make sure you always consume it
|
||||
terminated(uuid_literal, peek(sep1)),
|
||||
terminated(hash_literal, peek(sep1)),
|
||||
terminated(base64_literal, peek(sep1)), // base64 should be quoted or something
|
||||
terminated(ordinal_literal, peek(sep1)),
|
||||
terminated(hex_literal, peek(sep1)),
|
||||
terminated(dec_literal, peek(sep1)), // Allow digit-prefixed words
|
||||
terminated(email_literal, peek(sep1)),
|
||||
terminated(url_literal, peek(sep1)),
|
||||
terminated(css_color, peek(sep1)),
|
||||
c_escape,
|
||||
printf,
|
||||
other,
|
||||
)))
|
||||
trace(
|
||||
"ignore",
|
||||
take_many0(alt((
|
||||
// CAUTION: If adding an ignorable literal, if it doesn't start with `is_xid_continue`,
|
||||
// - Update `is_ignore_char` to make sure `sep1` doesn't eat it all up
|
||||
// - Make sure you always consume it
|
||||
terminated(uuid_literal, peek(sep1)),
|
||||
terminated(hash_literal, peek(sep1)),
|
||||
terminated(base64_literal, peek(sep1)), // base64 should be quoted or something
|
||||
terminated(ordinal_literal, peek(sep1)),
|
||||
terminated(hex_literal, peek(sep1)),
|
||||
terminated(dec_literal, peek(sep1)), // Allow digit-prefixed words
|
||||
terminated(email_literal, peek(sep1)),
|
||||
terminated(url_literal, peek(sep1)),
|
||||
terminated(css_color, peek(sep1)),
|
||||
c_escape,
|
||||
printf,
|
||||
other,
|
||||
))),
|
||||
)
|
||||
.parse_next(input)
|
||||
}
|
||||
|
||||
|
@ -201,12 +205,15 @@ mod parser {
|
|||
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
||||
<T as Stream>::Token: AsChar + Copy,
|
||||
{
|
||||
(
|
||||
one_of(|c| !is_xid_continue(c)),
|
||||
take_while(0.., is_ignore_char),
|
||||
trace(
|
||||
"other",
|
||||
(
|
||||
one_of(|c| !is_xid_continue(c)),
|
||||
take_while(0.., is_ignore_char),
|
||||
)
|
||||
.recognize(),
|
||||
)
|
||||
.recognize()
|
||||
.parse_next(input)
|
||||
.parse_next(input)
|
||||
}
|
||||
|
||||
fn ordinal_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice>
|
||||
|
@ -221,14 +228,17 @@ mod parser {
|
|||
['_'].contains(&c)
|
||||
}
|
||||
|
||||
(
|
||||
take_while(0.., is_sep),
|
||||
take_while(1.., is_dec_digit),
|
||||
alt((('s', 't'), ('n', 'd'), ('r', 'd'), ('t', 'h'))),
|
||||
take_while(0.., is_sep),
|
||||
trace(
|
||||
"ordinal_literal",
|
||||
(
|
||||
take_while(0.., is_sep),
|
||||
take_while(1.., is_dec_digit),
|
||||
alt((('s', 't'), ('n', 'd'), ('r', 'd'), ('t', 'h'))),
|
||||
take_while(0.., is_sep),
|
||||
)
|
||||
.recognize(),
|
||||
)
|
||||
.recognize()
|
||||
.parse_next(input)
|
||||
.parse_next(input)
|
||||
}
|
||||
|
||||
fn dec_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice>
|
||||
|
@ -237,7 +247,7 @@ mod parser {
|
|||
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
||||
<T as Stream>::Token: AsChar + Copy,
|
||||
{
|
||||
take_while(1.., is_dec_digit_with_sep).parse_next(input)
|
||||
trace("dec_literal", take_while(1.., is_dec_digit_with_sep)).parse_next(input)
|
||||
}
|
||||
|
||||
fn hex_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice>
|
||||
|
@ -259,12 +269,15 @@ mod parser {
|
|||
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
||||
<T as Stream>::Token: AsChar + Copy,
|
||||
{
|
||||
preceded(
|
||||
'#',
|
||||
alt((
|
||||
terminated(take_while(3..=8, is_lower_hex_digit), peek(sep1)),
|
||||
terminated(take_while(3..=8, is_upper_hex_digit), peek(sep1)),
|
||||
)),
|
||||
trace(
|
||||
"color",
|
||||
preceded(
|
||||
'#',
|
||||
alt((
|
||||
terminated(take_while(3..=8, is_lower_hex_digit), peek(sep1)),
|
||||
terminated(take_while(3..=8, is_upper_hex_digit), peek(sep1)),
|
||||
)),
|
||||
),
|
||||
)
|
||||
.parse_next(input)
|
||||
}
|
||||
|
@ -275,31 +288,34 @@ mod parser {
|
|||
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
||||
<T as Stream>::Token: AsChar + Copy,
|
||||
{
|
||||
alt((
|
||||
(
|
||||
take_while(8, is_lower_hex_digit),
|
||||
'-',
|
||||
take_while(4, is_lower_hex_digit),
|
||||
'-',
|
||||
take_while(4, is_lower_hex_digit),
|
||||
'-',
|
||||
take_while(4, is_lower_hex_digit),
|
||||
'-',
|
||||
take_while(12, is_lower_hex_digit),
|
||||
),
|
||||
(
|
||||
take_while(8, is_upper_hex_digit),
|
||||
'-',
|
||||
take_while(4, is_upper_hex_digit),
|
||||
'-',
|
||||
take_while(4, is_upper_hex_digit),
|
||||
'-',
|
||||
take_while(4, is_upper_hex_digit),
|
||||
'-',
|
||||
take_while(12, is_upper_hex_digit),
|
||||
),
|
||||
))
|
||||
.recognize()
|
||||
trace(
|
||||
"uuid",
|
||||
alt((
|
||||
(
|
||||
take_while(8, is_lower_hex_digit),
|
||||
'-',
|
||||
take_while(4, is_lower_hex_digit),
|
||||
'-',
|
||||
take_while(4, is_lower_hex_digit),
|
||||
'-',
|
||||
take_while(4, is_lower_hex_digit),
|
||||
'-',
|
||||
take_while(12, is_lower_hex_digit),
|
||||
),
|
||||
(
|
||||
take_while(8, is_upper_hex_digit),
|
||||
'-',
|
||||
take_while(4, is_upper_hex_digit),
|
||||
'-',
|
||||
take_while(4, is_upper_hex_digit),
|
||||
'-',
|
||||
take_while(4, is_upper_hex_digit),
|
||||
'-',
|
||||
take_while(12, is_upper_hex_digit),
|
||||
),
|
||||
))
|
||||
.recognize(),
|
||||
)
|
||||
.parse_next(input)
|
||||
}
|
||||
|
||||
|
@ -319,10 +335,13 @@ mod parser {
|
|||
// or more.
|
||||
|
||||
const IGNORE_HEX_MIN: usize = 32;
|
||||
alt((
|
||||
take_while(IGNORE_HEX_MIN.., is_lower_hex_digit),
|
||||
take_while(IGNORE_HEX_MIN.., is_upper_hex_digit),
|
||||
))
|
||||
trace(
|
||||
"hash",
|
||||
alt((
|
||||
take_while(IGNORE_HEX_MIN.., is_lower_hex_digit),
|
||||
take_while(IGNORE_HEX_MIN.., is_upper_hex_digit),
|
||||
)),
|
||||
)
|
||||
.parse_next(input)
|
||||
}
|
||||
|
||||
|
@ -332,32 +351,35 @@ mod parser {
|
|||
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
||||
<T as Stream>::Token: AsChar + Copy,
|
||||
{
|
||||
let (padding, captured) = take_while(1.., is_base64_digit).parse_next(input.clone())?;
|
||||
trace("base64", move |input: T| {
|
||||
let (padding, captured) = take_while(1.., is_base64_digit).parse_next(input.clone())?;
|
||||
|
||||
const CHUNK: usize = 4;
|
||||
let padding_offset = input.offset_to(&padding);
|
||||
let mut padding_len = CHUNK - padding_offset % CHUNK;
|
||||
if padding_len == CHUNK {
|
||||
padding_len = 0;
|
||||
}
|
||||
const CHUNK: usize = 4;
|
||||
let padding_offset = input.offset_to(&padding);
|
||||
let mut padding_len = CHUNK - padding_offset % CHUNK;
|
||||
if padding_len == CHUNK {
|
||||
padding_len = 0;
|
||||
}
|
||||
|
||||
if captured.slice_len() < 90
|
||||
&& padding_len == 0
|
||||
&& captured
|
||||
.as_bstr()
|
||||
.iter()
|
||||
.all(|c| !['/', '+'].contains(&c.as_char()))
|
||||
{
|
||||
return Err(winnow::error::ErrMode::Backtrack(
|
||||
winnow::error::Error::new(input, winnow::error::ErrorKind::Slice),
|
||||
));
|
||||
}
|
||||
if captured.slice_len() < 90
|
||||
&& padding_len == 0
|
||||
&& captured
|
||||
.as_bstr()
|
||||
.iter()
|
||||
.all(|c| !['/', '+'].contains(&c.as_char()))
|
||||
{
|
||||
return Err(winnow::error::ErrMode::Backtrack(
|
||||
winnow::error::Error::new(input, winnow::error::ErrorKind::Slice),
|
||||
));
|
||||
}
|
||||
|
||||
let (after, _) =
|
||||
take_while(padding_len..=padding_len, is_base64_padding).parse_next(padding)?;
|
||||
let (after, _) =
|
||||
take_while(padding_len..=padding_len, is_base64_padding).parse_next(padding)?;
|
||||
|
||||
let after_offset = input.offset_to(&after);
|
||||
Ok(input.next_slice(after_offset))
|
||||
let after_offset = input.offset_to(&after);
|
||||
Ok(input.next_slice(after_offset))
|
||||
})
|
||||
.parse_next(input)
|
||||
}
|
||||
|
||||
fn email_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice>
|
||||
|
@ -366,13 +388,16 @@ mod parser {
|
|||
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
||||
<T as Stream>::Token: AsChar + Copy,
|
||||
{
|
||||
(
|
||||
take_while(1.., is_localport_char),
|
||||
'@',
|
||||
take_while(1.., is_domain_char),
|
||||
trace(
|
||||
"email",
|
||||
(
|
||||
take_while(1.., is_localport_char),
|
||||
'@',
|
||||
take_while(1.., is_domain_char),
|
||||
)
|
||||
.recognize(),
|
||||
)
|
||||
.recognize()
|
||||
.parse_next(input)
|
||||
.parse_next(input)
|
||||
}
|
||||
|
||||
fn url_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice>
|
||||
|
@ -381,24 +406,27 @@ mod parser {
|
|||
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
||||
<T as Stream>::Token: AsChar + Copy,
|
||||
{
|
||||
(
|
||||
opt(terminated(
|
||||
take_while(1.., is_scheme_char),
|
||||
// HACK: Technically you can skip `//` if you don't have a domain but that would
|
||||
// get messy to support.
|
||||
(':', '/', '/'),
|
||||
)),
|
||||
trace(
|
||||
"url",
|
||||
(
|
||||
opt(terminated(url_userinfo, '@')),
|
||||
take_while(1.., is_domain_char),
|
||||
opt(preceded(':', take_while(1.., AsChar::is_dec_digit))),
|
||||
),
|
||||
'/',
|
||||
// HACK: Too lazy to enumerate
|
||||
take_while(0.., is_path_query_fragment),
|
||||
opt(terminated(
|
||||
take_while(1.., is_scheme_char),
|
||||
// HACK: Technically you can skip `//` if you don't have a domain but that would
|
||||
// get messy to support.
|
||||
(':', '/', '/'),
|
||||
)),
|
||||
(
|
||||
opt(terminated(url_userinfo, '@')),
|
||||
take_while(1.., is_domain_char),
|
||||
opt(preceded(':', take_while(1.., AsChar::is_dec_digit))),
|
||||
),
|
||||
'/',
|
||||
// HACK: Too lazy to enumerate
|
||||
take_while(0.., is_path_query_fragment),
|
||||
)
|
||||
.recognize(),
|
||||
)
|
||||
.recognize()
|
||||
.parse_next(input)
|
||||
.parse_next(input)
|
||||
}
|
||||
|
||||
fn url_userinfo<T>(input: T) -> IResult<T, <T as Stream>::Slice>
|
||||
|
@ -407,12 +435,15 @@ mod parser {
|
|||
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
||||
<T as Stream>::Token: AsChar + Copy,
|
||||
{
|
||||
(
|
||||
take_while(1.., is_localport_char),
|
||||
opt(preceded(':', take_while(0.., is_localport_char))),
|
||||
trace(
|
||||
"userinfo",
|
||||
(
|
||||
take_while(1.., is_localport_char),
|
||||
opt(preceded(':', take_while(0.., is_localport_char))),
|
||||
)
|
||||
.recognize(),
|
||||
)
|
||||
.recognize()
|
||||
.parse_next(input)
|
||||
.parse_next(input)
|
||||
}
|
||||
|
||||
fn c_escape<T>(input: T) -> IResult<T, <T as Stream>::Slice>
|
||||
|
@ -425,7 +456,11 @@ mod parser {
|
|||
// regular string that does escaping. The escaped letter might be part of a word, or it
|
||||
// might not be. Rather than guess and be wrong part of the time and correct people's words
|
||||
// incorrectly, we opt for just not evaluating it at all.
|
||||
preceded(take_while(1.., is_escape), take_while(0.., is_xid_continue)).parse_next(input)
|
||||
trace(
|
||||
"escape",
|
||||
preceded(take_while(1.., is_escape), take_while(0.., is_xid_continue)),
|
||||
)
|
||||
.parse_next(input)
|
||||
}
|
||||
|
||||
fn printf<T>(input: T) -> IResult<T, <T as Stream>::Slice>
|
||||
|
@ -434,7 +469,7 @@ mod parser {
|
|||
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
||||
<T as Stream>::Token: AsChar + Copy,
|
||||
{
|
||||
preceded('%', take_while(1.., is_xid_continue)).parse_next(input)
|
||||
trace("printf", preceded('%', take_while(1.., is_xid_continue))).parse_next(input)
|
||||
}
|
||||
|
||||
fn take_many0<I, E, F>(mut f: F) -> impl FnMut(I) -> IResult<I, <I as Stream>::Slice, E>
|
||||
|
|
Loading…
Reference in a new issue