mirror of
https://github.com/crate-ci/typos.git
synced 2025-01-10 08:44:47 -05:00
chore(typos): Add parse tracing
This commit is contained in:
parent
a1ad167632
commit
e98fc52b0d
1 changed files with 152 additions and 117 deletions
|
@ -133,6 +133,7 @@ mod parser {
|
||||||
use winnow::stream::Stream;
|
use winnow::stream::Stream;
|
||||||
use winnow::stream::StreamIsPartial;
|
use winnow::stream::StreamIsPartial;
|
||||||
use winnow::token::*;
|
use winnow::token::*;
|
||||||
|
use winnow::trace::trace;
|
||||||
|
|
||||||
pub(crate) fn next_identifier<T>(input: T) -> IResult<T, <T as Stream>::Slice>
|
pub(crate) fn next_identifier<T>(input: T) -> IResult<T, <T as Stream>::Slice>
|
||||||
where
|
where
|
||||||
|
@ -153,7 +154,7 @@ mod parser {
|
||||||
// `{XID_Continue}+` because XID_Continue is a superset of XID_Start and rather catch odd
|
// `{XID_Continue}+` because XID_Continue is a superset of XID_Start and rather catch odd
|
||||||
// or unexpected cases than strip off start characters to a word since we aren't doing a
|
// or unexpected cases than strip off start characters to a word since we aren't doing a
|
||||||
// proper word boundary parse
|
// proper word boundary parse
|
||||||
take_while(1.., is_xid_continue).parse_next(input)
|
trace("identifier", take_while(1.., is_xid_continue)).parse_next(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn ignore<T>(input: T) -> IResult<T, <T as Stream>::Slice>
|
fn ignore<T>(input: T) -> IResult<T, <T as Stream>::Slice>
|
||||||
|
@ -162,23 +163,26 @@ mod parser {
|
||||||
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
||||||
<T as Stream>::Token: AsChar + Copy,
|
<T as Stream>::Token: AsChar + Copy,
|
||||||
{
|
{
|
||||||
take_many0(alt((
|
trace(
|
||||||
// CAUTION: If adding an ignorable literal, if it doesn't start with `is_xid_continue`,
|
"ignore",
|
||||||
// - Update `is_ignore_char` to make sure `sep1` doesn't eat it all up
|
take_many0(alt((
|
||||||
// - Make sure you always consume it
|
// CAUTION: If adding an ignorable literal, if it doesn't start with `is_xid_continue`,
|
||||||
terminated(uuid_literal, peek(sep1)),
|
// - Update `is_ignore_char` to make sure `sep1` doesn't eat it all up
|
||||||
terminated(hash_literal, peek(sep1)),
|
// - Make sure you always consume it
|
||||||
terminated(base64_literal, peek(sep1)), // base64 should be quoted or something
|
terminated(uuid_literal, peek(sep1)),
|
||||||
terminated(ordinal_literal, peek(sep1)),
|
terminated(hash_literal, peek(sep1)),
|
||||||
terminated(hex_literal, peek(sep1)),
|
terminated(base64_literal, peek(sep1)), // base64 should be quoted or something
|
||||||
terminated(dec_literal, peek(sep1)), // Allow digit-prefixed words
|
terminated(ordinal_literal, peek(sep1)),
|
||||||
terminated(email_literal, peek(sep1)),
|
terminated(hex_literal, peek(sep1)),
|
||||||
terminated(url_literal, peek(sep1)),
|
terminated(dec_literal, peek(sep1)), // Allow digit-prefixed words
|
||||||
terminated(css_color, peek(sep1)),
|
terminated(email_literal, peek(sep1)),
|
||||||
c_escape,
|
terminated(url_literal, peek(sep1)),
|
||||||
printf,
|
terminated(css_color, peek(sep1)),
|
||||||
other,
|
c_escape,
|
||||||
)))
|
printf,
|
||||||
|
other,
|
||||||
|
))),
|
||||||
|
)
|
||||||
.parse_next(input)
|
.parse_next(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -201,12 +205,15 @@ mod parser {
|
||||||
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
||||||
<T as Stream>::Token: AsChar + Copy,
|
<T as Stream>::Token: AsChar + Copy,
|
||||||
{
|
{
|
||||||
(
|
trace(
|
||||||
one_of(|c| !is_xid_continue(c)),
|
"other",
|
||||||
take_while(0.., is_ignore_char),
|
(
|
||||||
|
one_of(|c| !is_xid_continue(c)),
|
||||||
|
take_while(0.., is_ignore_char),
|
||||||
|
)
|
||||||
|
.recognize(),
|
||||||
)
|
)
|
||||||
.recognize()
|
.parse_next(input)
|
||||||
.parse_next(input)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn ordinal_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice>
|
fn ordinal_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice>
|
||||||
|
@ -221,14 +228,17 @@ mod parser {
|
||||||
['_'].contains(&c)
|
['_'].contains(&c)
|
||||||
}
|
}
|
||||||
|
|
||||||
(
|
trace(
|
||||||
take_while(0.., is_sep),
|
"ordinal_literal",
|
||||||
take_while(1.., is_dec_digit),
|
(
|
||||||
alt((('s', 't'), ('n', 'd'), ('r', 'd'), ('t', 'h'))),
|
take_while(0.., is_sep),
|
||||||
take_while(0.., is_sep),
|
take_while(1.., is_dec_digit),
|
||||||
|
alt((('s', 't'), ('n', 'd'), ('r', 'd'), ('t', 'h'))),
|
||||||
|
take_while(0.., is_sep),
|
||||||
|
)
|
||||||
|
.recognize(),
|
||||||
)
|
)
|
||||||
.recognize()
|
.parse_next(input)
|
||||||
.parse_next(input)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn dec_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice>
|
fn dec_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice>
|
||||||
|
@ -237,7 +247,7 @@ mod parser {
|
||||||
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
||||||
<T as Stream>::Token: AsChar + Copy,
|
<T as Stream>::Token: AsChar + Copy,
|
||||||
{
|
{
|
||||||
take_while(1.., is_dec_digit_with_sep).parse_next(input)
|
trace("dec_literal", take_while(1.., is_dec_digit_with_sep)).parse_next(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn hex_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice>
|
fn hex_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice>
|
||||||
|
@ -259,12 +269,15 @@ mod parser {
|
||||||
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
||||||
<T as Stream>::Token: AsChar + Copy,
|
<T as Stream>::Token: AsChar + Copy,
|
||||||
{
|
{
|
||||||
preceded(
|
trace(
|
||||||
'#',
|
"color",
|
||||||
alt((
|
preceded(
|
||||||
terminated(take_while(3..=8, is_lower_hex_digit), peek(sep1)),
|
'#',
|
||||||
terminated(take_while(3..=8, is_upper_hex_digit), peek(sep1)),
|
alt((
|
||||||
)),
|
terminated(take_while(3..=8, is_lower_hex_digit), peek(sep1)),
|
||||||
|
terminated(take_while(3..=8, is_upper_hex_digit), peek(sep1)),
|
||||||
|
)),
|
||||||
|
),
|
||||||
)
|
)
|
||||||
.parse_next(input)
|
.parse_next(input)
|
||||||
}
|
}
|
||||||
|
@ -275,31 +288,34 @@ mod parser {
|
||||||
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
||||||
<T as Stream>::Token: AsChar + Copy,
|
<T as Stream>::Token: AsChar + Copy,
|
||||||
{
|
{
|
||||||
alt((
|
trace(
|
||||||
(
|
"uuid",
|
||||||
take_while(8, is_lower_hex_digit),
|
alt((
|
||||||
'-',
|
(
|
||||||
take_while(4, is_lower_hex_digit),
|
take_while(8, is_lower_hex_digit),
|
||||||
'-',
|
'-',
|
||||||
take_while(4, is_lower_hex_digit),
|
take_while(4, is_lower_hex_digit),
|
||||||
'-',
|
'-',
|
||||||
take_while(4, is_lower_hex_digit),
|
take_while(4, is_lower_hex_digit),
|
||||||
'-',
|
'-',
|
||||||
take_while(12, is_lower_hex_digit),
|
take_while(4, is_lower_hex_digit),
|
||||||
),
|
'-',
|
||||||
(
|
take_while(12, is_lower_hex_digit),
|
||||||
take_while(8, is_upper_hex_digit),
|
),
|
||||||
'-',
|
(
|
||||||
take_while(4, is_upper_hex_digit),
|
take_while(8, is_upper_hex_digit),
|
||||||
'-',
|
'-',
|
||||||
take_while(4, is_upper_hex_digit),
|
take_while(4, is_upper_hex_digit),
|
||||||
'-',
|
'-',
|
||||||
take_while(4, is_upper_hex_digit),
|
take_while(4, is_upper_hex_digit),
|
||||||
'-',
|
'-',
|
||||||
take_while(12, is_upper_hex_digit),
|
take_while(4, is_upper_hex_digit),
|
||||||
),
|
'-',
|
||||||
))
|
take_while(12, is_upper_hex_digit),
|
||||||
.recognize()
|
),
|
||||||
|
))
|
||||||
|
.recognize(),
|
||||||
|
)
|
||||||
.parse_next(input)
|
.parse_next(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -319,10 +335,13 @@ mod parser {
|
||||||
// or more.
|
// or more.
|
||||||
|
|
||||||
const IGNORE_HEX_MIN: usize = 32;
|
const IGNORE_HEX_MIN: usize = 32;
|
||||||
alt((
|
trace(
|
||||||
take_while(IGNORE_HEX_MIN.., is_lower_hex_digit),
|
"hash",
|
||||||
take_while(IGNORE_HEX_MIN.., is_upper_hex_digit),
|
alt((
|
||||||
))
|
take_while(IGNORE_HEX_MIN.., is_lower_hex_digit),
|
||||||
|
take_while(IGNORE_HEX_MIN.., is_upper_hex_digit),
|
||||||
|
)),
|
||||||
|
)
|
||||||
.parse_next(input)
|
.parse_next(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -332,32 +351,35 @@ mod parser {
|
||||||
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
||||||
<T as Stream>::Token: AsChar + Copy,
|
<T as Stream>::Token: AsChar + Copy,
|
||||||
{
|
{
|
||||||
let (padding, captured) = take_while(1.., is_base64_digit).parse_next(input.clone())?;
|
trace("base64", move |input: T| {
|
||||||
|
let (padding, captured) = take_while(1.., is_base64_digit).parse_next(input.clone())?;
|
||||||
|
|
||||||
const CHUNK: usize = 4;
|
const CHUNK: usize = 4;
|
||||||
let padding_offset = input.offset_to(&padding);
|
let padding_offset = input.offset_to(&padding);
|
||||||
let mut padding_len = CHUNK - padding_offset % CHUNK;
|
let mut padding_len = CHUNK - padding_offset % CHUNK;
|
||||||
if padding_len == CHUNK {
|
if padding_len == CHUNK {
|
||||||
padding_len = 0;
|
padding_len = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if captured.slice_len() < 90
|
if captured.slice_len() < 90
|
||||||
&& padding_len == 0
|
&& padding_len == 0
|
||||||
&& captured
|
&& captured
|
||||||
.as_bstr()
|
.as_bstr()
|
||||||
.iter()
|
.iter()
|
||||||
.all(|c| !['/', '+'].contains(&c.as_char()))
|
.all(|c| !['/', '+'].contains(&c.as_char()))
|
||||||
{
|
{
|
||||||
return Err(winnow::error::ErrMode::Backtrack(
|
return Err(winnow::error::ErrMode::Backtrack(
|
||||||
winnow::error::Error::new(input, winnow::error::ErrorKind::Slice),
|
winnow::error::Error::new(input, winnow::error::ErrorKind::Slice),
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
let (after, _) =
|
let (after, _) =
|
||||||
take_while(padding_len..=padding_len, is_base64_padding).parse_next(padding)?;
|
take_while(padding_len..=padding_len, is_base64_padding).parse_next(padding)?;
|
||||||
|
|
||||||
let after_offset = input.offset_to(&after);
|
let after_offset = input.offset_to(&after);
|
||||||
Ok(input.next_slice(after_offset))
|
Ok(input.next_slice(after_offset))
|
||||||
|
})
|
||||||
|
.parse_next(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn email_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice>
|
fn email_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice>
|
||||||
|
@ -366,13 +388,16 @@ mod parser {
|
||||||
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
||||||
<T as Stream>::Token: AsChar + Copy,
|
<T as Stream>::Token: AsChar + Copy,
|
||||||
{
|
{
|
||||||
(
|
trace(
|
||||||
take_while(1.., is_localport_char),
|
"email",
|
||||||
'@',
|
(
|
||||||
take_while(1.., is_domain_char),
|
take_while(1.., is_localport_char),
|
||||||
|
'@',
|
||||||
|
take_while(1.., is_domain_char),
|
||||||
|
)
|
||||||
|
.recognize(),
|
||||||
)
|
)
|
||||||
.recognize()
|
.parse_next(input)
|
||||||
.parse_next(input)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn url_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice>
|
fn url_literal<T>(input: T) -> IResult<T, <T as Stream>::Slice>
|
||||||
|
@ -381,24 +406,27 @@ mod parser {
|
||||||
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
||||||
<T as Stream>::Token: AsChar + Copy,
|
<T as Stream>::Token: AsChar + Copy,
|
||||||
{
|
{
|
||||||
(
|
trace(
|
||||||
opt(terminated(
|
"url",
|
||||||
take_while(1.., is_scheme_char),
|
|
||||||
// HACK: Technically you can skip `//` if you don't have a domain but that would
|
|
||||||
// get messy to support.
|
|
||||||
(':', '/', '/'),
|
|
||||||
)),
|
|
||||||
(
|
(
|
||||||
opt(terminated(url_userinfo, '@')),
|
opt(terminated(
|
||||||
take_while(1.., is_domain_char),
|
take_while(1.., is_scheme_char),
|
||||||
opt(preceded(':', take_while(1.., AsChar::is_dec_digit))),
|
// HACK: Technically you can skip `//` if you don't have a domain but that would
|
||||||
),
|
// get messy to support.
|
||||||
'/',
|
(':', '/', '/'),
|
||||||
// HACK: Too lazy to enumerate
|
)),
|
||||||
take_while(0.., is_path_query_fragment),
|
(
|
||||||
|
opt(terminated(url_userinfo, '@')),
|
||||||
|
take_while(1.., is_domain_char),
|
||||||
|
opt(preceded(':', take_while(1.., AsChar::is_dec_digit))),
|
||||||
|
),
|
||||||
|
'/',
|
||||||
|
// HACK: Too lazy to enumerate
|
||||||
|
take_while(0.., is_path_query_fragment),
|
||||||
|
)
|
||||||
|
.recognize(),
|
||||||
)
|
)
|
||||||
.recognize()
|
.parse_next(input)
|
||||||
.parse_next(input)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn url_userinfo<T>(input: T) -> IResult<T, <T as Stream>::Slice>
|
fn url_userinfo<T>(input: T) -> IResult<T, <T as Stream>::Slice>
|
||||||
|
@ -407,12 +435,15 @@ mod parser {
|
||||||
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
||||||
<T as Stream>::Token: AsChar + Copy,
|
<T as Stream>::Token: AsChar + Copy,
|
||||||
{
|
{
|
||||||
(
|
trace(
|
||||||
take_while(1.., is_localport_char),
|
"userinfo",
|
||||||
opt(preceded(':', take_while(0.., is_localport_char))),
|
(
|
||||||
|
take_while(1.., is_localport_char),
|
||||||
|
opt(preceded(':', take_while(0.., is_localport_char))),
|
||||||
|
)
|
||||||
|
.recognize(),
|
||||||
)
|
)
|
||||||
.recognize()
|
.parse_next(input)
|
||||||
.parse_next(input)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn c_escape<T>(input: T) -> IResult<T, <T as Stream>::Slice>
|
fn c_escape<T>(input: T) -> IResult<T, <T as Stream>::Slice>
|
||||||
|
@ -425,7 +456,11 @@ mod parser {
|
||||||
// regular string that does escaping. The escaped letter might be part of a word, or it
|
// regular string that does escaping. The escaped letter might be part of a word, or it
|
||||||
// might not be. Rather than guess and be wrong part of the time and correct people's words
|
// might not be. Rather than guess and be wrong part of the time and correct people's words
|
||||||
// incorrectly, we opt for just not evaluating it at all.
|
// incorrectly, we opt for just not evaluating it at all.
|
||||||
preceded(take_while(1.., is_escape), take_while(0.., is_xid_continue)).parse_next(input)
|
trace(
|
||||||
|
"escape",
|
||||||
|
preceded(take_while(1.., is_escape), take_while(0.., is_xid_continue)),
|
||||||
|
)
|
||||||
|
.parse_next(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn printf<T>(input: T) -> IResult<T, <T as Stream>::Slice>
|
fn printf<T>(input: T) -> IResult<T, <T as Stream>::Slice>
|
||||||
|
@ -434,7 +469,7 @@ mod parser {
|
||||||
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
<T as Stream>::Slice: AsBStr + SliceLen + Default,
|
||||||
<T as Stream>::Token: AsChar + Copy,
|
<T as Stream>::Token: AsChar + Copy,
|
||||||
{
|
{
|
||||||
preceded('%', take_while(1.., is_xid_continue)).parse_next(input)
|
trace("printf", preceded('%', take_while(1.., is_xid_continue))).parse_next(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn take_many0<I, E, F>(mut f: F) -> impl FnMut(I) -> IResult<I, <I as Stream>::Slice, E>
|
fn take_many0<I, E, F>(mut f: F) -> impl FnMut(I) -> IResult<I, <I as Stream>::Slice, E>
|
||||||
|
|
Loading…
Reference in a new issue