mirror of
https://github.com/crate-ci/typos.git
synced 2024-11-22 09:01:04 -05:00
Merge pull request #319 from epage/escape
fix(parser): Handle c-escape/printf
This commit is contained in:
commit
c329225e38
2 changed files with 80 additions and 1 deletions
|
@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
|
||||||
<!-- next-header -->
|
<!-- next-header -->
|
||||||
## [Unreleased] - ReleaseDate
|
## [Unreleased] - ReleaseDate
|
||||||
|
|
||||||
|
#### Bug Fixes
|
||||||
|
|
||||||
|
- Reduce false-positives by ignoring words following possible c-escape sequences or printf patterns.
|
||||||
|
|
||||||
## [1.1.2] - 2021-07-30
|
## [1.1.2] - 2021-07-30
|
||||||
|
|
||||||
#### Bug Fixes
|
#### Bug Fixes
|
||||||
|
|
|
@ -175,6 +175,8 @@ mod parser {
|
||||||
<T as nom::InputIter>::Item: AsChar + Copy,
|
<T as nom::InputIter>::Item: AsChar + Copy,
|
||||||
{
|
{
|
||||||
take_many0(alt((
|
take_many0(alt((
|
||||||
|
// CAUTION: If adding an ignorable literal, if it doesn't start with `is_xid_continue`,
|
||||||
|
// then you need to update `is_ignore_char` to make sure `sep1` doesn't eat it all up.
|
||||||
terminated(uuid_literal, sep1),
|
terminated(uuid_literal, sep1),
|
||||||
terminated(hash_literal, sep1),
|
terminated(hash_literal, sep1),
|
||||||
terminated(hex_literal, sep1),
|
terminated(hex_literal, sep1),
|
||||||
|
@ -182,6 +184,8 @@ mod parser {
|
||||||
terminated(base64_literal, sep1),
|
terminated(base64_literal, sep1),
|
||||||
terminated(email_literal, sep1),
|
terminated(email_literal, sep1),
|
||||||
terminated(url_literal, sep1),
|
terminated(url_literal, sep1),
|
||||||
|
terminated(c_escape, sep1),
|
||||||
|
terminated(printf, sep1),
|
||||||
sep1,
|
sep1,
|
||||||
)))(input)
|
)))(input)
|
||||||
}
|
}
|
||||||
|
@ -191,7 +195,7 @@ mod parser {
|
||||||
T: nom::InputTakeAtPosition,
|
T: nom::InputTakeAtPosition,
|
||||||
<T as nom::InputTakeAtPosition>::Item: AsChar + Copy,
|
<T as nom::InputTakeAtPosition>::Item: AsChar + Copy,
|
||||||
{
|
{
|
||||||
take_till1(is_xid_continue)(input)
|
take_while1(is_ignore_char)(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn dec_literal<T>(input: T) -> IResult<T, T>
|
fn dec_literal<T>(input: T) -> IResult<T, T>
|
||||||
|
@ -355,6 +359,40 @@ mod parser {
|
||||||
)))(input)
|
)))(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn c_escape<T>(input: T) -> IResult<T, T>
|
||||||
|
where
|
||||||
|
T: nom::InputTakeAtPosition
|
||||||
|
+ nom::InputTake
|
||||||
|
+ nom::InputIter
|
||||||
|
+ nom::InputLength
|
||||||
|
+ nom::Offset
|
||||||
|
+ nom::Slice<std::ops::RangeTo<usize>>
|
||||||
|
+ nom::Slice<std::ops::RangeFrom<usize>>
|
||||||
|
+ std::fmt::Debug
|
||||||
|
+ Clone,
|
||||||
|
<T as nom::InputTakeAtPosition>::Item: AsChar + Copy,
|
||||||
|
<T as nom::InputIter>::Item: AsChar + Copy,
|
||||||
|
{
|
||||||
|
preceded(char('\\'), take_while1(is_xid_continue))(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn printf<T>(input: T) -> IResult<T, T>
|
||||||
|
where
|
||||||
|
T: nom::InputTakeAtPosition
|
||||||
|
+ nom::InputTake
|
||||||
|
+ nom::InputIter
|
||||||
|
+ nom::InputLength
|
||||||
|
+ nom::Offset
|
||||||
|
+ nom::Slice<std::ops::RangeTo<usize>>
|
||||||
|
+ nom::Slice<std::ops::RangeFrom<usize>>
|
||||||
|
+ std::fmt::Debug
|
||||||
|
+ Clone,
|
||||||
|
<T as nom::InputTakeAtPosition>::Item: AsChar + Copy,
|
||||||
|
<T as nom::InputIter>::Item: AsChar + Copy,
|
||||||
|
{
|
||||||
|
preceded(char('%'), take_while1(is_xid_continue))(input)
|
||||||
|
}
|
||||||
|
|
||||||
fn take_many0<I, E, F>(mut f: F) -> impl FnMut(I) -> IResult<I, I, E>
|
fn take_many0<I, E, F>(mut f: F) -> impl FnMut(I) -> IResult<I, I, E>
|
||||||
where
|
where
|
||||||
I: nom::Offset + nom::InputTake + Clone + PartialEq + std::fmt::Debug,
|
I: nom::Offset + nom::InputTake + Clone + PartialEq + std::fmt::Debug,
|
||||||
|
@ -444,6 +482,13 @@ mod parser {
|
||||||
('a'..='z').contains(&c) || ('0'..='9').contains(&c) || "+.-".find(c).is_some()
|
('a'..='z').contains(&c) || ('0'..='9').contains(&c) || "+.-".find(c).is_some()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn is_ignore_char(i: impl AsChar + Copy) -> bool {
|
||||||
|
let c = i.as_char();
|
||||||
|
// See c_escape and printf
|
||||||
|
!unicode_xid::UnicodeXID::is_xid_continue(c) && c != '\\' && c != '%'
|
||||||
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn is_xid_continue(i: impl AsChar + Copy) -> bool {
|
fn is_xid_continue(i: impl AsChar + Copy) -> bool {
|
||||||
let c = i.as_char();
|
let c = i.as_char();
|
||||||
|
@ -940,6 +985,36 @@ mod test {
|
||||||
assert_eq!(expected, actual);
|
assert_eq!(expected, actual);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn tokenize_c_escape() {
|
||||||
|
let parser = TokenizerBuilder::new().build();
|
||||||
|
|
||||||
|
let input = "Hello \\Hello World";
|
||||||
|
let expected: Vec<Identifier> = vec![
|
||||||
|
Identifier::new_unchecked("Hello", Case::None, 0),
|
||||||
|
Identifier::new_unchecked("World", Case::None, 13),
|
||||||
|
];
|
||||||
|
let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect();
|
||||||
|
assert_eq!(expected, actual);
|
||||||
|
let actual: Vec<_> = parser.parse_str(input).collect();
|
||||||
|
assert_eq!(expected, actual);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn tokenize_printf() {
|
||||||
|
let parser = TokenizerBuilder::new().build();
|
||||||
|
|
||||||
|
let input = "Hello %Hello World";
|
||||||
|
let expected: Vec<Identifier> = vec![
|
||||||
|
Identifier::new_unchecked("Hello", Case::None, 0),
|
||||||
|
Identifier::new_unchecked("World", Case::None, 13),
|
||||||
|
];
|
||||||
|
let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect();
|
||||||
|
assert_eq!(expected, actual);
|
||||||
|
let actual: Vec<_> = parser.parse_str(input).collect();
|
||||||
|
assert_eq!(expected, actual);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn split_ident() {
|
fn split_ident() {
|
||||||
let cases = [
|
let cases = [
|
||||||
|
|
Loading…
Reference in a new issue