mirror of
https://github.com/crate-ci/typos.git
synced 2024-12-22 23:52:12 -05:00
fix(token): Don't crash on parsing unicode
This commit is contained in:
parent
4c248c85ec
commit
8879269b0d
1 changed files with 19 additions and 1 deletions
|
@ -159,7 +159,12 @@ mod parser {
|
|||
// `{XID_Continue}+` because XID_Continue is a superset of XID_Start and rather catch odd
|
||||
// or unexpected cases than strip off start characters to a word since we aren't doing a
|
||||
// proper word boundary parse
|
||||
trace("identifier", take_while(1.., is_xid_continue)).parse_next(input)
|
||||
trace(
|
||||
"identifier",
|
||||
take_while(1.., is_xid_continue)
|
||||
.verify(|s: &<T as Stream>::Slice| std::str::from_utf8(s.as_bstr()).is_ok()),
|
||||
)
|
||||
.parse_next(input)
|
||||
}
|
||||
|
||||
fn ignore<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
|
||||
|
@ -1310,6 +1315,18 @@ mod test {
|
|||
assert_eq!(expected, actual);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tokenize_unicode_without_unicode() {
|
||||
let parser = TokenizerBuilder::new().unicode(false).build();
|
||||
|
||||
let input = "appliqués";
|
||||
let expected: Vec<Identifier> = vec![];
|
||||
let actual: Vec<_> = parser.parse_bytes(input.as_bytes()).collect();
|
||||
assert_eq!(expected, actual);
|
||||
let actual: Vec<_> = parser.parse_str(input).collect();
|
||||
assert_eq!(expected, actual);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn split_ident() {
|
||||
let cases = [
|
||||
|
@ -1365,6 +1382,7 @@ mod test {
|
|||
"BFG9000",
|
||||
&[("BFG", Case::Upper, 0), ("9000", Case::None, 3)],
|
||||
),
|
||||
("appliqués", &[("appliqués", Case::Lower, 0)]),
|
||||
];
|
||||
for (input, expected) in cases.iter() {
|
||||
let ident = Identifier::new_unchecked(input, Case::None, 0);
|
||||
|
|
Loading…
Reference in a new issue