diff --git a/benches/tokenize.rs b/benches/tokenize.rs
index 1426ac8..9c7399d 100644
--- a/benches/tokenize.rs
+++ b/benches/tokenize.rs
@@ -1,40 +1,93 @@
 mod data;
 
-use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
+use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
 
-fn bench_tokenize(c: &mut Criterion) {
-    let mut group = c.benchmark_group("tokenize");
+fn bench_parse_str(c: &mut Criterion) {
+    let mut group = c.benchmark_group("parse_str");
     for (name, sample) in data::DATA {
         let len = sample.len();
-        group.bench_with_input(BenchmarkId::new("ident(bytes)", name), &len, |b, _| {
-            let parser = typos::tokens::Tokenizer::new();
-            b.iter(|| parser.parse_bytes(sample.as_bytes()).last());
-        });
-        group.bench_with_input(BenchmarkId::new("ident(str)", name), &len, |b, _| {
-            let parser = typos::tokens::Tokenizer::new();
+        group.throughput(Throughput::Bytes(len as u64));
+        group.bench_with_input(BenchmarkId::new("unicode", name), &len, |b, _| {
+            let parser = typos::tokens::TokenizerBuilder::new().unicode(true).build();
             b.iter(|| parser.parse_str(sample).last());
         });
+        group.bench_with_input(BenchmarkId::new("ascii", name), &len, |b, _| {
+            let parser = typos::tokens::TokenizerBuilder::new()
+                .unicode(false)
+                .build();
+            b.iter(|| parser.parse_str(sample).last());
+        });
+    }
+    group.finish();
+}
+
+fn bench_parse_bytes(c: &mut Criterion) {
+    let mut group = c.benchmark_group("parse_bytes");
+    for (name, sample) in data::DATA {
+        let len = sample.len();
+        group.throughput(Throughput::Bytes(len as u64));
+        group.bench_with_input(BenchmarkId::new("unicode", name), &len, |b, _| {
+            let parser = typos::tokens::TokenizerBuilder::new().unicode(true).build();
+            b.iter(|| parser.parse_bytes(sample.as_bytes()).last());
+        });
+        group.bench_with_input(BenchmarkId::new("ascii", name), &len, |b, _| {
+            let parser = typos::tokens::TokenizerBuilder::new()
+                .unicode(false)
+                .build();
+            b.iter(|| parser.parse_bytes(sample.as_bytes()).last());
+        });
+    }
+    group.finish();
+}
+
+fn bench_split(c: &mut Criterion) {
+    let mut group = c.benchmark_group("split");
+    for (name, sample) in data::DATA {
+        let len = sample.len();
+        group.throughput(Throughput::Bytes(len as u64));
         group.bench_with_input(BenchmarkId::new("words", name), &len, |b, _| {
             let symbol =
                 typos::tokens::Identifier::new_unchecked(sample, typos::tokens::Case::None, 0);
             b.iter(|| symbol.split().last());
         });
-        group.bench_with_input(
-            BenchmarkId::new("ident(bytes)+words", name),
-            &len,
-            |b, _| {
-                let parser = typos::tokens::Tokenizer::new();
-                b.iter(|| {
-                    parser
-                        .parse_bytes(sample.as_bytes())
-                        .flat_map(|i| i.split())
-                        .last()
-                });
-            },
-        );
     }
     group.finish();
 }
 
-criterion_group!(benches, bench_tokenize);
+fn bench_parse_split(c: &mut Criterion) {
+    let mut group = c.benchmark_group("parse_bytes+split");
+    for (name, sample) in data::DATA {
+        let len = sample.len();
+        group.throughput(Throughput::Bytes(len as u64));
+        group.bench_with_input(BenchmarkId::new("unicode", name), &len, |b, _| {
+            let parser = typos::tokens::TokenizerBuilder::new().unicode(true).build();
+            b.iter(|| {
+                parser
+                    .parse_bytes(sample.as_bytes())
+                    .flat_map(|i| i.split())
+                    .last()
+            });
+        });
+        group.bench_with_input(BenchmarkId::new("ascii", name), &len, |b, _| {
+            let parser = typos::tokens::TokenizerBuilder::new()
+                .unicode(false)
+                .build();
+            b.iter(|| {
+                parser
+                    .parse_bytes(sample.as_bytes())
+                    .flat_map(|i| i.split())
+                    .last()
+            });
+        });
+    }
+    group.finish();
+}
+
+criterion_group!(
+    benches,
+    bench_parse_str,
+    bench_parse_bytes,
+    bench_split,
+    bench_parse_split
+);
 criterion_main!(benches);
diff --git a/crates/typos/src/tokens.rs b/crates/typos/src/tokens.rs
index dfb1939..6bcabd1 100644
--- a/crates/typos/src/tokens.rs
+++ b/crates/typos/src/tokens.rs
@@ -1,6 +1,7 @@
 /// Define rules for tokenizaing a buffer.
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub struct TokenizerBuilder {
+    unicode: bool,
     ignore_hex: bool,
     leading_digits: bool,
 }
@@ -10,6 +11,12 @@ impl TokenizerBuilder {
         Default::default()
     }
 
+    /// Specify that unicode Identifiers are allowed.
+    pub fn unicode(&mut self, yes: bool) -> &mut Self {
+        self.unicode = yes;
+        self
+    }
+
     /// Specify that hexadecimal numbers should be ignored.
     pub fn ignore_hex(&mut self, yes: bool) -> &mut Self {
         self.ignore_hex = yes;
@@ -24,10 +31,12 @@ impl TokenizerBuilder {
 
     pub fn build(&self) -> Tokenizer {
         let TokenizerBuilder {
+            unicode,
             leading_digits,
             ignore_hex,
         } = self.clone();
         Tokenizer {
+            unicode,
             leading_digits,
             ignore_hex,
         }
@@ -37,6 +46,7 @@ impl TokenizerBuilder {
 impl Default for TokenizerBuilder {
     fn default() -> Self {
         Self {
+            unicode: true,
             leading_digits: false,
             ignore_hex: true,
         }
@@ -46,6 +56,7 @@ impl Default for TokenizerBuilder {
 /// Extract Identifiers from a buffer.
 #[derive(Debug, Clone)]
 pub struct Tokenizer {
+    unicode: bool,
     leading_digits: bool,
     ignore_hex: bool,
 }
@@ -56,18 +67,27 @@ impl Tokenizer {
     }
 
     pub fn parse_str<'c>(&'c self, content: &'c str) -> impl Iterator<Item = Identifier<'c>> {
-        parser::iter_literals(content).filter_map(move |identifier| {
+        let iter = if self.unicode {
+            itertools::Either::Left(unicode_parser::iter_literals(content))
+        } else {
+            itertools::Either::Right(ascii_parser::iter_literals(content.as_bytes()))
+        };
+        iter.filter_map(move |identifier| {
             let offset = offset(content.as_bytes(), identifier.as_bytes());
             self.transform(identifier, offset)
         })
     }
 
     pub fn parse_bytes<'c>(&'c self, content: &'c [u8]) -> impl Iterator<Item = Identifier<'c>> {
-        Utf8Chunks::new(content).flat_map(move |c| {
-            let chunk_offset = offset(content, c.as_bytes());
-            self.parse_str(c).map(move |i| {
-                Identifier::new_unchecked(i.token(), i.case(), i.offset() + chunk_offset)
-            })
+        let iter = if self.unicode {
+            let iter = Utf8Chunks::new(content).flat_map(move |c| unicode_parser::iter_literals(c));
+            itertools::Either::Left(iter)
+        } else {
+            itertools::Either::Right(ascii_parser::iter_literals(content))
+        };
+        iter.filter_map(move |identifier| {
+            let offset = offset(content, identifier.as_bytes());
+            self.transform(identifier, offset)
         })
     }
 
@@ -176,7 +196,7 @@ fn is_hex_digit(chr: u8) -> bool {
     chr.is_ascii_hexdigit()
 }
 
-mod parser {
+mod unicode_parser {
     use nom::bytes::complete::*;
     use nom::sequence::*;
     use nom::IResult;
@@ -209,6 +229,49 @@ mod parser {
     }
 }
 
+mod ascii_parser {
+    use nom::bytes::complete::*;
+    use nom::sequence::*;
+    use nom::IResult;
+
+    pub(crate) fn iter_literals(mut input: &[u8]) -> impl Iterator<Item = &str> {
+        std::iter::from_fn(move || match next_literal(input) {
+            Ok((i, o)) => {
+                input = i;
+                debug_assert_ne!(o, b"");
+                // This is safe because we've checked that the strings are a subset of ASCII
+                // characters.
+                let o = unsafe { std::str::from_utf8_unchecked(o) };
+                Some(o)
+            }
+            _ => None,
+        })
+    }
+
+    fn next_literal(input: &[u8]) -> IResult<&[u8], &[u8]> {
+        preceded(literal_sep, identifier)(input)
+    }
+
+    fn literal_sep(input: &[u8]) -> IResult<&[u8], &[u8]> {
+        take_till(|c: u8| is_continue(c))(input)
+    }
+
+    fn identifier(input: &[u8]) -> IResult<&[u8], &[u8]> {
+        // Generally a language would be `{XID_Start}{XID_Continue}*` but going with only
+        // `{XID_Continue}+` because XID_Continue is a superset of XID_Start and rather catch odd
+        // or unexpected cases than strip off start characters to a word since we aren't doing a
+        // proper word boundary parse
+        take_while1(|c: u8| is_continue(c))(input)
+    }
+
+    fn is_continue(c: u8) -> bool {
+        (b'a'..=b'z').contains(&c)
+            || (b'A'..=b'Z').contains(&c)
+            || (b'0'..=b'9').contains(&c)
+            || c == b'_'
+    }
+}
+
 /// A term composed of Words.
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub struct Identifier<'t> {
diff --git a/docs/reference.md b/docs/reference.md
index ccfa36a..cfede19 100644
--- a/docs/reference.md
+++ b/docs/reference.md
@@ -23,6 +23,7 @@ Configuration is read from the following (in precedence order)
 | default.binary         | --binary          | bool   | Check binary files as text |
 | default.check-filename | \-                | bool   | Verifying spelling in file names. |
 | default.check-file     | \-                | bool   | Verifying spelling in files. |
+| default.unicode        | \-                | bool   | Allow unicode characters in identifiers (and not just ASCII) |
 | default.ignore-hex     | \-                | bool   | Do not check identifiers that appear to be hexadecimal values. |
 | default.identifier-leading-digits   | \-   | bool   | Allow identifiers to start with digits, in addition to letters. |
 | default.locale         | --locale          | en, en-us, en-gb, en-ca, en-au   | English dialect to correct to. |
diff --git a/src/config.rs b/src/config.rs
index b8ca31a..4e3ef1e 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -230,6 +230,8 @@ impl EngineConfig {
 #[serde(deny_unknown_fields, default)]
 #[serde(rename_all = "kebab-case")]
 pub struct TokenizerConfig {
+    /// Allow unicode characters in identifiers (and not just ASCII)
+    pub unicode: Option<bool>,
     /// Do not check identifiers that appear to be hexadecimal values.
     pub ignore_hex: Option<bool>,
     /// Allow identifiers to start with digits, in addition to letters.
@@ -240,12 +242,16 @@ impl TokenizerConfig {
     pub fn from_defaults() -> Self {
         let empty = Self::default();
         Self {
+            unicode: Some(empty.unicode()),
             ignore_hex: Some(empty.ignore_hex()),
             identifier_leading_digits: Some(empty.identifier_leading_digits()),
         }
     }
 
     pub fn update(&mut self, source: &TokenizerConfig) {
+        if let Some(source) = source.unicode {
+            self.unicode = Some(source);
+        }
         if let Some(source) = source.ignore_hex {
             self.ignore_hex = Some(source);
         }
@@ -254,6 +260,10 @@ impl TokenizerConfig {
         }
     }
 
+    pub fn unicode(&self) -> bool {
+        self.unicode.unwrap_or(true)
+    }
+
     pub fn ignore_hex(&self) -> bool {
         self.ignore_hex.unwrap_or(true)
     }
diff --git a/src/policy.rs b/src/policy.rs
index 725a8eb..020eeb0 100644
--- a/src/policy.rs
+++ b/src/policy.rs
@@ -217,6 +217,7 @@ impl<'s> ConfigEngine<'s> {
         let dict_config = dict.unwrap_or_else(crate::config::DictConfig::from_defaults);
 
         let tokenizer = typos::tokens::TokenizerBuilder::new()
+            .unicode(tokenizer_config.unicode())
             .ignore_hex(tokenizer_config.ignore_hex())
             .leading_digits(tokenizer_config.identifier_leading_digits())
             .build();