diff --git a/Cargo.lock b/Cargo.lock index a3d23cd..a69c2a4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1574,6 +1574,7 @@ dependencies = [ "typos", "typos-dict", "typos-vars", + "unic-emoji-char", "unicase", "unicode-segmentation", "unicode-width", @@ -1642,6 +1643,47 @@ dependencies = [ "varcon-core", ] +[[package]] +name = "unic-char-property" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8c57a407d9b6fa02b4795eb81c5b6652060a15a7903ea981f3d723e6c0be221" +dependencies = [ + "unic-char-range", +] + +[[package]] +name = "unic-char-range" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0398022d5f700414f6b899e10b8348231abf9173fa93144cbc1a43b9793c1fbc" + +[[package]] +name = "unic-common" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "80d7ff825a6a654ee85a63e80f92f054f904f21e7d12da4e22f9834a4aaa35bc" + +[[package]] +name = "unic-emoji-char" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b07221e68897210270a38bde4babb655869637af0f69407f96053a34f76494d" +dependencies = [ + "unic-char-property", + "unic-char-range", + "unic-ucd-version", +] + +[[package]] +name = "unic-ucd-version" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96bd2f2237fe450fcd0a1d2f5f4e91711124f7857ba2e964247776ebeeb7b0c4" +dependencies = [ + "unic-common", +] + [[package]] name = "unicase" version = "2.6.0" diff --git a/Cargo.toml b/Cargo.toml index 090e72a..e2e48a6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -92,6 +92,7 @@ kstring = { version = "2.0.0", features = ["serde"] } typed-arena = "2.0.1" maplit = "1.0" unicode-width = "0.1.9" +unic-emoji-char = "0.9.0" [dev-dependencies] assert_fs = "1.0" diff --git a/src/bin/typos-cli/report.rs b/src/bin/typos-cli/report.rs index 87e7e75..342d66e 100644 --- a/src/bin/typos-cli/report.rs +++ b/src/bin/typos-cli/report.rs @@ -181,7 +181,8 @@ fn print_long_correction(msg: &Typo, palette: Palette) -> Result<(), std::io::Er let line = String::from_utf8_lossy(msg.buffer.as_ref()); let line = line.replace('\t', " "); let start = String::from_utf8_lossy(&msg.buffer[0..msg.byte_offset]); - let column = unicode_segmentation::UnicodeSegmentation::graphemes(start.as_ref(), true).count(); + let column_number = + unicode_segmentation::UnicodeSegmentation::graphemes(start.as_ref(), true).count() + 1; match &msg.corrections { typos::Status::Valid => {} typos::Status::Invalid => { @@ -213,15 +214,15 @@ fn print_long_correction(msg: &Typo, palette: Palette) -> Result<(), std::io::Er " --> {}{}{}", palette.info.paint(context_display(&msg.context)), palette.info.paint(divider), - palette.info.paint(column) + palette.info.paint(column_number) )?; if let Some(Context::File(context)) = &msg.context { let line_num = context.line_num.to_string(); let line_indent: String = itertools::repeat_n(" ", line_num.len()).collect(); - let visible_column = UnicodeWidthStr::width(start.as_ref()); - let visible_len = UnicodeWidthStr::width(msg.typo); + let visible_column = calculate_visible_column_width(start.as_ref()); + let visible_len = calculate_visible_column_width(msg.typo); let hl_indent: String = itertools::repeat_n(" ", visible_column).collect(); let hl: String = itertools::repeat_n("^", visible_len).collect(); @@ -246,6 +247,27 @@ fn print_long_correction(msg: &Typo, palette: Palette) -> Result<(), std::io::Er Ok(()) } +fn calculate_visible_column_width(str: &str) -> usize { + let mut result = 0; + let graphemes = unicode_segmentation::UnicodeSegmentation::graphemes(str, true); + for grapheme in graphemes { + result += if grapheme == "\t" { + // TODO: config tab width + 1 + } else if grapheme.chars().any(unic_emoji_char::is_emoji) { + // UnicodeWidthStr::width doesn't cover for emoji according to their README. + // See: https://github.com/unicode-rs/unicode-width#unicode-width + // Also, the actual rendered column width may differ from calculation, especially for emojis. + // In here, we expect emoji renderers should render this emoji properly. + 2 + } else { + UnicodeWidthStr::width(grapheme) + } + } + + result +} + fn context_display<'c>(context: &'c Option>) -> &'c dyn std::fmt::Display { context .as_ref() diff --git a/tests/cmd/stdin-failure-multiwidth.stdin b/tests/cmd/stdin-failure-multiwidth.stdin index 1af6419..989b316 100644 --- a/tests/cmd/stdin-failure-multiwidth.stdin +++ b/tests/cmd/stdin-failure-multiwidth.stdin @@ -24,11 +24,11 @@ Face with spiral eyes (U+1F635 U+200D U+1F4AB, Recommended Emoji ZWJ Sequence, v Grapheme clusters: 1, codepoints: 3, UnicodeWidthStr::width() == 4 (Read NOTE: https://github.com/unicode-rs/unicode-width) 😵‍💫 Apropriate world - ^^^^^^^^^^ highlight here + ^^^^^^^^^^ highlight here --- Horizontal tab (\t, U+09) Grapheme clusters: 1, codepoints: 1, UnicodeWidthStr::width() == 0 Apropriate world -^^^^^^^^^^ highlight here \ No newline at end of file + ^^^^^^^^^^ highlight here \ No newline at end of file diff --git a/tests/cmd/stdin-failure-multiwidth.stdout b/tests/cmd/stdin-failure-multiwidth.stdout index e6cf44a..64176a2 100644 --- a/tests/cmd/stdin-failure-multiwidth.stdout +++ b/tests/cmd/stdin-failure-multiwidth.stdout @@ -1,30 +1,30 @@ error: `Apropriate` should be `Appropriate` - --> -:5:2 + --> -:5:3 | 5 | 한 Apropriate world | ^^^^^^^^^^ | error: `Apropriate` should be `Appropriate` - --> -:12:2 + --> -:12:3 | 12 | 한 Apropriate world | ^^^^^^^^^^ | error: `Apropriate` should be `Appropriate` - --> -:19:2 + --> -:19:3 | 19 | 👁️‍🗨️ Apropriate world | ^^^^^^^^^^ | error: `Apropriate` should be `Appropriate` - --> -:26:2 + --> -:26:3 | 26 | 😵‍💫 Apropriate world - | ^^^^^^^^^^ + | ^^^^^^^^^^ | error: `Apropriate` should be `Appropriate` - --> -:33:1 + --> -:33:2 | 33 | Apropriate world - | ^^^^^^^^^^ + | ^^^^^^^^^^ |