mirror of
https://github.com/crate-ci/typos.git
synced 2025-01-23 23:18:57 -05:00
fix(api): Split lib
This commit is contained in:
parent
d8ca9f9d5a
commit
85ee5cfac9
8 changed files with 170 additions and 162 deletions
|
@ -4,19 +4,19 @@ extern crate test;
|
|||
|
||||
#[bench]
|
||||
fn load_corrections(b: &mut test::Bencher) {
|
||||
b.iter(|| scorrect::Corrections::new());
|
||||
b.iter(|| scorrect::Dictionary::new());
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn correction(b: &mut test::Bencher) {
|
||||
let corrections = scorrect::Corrections::new();
|
||||
let corrections = scorrect::Dictionary::new();
|
||||
assert_eq!(corrections.correct_str("successs"), Some("successes"));
|
||||
b.iter(|| corrections.correct_str("successs"));
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn no_correction(b: &mut test::Bencher) {
|
||||
let corrections = scorrect::Corrections::new();
|
||||
let corrections = scorrect::Dictionary::new();
|
||||
assert_eq!(corrections.correct_str("success"), None);
|
||||
b.iter(|| corrections.correct_str("success"));
|
||||
}
|
||||
|
|
|
@ -12,8 +12,8 @@ fn process_empty(b: &mut test::Bencher) {
|
|||
let sample_path = temp.child("sample");
|
||||
sample_path.write_str(data::EMPTY).unwrap();
|
||||
|
||||
let corrections = scorrect::Corrections::new();
|
||||
b.iter(|| scorrect::process_file(sample_path.path(), &corrections, scorrect::print_silent));
|
||||
let corrections = scorrect::Dictionary::new();
|
||||
b.iter(|| scorrect::process_file(sample_path.path(), &corrections, scorrect::report::print_silent));
|
||||
|
||||
temp.close().unwrap();
|
||||
}
|
||||
|
@ -24,8 +24,8 @@ fn process_no_tokens(b: &mut test::Bencher) {
|
|||
let sample_path = temp.child("sample");
|
||||
sample_path.write_str(data::NO_TOKENS).unwrap();
|
||||
|
||||
let corrections = scorrect::Corrections::new();
|
||||
b.iter(|| scorrect::process_file(sample_path.path(), &corrections, scorrect::print_silent));
|
||||
let corrections = scorrect::Dictionary::new();
|
||||
b.iter(|| scorrect::process_file(sample_path.path(), &corrections, scorrect::report::print_silent));
|
||||
|
||||
temp.close().unwrap();
|
||||
}
|
||||
|
@ -36,8 +36,8 @@ fn process_single_token(b: &mut test::Bencher) {
|
|||
let sample_path = temp.child("sample");
|
||||
sample_path.write_str(data::SINGLE_TOKEN).unwrap();
|
||||
|
||||
let corrections = scorrect::Corrections::new();
|
||||
b.iter(|| scorrect::process_file(sample_path.path(), &corrections, scorrect::print_silent));
|
||||
let corrections = scorrect::Dictionary::new();
|
||||
b.iter(|| scorrect::process_file(sample_path.path(), &corrections, scorrect::report::print_silent));
|
||||
|
||||
temp.close().unwrap();
|
||||
}
|
||||
|
@ -48,8 +48,8 @@ fn process_sherlock(b: &mut test::Bencher) {
|
|||
let sample_path = temp.child("sample");
|
||||
sample_path.write_str(data::SHERLOCK).unwrap();
|
||||
|
||||
let corrections = scorrect::Corrections::new();
|
||||
b.iter(|| scorrect::process_file(sample_path.path(), &corrections, scorrect::print_silent));
|
||||
let corrections = scorrect::Dictionary::new();
|
||||
b.iter(|| scorrect::process_file(sample_path.path(), &corrections, scorrect::report::print_silent));
|
||||
|
||||
temp.close().unwrap();
|
||||
}
|
||||
|
@ -60,8 +60,8 @@ fn process_code(b: &mut test::Bencher) {
|
|||
let sample_path = temp.child("sample");
|
||||
sample_path.write_str(data::CODE).unwrap();
|
||||
|
||||
let corrections = scorrect::Corrections::new();
|
||||
b.iter(|| scorrect::process_file(sample_path.path(), &corrections, scorrect::print_silent));
|
||||
let corrections = scorrect::Dictionary::new();
|
||||
b.iter(|| scorrect::process_file(sample_path.path(), &corrections, scorrect::report::print_silent));
|
||||
|
||||
temp.close().unwrap();
|
||||
}
|
||||
|
@ -72,8 +72,8 @@ fn process_corpus(b: &mut test::Bencher) {
|
|||
let sample_path = temp.child("sample");
|
||||
sample_path.write_str(data::CORPUS).unwrap();
|
||||
|
||||
let corrections = scorrect::Corrections::new();
|
||||
b.iter(|| scorrect::process_file(sample_path.path(), &corrections, scorrect::print_silent));
|
||||
let corrections = scorrect::Dictionary::new();
|
||||
b.iter(|| scorrect::process_file(sample_path.path(), &corrections, scorrect::report::print_silent));
|
||||
|
||||
temp.close().unwrap();
|
||||
}
|
||||
|
|
|
@ -6,30 +6,30 @@ mod data;
|
|||
|
||||
#[bench]
|
||||
fn tokenize_empty(b: &mut test::Bencher) {
|
||||
b.iter(|| scorrect::tokenize(data::EMPTY.as_bytes()).collect::<Vec<_>>());
|
||||
b.iter(|| scorrect::identifier::tokenize(data::EMPTY.as_bytes()).collect::<Vec<_>>());
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn tokenize_no_tokens(b: &mut test::Bencher) {
|
||||
b.iter(|| scorrect::tokenize(data::NO_TOKENS.as_bytes()).collect::<Vec<_>>());
|
||||
b.iter(|| scorrect::identifier::tokenize(data::NO_TOKENS.as_bytes()).collect::<Vec<_>>());
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn tokenize_single_token(b: &mut test::Bencher) {
|
||||
b.iter(|| scorrect::tokenize(data::SINGLE_TOKEN.as_bytes()).collect::<Vec<_>>());
|
||||
b.iter(|| scorrect::identifier::tokenize(data::SINGLE_TOKEN.as_bytes()).collect::<Vec<_>>());
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn tokenize_sherlock(b: &mut test::Bencher) {
|
||||
b.iter(|| scorrect::tokenize(data::SHERLOCK.as_bytes()).collect::<Vec<_>>());
|
||||
b.iter(|| scorrect::identifier::tokenize(data::SHERLOCK.as_bytes()).collect::<Vec<_>>());
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn tokenize_code(b: &mut test::Bencher) {
|
||||
b.iter(|| scorrect::tokenize(data::CODE.as_bytes()).collect::<Vec<_>>());
|
||||
b.iter(|| scorrect::identifier::tokenize(data::CODE.as_bytes()).collect::<Vec<_>>());
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn tokenize_corpus(b: &mut test::Bencher) {
|
||||
b.iter(|| scorrect::tokenize(data::CORPUS.as_bytes()).collect::<Vec<_>>());
|
||||
b.iter(|| scorrect::identifier::tokenize(data::CORPUS.as_bytes()).collect::<Vec<_>>());
|
||||
}
|
||||
|
|
18
src/dict.rs
Normal file
18
src/dict.rs
Normal file
|
@ -0,0 +1,18 @@
|
|||
include!(concat!(env!("OUT_DIR"), "/codegen.rs"));
|
||||
|
||||
pub struct Dictionary {
|
||||
}
|
||||
|
||||
impl Dictionary {
|
||||
pub fn new() -> Self {
|
||||
Dictionary { }
|
||||
}
|
||||
|
||||
pub fn correct_str<'s>(&'s self, word: &str) -> Option<&'s str> {
|
||||
DICTIONARY.get(word).map(|s| *s)
|
||||
}
|
||||
|
||||
pub fn correct_bytes<'s>(&'s self, word: &[u8]) -> Option<&'s [u8]> {
|
||||
std::str::from_utf8(word).ok().and_then(|word| DICTIONARY.get(word)).map(|s| s.as_bytes())
|
||||
}
|
||||
}
|
74
src/identifier.rs
Normal file
74
src/identifier.rs
Normal file
|
@ -0,0 +1,74 @@
|
|||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct Token<'t> {
|
||||
pub token: &'t [u8],
|
||||
pub offset: usize,
|
||||
}
|
||||
|
||||
impl<'t> Token<'t> {
|
||||
pub fn new(token: &'t [u8], offset: usize) -> Self {
|
||||
Self {
|
||||
token,
|
||||
offset,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn tokenize(content: &[u8]) -> impl Iterator<Item=Token> {
|
||||
lazy_static::lazy_static! {
|
||||
static ref SPLIT: regex::bytes::Regex = regex::bytes::Regex::new(r#"\b(\p{Alphabetic}|\d|_)+\b"#).unwrap();
|
||||
}
|
||||
SPLIT.find_iter(content).map(|m| Token::new(m.as_bytes(), m.start()))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn tokenize_empty_is_empty() {
|
||||
let input = b"";
|
||||
let expected: Vec<Token> = vec![];
|
||||
let actual: Vec<_> = tokenize(input).collect();
|
||||
assert_eq!(expected, actual);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tokenize_word_is_word() {
|
||||
let input = b"word";
|
||||
let expected: Vec<Token> = vec![Token::new(b"word", 0)];
|
||||
let actual: Vec<_> = tokenize(input).collect();
|
||||
assert_eq!(expected, actual);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tokenize_space_separated_words() {
|
||||
let input = b"A B";
|
||||
let expected: Vec<Token> = vec![Token::new(b"A", 0), Token::new(b"B", 2)];
|
||||
let actual: Vec<_> = tokenize(input).collect();
|
||||
assert_eq!(expected, actual);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tokenize_dot_separated_words() {
|
||||
let input = b"A.B";
|
||||
let expected: Vec<Token> = vec![Token::new(b"A", 0), Token::new(b"B", 2)];
|
||||
let actual: Vec<_> = tokenize(input).collect();
|
||||
assert_eq!(expected, actual);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tokenize_namespace_separated_words() {
|
||||
let input = b"A::B";
|
||||
let expected: Vec<Token> = vec![Token::new(b"A", 0), Token::new(b"B", 3)];
|
||||
let actual: Vec<_> = tokenize(input).collect();
|
||||
assert_eq!(expected, actual);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tokenize_underscore_doesnt_separate() {
|
||||
let input = b"A_B";
|
||||
let expected: Vec<Token> = vec![Token::new(b"A_B", 0)];
|
||||
let actual: Vec<_> = tokenize(input).collect();
|
||||
assert_eq!(expected, actual);
|
||||
}
|
||||
}
|
146
src/lib.rs
146
src/lib.rs
|
@ -1,60 +1,33 @@
|
|||
#[macro_use]
|
||||
extern crate serde_derive;
|
||||
|
||||
mod dict;
|
||||
|
||||
pub mod identifier;
|
||||
pub mod report;
|
||||
|
||||
pub use crate::dict::*;
|
||||
|
||||
use std::fs::File;
|
||||
use std::io::Read;
|
||||
|
||||
include!(concat!(env!("OUT_DIR"), "/codegen.rs"));
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct Token<'t> {
|
||||
pub token: &'t [u8],
|
||||
pub offset: usize,
|
||||
}
|
||||
|
||||
impl<'t> Token<'t> {
|
||||
pub fn new(token: &'t [u8], offset: usize) -> Self {
|
||||
Self {
|
||||
token,
|
||||
offset,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn tokenize(content: &[u8]) -> impl Iterator<Item=Token> {
|
||||
lazy_static::lazy_static! {
|
||||
static ref SPLIT: regex::bytes::Regex = regex::bytes::Regex::new(r#"\b(\p{Alphabetic}|\d|_)+\b"#).unwrap();
|
||||
}
|
||||
SPLIT.find_iter(content).map(|m| Token::new(m.as_bytes(), m.start()))
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct Message<'m> {
|
||||
path: &'m std::path::Path,
|
||||
#[serde(skip)]
|
||||
line: &'m [u8],
|
||||
line_num: usize,
|
||||
col_num: usize,
|
||||
word: &'m str,
|
||||
correction: &'m str,
|
||||
}
|
||||
|
||||
pub fn process_file(path: &std::path::Path, dictionary: &Corrections, report: Report) -> Result<(), failure::Error> {
|
||||
pub fn process_file(path: &std::path::Path, dictionary: &Dictionary, report: report::Report) -> Result<(), failure::Error> {
|
||||
let mut buffer = Vec::new();
|
||||
File::open(path)?.read_to_end(&mut buffer)?;
|
||||
for (line_idx, line) in grep_searcher::LineIter::new(b'\n', &buffer).enumerate() {
|
||||
let line_num = line_idx + 1;
|
||||
for token in tokenize(line) {
|
||||
for token in identifier::tokenize(line) {
|
||||
if let Some(word) = std::str::from_utf8(token.token).ok() {
|
||||
if let Some(correction) = dictionary.correct_str(word) {
|
||||
let col_num = token.offset;
|
||||
let msg = Message {
|
||||
let msg = report::Message {
|
||||
path,
|
||||
line,
|
||||
line_num,
|
||||
col_num,
|
||||
word,
|
||||
correction,
|
||||
non_exhaustive: (),
|
||||
};
|
||||
report(msg);
|
||||
}
|
||||
|
@ -65,100 +38,3 @@ pub fn process_file(path: &std::path::Path, dictionary: &Corrections, report: Re
|
|||
Ok(())
|
||||
}
|
||||
|
||||
pub type Report = fn(msg: Message);
|
||||
|
||||
pub fn print_silent(_: Message) {
|
||||
}
|
||||
|
||||
pub fn print_brief(msg: Message) {
|
||||
println!("{}:{}:{}: {} -> {}", msg.path.display(), msg.line_num, msg.col_num, msg.word, msg.correction);
|
||||
}
|
||||
|
||||
pub fn print_long(msg: Message) {
|
||||
let line_num = msg.line_num.to_string();
|
||||
let line_indent: String = itertools::repeat_n(" ", line_num.len()).collect();
|
||||
|
||||
let hl_indent: String = itertools::repeat_n(" ", msg.col_num).collect();
|
||||
let hl: String = itertools::repeat_n("^", msg.word.len()).collect();
|
||||
|
||||
println!("error: `{}` should be `{}`", msg.word, msg.correction);
|
||||
println!(" --> {}:{}:{}", msg.path.display(), msg.line_num, msg.col_num);
|
||||
println!("{} |", line_indent);
|
||||
println!("{} | {}", msg.line_num, String::from_utf8_lossy(msg.line).trim_end());
|
||||
println!("{} | {}{}", line_indent, hl_indent, hl);
|
||||
println!("{} |", line_indent);
|
||||
}
|
||||
|
||||
pub fn print_json(msg: Message) {
|
||||
println!("{}", serde_json::to_string(&msg).unwrap());
|
||||
}
|
||||
|
||||
pub struct Corrections {
|
||||
}
|
||||
|
||||
impl Corrections {
|
||||
pub fn new() -> Self {
|
||||
Corrections { }
|
||||
}
|
||||
|
||||
pub fn correct_str<'s>(&'s self, word: &str) -> Option<&'s str> {
|
||||
DICTIONARY.get(word).map(|s| *s)
|
||||
}
|
||||
|
||||
pub fn correct_bytes<'s>(&'s self, word: &[u8]) -> Option<&'s [u8]> {
|
||||
std::str::from_utf8(word).ok().and_then(|word| DICTIONARY.get(word)).map(|s| s.as_bytes())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn tokenize_empty_is_empty() {
|
||||
let input = b"";
|
||||
let expected: Vec<Token> = vec![];
|
||||
let actual: Vec<_> = tokenize(input).collect();
|
||||
assert_eq!(expected, actual);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tokenize_word_is_word() {
|
||||
let input = b"word";
|
||||
let expected: Vec<Token> = vec![Token::new(b"word", 0)];
|
||||
let actual: Vec<_> = tokenize(input).collect();
|
||||
assert_eq!(expected, actual);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tokenize_space_separated_words() {
|
||||
let input = b"A B";
|
||||
let expected: Vec<Token> = vec![Token::new(b"A", 0), Token::new(b"B", 2)];
|
||||
let actual: Vec<_> = tokenize(input).collect();
|
||||
assert_eq!(expected, actual);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tokenize_dot_separated_words() {
|
||||
let input = b"A.B";
|
||||
let expected: Vec<Token> = vec![Token::new(b"A", 0), Token::new(b"B", 2)];
|
||||
let actual: Vec<_> = tokenize(input).collect();
|
||||
assert_eq!(expected, actual);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tokenize_namespace_separated_words() {
|
||||
let input = b"A::B";
|
||||
let expected: Vec<Token> = vec![Token::new(b"A", 0), Token::new(b"B", 3)];
|
||||
let actual: Vec<_> = tokenize(input).collect();
|
||||
assert_eq!(expected, actual);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tokenize_underscore_doesnt_separate() {
|
||||
let input = b"A_B";
|
||||
let expected: Vec<Token> = vec![Token::new(b"A_B", 0)];
|
||||
let actual: Vec<_> = tokenize(input).collect();
|
||||
assert_eq!(expected, actual);
|
||||
}
|
||||
}
|
||||
|
|
12
src/main.rs
12
src/main.rs
|
@ -15,12 +15,12 @@ arg_enum!{
|
|||
}
|
||||
|
||||
impl Format {
|
||||
fn report(self) -> scorrect::Report {
|
||||
fn report(self) -> scorrect::report::Report {
|
||||
match self {
|
||||
Format::Silent => scorrect::print_silent,
|
||||
Format::Brief => scorrect::print_brief,
|
||||
Format::Long => scorrect::print_long,
|
||||
Format::Json => scorrect::print_json,
|
||||
Format::Silent => scorrect::report::print_silent,
|
||||
Format::Brief => scorrect::report::print_brief,
|
||||
Format::Long => scorrect::report::print_long,
|
||||
Format::Json => scorrect::report::print_json,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -63,7 +63,7 @@ impl Options {
|
|||
fn run() -> Result<(), failure::Error> {
|
||||
let options = Options::from_args().infer();
|
||||
|
||||
let dictionary = scorrect::Corrections::new();
|
||||
let dictionary = scorrect::Dictionary::new();
|
||||
|
||||
let first_path = &options.path.get(0).expect("arg parsing enforces at least one");
|
||||
let mut walk = ignore::WalkBuilder::new(first_path);
|
||||
|
|
40
src/report.rs
Normal file
40
src/report.rs
Normal file
|
@ -0,0 +1,40 @@
|
|||
#[derive(Debug, Serialize)]
|
||||
pub struct Message<'m> {
|
||||
pub path: &'m std::path::Path,
|
||||
#[serde(skip)]
|
||||
pub line: &'m [u8],
|
||||
pub line_num: usize,
|
||||
pub col_num: usize,
|
||||
pub word: &'m str,
|
||||
pub correction: &'m str,
|
||||
#[serde(skip)]
|
||||
pub(crate) non_exhaustive: (),
|
||||
}
|
||||
|
||||
pub type Report = fn(msg: Message);
|
||||
|
||||
pub fn print_silent(_: Message) {
|
||||
}
|
||||
|
||||
pub fn print_brief(msg: Message) {
|
||||
println!("{}:{}:{}: {} -> {}", msg.path.display(), msg.line_num, msg.col_num, msg.word, msg.correction);
|
||||
}
|
||||
|
||||
pub fn print_long(msg: Message) {
|
||||
let line_num = msg.line_num.to_string();
|
||||
let line_indent: String = itertools::repeat_n(" ", line_num.len()).collect();
|
||||
|
||||
let hl_indent: String = itertools::repeat_n(" ", msg.col_num).collect();
|
||||
let hl: String = itertools::repeat_n("^", msg.word.len()).collect();
|
||||
|
||||
println!("error: `{}` should be `{}`", msg.word, msg.correction);
|
||||
println!(" --> {}:{}:{}", msg.path.display(), msg.line_num, msg.col_num);
|
||||
println!("{} |", line_indent);
|
||||
println!("{} | {}", msg.line_num, String::from_utf8_lossy(msg.line).trim_end());
|
||||
println!("{} | {}{}", line_indent, hl_indent, hl);
|
||||
println!("{} |", line_indent);
|
||||
}
|
||||
|
||||
pub fn print_json(msg: Message) {
|
||||
println!("{}", serde_json::to_string(&msg).unwrap());
|
||||
}
|
Loading…
Add table
Reference in a new issue