Merge pull request #88 from epage/threads

perf: Multi-threaded spell checking
This commit is contained in:
Ed Page 2020-03-21 14:40:44 -05:00 committed by GitHub
commit 3b382aac45
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 504 additions and 436 deletions

785
Cargo.lock generated

File diff suppressed because it is too large Load diff

View file

@ -16,7 +16,7 @@ stages:
jobs: jobs:
- template: default.yml@templates - template: default.yml@templates
parameters: parameters:
minrust: 1.36.0 minrust: 1.40.0
codecov_token: $(CODECOV_TOKEN_SECRET) codecov_token: $(CODECOV_TOKEN_SECRET)
- stage: codegen - stage: codegen
displayName: Verify Code-gen displayName: Verify Code-gen

View file

@ -90,12 +90,16 @@ function bench_dir() {
echo "$name: $version" >> $output echo "$name: $version" >> $output
echo "" >> $output echo "" >> $output
rg_command="" rg_command=""
rg_j1_command=""
if [[ ! -z $rg_path ]]; then if [[ ! -z $rg_path ]]; then
rg_command="$rg_path bin $path" rg_command="$rg_path bin $path"
rg_j1_command="$rg_path --threads 1 bin $path"
fi fi
typos_command="" typos_command=""
typos_j1_command=""
if [[ ! -z $typos_path ]]; then if [[ ! -z $typos_path ]]; then
typos_command="$typos_path $path" typos_command="$typos_path $path"
typos_j1_command="$typos_path --threads 1 $path"
fi fi
misspell_rs_command="" misspell_rs_command=""
if [[ ! -z $misspell_rs_path ]]; then if [[ ! -z $misspell_rs_path ]]; then
@ -110,7 +114,7 @@ function bench_dir() {
if [[ ! -z $codespell_path ]]; then if [[ ! -z $codespell_path ]]; then
codespell_command="$codespell_path $path" codespell_command="$codespell_path $path"
fi fi
hyperfine --warmup 1 -i --export-json $report_prefix-rg.json --export-markdown $report_prefix-rg.md "$rg_command" "$typos_command" "$misspell_rs_command" "$misspell_go_command" "$codespell_command" hyperfine --warmup 1 -i --export-json $report_prefix-rg.json --export-markdown $report_prefix-rg.md "$rg_command" "$rg_j1_command" "$typos_command" "$typos_j1_command" "$misspell_rs_command" "$misspell_go_command" "$codespell_command"
cat $report_prefix-rg.md >> $output cat $report_prefix-rg.md >> $output
fi fi
echo "" >> $output echo "" >> $output

View file

@ -3,6 +3,7 @@
extern crate clap; extern crate clap;
use std::io::Write; use std::io::Write;
use std::sync::atomic;
use structopt::StructOpt; use structopt::StructOpt;
@ -79,6 +80,10 @@ struct Args {
)] )]
pub format: Format, pub format: Format,
#[structopt(short = "j", long = "threads", default_value = "0")]
/// The approximate number of threads to use.
threads: usize,
#[structopt(flatten)] #[structopt(flatten)]
config: ConfigArgs, config: ConfigArgs,
@ -86,6 +91,16 @@ struct Args {
verbose: clap_verbosity_flag::Verbosity, verbose: clap_verbosity_flag::Verbosity,
} }
impl Args {
pub fn infer(mut self) -> Self {
if self.path.len() == 1 && self.path[0].is_file() {
self.threads = 1;
}
self
}
}
#[derive(Debug, StructOpt)] #[derive(Debug, StructOpt)]
#[structopt(rename_all = "kebab-case")] #[structopt(rename_all = "kebab-case")]
pub struct FileArgs { pub struct FileArgs {
@ -261,7 +276,7 @@ impl config::WalkSource for WalkArgs {
} }
} }
trait Checks { trait Checks: Send + Sync {
fn check_filename( fn check_filename(
&self, &self,
path: &std::path::Path, path: &std::path::Path,
@ -378,7 +393,7 @@ fn check_path(
checks: &dyn Checks, checks: &dyn Checks,
parser: &typos::tokens::Parser, parser: &typos::tokens::Parser,
dictionary: &dyn typos::Dictionary, dictionary: &dyn typos::Dictionary,
) -> Result<(bool, bool), anyhow::Error> { ) -> (bool, bool) {
let mut typos_found = false; let mut typos_found = false;
let mut errors_found = false; let mut errors_found = false;
@ -394,7 +409,35 @@ fn check_path(
} }
} }
Ok((typos_found, errors_found)) (typos_found, errors_found)
}
fn check_path_parallel(
walk: ignore::WalkParallel,
format: Format,
checks: &dyn Checks,
parser: &typos::tokens::Parser,
dictionary: &dyn typos::Dictionary,
) -> (bool, bool) {
let typos_found = atomic::AtomicBool::new(false);
let errors_found = atomic::AtomicBool::new(false);
walk.run(|| {
Box::new(|entry: Result<ignore::DirEntry, ignore::Error>| {
match check_entry(entry, format, checks, parser, dictionary) {
Ok(true) => typos_found.store(true, atomic::Ordering::Relaxed),
Err(err) => {
let msg = typos::report::Error::new(err.to_string());
format.report()(msg.into());
errors_found.store(true, atomic::Ordering::Relaxed);
}
_ => (),
}
ignore::WalkState::Continue
})
});
(typos_found.into_inner(), errors_found.into_inner())
} }
fn check_entry( fn check_entry(
@ -421,16 +464,15 @@ fn check_entry(
} }
fn run() -> Result<i32, anyhow::Error> { fn run() -> Result<i32, anyhow::Error> {
let args = Args::from_args(); let args = Args::from_args().infer();
init_logging(args.verbose.log_level()); init_logging(args.verbose.log_level());
let mut config = config::Config::default(); let config = if let Some(path) = args.custom_config.as_ref() {
if let Some(path) = args.custom_config.as_ref() { config::Config::from_file(path)?
let custom = config::Config::from_file(path)?; } else {
config.update(&custom); config::Config::default()
} };
let config = config;
let mut typos_found = false; let mut typos_found = false;
let mut errors_found = false; let mut errors_found = false;
@ -468,13 +510,16 @@ fn run() -> Result<i32, anyhow::Error> {
.binary(config.files.binary()); .binary(config.files.binary());
let mut walk = ignore::WalkBuilder::new(path); let mut walk = ignore::WalkBuilder::new(path);
walk.hidden(config.files.ignore_hidden()) walk.threads(args.threads)
.hidden(config.files.ignore_hidden())
.ignore(config.files.ignore_dot()) .ignore(config.files.ignore_dot())
.git_global(config.files.ignore_global()) .git_global(config.files.ignore_global())
.git_ignore(config.files.ignore_vcs()) .git_ignore(config.files.ignore_vcs())
.git_exclude(config.files.ignore_vcs()) .git_exclude(config.files.ignore_vcs())
.parents(config.files.ignore_parent()); .parents(config.files.ignore_parent());
let single_threaded = args.threads == 1;
if args.files { if args.files {
if single_threaded {
for entry in walk.build() { for entry in walk.build() {
match entry { match entry {
Ok(entry) => { Ok(entry) => {
@ -488,10 +533,40 @@ fn run() -> Result<i32, anyhow::Error> {
} }
} }
} }
} else {
let format = args.format;
let atomic_errors = atomic::AtomicBool::new(errors_found);
walk.build_parallel().run(|| {
Box::new(|entry: Result<ignore::DirEntry, ignore::Error>| {
match entry {
Ok(entry) => {
let msg = typos::report::File::new(entry.path());
format.report()(msg.into());
}
Err(err) => {
let msg = typos::report::Error::new(err.to_string());
format.report()(msg.into());
atomic_errors.store(true, atomic::Ordering::Relaxed);
}
}
ignore::WalkState::Continue
})
});
errors_found = atomic_errors.into_inner();
}
} else if args.identifiers { } else if args.identifiers {
let checks = settings.build_identifier_parser(); let checks = settings.build_identifier_parser();
let (cur_typos, cur_errors) = let (cur_typos, cur_errors) = if single_threaded {
check_path(walk.build(), args.format, &checks, &parser, &dictionary)?; check_path(walk.build(), args.format, &checks, &parser, &dictionary)
} else {
check_path_parallel(
walk.build_parallel(),
args.format,
&checks,
&parser,
&dictionary,
)
};
if cur_typos { if cur_typos {
typos_found = true; typos_found = true;
} }
@ -500,8 +575,17 @@ fn run() -> Result<i32, anyhow::Error> {
} }
} else if args.words { } else if args.words {
let checks = settings.build_word_parser(); let checks = settings.build_word_parser();
let (cur_typos, cur_errors) = let (cur_typos, cur_errors) = if single_threaded {
check_path(walk.build(), args.format, &checks, &parser, &dictionary)?; check_path(walk.build(), args.format, &checks, &parser, &dictionary)
} else {
check_path_parallel(
walk.build_parallel(),
args.format,
&checks,
&parser,
&dictionary,
)
};
if cur_typos { if cur_typos {
typos_found = true; typos_found = true;
} }
@ -510,8 +594,17 @@ fn run() -> Result<i32, anyhow::Error> {
} }
} else { } else {
let checks = settings.build_checks(); let checks = settings.build_checks();
let (cur_typos, cur_errors) = let (cur_typos, cur_errors) = if single_threaded {
check_path(walk.build(), args.format, &checks, &parser, &dictionary)?; check_path(walk.build(), args.format, &checks, &parser, &dictionary)
} else {
check_path_parallel(
walk.build_parallel(),
args.format,
&checks,
&parser,
&dictionary,
)
};
if cur_typos { if cur_typos {
typos_found = true; typos_found = true;
} }