Merge pull request #51 from epage/bench

perf: Benchmark 0.1
This commit is contained in:
Ed Page 2019-10-25 06:14:00 -06:00 committed by GitHub
commit 8ef836a51f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
17 changed files with 1353 additions and 0 deletions

View file

@ -13,6 +13,11 @@ keywords = ["development", "spelling"]
license = "MIT" license = "MIT"
edition = "2018" edition = "2018"
[[bin]]
name = "typos"
path = "src/main.rs"
doc = false
[badges] [badges]
travis-ci = { repository = "epage/typos" } travis-ci = { repository = "epage/typos" }
appveyor = { repository = "epage/typos" } appveyor = { repository = "epage/typos" }

304
benchsuite/benchsuite.sh Executable file
View file

@ -0,0 +1,304 @@
#!/usr/bin/env bash
set -e
current_dir=`dirname $(readlink -f $0)`
base_dir="/tmp/benchsuite"
if [[ $# -ge 1 ]]; then
base_dir=$1
fi
mkdir -p $base_dir
pushd $base_dir
base_dir=.
machine="$HOSTNAME"
if [[ $# -ge 2 ]]; then
machine=$2
fi
current_day=`date +%Y-%m-%d`
report_prefix=$current_dir/runs/$current_day-$machine
report_path=$report_prefix.md
mkdir -p `dirname $report_path`
echo "" > $report_path
echo "# Spell Check Shootout" >> $report_path
echo "" >> $report_path
echo "These are the results as of $current_day" >> $report_path
echo "" >> $report_path
echo "Command:" >> $report_path
echo "\`\`\`bash" >> $report_path
echo "$ $0 $base_dir $machine" >> $report_path
echo "\`\`\`" >> $report_path
echo "" >> $report_path
linux_clean_path=`$current_dir/fixtures/linux_clean.sh path $base_dir`
linux_clean_version=`$current_dir/fixtures/linux_clean.sh version $base_dir`
linux_built_path=`$current_dir/fixtures/linux_built.sh path $base_dir`
linux_built_version=`$current_dir/fixtures/linux_built.sh version $base_dir`
ripgrep_clean_path=`$current_dir/fixtures/ripgrep_clean.sh path $base_dir`
ripgrep_clean_version=`$current_dir/fixtures/ripgrep_clean.sh version $base_dir`
ripgrep_built_path=`$current_dir/fixtures/ripgrep_built.sh path $base_dir`
ripgrep_built_version=`$current_dir/fixtures/ripgrep_built.sh version $base_dir`
subtitles_en_path=`$current_dir/fixtures/subtitles_en.sh path $base_dir`
subtitles_en_version=`$current_dir/fixtures/subtitles_en.sh version $base_dir`
subtitles_en_small_path=`$current_dir/fixtures/subtitles_en_small.sh path $base_dir`
subtitles_en_small_version=`$current_dir/fixtures/subtitles_en_small.sh version $base_dir`
subtitles_ru_path=`$current_dir/fixtures/subtitles_ru.sh path $base_dir`
subtitles_ru_version=`$current_dir/fixtures/subtitles_ru.sh version $base_dir`
subtitles_ru_small_path=`$current_dir/fixtures/subtitles_ru_small.sh path $base_dir`
subtitles_ru_small_version=`$current_dir/fixtures/subtitles_ru_small.sh version $base_dir`
echo "" >> $report_path
echo "Spell checkers:" >> $report_path
rg_path=`$current_dir/uut/rg.sh path $base_dir`
rg_version=`$current_dir/uut/rg.sh version $base_dir`
if [[ -z $rg_path ]]; then
>&2 echo "Warning: rg uut is unavailable"
echo "- rg: N/A" >> $report_path
else
echo "- $rg_version" >> $report_path
fi
typos_path=`$current_dir/uut/typos.sh path $base_dir`
typos_version=`$current_dir/uut/typos.sh version $base_dir`
if [[ -z $typos_path ]]; then
>&2 echo "Warning: typos uut is unavailable"
echo "- typos: N/A" >> $report_path
else
echo "- $typos_version" >> $report_path
fi
misspell_rs_path=`$current_dir/uut/misspell_rs.sh path $base_dir`
misspell_rs_version=`$current_dir/uut/misspell_rs.sh version $base_dir`
if [[ -z $misspell_rs_path ]]; then
>&2 echo "Warning: misspell_rs uut is unavailable"
echo "- misspell_rs: N/A" >> $report_path
else
echo "- $misspell_rs_version" >> $report_path
fi
misspell_go_path=`$current_dir/uut/misspell_go.sh path $base_dir`
misspell_go_version=`$current_dir/uut/misspell_go.sh version $base_dir`
if [[ -z $misspell_go_path ]]; then
>&2 echo "Warning: misspell_go uut is unavailable"
echo "- misspell_go: N/A" >> $report_path
else
echo "- $misspell_go_version" >> $report_path
fi
codespell_path=`$current_dir/uut/codespell.sh path $base_dir`
codespell_version=`$current_dir/uut/codespell.sh version $base_dir`
if [[ -z $codespell_path ]]; then
>&2 echo "Warning: codespell uut is unavailable"
echo "- codespell: N/A" >> $report_path
else
echo "- $codespell_version" >> $report_path
fi
scspell_path=`$current_dir/uut/scspell.sh path $base_dir`
scspell_version=`$current_dir/uut/scspell.sh version $base_dir`
if [[ -z $scspell_path ]]; then
>&2 echo "Warning: scspell uut is unavailable"
echo "- scspell: N/A" >> $report_path
else
echo "- $scspell_version" >> $report_path
fi
echo "" >> $report_path
echo "## linux_clean fixture" >> $report_path
echo "" >> $report_path
if [[ -z $linux_clean_path ]]; then
>&2 echo "Warning: linux_clean fixture is unavailable"
echo "N/A" >> $report_path
else
echo "linux_clean: $linux_clean_version" >> $report_path
echo "" >> $report_path
rg_command=""
if [[ ! -z $rg_path ]]; then
rg_command="$rg_path bin $linux_clean_path"
fi
typos_command=""
if [[ ! -z $typos_path ]]; then
typos_command="$typos_path $linux_clean_path"
fi
misspell_rs_command=""
if [[ ! -z $misspell_rs_path ]]; then
misspell_rs_command="$misspell_rs_path $linux_clean_path"
fi
misspell_go_command=""
if [[ ! -z $misspell_go_path ]]; then
misspell_go_command="$misspell_go_path $linux_clean_path"
fi
# Skipping scspell, doesn't work on directories
codespell_command=""
if [[ ! -z $codespell_path ]]; then
codespell_command="$codespell_path $linux_clean_path"
fi
hyperfine --warmup 1 -i --export-json $report_prefix-rg.json --export-markdown $report_prefix-rg.md "$rg_command" "$typos_command" "$misspell_rs_command" "$misspell_go_command" "$codespell_command"
cat $report_prefix-rg.md >> $report_path
fi
echo "" >> $report_path
echo "## linux_built fixture" >> $report_path
echo "" >> $report_path
if [[ -z $linux_built_path ]]; then
>&2 echo "Warning: linux_built fixture is unavailable"
echo "N/A" >> $report_path
else
echo "linux_built: $linux_built_version" >> $report_path
echo "" >> $report_path
rg_command=""
if [[ ! -z $rg_path ]]; then
rg_command="$rg_path bin $linux_built_path"
fi
typos_command=""
if [[ ! -z $typos_path ]]; then
typos_command="$typos_path $linux_built_path"
fi
misspell_rs_command=""
if [[ ! -z $misspell_rs_path ]]; then
misspell_rs_command="$misspell_rs_path $linux_built_path"
fi
misspell_go_command=""
if [[ ! -z $misspell_go_path ]]; then
misspell_go_command="$misspell_go_path $linux_built_path"
fi
# Skipping scspell, doesn't work on directories
codespell_command=""
if [[ ! -z $codespell_path ]]; then
codespell_command="$codespell_path $linux_built_path"
fi
hyperfine --warmup 1 -i --export-json $report_prefix-rg.json --export-markdown $report_prefix-rg.md "$rg_command" "$typos_command" "$misspell_rs_command" "$misspell_go_command" "$codespell_command"
cat $report_prefix-rg.md >> $report_path
fi
echo "" >> $report_path
if [[ -z $subtitles_en_path ]]; then
>&2 echo "Warning: subtitles_en fixture is unavailable"
fi
if [[ -z $subtitles_en_small_path ]]; then
>&2 echo "Warning: subtitles_en_small fixture is unavailable"
fi
echo "## subtitles_ru_small fixture" >> $report_path
echo "" >> $report_path
if [[ -z $subtitles_ru_small_path ]]; then
>&2 echo "Warning: subtitles_ru_small fixture is unavailable"
echo "N/A" >> $report_path
else
echo "subtitles_ru_small: $subtitles_ru_small_version" >> $report_path
echo "" >> $report_path
rg_command=""
if [[ ! -z $rg_path ]]; then
rg_command="$rg_path bin $subtitles_ru_small_path"
fi
typos_command=""
if [[ ! -z $typos_path ]]; then
typos_command="$typos_path $subtitles_ru_small_path"
fi
misspell_rs_command=""
if [[ ! -z $misspell_rs_path ]]; then
misspell_rs_command="$misspell_rs_path $subtitles_ru_small_path"
fi
misspell_go_command=""
if [[ ! -z $misspell_go_path ]]; then
misspell_go_command="$misspell_go_path $subtitles_ru_small_path"
fi
scspell_command=""
if [[ ! -z $scspell_path ]]; then
scspell_command="$scspell_path $subtitles_ru_small_path"
fi
codespell_command=""
if [[ ! -z $codespell_path ]]; then
codespell_command="$codespell_path $subtitles_ru_small_path"
fi
hyperfine --warmup 1 -i --export-json $report_prefix-rg.json --export-markdown $report_prefix-rg.md "$rg_command" "$typos_command" "$misspell_rs_command" "$misspell_go_command" "$scspell_command" "$codespell_command"
cat $report_prefix-rg.md >> $report_path
fi
echo "" >> $report_path
echo "## ripgrep_clean fixture" >> $report_path
echo "" >> $report_path
if [[ -z $ripgrep_clean_path ]]; then
>&2 echo "Warning: ripgrep_clean fixture is unavailable"
echo "N/A" >> $report_path
else
echo "ripgrep_clean: $ripgrep_clean_version" >> $report_path
echo "" >> $report_path
rg_command=""
if [[ ! -z $rg_path ]]; then
rg_command="$rg_path bin $ripgrep_clean_path"
fi
typos_command=""
if [[ ! -z $typos_path ]]; then
typos_command="$typos_path $ripgrep_clean_path"
fi
misspell_rs_command=""
if [[ ! -z $misspell_rs_path ]]; then
misspell_rs_command="$misspell_rs_path $ripgrep_clean_path"
fi
misspell_go_command=""
if [[ ! -z $misspell_go_path ]]; then
misspell_go_command="$misspell_go_path $ripgrep_clean_path"
fi
# Skipping scspell, doesn't work on directories
codespell_command=""
if [[ ! -z $codespell_path ]]; then
codespell_command="$codespell_path $ripgrep_clean_path"
fi
hyperfine --warmup 1 -i --export-json $report_prefix-rg.json --export-markdown $report_prefix-rg.md "$rg_command" "$typos_command" "$misspell_rs_command" "$misspell_go_command" "$codespell_command"
cat $report_prefix-rg.md >> $report_path
fi
echo "" >> $report_path
echo "## ripgrep_built fixture" >> $report_path
echo "" >> $report_path
if [[ -z $ripgrep_built_path ]]; then
>&2 echo "Warning: ripgrep_built fixture is unavailable"
echo "N/A" >> $report_path
else
echo "ripgrep_built: $ripgrep_built_version" >> $report_path
echo "" >> $report_path
rg_command=""
if [[ ! -z $rg_path ]]; then
rg_command="$rg_path bin $ripgrep_built_path"
fi
typos_command=""
if [[ ! -z $typos_path ]]; then
typos_command="$typos_path $ripgrep_built_path"
fi
misspell_rs_command=""
if [[ ! -z $misspell_rs_path ]]; then
misspell_rs_command="$misspell_rs_path $ripgrep_built_path"
fi
misspell_go_command=""
if [[ ! -z $misspell_go_path ]]; then
misspell_go_command="$misspell_go_path $ripgrep_built_path"
fi
# Skipping scspell, doesn't work on directories
codespell_command=""
if [[ ! -z $codespell_path ]]; then
codespell_command="$codespell_path $ripgrep_built_path"
fi
hyperfine --warmup 1 -i --export-json $report_prefix-rg.json --export-markdown $report_prefix-rg.md "$rg_command" "$typos_command" "$misspell_rs_command" "$misspell_go_command" "$codespell_command"
cat $report_prefix-rg.md >> $report_path
fi
echo "" >> $report_path

View file

@ -0,0 +1,80 @@
#!/usr/bin/env bash
# Pre-reqs:
# - git
# - able to build a Linux kernel
# - apt install libelf-dev, bc
set -e
FIXTURE_DIR="linux_built"
function cpucount() {
echo `grep -c ^processor /proc/cpuinfo`
}
if [[ $# -eq 0 ]]; then
exit 1
fi
command=$1
base_dir="/tmp/benchsuite"
if [[ $# -ge 2 ]]; then
base_dir=$2
fi
current_dir=`dirname $(readlink -f $0)`
root_dir="${base_dir}/$FIXTURE_DIR"
out_file="$root_dir"
log_path="${base_dir}/$FIXTURE_DIR.log"
function path() {
if [[ -e $out_file ]]; then
echo $out_file
fi
}
function clear() {
rm -Rf ${root_dir} ${log_path}
}
function download() {
if [[ ! -e $out_file ]]; then
echo "Downloading $FIXTURE_DIR" >> ${log_path}
in_file=`$current_dir/linux_clean.sh download $base_dir`
cp -R $in_file $out_file
# We want to build the kernel because the process of building it produces
# a lot of junk in the repository that a search tool probably shouldn't
# touch.
pushd $root_dir >> ${log_path}
make defconfig >> ${log_path}
make -j $(cpucount) >> ${log_path}
popd >> ${log_path}
fi
}
function version() {
if [[ -e $out_file ]]; then
pushd $root_dir >> ${log_path}
echo "linux `git rev-parse HEAD`"
popd >> ${log_path}
fi
}
case $command in
path)
echo $(path)
;;
clear)
echo $(clear)
;;
version)
echo $(version)
;;
download)
download
echo $(path)
;;
*)
>&2 echo "Invalid command: $command"
exit 1
;;
esac

View file

@ -0,0 +1,70 @@
#!/usr/bin/env bash
# Pre-reqs:
# - git
set -e
FIXTURE_DIR="linux_clean"
# Clone from burntsushi's fork so that we always get the same corpus *and* still
# do a shallow clone. Shallow clones are much much cheaper than full
# clones.
REPO_URL="git://github.com/BurntSushi/linux"
if [[ $# -eq 0 ]]; then
exit 1
fi
command=$1
base_dir="/tmp/benchsuite"
if [[ $# -ge 2 ]]; then
base_dir=$2
fi
root_dir="${base_dir}/$FIXTURE_DIR"
out_file="$root_dir"
log_path="${base_dir}/$FIXTURE_DIR.log"
function path() {
if [[ -e $out_file ]]; then
echo $out_file
fi
}
function clear() {
rm -Rf ${root_dir} ${log_path}
}
function download() {
if [[ ! -e $out_file ]]; then
mkdir -p ${root_dir}
echo "Downloading $FIXTURE_DIR" >> ${log_path}
git clone --depth 1 $REPO_URL $root_dir >> ${log_path}
fi
}
function version() {
if [[ -e $out_file ]]; then
pushd $root_dir >> ${log_path}
echo "linux `git rev-parse HEAD`"
popd >> ${log_path}
fi
}
case $command in
path)
echo $(path)
;;
clear)
echo $(clear)
;;
version)
echo $(version)
;;
download)
download
echo $(path)
;;
*)
>&2 echo "Invalid command: $command"
exit 1
;;
esac

View file

@ -0,0 +1,74 @@
#!/usr/bin/env bash
# Pre-reqs:
# - git
# - rust
set -e
FIXTURE_DIR="ripgrep_built"
if [[ $# -eq 0 ]]; then
exit 1
fi
command=$1
base_dir="/tmp/benchsuite"
if [[ $# -ge 2 ]]; then
base_dir=$2
fi
current_dir=`dirname $(readlink -f $0)`
root_dir="${base_dir}/$FIXTURE_DIR"
out_file="$root_dir"
log_path="${base_dir}/$FIXTURE_DIR.log"
function path() {
if [[ -e $out_file ]]; then
echo $out_file
fi
}
function clear() {
rm -Rf ${root_dir} ${log_path}
}
function download() {
if [[ ! -e $out_file ]]; then
echo "Downloading $FIXTURE_DIR" >> ${log_path}
in_file=`$current_dir/ripgrep_clean.sh download $base_dir`
cp -R $in_file $out_file
# We want to build the kernel because the process of building it produces
# a lot of junk in the repository that a search tool probably shouldn't
# touch.
pushd $root_dir >> ${log_path}
cargo check >> ${log_path}
popd >> ${log_path}
fi
}
function version() {
if [[ -e $out_file ]]; then
pushd $root_dir >> ${log_path}
echo "rg `git rev-parse HEAD`"
popd >> ${log_path}
fi
}
case $command in
path)
echo $(path)
;;
clear)
echo $(clear)
;;
version)
echo $(version)
;;
download)
download
echo $(path)
;;
*)
>&2 echo "Invalid command: $command"
exit 1
;;
esac

View file

@ -0,0 +1,72 @@
#!/usr/bin/env bash
# Pre-reqs:
# - git
set -e
FIXTURE_DIR="ripgrep_clean"
REPO_TAG="11.0.1"
REPO_URL="git://github.com/BurntSushi/ripgrep"
function cpucount() {
echo `grep -c ^processor /proc/cpuinfo`
}
if [[ $# -eq 0 ]]; then
exit 1
fi
command=$1
base_dir="/tmp/benchsuite"
if [[ $# -ge 2 ]]; then
base_dir=$2
fi
root_dir="${base_dir}/$FIXTURE_DIR"
out_file="$root_dir"
log_path="${base_dir}/$FIXTURE_DIR.log"
function path() {
if [[ -e $out_file ]]; then
echo $out_file
fi
}
function clear() {
rm -Rf ${root_dir} ${log_path}
}
function download() {
if [[ ! -e $out_file ]]; then
mkdir -p ${root_dir}
echo "Downloading $FIXTURE_DIR" >> ${log_path}
git clone --depth 1 $REPO_URL --branch $REPO_TAG $root_dir >> ${log_path}
fi
}
function version() {
if [[ -e $out_file ]]; then
pushd $root_dir >> ${log_path}
echo "rg `git rev-parse HEAD`"
popd >> ${log_path}
fi
}
case $command in
path)
echo $(path)
;;
clear)
echo $(clear)
;;
version)
echo $(version)
;;
download)
download
echo $(path)
;;
*)
>&2 echo "Invalid command: $command"
exit 1
;;
esac

View file

@ -0,0 +1,76 @@
#!/usr/bin/env bash
# Pre-reqs:
# - gunzip
# - curl
set -e
FIXTURE_DIR="subtitles_en"
SUBTITLES_NAME="OpenSubtitles2016.raw.en"
SUBTITLES_NAME_SAMPLE="$SUBTITLES_NAME.sample"
SUBTITLES_NAME_GZ="${SUBTITLES_NAME}.gz"
SUBTITLES_URL="https://object.pouta.csc.fi/OPUS-OpenSubtitles/v2016/mono/en.txt.gz"
if [[ $# -eq 0 ]]; then
exit 1
fi
command=$1
base_dir="/tmp/benchsuite"
if [[ $# -ge 2 ]]; then
base_dir=$2
fi
root_dir="${base_dir}/$FIXTURE_DIR"
out_file="$root_dir/$SUBTITLES_NAME_SAMPLE"
log_path="${base_dir}/$FIXTURE_DIR.log"
function path() {
if [[ -e $out_file ]]; then
echo $out_file
fi
}
function clear() {
rm -Rf ${root_dir} ${log_path}
}
function download() {
if [[ ! -e $out_file ]]; then
mkdir -p ${root_dir}
echo "Downloading $FIXTURE_DIR" >> ${log_path}
pushd $root_dir >> ${log_path}
curl -L $SUBTITLES_URL -o $SUBTITLES_NAME_GZ
gunzip $SUBTITLES_NAME_GZ
# Get a sample roughly the same size as the Russian corpus so that
# benchmarks finish in a reasonable time.
head -n 32722372 $SUBTITLES_NAME > $SUBTITLES_NAME_SAMPLE
shasum $SUBTITLES_NAME_SAMPLE > $SUBTITLES_NAME_SAMPLE.sha
popd >> ${log_path}
fi
}
function version() {
if [[ -e $out_file ]]; then
echo "`basename $out_file` `cat $out_file.sha | cut -d " " -f 1`"
fi
}
case $command in
path)
echo $(path)
;;
clear)
echo $(clear)
;;
version)
echo $(version)
;;
download)
download
echo $(path)
;;
*)
>&2 echo "Invalid command: $command"
exit 1
;;
esac

View file

@ -0,0 +1,66 @@
#!/usr/bin/env bash
set -e
FIXTURE_DIR="subtitles_en"
SUBTITLES_NAME="OpenSubtitles2016.raw.en"
SUBTITLES_NAME_SMALL="OpenSubtitles2016.raw.en.small"
LINES=10000
if [[ $# -eq 0 ]]; then
exit 1
fi
command=$1
base_dir="/tmp/benchsuite"
if [[ $# -ge 2 ]]; then
base_dir=$2
fi
current_dir=`dirname $(readlink -f $0)`
root_dir="${base_dir}/$FIXTURE_DIR"
out_file="$root_dir/$SUBTITLES_NAME_SMALL"
log_path="${base_dir}/$FIXTURE_DIR.log"
function path() {
if [[ -e $out_file ]]; then
echo $out_file
fi
}
function clear() {
rm -Rf ${root_dir} ${log_path}
}
function download() {
if [[ ! -e $out_file ]]; then
in_file=`$current_dir/subtitles_en.sh download $base_dir`
head -n $LINES $in_file > $out_file
shasum $out_file > $out_file.sha
fi
}
function version() {
if [[ -e $out_file ]]; then
echo "`basename $out_file` `cat $out_file.sha | cut -d " " -f 1`"
fi
}
case $command in
path)
echo $(path)
;;
clear)
echo $(clear)
;;
version)
echo $(version)
;;
download)
download
echo $(path)
;;
*)
>&2 echo "Invalid command: $command"
exit 1
;;
esac

View file

@ -0,0 +1,69 @@
#!/usr/bin/env bash
set -e
FIXTURE_DIR="subtitles_ru"
SUBTITLES_NAME="OpenSubtitles2016.raw.ru"
SUBTITLES_NAME_GZ="${SUBTITLES_NAME}.gz"
SUBTITLES_URL="https://object.pouta.csc.fi/OPUS-OpenSubtitles/v2016/mono/ru.txt.gz"
if [[ $# -eq 0 ]]; then
exit 1
fi
command=$1
base_dir="/tmp/benchsuite"
if [[ $# -ge 2 ]]; then
base_dir=$2
fi
root_dir="${base_dir}/$FIXTURE_DIR"
out_file="$root_dir/$SUBTITLES_NAME"
log_path="${base_dir}/$FIXTURE_DIR.log"
function path() {
if [[ -e $out_file ]]; then
echo $out_file
fi
}
function clear() {
rm -Rf ${root_dir} ${log_path}
}
function download() {
if [[ ! -e $out_file ]]; then
mkdir -p ${root_dir}
echo "Downloading $FIXTURE_DIR" >> ${log_path}
pushd $root_dir >> ${log_path}
curl -L $SUBTITLES_URL -o $SUBTITLES_NAME_GZ
gunzip $SUBTITLES_NAME_GZ
shasum $SUBTITLES_NAME > $SUBTITLES_NAME.sha
popd >> ${log_path}
fi
}
function version() {
if [[ -e $out_file ]]; then
echo "`basename $out_file` `cat $out_file.sha | cut -d " " -f 1`"
fi
}
case $command in
path)
echo $(path)
;;
clear)
echo $(clear)
;;
version)
echo $(version)
;;
download)
download
echo $(path)
;;
*)
>&2 echo "Invalid command: $command"
exit 1
;;
esac

View file

@ -0,0 +1,66 @@
#!/usr/bin/env bash
set -e
FIXTURE_DIR="subtitles_ru"
SUBTITLES_NAME="OpenSubtitles2016.raw.ru"
SUBTITLES_NAME_SMALL="$SUBTITLES_NAME.small"
LINES=10000
if [[ $# -eq 0 ]]; then
exit 1
fi
command=$1
base_dir="/tmp/benchsuite"
if [[ $# -ge 2 ]]; then
base_dir=$2
fi
current_dir=`dirname $(readlink -f $0)`
root_dir="${base_dir}/$FIXTURE_DIR"
out_file="$root_dir/$SUBTITLES_NAME_SMALL"
log_path="${base_dir}/$FIXTURE_DIR.log"
function path() {
if [[ -e $out_file ]]; then
echo $out_file
fi
}
function clear() {
rm -Rf ${root_dir} ${log_path}
}
function download() {
if [[ ! -e $out_file ]]; then
in_file=`$current_dir/subtitles_ru.sh download $base_dir`
head -n $LINES $in_file > $out_file
shasum $out_file > $out_file.sha
fi
}
function version() {
if [[ -e $out_file ]]; then
echo "`basename $out_file` `cat $out_file.sha | cut -d " " -f 1`"
fi
}
case $command in
path)
echo $(path)
;;
clear)
echo $(clear)
;;
version)
echo $(version)
;;
download)
download
echo $(path)
;;
*)
>&2 echo "Invalid command: $command"
exit 1
;;
esac

View file

@ -0,0 +1,64 @@
# Spell Check Shootout
These are the results as of 2019-10-24
Command:
```bash
$ ./benchsuite.sh . Nightblood
```
Spell checkers:
- ripgrep 11.0.1 (rev 973de50c9e) -SIMD -AVX (compiled) +SIMD +AVX (runtime)
- typos-cli 0.1.0 w/ cargo 1.38.0 (23ef9a4ef 2019-08-20)
- misspell 0.2.0 w/ cargo 1.38.0 (23ef9a4ef 2019-08-20)
- misspell_go 0.3.4
- codespell 1.15.0 w/ Python 3.5.2
- scspell 2.2 w/ Python 3.5.2
## linux_clean fixture
N/A
## linux_built fixture
N/A
## subtitles_ru_small fixture
subtitles_ru_small: OpenSubtitles2016.raw.ru.small c4549d470463cae24b3dbb1efd138192242c0853
| Command | Mean [ms] | Min…Max [ms] |
|:---|---:|---:|
| `./rg/rg bin ./subtitles_ru/OpenSubtitles2016.raw.ru.small` | 7.6 ± 1.2 | 5.7…12.0 |
| `./typos/bin/typos ./subtitles_ru/OpenSubtitles2016.raw.ru.small` | 61.2 ± 4.1 | 52.4…70.1 |
| `./misspell_rs/bin/misspell ./subtitles_ru/OpenSubtitles2016.raw.ru.small` | 33.5 ± 2.3 | 30.2…40.0 |
| `./misspell_go/bin/misspell ./subtitles_ru/OpenSubtitles2016.raw.ru.small` | 40.2 ± 2.6 | 34.1…46.1 |
| `./scspell/bin/scspell ./subtitles_ru/OpenSubtitles2016.raw.ru.small` | 281.5 ± 3.8 | 276.8…289.3 |
| `./codespell/bin/codespell ./subtitles_ru/OpenSubtitles2016.raw.ru.small` | 311.8 ± 5.8 | 299.8…321.8 |
## ripgrep_clean fixture
ripgrep_clean: rg 973de50c9ef451da2cfcdfa86f2b2711d8d6ff48
| Command | Mean [ms] | Min…Max [ms] |
|:---|---:|---:|
| `./rg/rg bin ./ripgrep_clean` | 27.6 ± 5.1 | 20.6…38.1 |
| `./typos/bin/typos ./ripgrep_clean` | 168.0 ± 11.4 | 145.4…182.4 |
| `./misspell_rs/bin/misspell ./ripgrep_clean` | 145.4 ± 4.1 | 136.1…153.0 |
| `./misspell_go/bin/misspell ./ripgrep_clean` | 214.8 ± 7.8 | 193.4…226.5 |
| `./codespell/bin/codespell ./ripgrep_clean` | 651.0 ± 15.1 | 628.9…682.0 |
## ripgrep_built fixture
ripgrep_built: rg 973de50c9ef451da2cfcdfa86f2b2711d8d6ff48
| Command | Mean [ms] | Min…Max [ms] |
|:---|---:|---:|
| `./rg/rg bin ./ripgrep_built` | 32.5 ± 4.9 | 26.1…41.5 |
| `./typos/bin/typos ./ripgrep_built` | 174.1 ± 5.9 | 163.8…187.5 |
| `./misspell_rs/bin/misspell ./ripgrep_built` | 143.8 ± 5.0 | 137.2…161.0 |
| `./misspell_go/bin/misspell ./ripgrep_built` | 278.6 ± 8.1 | 266.7…291.6 |
| `./codespell/bin/codespell ./ripgrep_built` | 840.5 ± 11.2 | 819.4…853.0 |

71
benchsuite/uut/codespell.sh Executable file
View file

@ -0,0 +1,71 @@
#!/usr/bin/env bash
# Pre-reqs:
# - python3-venv
set -e
UUT_DIR="codespell"
CODESPELL_VERSION="1.15.0"
if [[ $# -eq 0 ]]; then
exit 1
fi
command=$1
base_dir="/tmp/benchsuite"
if [[ $# -ge 2 ]]; then
base_dir=$2
fi
root_dir="${base_dir}/$UUT_DIR"
bin_dir=$root_dir/bin
out_file="$bin_dir/codespell"
log_path="${base_dir}/$UUT_DIR.log"
function path() {
if [[ -e $out_file ]]; then
echo $out_file
fi
}
function clear() {
rm -Rf ${root_dir} ${log_path}
}
function download() {
if [[ ! -e $out_file ]]; then
mkdir -p ${base_dir}
echo "Downloading $UUT_DIR" >> ${log_path}
python3 -m venv $root_dir >> $log_path
# My version of Ubuntu is using 8.1.1 and unsure if I want to touch it.
$bin_dir/pip install -U pip==9.0.3 >> $log_path
$bin_dir/pip install -U codespell==$CODESPELL_VERSION >> $log_path
fi
}
function version() {
if [[ -e $out_file ]]; then
echo "$UUT_DIR `$out_file --version` w/ `$bin_dir/python3 --version`"
fi
}
case $command in
path)
echo $(path)
;;
clear)
echo $(clear)
;;
version)
echo $(version)
;;
download)
download
echo $(path)
;;
*)
>&2 echo "Invalid command: $command"
exit 1
;;
esac

67
benchsuite/uut/misspell_go.sh Executable file
View file

@ -0,0 +1,67 @@
#!/usr/bin/env bash
set -e
UUT_DIR="misspell_go"
MISSPELL_GO_VERSION="2.2"
if [[ $# -eq 0 ]]; then
exit 1
fi
command=$1
base_dir="/tmp/benchsuite"
if [[ $# -ge 2 ]]; then
base_dir=$2
fi
root_dir="${base_dir}/$UUT_DIR"
out_file="$root_dir/bin/misspell"
log_path="${base_dir}/$UUT_DIR.log"
function path() {
if [[ -e $out_file ]]; then
echo $out_file
fi
}
function clear() {
rm -Rf ${root_dir} ${log_path}
}
function download() {
if [[ ! -e $out_file ]]; then
mkdir -p ${root_dir}
echo "Downloading $UUT_DIR" >> ${log_path}
pushd $root_dir >> $log_path
curl -L -o ./install-misspell.sh https://git.io/misspell
sh ./install-misspell.sh >> $log_path
popd >> $log_path
fi
}
function version() {
if [[ -e $out_file ]]; then
echo "$UUT_DIR `$out_file -v`"
fi
}
case $command in
path)
echo $(path)
;;
clear)
echo $(clear)
;;
version)
echo $(version)
;;
download)
download
echo $(path)
;;
*)
>&2 echo "Invalid command: $command"
exit 1
;;
esac

66
benchsuite/uut/misspell_rs.sh Executable file
View file

@ -0,0 +1,66 @@
#!/usr/bin/env bash
set -e
UUT_DIR="misspell_rs"
MISSPELL_RS_VERSION="0.2.0"
if [[ $# -eq 0 ]]; then
exit 1
fi
command=$1
base_dir="/tmp/benchsuite"
if [[ $# -ge 2 ]]; then
base_dir=$2
fi
root_dir="${base_dir}/$UUT_DIR"
out_file="$root_dir/bin/misspell"
log_path="${base_dir}/$UUT_DIR.log"
function path() {
if [[ -e $out_file ]]; then
echo $out_file
fi
}
function clear() {
rm -Rf ${root_dir} ${log_path}
}
function download() {
if [[ ! -e $out_file ]]; then
mkdir -p ${root_dir}
mkdir -p ${base_dir}/_cache
echo "Downloading $UUT_DIR" >> ${log_path}
cargo --version > $root_dir/cargo.txt
CARGO_TARGET_DIR=$base_dir/_cache cargo install misspell --version $MISSPELL_RS_VERSION --root $root_dir
fi
}
function version() {
if [[ -e $out_file ]]; then
echo "`$out_file --version` w/ `cat $root_dir/cargo.txt`"
fi
}
case $command in
path)
echo $(path)
;;
clear)
echo $(clear)
;;
version)
echo $(version)
;;
download)
download
echo $(path)
;;
*)
>&2 echo "Invalid command: $command"
exit 1
;;
esac

69
benchsuite/uut/rg.sh Executable file
View file

@ -0,0 +1,69 @@
#!/usr/bin/env bash
set -e
UUT_DIR="rg"
RG_VERSION="11.0.1"
RG_TARGET="x86_64-unknown-linux-musl"
if [[ $# -eq 0 ]]; then
exit 1
fi
command=$1
base_dir="/tmp/benchsuite"
if [[ $# -ge 2 ]]; then
base_dir=$2
fi
root_dir="${base_dir}/$UUT_DIR"
out_file="$root_dir/rg"
log_path="${base_dir}/$UUT_DIR.log"
function path() {
if [[ -e $out_file ]]; then
echo $out_file
fi
}
function clear() {
rm -Rf ${root_dir} ${log_path}
}
function download() {
if [[ ! -e $out_file ]]; then
mkdir -p ${root_dir}
echo "Downloading $UUT_DIR" >> ${log_path}
pushd $root_dir >> $log_path
curl -L -o rg.tgz https://github.com/BurntSushi/ripgrep/releases/download/11.0.1/ripgrep-$RG_VERSION-$RG_TARGET.tar.gz
tar -zxvf rg.tgz >> $log_path
cp */rg .
popd >> $log_path
fi
}
function version() {
if [[ -e $out_file ]]; then
echo "`$out_file --version`"
fi
}
case $command in
path)
echo $(path)
;;
clear)
echo $(clear)
;;
version)
echo $(version)
;;
download)
download
echo $(path)
;;
*)
>&2 echo "Invalid command: $command"
exit 1
;;
esac

69
benchsuite/uut/scspell.sh Executable file
View file

@ -0,0 +1,69 @@
#!/usr/bin/env bash
set -e
UUT_DIR="scspell"
SCSPELL_VERSION="2.2"
if [[ $# -eq 0 ]]; then
exit 1
fi
command=$1
base_dir="/tmp/benchsuite"
if [[ $# -ge 2 ]]; then
base_dir=$2
fi
root_dir="${base_dir}/$UUT_DIR"
bin_dir=$root_dir/bin
out_file="$bin_dir/scspell"
log_path="${base_dir}/$UUT_DIR.log"
function path() {
if [[ -e $out_file ]]; then
echo $out_file
fi
}
function clear() {
rm -Rf ${root_dir} ${log_path}
}
function download() {
if [[ ! -e $out_file ]]; then
mkdir -p ${base_dir}
echo "Downloading $UUT_DIR" >> ${log_path}
python3 -m venv $root_dir >> $log_path
# My version of Ubuntu is using 8.1.1 and unsure if I want to touch it.
$bin_dir/pip install -U pip==9.0.3 >> $log_path
$bin_dir/pip install -U scspell3k==$SCSPELL_VERSION >> $log_path
fi
}
function version() {
if [[ -e $out_file ]]; then
echo "`$out_file --version` w/ `$bin_dir/python3 --version`"
fi
}
case $command in
path)
echo $(path)
;;
clear)
echo $(clear)
;;
version)
echo $(version)
;;
download)
download
echo $(path)
;;
*)
>&2 echo "Invalid command: $command"
exit 1
;;
esac

65
benchsuite/uut/typos.sh Executable file
View file

@ -0,0 +1,65 @@
#!/usr/bin/env bash
set -e
UUT_DIR="typos"
if [[ $# -eq 0 ]]; then
exit 1
fi
command=$1
base_dir="/tmp/benchsuite"
if [[ $# -ge 2 ]]; then
base_dir=$2
fi
current_dir=`dirname $(readlink -f $0)`
root_dir="${base_dir}/$UUT_DIR"
out_file="$root_dir/bin/typos"
log_path="${base_dir}/$UUT_DIR.log"
function path() {
if [[ -e $out_file ]]; then
echo $out_file
fi
}
function clear() {
rm -Rf ${root_dir} ${log_path}
}
function download() {
if [[ ! -e $out_file ]]; then
mkdir -p ${root_dir}
echo "Downloading $UUT_DIR" >> ${log_path}
cargo --version > $root_dir/cargo.txt
cargo install --path `realpath $current_dir/../..` --root $root_dir
fi
}
function version() {
if [[ -e $out_file ]]; then
echo "`$out_file --version` w/ `cat $root_dir/cargo.txt`"
fi
}
case $command in
path)
echo $(path)
;;
clear)
echo $(clear)
;;
version)
echo $(version)
;;
download)
download
echo $(path)
;;
*)
>&2 echo "Invalid command: $command"
exit 1
;;
esac