From 1d13f5d3e6bff3ef45cb02473a6f30611180fa17 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Thu, 24 Oct 2019 20:11:13 -0600 Subject: [PATCH 1/3] fix(cli): Name of bin back to typos --- Cargo.toml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Cargo.toml b/Cargo.toml index 21aed57..b380145 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,6 +13,11 @@ keywords = ["development", "spelling"] license = "MIT" edition = "2018" +[[bin]] +name = "typos" +path = "src/main.rs" +doc = false + [badges] travis-ci = { repository = "epage/typos" } appveyor = { repository = "epage/typos" } From a3fabbd8558b77121d1cd4e212238eb96e7f585a Mon Sep 17 00:00:00 2001 From: Ed Page Date: Tue, 2 Jul 2019 06:58:50 -0600 Subject: [PATCH 2/3] perf: Create end-to-end benchmark suite Fixtures were taken from ripgrep. The framework was rewritten to be more composable (rather than a single python script that had both generic fixtures and selection of units-under-test) One of the goals was to completely generate a report that would include all relevant information for reproducing the results or adding nuance for when results change. Having problems with subtitles_en, so its not fully included atm. --- benchsuite/benchsuite.sh | 304 ++++++++++++++++++++++ benchsuite/fixtures/linux_built.sh | 80 ++++++ benchsuite/fixtures/linux_clean.sh | 70 +++++ benchsuite/fixtures/ripgrep_built.sh | 74 ++++++ benchsuite/fixtures/ripgrep_clean.sh | 72 +++++ benchsuite/fixtures/subtitles_en.sh | 76 ++++++ benchsuite/fixtures/subtitles_en_small.sh | 66 +++++ benchsuite/fixtures/subtitles_ru.sh | 69 +++++ benchsuite/fixtures/subtitles_ru_small.sh | 66 +++++ benchsuite/uut/codespell.sh | 71 +++++ benchsuite/uut/misspell_go.sh | 67 +++++ benchsuite/uut/misspell_rs.sh | 66 +++++ benchsuite/uut/rg.sh | 69 +++++ benchsuite/uut/scspell.sh | 69 +++++ benchsuite/uut/typos.sh | 65 +++++ 15 files changed, 1284 insertions(+) create mode 100755 benchsuite/benchsuite.sh create mode 100755 benchsuite/fixtures/linux_built.sh create mode 100755 benchsuite/fixtures/linux_clean.sh create mode 100755 benchsuite/fixtures/ripgrep_built.sh create mode 100755 benchsuite/fixtures/ripgrep_clean.sh create mode 100755 benchsuite/fixtures/subtitles_en.sh create mode 100755 benchsuite/fixtures/subtitles_en_small.sh create mode 100755 benchsuite/fixtures/subtitles_ru.sh create mode 100755 benchsuite/fixtures/subtitles_ru_small.sh create mode 100755 benchsuite/uut/codespell.sh create mode 100755 benchsuite/uut/misspell_go.sh create mode 100755 benchsuite/uut/misspell_rs.sh create mode 100755 benchsuite/uut/rg.sh create mode 100755 benchsuite/uut/scspell.sh create mode 100755 benchsuite/uut/typos.sh diff --git a/benchsuite/benchsuite.sh b/benchsuite/benchsuite.sh new file mode 100755 index 0000000..ddf54c8 --- /dev/null +++ b/benchsuite/benchsuite.sh @@ -0,0 +1,304 @@ +#!/usr/bin/env bash +set -e + +current_dir=`dirname $(readlink -f $0)` + +base_dir="/tmp/benchsuite" +if [[ $# -ge 1 ]]; then + base_dir=$1 +fi +mkdir -p $base_dir +pushd $base_dir +base_dir=. + + +machine="$HOSTNAME" +if [[ $# -ge 2 ]]; then + machine=$2 +fi + +current_day=`date +%Y-%m-%d` +report_prefix=$current_dir/runs/$current_day-$machine +report_path=$report_prefix.md +mkdir -p `dirname $report_path` + +echo "" > $report_path +echo "# Spell Check Shootout" >> $report_path +echo "" >> $report_path +echo "These are the results as of $current_day" >> $report_path +echo "" >> $report_path +echo "Command:" >> $report_path +echo "\`\`\`bash" >> $report_path +echo "$ $0 $base_dir $machine" >> $report_path +echo "\`\`\`" >> $report_path +echo "" >> $report_path + +linux_clean_path=`$current_dir/fixtures/linux_clean.sh path $base_dir` +linux_clean_version=`$current_dir/fixtures/linux_clean.sh version $base_dir` + +linux_built_path=`$current_dir/fixtures/linux_built.sh path $base_dir` +linux_built_version=`$current_dir/fixtures/linux_built.sh version $base_dir` + +ripgrep_clean_path=`$current_dir/fixtures/ripgrep_clean.sh path $base_dir` +ripgrep_clean_version=`$current_dir/fixtures/ripgrep_clean.sh version $base_dir` + +ripgrep_built_path=`$current_dir/fixtures/ripgrep_built.sh path $base_dir` +ripgrep_built_version=`$current_dir/fixtures/ripgrep_built.sh version $base_dir` + +subtitles_en_path=`$current_dir/fixtures/subtitles_en.sh path $base_dir` +subtitles_en_version=`$current_dir/fixtures/subtitles_en.sh version $base_dir` + +subtitles_en_small_path=`$current_dir/fixtures/subtitles_en_small.sh path $base_dir` +subtitles_en_small_version=`$current_dir/fixtures/subtitles_en_small.sh version $base_dir` + +subtitles_ru_path=`$current_dir/fixtures/subtitles_ru.sh path $base_dir` +subtitles_ru_version=`$current_dir/fixtures/subtitles_ru.sh version $base_dir` + +subtitles_ru_small_path=`$current_dir/fixtures/subtitles_ru_small.sh path $base_dir` +subtitles_ru_small_version=`$current_dir/fixtures/subtitles_ru_small.sh version $base_dir` +echo "" >> $report_path + + +echo "Spell checkers:" >> $report_path +rg_path=`$current_dir/uut/rg.sh path $base_dir` +rg_version=`$current_dir/uut/rg.sh version $base_dir` +if [[ -z $rg_path ]]; then + >&2 echo "Warning: rg uut is unavailable" + echo "- rg: N/A" >> $report_path +else + echo "- $rg_version" >> $report_path +fi + +typos_path=`$current_dir/uut/typos.sh path $base_dir` +typos_version=`$current_dir/uut/typos.sh version $base_dir` +if [[ -z $typos_path ]]; then + >&2 echo "Warning: typos uut is unavailable" + echo "- typos: N/A" >> $report_path +else + echo "- $typos_version" >> $report_path +fi + +misspell_rs_path=`$current_dir/uut/misspell_rs.sh path $base_dir` +misspell_rs_version=`$current_dir/uut/misspell_rs.sh version $base_dir` +if [[ -z $misspell_rs_path ]]; then + >&2 echo "Warning: misspell_rs uut is unavailable" + echo "- misspell_rs: N/A" >> $report_path +else + echo "- $misspell_rs_version" >> $report_path +fi + +misspell_go_path=`$current_dir/uut/misspell_go.sh path $base_dir` +misspell_go_version=`$current_dir/uut/misspell_go.sh version $base_dir` +if [[ -z $misspell_go_path ]]; then + >&2 echo "Warning: misspell_go uut is unavailable" + echo "- misspell_go: N/A" >> $report_path +else + echo "- $misspell_go_version" >> $report_path +fi + +codespell_path=`$current_dir/uut/codespell.sh path $base_dir` +codespell_version=`$current_dir/uut/codespell.sh version $base_dir` +if [[ -z $codespell_path ]]; then + >&2 echo "Warning: codespell uut is unavailable" + echo "- codespell: N/A" >> $report_path +else + echo "- $codespell_version" >> $report_path +fi + +scspell_path=`$current_dir/uut/scspell.sh path $base_dir` +scspell_version=`$current_dir/uut/scspell.sh version $base_dir` +if [[ -z $scspell_path ]]; then + >&2 echo "Warning: scspell uut is unavailable" + echo "- scspell: N/A" >> $report_path +else + echo "- $scspell_version" >> $report_path +fi +echo "" >> $report_path + + +echo "## linux_clean fixture" >> $report_path +echo "" >> $report_path +if [[ -z $linux_clean_path ]]; then + >&2 echo "Warning: linux_clean fixture is unavailable" + echo "N/A" >> $report_path +else + echo "linux_clean: $linux_clean_version" >> $report_path + echo "" >> $report_path + rg_command="" + if [[ ! -z $rg_path ]]; then + rg_command="$rg_path bin $linux_clean_path" + fi + typos_command="" + if [[ ! -z $typos_path ]]; then + typos_command="$typos_path $linux_clean_path" + fi + misspell_rs_command="" + if [[ ! -z $misspell_rs_path ]]; then + misspell_rs_command="$misspell_rs_path $linux_clean_path" + fi + misspell_go_command="" + if [[ ! -z $misspell_go_path ]]; then + misspell_go_command="$misspell_go_path $linux_clean_path" + fi + # Skipping scspell, doesn't work on directories + codespell_command="" + if [[ ! -z $codespell_path ]]; then + codespell_command="$codespell_path $linux_clean_path" + fi + hyperfine --warmup 1 -i --export-json $report_prefix-rg.json --export-markdown $report_prefix-rg.md "$rg_command" "$typos_command" "$misspell_rs_command" "$misspell_go_command" "$codespell_command" + cat $report_prefix-rg.md >> $report_path +fi +echo "" >> $report_path + + +echo "## linux_built fixture" >> $report_path +echo "" >> $report_path +if [[ -z $linux_built_path ]]; then + >&2 echo "Warning: linux_built fixture is unavailable" + echo "N/A" >> $report_path +else + echo "linux_built: $linux_built_version" >> $report_path + echo "" >> $report_path + rg_command="" + if [[ ! -z $rg_path ]]; then + rg_command="$rg_path bin $linux_built_path" + fi + typos_command="" + if [[ ! -z $typos_path ]]; then + typos_command="$typos_path $linux_built_path" + fi + misspell_rs_command="" + if [[ ! -z $misspell_rs_path ]]; then + misspell_rs_command="$misspell_rs_path $linux_built_path" + fi + misspell_go_command="" + if [[ ! -z $misspell_go_path ]]; then + misspell_go_command="$misspell_go_path $linux_built_path" + fi + # Skipping scspell, doesn't work on directories + codespell_command="" + if [[ ! -z $codespell_path ]]; then + codespell_command="$codespell_path $linux_built_path" + fi + hyperfine --warmup 1 -i --export-json $report_prefix-rg.json --export-markdown $report_prefix-rg.md "$rg_command" "$typos_command" "$misspell_rs_command" "$misspell_go_command" "$codespell_command" + cat $report_prefix-rg.md >> $report_path +fi +echo "" >> $report_path + + +if [[ -z $subtitles_en_path ]]; then + >&2 echo "Warning: subtitles_en fixture is unavailable" +fi + + +if [[ -z $subtitles_en_small_path ]]; then + >&2 echo "Warning: subtitles_en_small fixture is unavailable" +fi + + +echo "## subtitles_ru_small fixture" >> $report_path +echo "" >> $report_path +if [[ -z $subtitles_ru_small_path ]]; then + >&2 echo "Warning: subtitles_ru_small fixture is unavailable" + echo "N/A" >> $report_path +else + echo "subtitles_ru_small: $subtitles_ru_small_version" >> $report_path + echo "" >> $report_path + rg_command="" + if [[ ! -z $rg_path ]]; then + rg_command="$rg_path bin $subtitles_ru_small_path" + fi + typos_command="" + if [[ ! -z $typos_path ]]; then + typos_command="$typos_path $subtitles_ru_small_path" + fi + misspell_rs_command="" + if [[ ! -z $misspell_rs_path ]]; then + misspell_rs_command="$misspell_rs_path $subtitles_ru_small_path" + fi + misspell_go_command="" + if [[ ! -z $misspell_go_path ]]; then + misspell_go_command="$misspell_go_path $subtitles_ru_small_path" + fi + scspell_command="" + if [[ ! -z $scspell_path ]]; then + scspell_command="$scspell_path $subtitles_ru_small_path" + fi + codespell_command="" + if [[ ! -z $codespell_path ]]; then + codespell_command="$codespell_path $subtitles_ru_small_path" + fi + hyperfine --warmup 1 -i --export-json $report_prefix-rg.json --export-markdown $report_prefix-rg.md "$rg_command" "$typos_command" "$misspell_rs_command" "$misspell_go_command" "$scspell_command" "$codespell_command" + cat $report_prefix-rg.md >> $report_path +fi +echo "" >> $report_path + + +echo "## ripgrep_clean fixture" >> $report_path +echo "" >> $report_path +if [[ -z $ripgrep_clean_path ]]; then + >&2 echo "Warning: ripgrep_clean fixture is unavailable" + echo "N/A" >> $report_path +else + echo "ripgrep_clean: $ripgrep_clean_version" >> $report_path + echo "" >> $report_path + rg_command="" + if [[ ! -z $rg_path ]]; then + rg_command="$rg_path bin $ripgrep_clean_path" + fi + typos_command="" + if [[ ! -z $typos_path ]]; then + typos_command="$typos_path $ripgrep_clean_path" + fi + misspell_rs_command="" + if [[ ! -z $misspell_rs_path ]]; then + misspell_rs_command="$misspell_rs_path $ripgrep_clean_path" + fi + misspell_go_command="" + if [[ ! -z $misspell_go_path ]]; then + misspell_go_command="$misspell_go_path $ripgrep_clean_path" + fi + # Skipping scspell, doesn't work on directories + codespell_command="" + if [[ ! -z $codespell_path ]]; then + codespell_command="$codespell_path $ripgrep_clean_path" + fi + hyperfine --warmup 1 -i --export-json $report_prefix-rg.json --export-markdown $report_prefix-rg.md "$rg_command" "$typos_command" "$misspell_rs_command" "$misspell_go_command" "$codespell_command" + cat $report_prefix-rg.md >> $report_path +fi +echo "" >> $report_path + + +echo "## ripgrep_built fixture" >> $report_path +echo "" >> $report_path +if [[ -z $ripgrep_built_path ]]; then + >&2 echo "Warning: ripgrep_built fixture is unavailable" + echo "N/A" >> $report_path +else + echo "ripgrep_built: $ripgrep_built_version" >> $report_path + echo "" >> $report_path + rg_command="" + if [[ ! -z $rg_path ]]; then + rg_command="$rg_path bin $ripgrep_built_path" + fi + typos_command="" + if [[ ! -z $typos_path ]]; then + typos_command="$typos_path $ripgrep_built_path" + fi + misspell_rs_command="" + if [[ ! -z $misspell_rs_path ]]; then + misspell_rs_command="$misspell_rs_path $ripgrep_built_path" + fi + misspell_go_command="" + if [[ ! -z $misspell_go_path ]]; then + misspell_go_command="$misspell_go_path $ripgrep_built_path" + fi + # Skipping scspell, doesn't work on directories + codespell_command="" + if [[ ! -z $codespell_path ]]; then + codespell_command="$codespell_path $ripgrep_built_path" + fi + hyperfine --warmup 1 -i --export-json $report_prefix-rg.json --export-markdown $report_prefix-rg.md "$rg_command" "$typos_command" "$misspell_rs_command" "$misspell_go_command" "$codespell_command" + cat $report_prefix-rg.md >> $report_path +fi +echo "" >> $report_path diff --git a/benchsuite/fixtures/linux_built.sh b/benchsuite/fixtures/linux_built.sh new file mode 100755 index 0000000..f90cbcd --- /dev/null +++ b/benchsuite/fixtures/linux_built.sh @@ -0,0 +1,80 @@ +#!/usr/bin/env bash +# Pre-reqs: +# - git +# - able to build a Linux kernel +# - apt install libelf-dev, bc +set -e + +FIXTURE_DIR="linux_built" + +function cpucount() { + echo `grep -c ^processor /proc/cpuinfo` +} + +if [[ $# -eq 0 ]]; then + exit 1 +fi +command=$1 + +base_dir="/tmp/benchsuite" +if [[ $# -ge 2 ]]; then + base_dir=$2 +fi + +current_dir=`dirname $(readlink -f $0)` +root_dir="${base_dir}/$FIXTURE_DIR" +out_file="$root_dir" +log_path="${base_dir}/$FIXTURE_DIR.log" + +function path() { + if [[ -e $out_file ]]; then + echo $out_file + fi +} + +function clear() { + rm -Rf ${root_dir} ${log_path} +} + +function download() { + if [[ ! -e $out_file ]]; then + echo "Downloading $FIXTURE_DIR" >> ${log_path} + in_file=`$current_dir/linux_clean.sh download $base_dir` + cp -R $in_file $out_file + # We want to build the kernel because the process of building it produces + # a lot of junk in the repository that a search tool probably shouldn't + # touch. + pushd $root_dir >> ${log_path} + make defconfig >> ${log_path} + make -j $(cpucount) >> ${log_path} + popd >> ${log_path} + fi +} + +function version() { + if [[ -e $out_file ]]; then + pushd $root_dir >> ${log_path} + echo "linux `git rev-parse HEAD`" + popd >> ${log_path} + fi +} + +case $command in + path) + echo $(path) + ;; + clear) + echo $(clear) + ;; + version) + echo $(version) + ;; + download) + download + echo $(path) + ;; + *) + >&2 echo "Invalid command: $command" + exit 1 + ;; +esac diff --git a/benchsuite/fixtures/linux_clean.sh b/benchsuite/fixtures/linux_clean.sh new file mode 100755 index 0000000..54413e6 --- /dev/null +++ b/benchsuite/fixtures/linux_clean.sh @@ -0,0 +1,70 @@ +#!/usr/bin/env bash +# Pre-reqs: +# - git +set -e + +FIXTURE_DIR="linux_clean" +# Clone from burntsushi's fork so that we always get the same corpus *and* still +# do a shallow clone. Shallow clones are much much cheaper than full +# clones. +REPO_URL="git://github.com/BurntSushi/linux" + +if [[ $# -eq 0 ]]; then + exit 1 +fi +command=$1 + +base_dir="/tmp/benchsuite" +if [[ $# -ge 2 ]]; then + base_dir=$2 +fi + +root_dir="${base_dir}/$FIXTURE_DIR" +out_file="$root_dir" +log_path="${base_dir}/$FIXTURE_DIR.log" + +function path() { + if [[ -e $out_file ]]; then + echo $out_file + fi +} + +function clear() { + rm -Rf ${root_dir} ${log_path} +} + +function download() { + if [[ ! -e $out_file ]]; then + mkdir -p ${root_dir} + echo "Downloading $FIXTURE_DIR" >> ${log_path} + git clone --depth 1 $REPO_URL $root_dir >> ${log_path} + fi +} + +function version() { + if [[ -e $out_file ]]; then + pushd $root_dir >> ${log_path} + echo "linux `git rev-parse HEAD`" + popd >> ${log_path} + fi +} + +case $command in + path) + echo $(path) + ;; + clear) + echo $(clear) + ;; + version) + echo $(version) + ;; + download) + download + echo $(path) + ;; + *) + >&2 echo "Invalid command: $command" + exit 1 + ;; +esac diff --git a/benchsuite/fixtures/ripgrep_built.sh b/benchsuite/fixtures/ripgrep_built.sh new file mode 100755 index 0000000..71d4a83 --- /dev/null +++ b/benchsuite/fixtures/ripgrep_built.sh @@ -0,0 +1,74 @@ +#!/usr/bin/env bash +# Pre-reqs: +# - git +# - rust +set -e + +FIXTURE_DIR="ripgrep_built" + +if [[ $# -eq 0 ]]; then + exit 1 +fi +command=$1 + +base_dir="/tmp/benchsuite" +if [[ $# -ge 2 ]]; then + base_dir=$2 +fi + +current_dir=`dirname $(readlink -f $0)` +root_dir="${base_dir}/$FIXTURE_DIR" +out_file="$root_dir" +log_path="${base_dir}/$FIXTURE_DIR.log" + +function path() { + if [[ -e $out_file ]]; then + echo $out_file + fi +} + +function clear() { + rm -Rf ${root_dir} ${log_path} +} + +function download() { + if [[ ! -e $out_file ]]; then + echo "Downloading $FIXTURE_DIR" >> ${log_path} + in_file=`$current_dir/ripgrep_clean.sh download $base_dir` + cp -R $in_file $out_file + # We want to build the kernel because the process of building it produces + # a lot of junk in the repository that a search tool probably shouldn't + # touch. + pushd $root_dir >> ${log_path} + cargo check >> ${log_path} + popd >> ${log_path} + fi +} + +function version() { + if [[ -e $out_file ]]; then + pushd $root_dir >> ${log_path} + echo "rg `git rev-parse HEAD`" + popd >> ${log_path} + fi +} + +case $command in + path) + echo $(path) + ;; + clear) + echo $(clear) + ;; + version) + echo $(version) + ;; + download) + download + echo $(path) + ;; + *) + >&2 echo "Invalid command: $command" + exit 1 + ;; +esac diff --git a/benchsuite/fixtures/ripgrep_clean.sh b/benchsuite/fixtures/ripgrep_clean.sh new file mode 100755 index 0000000..127d175 --- /dev/null +++ b/benchsuite/fixtures/ripgrep_clean.sh @@ -0,0 +1,72 @@ +#!/usr/bin/env bash +# Pre-reqs: +# - git +set -e + +FIXTURE_DIR="ripgrep_clean" +REPO_TAG="11.0.1" +REPO_URL="git://github.com/BurntSushi/ripgrep" + +function cpucount() { + echo `grep -c ^processor /proc/cpuinfo` +} + +if [[ $# -eq 0 ]]; then + exit 1 +fi +command=$1 + +base_dir="/tmp/benchsuite" +if [[ $# -ge 2 ]]; then + base_dir=$2 +fi + +root_dir="${base_dir}/$FIXTURE_DIR" +out_file="$root_dir" +log_path="${base_dir}/$FIXTURE_DIR.log" + +function path() { + if [[ -e $out_file ]]; then + echo $out_file + fi +} + +function clear() { + rm -Rf ${root_dir} ${log_path} +} + +function download() { + if [[ ! -e $out_file ]]; then + mkdir -p ${root_dir} + echo "Downloading $FIXTURE_DIR" >> ${log_path} + git clone --depth 1 $REPO_URL --branch $REPO_TAG $root_dir >> ${log_path} + fi +} + +function version() { + if [[ -e $out_file ]]; then + pushd $root_dir >> ${log_path} + echo "rg `git rev-parse HEAD`" + popd >> ${log_path} + fi +} + +case $command in + path) + echo $(path) + ;; + clear) + echo $(clear) + ;; + version) + echo $(version) + ;; + download) + download + echo $(path) + ;; + *) + >&2 echo "Invalid command: $command" + exit 1 + ;; +esac diff --git a/benchsuite/fixtures/subtitles_en.sh b/benchsuite/fixtures/subtitles_en.sh new file mode 100755 index 0000000..e014c68 --- /dev/null +++ b/benchsuite/fixtures/subtitles_en.sh @@ -0,0 +1,76 @@ +#!/usr/bin/env bash +# Pre-reqs: +# - gunzip +# - curl +set -e + +FIXTURE_DIR="subtitles_en" +SUBTITLES_NAME="OpenSubtitles2016.raw.en" +SUBTITLES_NAME_SAMPLE="$SUBTITLES_NAME.sample" +SUBTITLES_NAME_GZ="${SUBTITLES_NAME}.gz" +SUBTITLES_URL="https://object.pouta.csc.fi/OPUS-OpenSubtitles/v2016/mono/en.txt.gz" + +if [[ $# -eq 0 ]]; then + exit 1 +fi +command=$1 + +base_dir="/tmp/benchsuite" +if [[ $# -ge 2 ]]; then + base_dir=$2 +fi + +root_dir="${base_dir}/$FIXTURE_DIR" +out_file="$root_dir/$SUBTITLES_NAME_SAMPLE" +log_path="${base_dir}/$FIXTURE_DIR.log" + +function path() { + if [[ -e $out_file ]]; then + echo $out_file + fi +} + +function clear() { + rm -Rf ${root_dir} ${log_path} +} + +function download() { + if [[ ! -e $out_file ]]; then + mkdir -p ${root_dir} + echo "Downloading $FIXTURE_DIR" >> ${log_path} + pushd $root_dir >> ${log_path} + curl -L $SUBTITLES_URL -o $SUBTITLES_NAME_GZ + gunzip $SUBTITLES_NAME_GZ + # Get a sample roughly the same size as the Russian corpus so that + # benchmarks finish in a reasonable time. + head -n 32722372 $SUBTITLES_NAME > $SUBTITLES_NAME_SAMPLE + shasum $SUBTITLES_NAME_SAMPLE > $SUBTITLES_NAME_SAMPLE.sha + popd >> ${log_path} + fi +} + +function version() { + if [[ -e $out_file ]]; then + echo "`basename $out_file` `cat $out_file.sha | cut -d " " -f 1`" + fi +} + +case $command in + path) + echo $(path) + ;; + clear) + echo $(clear) + ;; + version) + echo $(version) + ;; + download) + download + echo $(path) + ;; + *) + >&2 echo "Invalid command: $command" + exit 1 + ;; +esac diff --git a/benchsuite/fixtures/subtitles_en_small.sh b/benchsuite/fixtures/subtitles_en_small.sh new file mode 100755 index 0000000..d2288a5 --- /dev/null +++ b/benchsuite/fixtures/subtitles_en_small.sh @@ -0,0 +1,66 @@ +#!/usr/bin/env bash +set -e + +FIXTURE_DIR="subtitles_en" +SUBTITLES_NAME="OpenSubtitles2016.raw.en" +SUBTITLES_NAME_SMALL="OpenSubtitles2016.raw.en.small" +LINES=10000 + +if [[ $# -eq 0 ]]; then + exit 1 +fi +command=$1 + +base_dir="/tmp/benchsuite" +if [[ $# -ge 2 ]]; then + base_dir=$2 +fi + +current_dir=`dirname $(readlink -f $0)` +root_dir="${base_dir}/$FIXTURE_DIR" +out_file="$root_dir/$SUBTITLES_NAME_SMALL" +log_path="${base_dir}/$FIXTURE_DIR.log" + +function path() { + if [[ -e $out_file ]]; then + echo $out_file + fi +} + +function clear() { + rm -Rf ${root_dir} ${log_path} +} + +function download() { + if [[ ! -e $out_file ]]; then + in_file=`$current_dir/subtitles_en.sh download $base_dir` + head -n $LINES $in_file > $out_file + shasum $out_file > $out_file.sha + fi +} + +function version() { + if [[ -e $out_file ]]; then + echo "`basename $out_file` `cat $out_file.sha | cut -d " " -f 1`" + fi +} + +case $command in + path) + echo $(path) + ;; + clear) + echo $(clear) + ;; + version) + echo $(version) + ;; + download) + download + echo $(path) + ;; + *) + >&2 echo "Invalid command: $command" + exit 1 + ;; +esac diff --git a/benchsuite/fixtures/subtitles_ru.sh b/benchsuite/fixtures/subtitles_ru.sh new file mode 100755 index 0000000..c9a3895 --- /dev/null +++ b/benchsuite/fixtures/subtitles_ru.sh @@ -0,0 +1,69 @@ +#!/usr/bin/env bash +set -e + +FIXTURE_DIR="subtitles_ru" +SUBTITLES_NAME="OpenSubtitles2016.raw.ru" +SUBTITLES_NAME_GZ="${SUBTITLES_NAME}.gz" +SUBTITLES_URL="https://object.pouta.csc.fi/OPUS-OpenSubtitles/v2016/mono/ru.txt.gz" + +if [[ $# -eq 0 ]]; then + exit 1 +fi +command=$1 + +base_dir="/tmp/benchsuite" +if [[ $# -ge 2 ]]; then + base_dir=$2 +fi + +root_dir="${base_dir}/$FIXTURE_DIR" +out_file="$root_dir/$SUBTITLES_NAME" +log_path="${base_dir}/$FIXTURE_DIR.log" + +function path() { + if [[ -e $out_file ]]; then + echo $out_file + fi +} + +function clear() { + rm -Rf ${root_dir} ${log_path} +} + +function download() { + if [[ ! -e $out_file ]]; then + mkdir -p ${root_dir} + echo "Downloading $FIXTURE_DIR" >> ${log_path} + pushd $root_dir >> ${log_path} + curl -L $SUBTITLES_URL -o $SUBTITLES_NAME_GZ + gunzip $SUBTITLES_NAME_GZ + shasum $SUBTITLES_NAME > $SUBTITLES_NAME.sha + popd >> ${log_path} + fi +} + +function version() { + if [[ -e $out_file ]]; then + echo "`basename $out_file` `cat $out_file.sha | cut -d " " -f 1`" + fi +} + +case $command in + path) + echo $(path) + ;; + clear) + echo $(clear) + ;; + version) + echo $(version) + ;; + download) + download + echo $(path) + ;; + *) + >&2 echo "Invalid command: $command" + exit 1 + ;; +esac diff --git a/benchsuite/fixtures/subtitles_ru_small.sh b/benchsuite/fixtures/subtitles_ru_small.sh new file mode 100755 index 0000000..02f89de --- /dev/null +++ b/benchsuite/fixtures/subtitles_ru_small.sh @@ -0,0 +1,66 @@ +#!/usr/bin/env bash +set -e + +FIXTURE_DIR="subtitles_ru" +SUBTITLES_NAME="OpenSubtitles2016.raw.ru" +SUBTITLES_NAME_SMALL="$SUBTITLES_NAME.small" +LINES=10000 + +if [[ $# -eq 0 ]]; then + exit 1 +fi +command=$1 + +base_dir="/tmp/benchsuite" +if [[ $# -ge 2 ]]; then + base_dir=$2 +fi + +current_dir=`dirname $(readlink -f $0)` +root_dir="${base_dir}/$FIXTURE_DIR" +out_file="$root_dir/$SUBTITLES_NAME_SMALL" +log_path="${base_dir}/$FIXTURE_DIR.log" + +function path() { + if [[ -e $out_file ]]; then + echo $out_file + fi +} + +function clear() { + rm -Rf ${root_dir} ${log_path} +} + +function download() { + if [[ ! -e $out_file ]]; then + in_file=`$current_dir/subtitles_ru.sh download $base_dir` + head -n $LINES $in_file > $out_file + shasum $out_file > $out_file.sha + fi +} + +function version() { + if [[ -e $out_file ]]; then + echo "`basename $out_file` `cat $out_file.sha | cut -d " " -f 1`" + fi +} + +case $command in + path) + echo $(path) + ;; + clear) + echo $(clear) + ;; + version) + echo $(version) + ;; + download) + download + echo $(path) + ;; + *) + >&2 echo "Invalid command: $command" + exit 1 + ;; +esac diff --git a/benchsuite/uut/codespell.sh b/benchsuite/uut/codespell.sh new file mode 100755 index 0000000..077e002 --- /dev/null +++ b/benchsuite/uut/codespell.sh @@ -0,0 +1,71 @@ +#!/usr/bin/env bash +# Pre-reqs: +# - python3-venv +set -e + +UUT_DIR="codespell" +CODESPELL_VERSION="1.15.0" + +if [[ $# -eq 0 ]]; then + exit 1 +fi +command=$1 + +base_dir="/tmp/benchsuite" +if [[ $# -ge 2 ]]; then + base_dir=$2 +fi + +root_dir="${base_dir}/$UUT_DIR" +bin_dir=$root_dir/bin +out_file="$bin_dir/codespell" +log_path="${base_dir}/$UUT_DIR.log" + +function path() { + if [[ -e $out_file ]]; then + echo $out_file + fi +} + +function clear() { + rm -Rf ${root_dir} ${log_path} +} + +function download() { + if [[ ! -e $out_file ]]; then + mkdir -p ${base_dir} + echo "Downloading $UUT_DIR" >> ${log_path} + + python3 -m venv $root_dir >> $log_path + + # My version of Ubuntu is using 8.1.1 and unsure if I want to touch it. + $bin_dir/pip install -U pip==9.0.3 >> $log_path + $bin_dir/pip install -U codespell==$CODESPELL_VERSION >> $log_path + fi +} + +function version() { + if [[ -e $out_file ]]; then + echo "$UUT_DIR `$out_file --version` w/ `$bin_dir/python3 --version`" + fi +} + +case $command in + path) + echo $(path) + ;; + clear) + echo $(clear) + ;; + version) + echo $(version) + ;; + download) + download + echo $(path) + ;; + *) + >&2 echo "Invalid command: $command" + exit 1 + ;; +esac diff --git a/benchsuite/uut/misspell_go.sh b/benchsuite/uut/misspell_go.sh new file mode 100755 index 0000000..6d69780 --- /dev/null +++ b/benchsuite/uut/misspell_go.sh @@ -0,0 +1,67 @@ +#!/usr/bin/env bash +set -e + +UUT_DIR="misspell_go" +MISSPELL_GO_VERSION="2.2" + +if [[ $# -eq 0 ]]; then + exit 1 +fi +command=$1 + +base_dir="/tmp/benchsuite" +if [[ $# -ge 2 ]]; then + base_dir=$2 +fi + +root_dir="${base_dir}/$UUT_DIR" +out_file="$root_dir/bin/misspell" +log_path="${base_dir}/$UUT_DIR.log" + +function path() { + if [[ -e $out_file ]]; then + echo $out_file + fi +} + +function clear() { + rm -Rf ${root_dir} ${log_path} +} + +function download() { + if [[ ! -e $out_file ]]; then + mkdir -p ${root_dir} + echo "Downloading $UUT_DIR" >> ${log_path} + + pushd $root_dir >> $log_path + curl -L -o ./install-misspell.sh https://git.io/misspell + sh ./install-misspell.sh >> $log_path + popd >> $log_path + fi +} + +function version() { + if [[ -e $out_file ]]; then + echo "$UUT_DIR `$out_file -v`" + fi +} + +case $command in + path) + echo $(path) + ;; + clear) + echo $(clear) + ;; + version) + echo $(version) + ;; + download) + download + echo $(path) + ;; + *) + >&2 echo "Invalid command: $command" + exit 1 + ;; +esac diff --git a/benchsuite/uut/misspell_rs.sh b/benchsuite/uut/misspell_rs.sh new file mode 100755 index 0000000..a23dfad --- /dev/null +++ b/benchsuite/uut/misspell_rs.sh @@ -0,0 +1,66 @@ +#!/usr/bin/env bash +set -e + +UUT_DIR="misspell_rs" +MISSPELL_RS_VERSION="0.2.0" + +if [[ $# -eq 0 ]]; then + exit 1 +fi +command=$1 + +base_dir="/tmp/benchsuite" +if [[ $# -ge 2 ]]; then + base_dir=$2 +fi + +root_dir="${base_dir}/$UUT_DIR" +out_file="$root_dir/bin/misspell" +log_path="${base_dir}/$UUT_DIR.log" + +function path() { + if [[ -e $out_file ]]; then + echo $out_file + fi +} + +function clear() { + rm -Rf ${root_dir} ${log_path} +} + +function download() { + if [[ ! -e $out_file ]]; then + mkdir -p ${root_dir} + mkdir -p ${base_dir}/_cache + echo "Downloading $UUT_DIR" >> ${log_path} + + cargo --version > $root_dir/cargo.txt + CARGO_TARGET_DIR=$base_dir/_cache cargo install misspell --version $MISSPELL_RS_VERSION --root $root_dir + fi +} + +function version() { + if [[ -e $out_file ]]; then + echo "`$out_file --version` w/ `cat $root_dir/cargo.txt`" + fi +} + +case $command in + path) + echo $(path) + ;; + clear) + echo $(clear) + ;; + version) + echo $(version) + ;; + download) + download + echo $(path) + ;; + *) + >&2 echo "Invalid command: $command" + exit 1 + ;; +esac diff --git a/benchsuite/uut/rg.sh b/benchsuite/uut/rg.sh new file mode 100755 index 0000000..8cd9921 --- /dev/null +++ b/benchsuite/uut/rg.sh @@ -0,0 +1,69 @@ +#!/usr/bin/env bash +set -e + +UUT_DIR="rg" +RG_VERSION="11.0.1" +RG_TARGET="x86_64-unknown-linux-musl" + +if [[ $# -eq 0 ]]; then + exit 1 +fi +command=$1 + +base_dir="/tmp/benchsuite" +if [[ $# -ge 2 ]]; then + base_dir=$2 +fi + +root_dir="${base_dir}/$UUT_DIR" +out_file="$root_dir/rg" +log_path="${base_dir}/$UUT_DIR.log" + +function path() { + if [[ -e $out_file ]]; then + echo $out_file + fi +} + +function clear() { + rm -Rf ${root_dir} ${log_path} +} + +function download() { + if [[ ! -e $out_file ]]; then + mkdir -p ${root_dir} + echo "Downloading $UUT_DIR" >> ${log_path} + + pushd $root_dir >> $log_path + curl -L -o rg.tgz https://github.com/BurntSushi/ripgrep/releases/download/11.0.1/ripgrep-$RG_VERSION-$RG_TARGET.tar.gz + tar -zxvf rg.tgz >> $log_path + cp */rg . + popd >> $log_path + fi +} + +function version() { + if [[ -e $out_file ]]; then + echo "`$out_file --version`" + fi +} + +case $command in + path) + echo $(path) + ;; + clear) + echo $(clear) + ;; + version) + echo $(version) + ;; + download) + download + echo $(path) + ;; + *) + >&2 echo "Invalid command: $command" + exit 1 + ;; +esac diff --git a/benchsuite/uut/scspell.sh b/benchsuite/uut/scspell.sh new file mode 100755 index 0000000..2ee6bca --- /dev/null +++ b/benchsuite/uut/scspell.sh @@ -0,0 +1,69 @@ +#!/usr/bin/env bash +set -e + +UUT_DIR="scspell" +SCSPELL_VERSION="2.2" + +if [[ $# -eq 0 ]]; then + exit 1 +fi +command=$1 + +base_dir="/tmp/benchsuite" +if [[ $# -ge 2 ]]; then + base_dir=$2 +fi + +root_dir="${base_dir}/$UUT_DIR" +bin_dir=$root_dir/bin +out_file="$bin_dir/scspell" +log_path="${base_dir}/$UUT_DIR.log" + +function path() { + if [[ -e $out_file ]]; then + echo $out_file + fi +} + +function clear() { + rm -Rf ${root_dir} ${log_path} +} + +function download() { + if [[ ! -e $out_file ]]; then + mkdir -p ${base_dir} + echo "Downloading $UUT_DIR" >> ${log_path} + + python3 -m venv $root_dir >> $log_path + + # My version of Ubuntu is using 8.1.1 and unsure if I want to touch it. + $bin_dir/pip install -U pip==9.0.3 >> $log_path + $bin_dir/pip install -U scspell3k==$SCSPELL_VERSION >> $log_path + fi +} + +function version() { + if [[ -e $out_file ]]; then + echo "`$out_file --version` w/ `$bin_dir/python3 --version`" + fi +} + +case $command in + path) + echo $(path) + ;; + clear) + echo $(clear) + ;; + version) + echo $(version) + ;; + download) + download + echo $(path) + ;; + *) + >&2 echo "Invalid command: $command" + exit 1 + ;; +esac diff --git a/benchsuite/uut/typos.sh b/benchsuite/uut/typos.sh new file mode 100755 index 0000000..ed593a5 --- /dev/null +++ b/benchsuite/uut/typos.sh @@ -0,0 +1,65 @@ +#!/usr/bin/env bash +set -e + +UUT_DIR="typos" + +if [[ $# -eq 0 ]]; then + exit 1 +fi +command=$1 + +base_dir="/tmp/benchsuite" +if [[ $# -ge 2 ]]; then + base_dir=$2 +fi + +current_dir=`dirname $(readlink -f $0)` +root_dir="${base_dir}/$UUT_DIR" +out_file="$root_dir/bin/typos" +log_path="${base_dir}/$UUT_DIR.log" + +function path() { + if [[ -e $out_file ]]; then + echo $out_file + fi +} + +function clear() { + rm -Rf ${root_dir} ${log_path} +} + +function download() { + if [[ ! -e $out_file ]]; then + mkdir -p ${root_dir} + echo "Downloading $UUT_DIR" >> ${log_path} + + cargo --version > $root_dir/cargo.txt + cargo install --path `realpath $current_dir/../..` --root $root_dir + fi +} + +function version() { + if [[ -e $out_file ]]; then + echo "`$out_file --version` w/ `cat $root_dir/cargo.txt`" + fi +} + +case $command in + path) + echo $(path) + ;; + clear) + echo $(clear) + ;; + version) + echo $(version) + ;; + download) + download + echo $(path) + ;; + *) + >&2 echo "Invalid command: $command" + exit 1 + ;; +esac From ca78fed347fe410fc1a1f52a9ddb13e8414df944 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Thu, 24 Oct 2019 21:04:12 -0600 Subject: [PATCH 3/3] perf: Benchmark 0.1 --- benchsuite/runs/2019-10-24-Nightblood.md | 64 ++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 benchsuite/runs/2019-10-24-Nightblood.md diff --git a/benchsuite/runs/2019-10-24-Nightblood.md b/benchsuite/runs/2019-10-24-Nightblood.md new file mode 100644 index 0000000..2ef682c --- /dev/null +++ b/benchsuite/runs/2019-10-24-Nightblood.md @@ -0,0 +1,64 @@ + +# Spell Check Shootout + +These are the results as of 2019-10-24 + +Command: +```bash +$ ./benchsuite.sh . Nightblood +``` + + +Spell checkers: +- ripgrep 11.0.1 (rev 973de50c9e) -SIMD -AVX (compiled) +SIMD +AVX (runtime) +- typos-cli 0.1.0 w/ cargo 1.38.0 (23ef9a4ef 2019-08-20) +- misspell 0.2.0 w/ cargo 1.38.0 (23ef9a4ef 2019-08-20) +- misspell_go 0.3.4 +- codespell 1.15.0 w/ Python 3.5.2 +- scspell 2.2 w/ Python 3.5.2 + +## linux_clean fixture + +N/A + +## linux_built fixture + +N/A + +## subtitles_ru_small fixture + +subtitles_ru_small: OpenSubtitles2016.raw.ru.small c4549d470463cae24b3dbb1efd138192242c0853 + +| Command | Mean [ms] | Min…Max [ms] | +|:---|---:|---:| +| `./rg/rg bin ./subtitles_ru/OpenSubtitles2016.raw.ru.small` | 7.6 ± 1.2 | 5.7…12.0 | +| `./typos/bin/typos ./subtitles_ru/OpenSubtitles2016.raw.ru.small` | 61.2 ± 4.1 | 52.4…70.1 | +| `./misspell_rs/bin/misspell ./subtitles_ru/OpenSubtitles2016.raw.ru.small` | 33.5 ± 2.3 | 30.2…40.0 | +| `./misspell_go/bin/misspell ./subtitles_ru/OpenSubtitles2016.raw.ru.small` | 40.2 ± 2.6 | 34.1…46.1 | +| `./scspell/bin/scspell ./subtitles_ru/OpenSubtitles2016.raw.ru.small` | 281.5 ± 3.8 | 276.8…289.3 | +| `./codespell/bin/codespell ./subtitles_ru/OpenSubtitles2016.raw.ru.small` | 311.8 ± 5.8 | 299.8…321.8 | + +## ripgrep_clean fixture + +ripgrep_clean: rg 973de50c9ef451da2cfcdfa86f2b2711d8d6ff48 + +| Command | Mean [ms] | Min…Max [ms] | +|:---|---:|---:| +| `./rg/rg bin ./ripgrep_clean` | 27.6 ± 5.1 | 20.6…38.1 | +| `./typos/bin/typos ./ripgrep_clean` | 168.0 ± 11.4 | 145.4…182.4 | +| `./misspell_rs/bin/misspell ./ripgrep_clean` | 145.4 ± 4.1 | 136.1…153.0 | +| `./misspell_go/bin/misspell ./ripgrep_clean` | 214.8 ± 7.8 | 193.4…226.5 | +| `./codespell/bin/codespell ./ripgrep_clean` | 651.0 ± 15.1 | 628.9…682.0 | + +## ripgrep_built fixture + +ripgrep_built: rg 973de50c9ef451da2cfcdfa86f2b2711d8d6ff48 + +| Command | Mean [ms] | Min…Max [ms] | +|:---|---:|---:| +| `./rg/rg bin ./ripgrep_built` | 32.5 ± 4.9 | 26.1…41.5 | +| `./typos/bin/typos ./ripgrep_built` | 174.1 ± 5.9 | 163.8…187.5 | +| `./misspell_rs/bin/misspell ./ripgrep_built` | 143.8 ± 5.0 | 137.2…161.0 | +| `./misspell_go/bin/misspell ./ripgrep_built` | 278.6 ± 8.1 | 266.7…291.6 | +| `./codespell/bin/codespell ./ripgrep_built` | 840.5 ± 11.2 | 819.4…853.0 | +