mirror of
https://github.com/crate-ci/typos.git
synced 2024-11-27 11:31:00 -05:00
a3fabbd855
Fixtures were taken from ripgrep. The framework was rewritten to be more composable (rather than a single python script that had both generic fixtures and selection of units-under-test) One of the goals was to completely generate a report that would include all relevant information for reproducing the results or adding nuance for when results change. Having problems with subtitles_en, so its not fully included atm.
76 lines
1.5 KiB
Bash
Executable file
76 lines
1.5 KiB
Bash
Executable file
#!/usr/bin/env bash
|
|
# Pre-reqs:
|
|
# - gunzip
|
|
# - curl
|
|
set -e
|
|
|
|
FIXTURE_DIR="subtitles_en"
|
|
SUBTITLES_NAME="OpenSubtitles2016.raw.en"
|
|
SUBTITLES_NAME_SAMPLE="$SUBTITLES_NAME.sample"
|
|
SUBTITLES_NAME_GZ="${SUBTITLES_NAME}.gz"
|
|
SUBTITLES_URL="https://object.pouta.csc.fi/OPUS-OpenSubtitles/v2016/mono/en.txt.gz"
|
|
|
|
if [[ $# -eq 0 ]]; then
|
|
exit 1
|
|
fi
|
|
command=$1
|
|
|
|
base_dir="/tmp/benchsuite"
|
|
if [[ $# -ge 2 ]]; then
|
|
base_dir=$2
|
|
fi
|
|
|
|
root_dir="${base_dir}/$FIXTURE_DIR"
|
|
out_file="$root_dir/$SUBTITLES_NAME_SAMPLE"
|
|
log_path="${base_dir}/$FIXTURE_DIR.log"
|
|
|
|
function path() {
|
|
if [[ -e $out_file ]]; then
|
|
echo $out_file
|
|
fi
|
|
}
|
|
|
|
function clear() {
|
|
rm -Rf ${root_dir} ${log_path}
|
|
}
|
|
|
|
function download() {
|
|
if [[ ! -e $out_file ]]; then
|
|
mkdir -p ${root_dir}
|
|
echo "Downloading $FIXTURE_DIR" >> ${log_path}
|
|
pushd $root_dir >> ${log_path}
|
|
curl -L $SUBTITLES_URL -o $SUBTITLES_NAME_GZ
|
|
gunzip $SUBTITLES_NAME_GZ
|
|
# Get a sample roughly the same size as the Russian corpus so that
|
|
# benchmarks finish in a reasonable time.
|
|
head -n 32722372 $SUBTITLES_NAME > $SUBTITLES_NAME_SAMPLE
|
|
shasum $SUBTITLES_NAME_SAMPLE > $SUBTITLES_NAME_SAMPLE.sha
|
|
popd >> ${log_path}
|
|
fi
|
|
}
|
|
|
|
function version() {
|
|
if [[ -e $out_file ]]; then
|
|
echo "`basename $out_file` `cat $out_file.sha | cut -d " " -f 1`"
|
|
fi
|
|
}
|
|
|
|
case $command in
|
|
path)
|
|
echo $(path)
|
|
;;
|
|
clear)
|
|
echo $(clear)
|
|
;;
|
|
version)
|
|
echo $(version)
|
|
;;
|
|
download)
|
|
download
|
|
echo $(path)
|
|
;;
|
|
*)
|
|
>&2 echo "Invalid command: $command"
|
|
exit 1
|
|
;;
|
|
esac
|