influxdb/server_benchmarks/benches/scrape_benches.sh

107 lines
3.7 KiB
Bash
Executable File

#!/bin/bash
#
# Scrapes the most recent run of all criterion benchmark results into lineprotocol for analysis
#
# Generate data:
# (cd ~/Codes/influxdb_iox && cargo bench)
#
# Scrape data:
# ./scrape_benches.sh /Users/alamb/Software/influxdb_iox2
#
# To load to
function main {
SOURCE_DIR=$1
if [ -z "$SOURCE_DIR" ] ; then
echo "Error: influxdb_iox source directory not specified"
echo "Usage: $0 <influxdb_iox_dir>"
exit 1
fi
GITSHA=`cd $SOURCE_DIR && git rev-parse HEAD`
# pick timestamp of the commit
TIMESTAMP=$((`git show -s --format="%ct" $GITSHA` * 1000000000)) # ct means seconds since epoch
# note can use this to use the time this script ran
# TIMESTAMP=$((`date +%s` * 1000000000)) # +%s means seconds since epoch
# Criterion produces files named like this:
#
# target/criterion/float_encode_sequential/10000/new/estimates.json
# target/criterion/float_encode_sequential/10000/base/estimates.json
# target/criterion/float_encode_sequential/100000/change/estimates.json
# target/criterion/float_encode_sequential/100000/sample1/estimates.json
# target/criterion/float_encode_sequential/100000/new/estimates.json
# target/criterion/float_encode_sequential/100000/base/estimates.json
#
# The new/estimates.json are the ones from the most recent run
find "$SOURCE_DIR/target/criterion" -name 'estimates.json' | grep 'new/estimates.json'| while read estimates_file ;
do
process_file "$estimates_file"
done
}
# Processes a criterion results file and produces line protocol out
#
# Input:
# {
# "mean":{
# "confidence_interval":{"confidence_level":0.95,"lower_bound":92384.98456288037,"upper_bound":94127.8605349043},
# "point_estimate":93193.31282952648,
# "standard_error":444.9439871182596
# },
# "median":{
# "confidence_interval":{"confidence_level":0.95,"lower_bound":91137.96363636364,"upper_bound":92769.5854020979},
# "point_estimate":91426.08165568294,
# "standard_error":505.4331525578268
# },
# "median_abs_dev": .. (same structure )
# "slope": .. (same structure )
# "std_dev": .. (same structure )
# }
#
# Output: (line protocol)
#
# bench,gitsha=<gitsha>,hostname=trogdor,group_name=float_encode_sequential,bench_name=10000 mean=93193.31282952648,mean_standard_error=444.9439871182596,median=91426.08165568294,median_standard_error=505.4331525578268
function process_file {
estimates_file=$1
#echo "processing $estimates_file"
# example estimates_file:
# /path/target/criterion/float_encode_sequential/10000/new/estimates.json
# find the benchmark name (encoded as a filename)
[[ $estimates_file =~ ^.*target/criterion/(.*)/new/estimates.json$ ]] && dirname=${BASH_REMATCH[1]}
# echo $dirname
# float_encode_sequential/10000
#echo "dirname: $dirname"
# split on `/`
# https://stackoverflow.com/questions/918886/how-do-i-split-a-string-on-a-delimiter-in-bash)
IFS=/ read -a fields <<<"$dirname"
#echo "fields[0]: ${fields[0]}"
#echo "fields[1]: ${fields[1]}"
# fields[0]: float_encode_sequential
# fields[1]: 10000
# some benchmark names have spaces in them (thumbs down, so replace them with _)
group_name=${fields[0]/ /_}
bench_name=${fields[1]/ /_}
hostname=`hostname`
echo -n "bench,gitsha=$GITSHA,hostname=${hostname},group_name=$group_name,bench_name=$bench_name "
# use the jq command to pull out the various fields
echo -n `jq -j '"mean=" + (.mean.point_estimate | tostring), ",mean_standard_error=" + (.mean.standard_error | tostring), ",median=" + (.median.point_estimate | tostring), ",median_standard_error=" + (.median.standard_error | tostring)' "$estimates_file"`
echo -n " $TIMESTAMP"
echo
}
main $*