chore: enable ingest performance tests for master (#21627)

* chore: enable ingest performance tests (#21624)

* chore: add run script for performance tests

* chore: update circle config to run performance tests

* chore: adjustments to perf test config

* remove 1.8 branch here, as that branch is already configured for perf
  tests
* remove extraneous comments in config and perftest script
* move perftest setup logic out of circle config and into a script
* use a specific ubuntu machine image for circle driver
* omit gzip/gunzip from perf test dataset manipulations

* chore: use r5.2xlarge instance type and put perf test dataset in ramdisk

* chore: fix query logic in nightly aws teardown (#21678)
pull/21696/head
Cody Shepherd 2021-06-15 10:00:40 -07:00 committed by GitHub
parent 4e7d1b3909
commit 45f8c66345
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 286 additions and 0 deletions

View File

@ -1,8 +1,24 @@
version: "2.1"
parameters:
aws_teardown:
default: false
type: boolean
aws_teardown_branch:
default: "n/a"
type: string
aws_teardown_sha:
default: "n/a"
type: string
aws_teardown_datestring:
default: "n/a"
type: string
workflows:
version: 2
build:
when:
not: << pipeline.parameters.aws_teardown >>
jobs:
- godeps
- gotest:
@ -29,6 +45,13 @@ workflows:
ignore: /pull\/[0-9]+/
requires:
- build
- perf_test:
requires:
- cross_build
filters:
branches:
only:
- "master"
- grace_daily:
requires:
- build
@ -49,6 +72,22 @@ workflows:
requires:
- e2e-monitor-ci
aws_destroy_daily:
triggers:
- schedule:
# run every day at 10pm -- note: use spaces, not tabs
cron: "0 22 * * *"
filters:
branches:
only:
- "master"
jobs:
- aws_destroy_by_date
aws_destroy_callback:
when: << pipeline.parameters.aws_teardown >>
jobs:
- aws_destroy_by_name
nightly:
triggers:
- schedule:
@ -487,6 +526,76 @@ jobs:
mv dist/influx* artifacts/
- store_artifacts:
path: artifacts
- persist_to_workspace:
root: .
paths:
- artifacts/*amd64.deb
perf_test:
machine:
image: ubuntu-2004:202010-01
steps:
- attach_workspace:
at: /tmp/workspace
- checkout
- add_ssh_keys:
fingerprints:
- "91:0a:5b:a7:f9:46:77:f3:5d:4a:cf:d2:44:c8:2c:5a"
- run:
name: Run test in AWS instance
no_output_timeout: 20m
command: AWS_ACCESS_KEY_ID=${TEST_AWS_ACCESS_KEY_ID} AWS_SECRET_ACCESS_KEY=${TEST_AWS_SECRET_ACCESS_KEY} scripts/ci/perf_test.sh
aws_destroy_by_date:
machine:
enabled: true
docker_layer_caching: true
steps:
- attach_workspace:
at: /tmp/workspace
- checkout
- add_ssh_keys:
fingerprints:
- "91:0a:5b:a7:f9:46:77:f3:5d:4a:cf:d2:44:c8:2c:5a"
- run:
name: Destroy AWS instances with datestring more than a day old
no_output_timeout: 20m
command: |
set -x
yesterday_date=$(date --date "yesterday" +%Y%m%d)
instance_info=$(AWS_ACCESS_KEY_ID=${TEST_AWS_ACCESS_KEY_ID} \
AWS_SECRET_ACCESS_KEY=${TEST_AWS_SECRET_ACCESS_KEY} \
aws --region us-west-2 ec2 describe-instances \
--filters "Name=tag:Name,Values=oss-perftest-*" \
--query "Reservations[].Instances[].[InstanceId, Tags[?Key=='Name']|[0].Value]" \
--output text)
while [ -n "$instance_info" ]; do
instance_id=$(echo $instance_info | tr -s ' ' | cut -d ' ' -f1)
name=$(echo $instance_info | tr -s ' ' | cut -d ' ' -f2)
instance_info=$(echo $instance_info | tr -s ' ' | cut -d ' ' -f3-)
date=$(echo $name | cut -d '-' -f3)
if [ $date -le $yesterday_date ]; then
AWS_ACCESS_KEY_ID=${TEST_AWS_ACCESS_KEY_ID} AWS_SECRET_ACCESS_KEY=${TEST_AWS_SECRET_ACCESS_KEY} aws --region us-west-2 ec2 terminate-instances --instance-ids $instance_id
fi
done
aws_destroy_by_name:
machine:
enabled: true
docker_layer_caching: true
steps:
- attach_workspace:
at: /tmp/workspace
- checkout
- add_ssh_keys:
fingerprints:
- "91:0a:5b:a7:f9:46:77:f3:5d:4a:cf:d2:44:c8:2c:5a"
- run:
name: Destroy AWS instances by constructing name from arguments
no_output_timeout: 20m
command: |
set -x
name=oss-perftest-<< pipeline.parameters.aws_teardown_datestring >>-<< pipeline.parameters.aws_teardown_branch >>-<< pipeline.parameters.aws_teardown_sha >>
instance_id=$(AWS_ACCESS_KEY_ID=${TEST_AWS_ACCESS_KEY_ID} AWS_SECRET_ACCESS_KEY=${TEST_AWS_SECRET_ACCESS_KEY} aws --region us-west-2 ec2 describe-instances --filters "Name=tag:Name,Values=$name" --query 'Reservations[].Instances[].InstanceId' --output text)
AWS_ACCESS_KEY_ID=${TEST_AWS_ACCESS_KEY_ID} AWS_SECRET_ACCESS_KEY=${TEST_AWS_SECRET_ACCESS_KEY} aws --region us-west-2 ec2 terminate-instances --instance-ids $instance_id
deploy_nightly:
machine:

76
scripts/ci/perf_test.sh Executable file
View File

@ -0,0 +1,76 @@
set -ex -o pipefail
# get latest ubuntu 20.04 ami for us-west-2
ami_id=$(aws --region us-west-2 ssm get-parameters --names /aws/service/canonical/ubuntu/server/20.04/stable/current/amd64/hvm/ebs-gp2/ami-id --query 'Parameters[0].[Value]' --output text)
# launch ec2 instance
instance_type="r5.2xlarge"
datestring=$(date +%Y%m%d)
instance_info=$(aws --region us-west-2 ec2 run-instances \
--image-id $ami_id \
--instance-type $instance_type \
--block-device-mappings DeviceName=/dev/sda1,Ebs={VolumeSize=200} \
--key-name circleci-oss-test \
--security-group-ids sg-03004366a38eccc97 \
--subnet-id subnet-0c079d746f27ede5e \
--tag-specifications "ResourceType=instance,Tags=[{Key=Name,Value=oss-perftest-$datestring-${CIRCLE_BRANCH}-${CIRCLE_SHA1}}]")
# get instance info
ec2_instance_id=$(echo $instance_info | jq -r .Instances[].InstanceId)
sleep 60
ec2_ip=$(aws \
--region us-west-2 \
ec2 describe-instances \
--instance-ids $ec2_instance_id \
--query "Reservations[].Instances[].PublicIpAddress" \
--output text)
while [ -z $ec2_ip ]; do
sleep 5
ec2_ip=$(aws \
--region us-west-2 \
ec2 describe-instances \
--instance-ids $ec2_instance_id \
--query "Reservations[].Instances[].PublicIpAddress" \
--output text)
done
trap "aws --region us-west-2 ec2 terminate-instances --instance-ids $ec2_instance_id" KILL
# push binary and script to instance
debname=$(find /tmp/workspace/artifacts/influxdb*amd64.deb)
base_debname=$(basename $debname)
source_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
scp $debname ubuntu@$ec2_ip:/home/ubuntu/$base_debname
scp ${source_dir}/run_perftest.sh ubuntu@$ec2_ip:/home/ubuntu/run_perftest.sh
# install deb in remote vm and create ramdisk for dataset files
RAMDISK_DIR=/mnt/ramdisk
ssh ubuntu@$ec2_ip << EOF
sudo DEBIAN_FRONTEND=noninteractive apt-get install --assume-yes /home/ubuntu/$base_debname
sudo systemctl unmask influxdb.service
sudo systemctl start influxdb
sudo mkdir -p ${RAMDISK_DIR}
sudo mount -t tmpfs -o size=32G tmpfs ${RAMDISK_DIR}
EOF
# poll for influx service ready
set +e
result=$(ssh ubuntu@$ec2_ip "influx ping")
while [ "$result" != "OK" ]; do
sleep 2
result=$(ssh ubuntu@$ec2_ip "influx ping")
done
set -e
# setup influxdb2
export INFLUXDB2=true
export TEST_ORG=example_org
export TEST_TOKEN=token
ssh ubuntu@$ec2_ip "influx setup -c default -b benchmark_db -n default -o $TEST_ORG -p thisisnotused -r 0 -t $TEST_TOKEN -u ubuntu --skip-verify --force"
# run tests
ssh ubuntu@$ec2_ip "nohup sudo CIRCLE_TEARDOWN=true CIRCLE_TOKEN=${CIRCLE_API_CALLBACK_TOKEN} CLOUD2_BUCKET=${CLOUD2_PERFTEST_BUCKET} CLOUD2_ORG=${CLOUD2_PERFTEST_ORG} DATASET_DIR=${RAMDISK_DIR} DATA_I_TYPE=${instance_type} DB_TOKEN=${CLOUD2_PERFTEST_TOKEN} INFLUXDB2=${INFLUXDB2} INFLUXDB_VERSION=${CIRCLE_BRANCH} NGINX_HOST=localhost TEST_COMMIT=${CIRCLE_SHA1} TEST_ORG=${TEST_ORG} TEST_TOKEN=${TEST_TOKEN} CIRCLE_TEARDOWN_DATESTRING=$datestring ./run_perftest.sh > /home/ubuntu/perftest_log.txt 2>&1 &"

101
scripts/ci/run_perftest.sh Executable file
View File

@ -0,0 +1,101 @@
#!/usr/bin/sh -ex
# Install Telegraf
wget -qO- https://repos.influxdata.com/influxdb.key | apt-key add -
echo "deb https://repos.influxdata.com/ubuntu focal stable" | tee /etc/apt/sources.list.d/influxdb.list
DEBIAN_FRONTEND=noninteractive apt-get update
DEBIAN_FRONTEND=noninteractive apt-get install -y git jq telegraf
working_dir=$(mktemp -d)
mkdir -p /etc/telegraf
cat << EOF > /etc/telegraf/telegraf.conf
[[outputs.influxdb_v2]]
urls = ["https://us-west-2-1.aws.cloud2.influxdata.com"]
token = "${DB_TOKEN}"
organization = "${CLOUD2_ORG}"
bucket = "${CLOUD2_BUCKET}"
[[inputs.file]]
files = ["$working_dir/*.json"]
file_tag = "test_name"
data_format = "json"
json_strict = true
json_string_fields = [
"branch",
"commit",
"i_type",
"time"
]
json_time_key = "time"
json_time_format = "unix"
tag_keys = [
"i_type",
"branch"
]
EOF
systemctl restart telegraf
cd $working_dir
# install golang latest version
go_version=$(curl https://golang.org/VERSION?m=text)
go_endpoint="$go_version.linux-amd64.tar.gz"
wget "https://dl.google.com/go/$go_endpoint" -O "$working_dir/$go_endpoint"
rm -rf /usr/local/go
tar -C /usr/local -xzf "$working_dir/$go_endpoint"
# set env variables necessary for go to work during cloud-init
if [ `whoami` = root ]; then
mkdir -p /root/go/bin
export HOME=/root
export GOPATH=/root/go/bin
export PATH=$PATH:/usr/local/go/bin:$GOPATH
fi
go version
# clone influxdb comparisons
git clone https://github.com/influxdata/influxdb-comparisons.git $working_dir/influxdb-comparisons
cd $working_dir/influxdb-comparisons
# install cmds
go get \
github.com/influxdata/influxdb-comparisons/cmd/bulk_data_gen \
github.com/influxdata/influxdb-comparisons/cmd/bulk_load_influx \
github.com/influxdata/influxdb-comparisons/cmd/bulk_query_gen \
github.com/influxdata/influxdb-comparisons/cmd/query_benchmarker_influxdb
# Run and record tests
datestring=$(date +%s)
seed=$datestring
for scale in 50 100 500; do
# generate bulk data
scale_string="scalevar-$scale"
scale_seed_string="$scale_string-seed-$seed"
data_fname="influx-bulk-records-usecase-devops-$scale_seed_string.txt"
$GOPATH/bin/bulk_data_gen --seed=$seed --use-case=devops --scale-var=$scale --format=influx-bulk > ${DATASET_DIR}/$data_fname
# run ingest tests
test_type=ingest
for parseme in "5000:2" "5000:20" "15000:2" "15000:20"; do
batch=$(echo $parseme | cut -d: -f1)
workers=$(echo $parseme | cut -d: -f2)
load_opts="-batch-size=$batch -workers=$workers -urls=http://${NGINX_HOST}:8086 -do-abort-on-exist=false -do-db-create=true -backoff=1s -backoff-timeout=300m0s"
if [ -z $INFLUXDB2 ] || [ $INFLUXDB2 = true ]; then
load_opts="$load_opts -organization=$TEST_ORG -token=$TEST_TOKEN"
fi
cat ${DATASET_DIR}/$data_fname | $GOPATH/bin/bulk_load_influx $load_opts | jq ". += {branch: \"${INFLUXDB_VERSION}\", commit: \"${TEST_COMMIT}\", time: \"$datestring\", i_type: \"${DATA_I_TYPE}\"}" > $working_dir/test-$test_type-$scale_string-batchsize-$batch-workers-$workers.json
done
done
telegraf --once
if [ "${CIRCLE_TEARDOWN}" = "true" ]; then
curl --request POST \
--url https://circleci.com/api/v2/project/github/influxdata/influxdb/pipeline \
--header "Circle-Token: ${CIRCLE_TOKEN}" \
--header 'content-type: application/json' \
--data "{\"branch\":\"${INFLUXDB_VERSION}\", \"parameters\":{\"aws_teardown\": true, \"aws_teardown_branch\":\"${INFLUXDB_VERSION}\", \"aws_teardown_sha\":\"${TEST_COMMIT}\", \"aws_teardown_datestring\":\"${CIRCLE_TEARDOWN_DATESTRING}\"}}"
fi