Merge pull request #11816 from andriyDev/FlakeRateSync

Report flakes only after synchronizing all integration tests
2021-06-30 16:38:08 -04:00 · 2021-06-30 16:38:08 -04:00 · 323733c532
parent b8596f1a13 7ea94ed00a
commit 323733c532
6 changed files with 139 additions and 25 deletions
--- a/hack/jenkins/common.sh
+++ b/hack/jenkins/common.sh
@ -142,6 +142,17 @@ fi
 # Add the out/ directory to the PATH, for using new drivers.
 export PATH="$(pwd)/out/":$PATH

+STARTED_ENVIRONMENTS="gs://minikube-builds/logs/${MINIKUBE_LOCATION}/${COMMIT:0:7}/started_environments_${ROOT_JOB_ID}.txt"
+# Ensure STARTED_ENVIRONMENTS exists so we can append (but don't erase any existing entries in STARTED_ENVIRONMENTS)
+< /dev/null gsutil cp -n - "${STARTED_ENVIRONMENTS}"
+# Copy the job name to APPEND_TMP
+APPEND_TMP="gs://minikube-builds/logs/${MINIKUBE_LOCATION}/${COMMIT:0:7}/$(basename $(mktemp))"
+echo "${JOB_NAME}"\
+  | gsutil cp - "${APPEND_TMP}"
+# Append
+gsutil compose "${STARTED_ENVIRONMENTS}" "${APPEND_TMP}" "${STARTED_ENVIRONMENTS}"
+gsutil rm "${APPEND_TMP}"
+
 echo
 echo ">> Downloading test inputs from ${MINIKUBE_LOCATION} ..."
 gsutil -qm cp \
@ -441,11 +452,16 @@ if [ -z "${EXTERNAL}" ]; then
  gsutil -qm cp "${HTML_OUT}" "gs://${JOB_GCS_BUCKET}.html" || true
  echo ">> uploading ${SUMMARY_OUT}"
  gsutil -qm cp "${SUMMARY_OUT}" "gs://${JOB_GCS_BUCKET}_summary.json" || true
-  if [[ "${MINIKUBE_LOCATION}" == "master" ]]; then
-    ./test-flake-chart/upload_tests.sh "${SUMMARY_OUT}"
-  elif [[ "${JOB_NAME}" == "Docker_Linux" || "${JOB_NAME}" == "Docker_Linux_containerd" || "${JOB_NAME}" == "KVM_Linux" || "${JOB_NAME}" == "KVM_Linux_containerd" ]]; then
-    ./test-flake-chart/report_flakes.sh "${MINIKUBE_LOCATION}" "${SUMMARY_OUT}" "${JOB_NAME}"
-  fi
+
+  FINISHED_ENVIRONMENTS="gs://minikube-builds/logs/${MINIKUBE_LOCATION}/${COMMIT:0:7}/finished_environments_${ROOT_JOB_ID}.txt"
+  # Ensure FINISHED_ENVIRONMENTS exists so we can append (but don't erase any existing entries in FINISHED_ENVIRONMENTS)
+  < /dev/null gsutil cp -n - "${FINISHED_ENVIRONMENTS}"
+  # Copy the job name to APPEND_TMP
+  APPEND_TMP="gs://minikube-builds/logs/${MINIKUBE_LOCATION}/${COMMIT:0:7}/$(basename $(mktemp))"
+  echo "${JOB_NAME}"\
+    | gsutil cp - "${APPEND_TMP}"
+  gsutil compose "${FINISHED_ENVIRONMENTS}" "${APPEND_TMP}" "${FINISHED_ENVIRONMENTS}"
+  gsutil rm "${APPEND_TMP}"
 else 
  # Otherwise, put the results in a predictable spot so the upload job can find them
  REPORTS_PATH=test_reports
--- a/hack/jenkins/test-flake-chart/report_flakes.sh
+++ b/hack/jenkins/test-flake-chart/report_flakes.sh
@ -21,13 +21,13 @@
 set -eu -o pipefail

 if [ "$#" -ne 3 ]; then
-  echo "Wrong number of arguments. Usage: report_flakes.sh <PR number> <gopogh_summary.json> <environment>" 1>&2
+  echo "Wrong number of arguments. Usage: report_flakes.sh <PR number> <short commit> <environment list file>" 1>&2
  exit 1
 fi

 PR_NUMBER=$1
-SUMMARY_DATA=$2
-ENVIRONMENT=$3
+SHORT_COMMIT=$2
+ENVIRONMENT_LIST=$3

 # To prevent having a super-long comment, add a maximum number of tests to report.
 MAX_REPORTED_TESTS=30
@ -35,13 +35,14 @@ MAX_REPORTED_TESTS=30
 DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )

 TMP_DATA=$(mktemp)
-# 1) Process the data in the gopogh summary.
-# 2) Filter tests to only include failed tests on the environment (and only get their names).
-# 3) Sort the names of the tests.
+# 1) Process the data in each gopogh summary.
+# 2) Filter tests to only include failed tests (and only get their names and environment).
+# 3) Sort by environment, then test name.
 # 4) Store in file $TMP_DATA.
-< "$SUMMARY_DATA" $DIR/process_data.sh \
-  | sed -n -r -e "s/[0-9a-f]*,[0-9-]*,$ENVIRONMENT,([a-zA-Z\/_-]*),Failed,[.0-9]*/\1/p" \
-  | sort \
+gsutil cat $(< "${ENVIRONMENT_LIST}" sed -r "s/^/gs:\\/\\/minikube-builds\\/logs\\/${PR_NUMBER}\\/${SHORT_COMMIT}\\/; s/$/_summary.json/") \
+  | $DIR/process_data.sh \
+  | sed -n -r -e "s/[0-9a-f]*,[0-9-]*,([a-zA-Z\/_0-9-]*),([a-zA-Z\/_0-9-]*),Failed,[.0-9]*/\1:\2/p" \
+  | sort -t, -k\
  > "$TMP_DATA"

 # Download the precomputed flake rates from the GCS bucket into file $TMP_FLAKE_RATES.
@ -49,12 +50,12 @@ TMP_FLAKE_RATES=$(mktemp)
 gsutil cp gs://minikube-flake-rate/flake_rates.csv "$TMP_FLAKE_RATES"

 TMP_FAILED_RATES="$TMP_FLAKE_RATES\_filtered"
-# 1) Parse/filter the flake rates to only include the test name and flake rates for environment.
-# 2) Sort the flake rates based on test name.
+# 1) Parse the flake rates to only include the environment, test name, and flake rates.
+# 2) Sort the flake rates based on environment+test name.
 # 3) Join the flake rates with the failing tests to only get flake rates of failing tests.
 # 4) Sort failed test flake rates based on the flakiness of that test - stable tests should be first on the list.
 # 5) Store in file $TMP_FAILED_RATES.
-< "$TMP_FLAKE_RATES" sed -n -r -e "s/$ENVIRONMENT,([a-zA-Z\/_-]*),([.0-9]*),[.0-9]*/\1,\2/p" \
+< "$TMP_FLAKE_RATES" sed -n -r -e "s/([a-zA-Z0-9_-]*),([a-zA-Z\/0-9_-]*),([.0-9]*),[.0-9]*/\1:\2,\3/p" \
  | sort -t, -k1,1 \
  | join -t , -j 1 "$TMP_DATA" - \
  | sort -g -t, -k2,2 \
@ -68,12 +69,12 @@ fi

 # Create the comment template.
 TMP_COMMENT=$(mktemp)
-printf "These are the flake rates of all failed tests on %s.\n|Failed Tests|Flake Rate (%%)|\n|---|---|\n" "$ENVIRONMENT" > "$TMP_COMMENT"
+printf "These are the flake rates of all failed tests per %s.\n|Environment|Failed Tests|Flake Rate (%%)|\n|---|---|---|\n" "$ENVIRONMENT" > "$TMP_COMMENT"
 # 1) Get the first $MAX_REPORTED_TESTS lines.
-# 2) Print a row in the table with the test name, flake rate, and a link to the flake chart for that test.
+# 2) Print a row in the table with the environment, test name, flake rate, and a link to the flake chart for that test.
 # 3) Append these rows to file $TMP_COMMENT.
 < "$TMP_FAILED_RATES" head -n $MAX_REPORTED_TESTS \
-  | sed -n -r -e "s/([a-zA-Z\/_-]*),([.0-9]*)/|\1|\2 ([chart](https:\/\/storage.googleapis.com\/minikube-flake-rate\/flake_chart.html?env=$ENVIRONMENT\&test=\1))|/p" \
+  | sed -n -r -e "s/([a-zA-Z\/0-9_-]*):([a-zA-Z\/0-9_-]*),([.0-9]*)/|\1|\2|\3 ([chart](https:\/\/storage.googleapis.com\/minikube-flake-rate\/flake_chart.html?env=\1\&test=\2))|/p" \
  >> "$TMP_COMMENT"

 # If there are too many failing tests, add an extra row explaining this, and a message after the table.
--- a/hack/jenkins/test-flake-chart/sync_tests.sh
+++ b/hack/jenkins/test-flake-chart/sync_tests.sh
@ -0,0 +1,73 @@
+#!/bin/bash
+
+# Copyright 2021 The Kubernetes Authors All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This script is called once per integration test. If all integration tests that
+# have registered themselves in the started environment list have also
+# registered themselves in the finished environment list, this script reports
+# flakes or uploads flakes to flake data.
+# 
+# This script expects the following env variables:
+# MINIKUBE_LOCATION: The Github location being run on (e.g. master, 11000).
+# COMMIT: Commit hash the tests ran on.
+# ROOT_JOB_ID: Job ID to use for synchronization.
+
+set -o pipefail
+
+BUCKET_PATH="gs://minikube-builds/logs/${MINIKUBE_LOCATION}/${COMMIT:0:7}"
+STARTED_LIST=$(gsutil cat "${BUCKET_PATH}/started_environments_${ROOT_JOB_ID}.txt" | sort | uniq)
+
+if [ $? -ne 0 ]; then
+  echo "Unable to read environment list. Likely being run before all tests are ready or after tests have already been uploaded." 1>&2
+  exit 0
+fi
+
+set -eu -o pipefail
+
+FINISHED_LIST=$(mktemp)
+gsutil cat "${BUCKET_PATH}/finished_environments_${ROOT_JOB_ID}.txt"\
+  | sort\
+  | uniq > "${FINISHED_LIST}"
+
+STARTED_COUNT=$(echo "${STARTED_LIST}" | wc -l)
+FINISHED_COUNT=$(\
+  echo "${STARTED_LIST}"\
+  | join - "${FINISHED_LIST}"\
+  | wc -l)
+
+if [ ${STARTED_COUNT} -ne ${FINISHED_COUNT} ]; then
+  echo "Started environments are not all finished! Started: ${STARTED_LIST}, Finished: $(cat ${FINISHED_LIST}))"
+  exit 0
+fi
+
+# Prevent other invocations of this script from uploading the same thing multiple times.
+gsutil rm "${BUCKET_PATH}/started_environments_${ROOT_JOB_ID}.txt"
+
+# At this point, we know all integration tests are done and we can process all summaries safely.
+
+# Get directory of this script.
+DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
+
+if [[ "${MINIKUBE_LOCATION}" == "master" ]]; then
+  for ENVIRONMENT in ${STARTED_LIST}; do
+    SUMMARY="${BUCKET_PATH}/${ENVIRONMENT}_summary.json"
+    "${DIR}/upload_tests.sh" "${SUMMARY}"
+  done
+else
+  "${DIR}/report_flakes.sh" "${MINIKUBE_LOCATION}" "${COMMIT:0:7}" "${FINISHED_LIST}"
+fi
+
+gsutil rm "${BUCKET_PATH}/finished_environments_${ROOT_JOB_ID}.txt"
+rm "${FINISHED_LIST}"
--- a/hack/jenkins/test-flake-chart/upload_tests.sh
+++ b/hack/jenkins/test-flake-chart/upload_tests.sh
@ -16,12 +16,12 @@

 # Takes a gopogh summary, extracts test data as a CSV and appends to the
 # existing CSV data in the GCS bucket.
-# Example usage: ./jenkins_upload_tests.sh gopogh_summary.json
+# Example usage: ./upload_tests.sh gopogh_summary.json

 set -eu -o pipefail

 if [ "$#" -ne 1 ]; then
-  echo "Wrong number of arguments. Usage: jenkins_upload_tests.sh <gopogh_summary.json>" 1>&2
+  echo "Wrong number of arguments. Usage: upload_tests.sh <gopogh_summary.json>" 1>&2
  exit 1
 fi

--- a/hack/jenkins/upload_integration_report.sh
+++ b/hack/jenkins/upload_integration_report.sh
@ -48,6 +48,13 @@ SUMMARY_OUT="$ARTIFACTS/summary.txt"
 echo ">> uploading ${SUMMARY_OUT}"
 gsutil -qm cp "${SUMMARY_OUT}" "gs://${JOB_GCS_BUCKET}_summary.json" || true

-if [[ "${MINIKUBE_LOCATION}" == "master" ]]; then
-  ./test-flake-chart/upload_tests.sh "${SUMMARY_OUT}"
-fi
+FINISHED_ENVIRONMENTS="gs://minikube-builds/logs/${MINIKUBE_LOCATION}/${COMMIT:0:7}/finished_environments_${ROOT_JOB_ID}.txt"
+# Ensure FINISHED_ENVIRONMENTS exists so we can append (but don't erase any existing entries in FINISHED_ENVIRONMENTS)
+< /dev/null gsutil cp -n - "${FINISHED_ENVIRONMENTS}"
+# Copy the job name to APPEND_TMP
+APPEND_TMP="gs://minikube-builds/logs/${MINIKUBE_LOCATION}/${COMMIT:0:7}/$(basename $(mktemp))"
+echo "${JOB_NAME}"\
+  | gsutil cp - "${APPEND_TMP}"
+# Append
+gsutil compose "${FINISHED_ENVIRONMENTS}" "${APPEND_TMP}" "${FINISHED_ENVIRONMENTS}"
+gsutil rm "${APPEND_TMP}"
--- a/hack/jenkins/windows_integration_test_docker.ps1
+++ b/hack/jenkins/windows_integration_test_docker.ps1
@ -39,6 +39,15 @@ If ($lastexitcode -gt 0) {
 	Exit $lastexitcode
 }

+$started_environments="gs://$gcs_bucket/started_environments_$env:ROOT_JOB_ID.txt"
+$append_tmp="gs://$gcs_bucket/tmp$(-join ((65..90) + (97..122) | Get-Random -Count 10 | % {[char]$_}))"
+# Ensure started_environments exists so we can append (but don't erase any existing entries in started_environments)
+$null | gsutil cp -n - "$started_environments"
+# Copy the Docker_Windows to append_tmp
+echo "Docker_Windows" | gsutil cp - "$append_tmp"
+gsutil compose "$started_environments" "$append_tmp" "$started_environments"
+gsutil rm "$append_tmp"
+
 # Remove unused images and containers
 docker system prune --all --force

@ -89,6 +98,14 @@ gsutil -qm cp testout.json gs://$gcs_bucket/Docker_Windows.json
 gsutil -qm cp testout.html gs://$gcs_bucket/Docker_Windows.html
 gsutil -qm cp testout_summary.json gs://$gcs_bucket/Docker_Windows_summary.json

+$finished_environments="gs://$gcs_bucket/finished_environments_$env:ROOT_JOB_ID.txt"
+$append_tmp="gs://$gcs_bucket/tmp$(-join ((65..90) + (97..122) | Get-Random -Count 10 | % {[char]$_}))"
+# Ensure finished_environments exists so we can append (but don't erase any existing entries in finished_environments)
+$null | gsutil cp -n - "$finished_environments"
+# Copy the Docker_Windows to append_tmp
+echo "Docker_Windows" | gsutil cp - "$append_tmp"
+gsutil compose "$started_environments" "$append_tmp" "$started_environments"
+gsutil rm "$append_tmp"

 # Update the PR with the new info
 $json = "{`"state`": `"$env:status`", `"description`": `"Jenkins: $description`", `"target_url`": `"$env:target_url`", `"context`": `"Docker_Windows`"}"