326 lines
14 KiB
Bash
Executable File
326 lines
14 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
set -eu -o pipefail
|
|
|
|
########################
|
|
# --- Script Summary ---
|
|
# This script is the junction between the CIs of the public UI and influxdb OSS repos and the monitor-ci CI (private).
|
|
# When the public CI is started, this script kicks off the private CI and waits for it to complete.
|
|
# This script uses the CircleCI APIs to make this magic happen.
|
|
#
|
|
# If the private CI fails, this script will collect the names and artifacts of the failed jobs and report them.
|
|
# This script should support multiple workflows if more are added, although it has not been tested.
|
|
# This script waits 50 minutes for the private CI to complete otherwise it fails.
|
|
#
|
|
# **For Running from the UI Repository:**
|
|
# If you want to retry failed jobs in the private CI, simply retry this job from the public CI.
|
|
# - This script uses your commit SHA to search for a failed pipeline to retry before starting a new one.
|
|
#
|
|
# If you retry a failing job in the private CI and it passes, you can safely rerun this job in the public CI.
|
|
# - This script uses your commit SHA to search for a passing pipeline before starting a new one.
|
|
# - If you rerun the private CI and it passes, this script will find that pipeline and will not start a new one.
|
|
# - In this situation the script will exit quickly with success.
|
|
#
|
|
# Pipeline Workflow options:
|
|
# - RUN_WORKFLOW Env Var required to determine which workflow to run.
|
|
# - enum options: 'build_oss', 'build_oss_embedded'
|
|
# - e.g. RUN_WORKFLOW='build_oss_embedded'
|
|
#
|
|
# Required Env Vars for all workflows:
|
|
# - RUN_WORKFLOW: enum for which pipeline workflow
|
|
# - API_KEY: the CircleCI API access key
|
|
# - MONITOR_CI_BRANCH: the branch of the monitor-ci repo to start a pipeline with (usually 'master')
|
|
#
|
|
# **For OSS-specific testing:**
|
|
# Since the OSS private CI is very simple, retrying a failing job in the private CI is not supported.
|
|
# OSS-specific testing can include evaluating changes on the OSS master branch against the latest UI acceptance image
|
|
# to make sure OSS API changes don't break the UI, and evaluating changes to an OSS binary with embedded UI assets with
|
|
# a specified UI commit that the UI is from (like a tagged release commit). This allows for the master branches of
|
|
# both the UI and influxdb OSS respositories to always stay compatible, and for OSS release builds to be e2e tested
|
|
# without needing to duplicate the entire private test infrastructure provided in monitor-ci.
|
|
#
|
|
# Required Env Vars for Testing Changes to OSS Master with the Latest Image Published from UI Master:
|
|
# - RUN_WORKFLOW='build_oss'
|
|
# - OSS_SHA: the influxdb repo commit SHA we're running against
|
|
#
|
|
# Required Env Vars for Testing Changes to an OSS Image with Embedded UI with e2e tests from a Specific UI Commit:
|
|
# - RUN_WORKFLOW='build_oss_embedded'
|
|
# - UI_SHA: the UI repo commit SHA we want to build and run e2e tests from
|
|
# - UI_BRANCH: the UI branch where the commit exists
|
|
# - OSS_SHA: the influxdb repo commit SHA we're running against
|
|
|
|
########################
|
|
|
|
# starts a new monitor-ci pipeline with provided parameters
|
|
startNewPipeline() {
|
|
pipelineStartMsg=$1
|
|
reqData=$2
|
|
|
|
printf "\n${pipelineStartMsg}\n"
|
|
pipeline=$(curl -s --fail --request POST \
|
|
--url https://circleci.com/api/v2/project/gh/influxdata/monitor-ci/pipeline \
|
|
--header "Circle-Token: ${API_KEY}" \
|
|
--header 'content-type: application/json' \
|
|
--header 'Accept: application/json' \
|
|
--data "${reqData}")
|
|
|
|
if [ $? != 0 ]; then
|
|
echo "failed to start the monitor-ci pipeline, quitting"
|
|
exit 1
|
|
fi
|
|
|
|
# set variables to identify pipeline to watch
|
|
pipeline_id=$(echo ${pipeline} | jq -r '.id')
|
|
pipeline_number=$(echo ${pipeline} | jq -r '.number')
|
|
|
|
printf "\nwaiting for monitor-ci pipeline to begin...\n"
|
|
sleep 1m
|
|
printf "\nmonitor-ci pipeline has begun. Running pipeline number ${pipeline_number} with id ${pipeline_id}\n"
|
|
}
|
|
|
|
# retries all failed jobs from a previously failed monitor-ci pipeline
|
|
retryFailedPipeline() {
|
|
failed_pipeline_workflow_id=$1
|
|
failed_pipeline_id=$2
|
|
failed_pipeline_number=$3
|
|
|
|
pipeline=$(curl -s --fail --request POST \
|
|
--url https://circleci.com/api/v2/workflow/${failed_pipeline_workflow_id}/rerun \
|
|
--header "Circle-Token: ${API_KEY}" \
|
|
--header 'content-type: application/json' \
|
|
--header 'Accept: application/json' \
|
|
--data "{ \"from_failed\": true }")
|
|
|
|
if [ $? != 0 ]; then
|
|
echo "failed to re-run the monitor-ci pipeline, quitting"
|
|
exit 1
|
|
fi
|
|
|
|
# set variables to identify pipeline to watch
|
|
pipeline_id=$failed_pipeline_id
|
|
pipeline_number=$failed_pipeline_number
|
|
|
|
printf "\nwaiting for monitor-ci pipeline to begin the re-run...\n"
|
|
sleep 1m
|
|
printf "\nmonitor-ci pipeline re-run has begun. Running pipeline number ${pipeline_number} with id ${pipeline_id}\n"
|
|
}
|
|
|
|
# cancel if already have a passing pipeline for a given SHA
|
|
earlyTermination() {
|
|
local current_sha=$1
|
|
local regex_line=$2
|
|
local regex_exclusion=$3
|
|
|
|
all_pipelines=$(curl -s --request GET \
|
|
--url "https://circleci.com/api/v2/project/gh/influxdata/monitor-ci/pipeline" \
|
|
--header "Circle-Token: ${API_KEY}" \
|
|
--header 'content-type: application/json' \
|
|
--header 'Accept: application/json')
|
|
|
|
# check the status of the workflows for each of these pipelines
|
|
all_pipelines_ids=( $(echo ${all_pipelines} | jq -r '.items | .[].id') )
|
|
for pipeline_id in "${all_pipelines_ids[@]}"; do
|
|
|
|
config=$(curl -s --request GET \
|
|
--url "https://circleci.com/api/v2/pipeline/${pipeline_id}/config" \
|
|
--header "Circle-Token: ${API_KEY}" \
|
|
--header 'content-type: application/json' \
|
|
--header 'Accept: application/json')
|
|
|
|
# finds the SHA parameter used in this pipeline by hunting for a specific line
|
|
pipeline_sha=$(echo ${config} | jq '.compiled' | grep -o ${regex_line} | grep -v ${regex_exclusion} | head -1 | sed 's/=/\n/g' | tail -1 || true)
|
|
|
|
if [[ "${current_sha}" == "${pipeline_sha}" ]]; then
|
|
# check if this pipeline's 'build' workflow is passing
|
|
workflows=$(curl -s --request GET \
|
|
--url "https://circleci.com/api/v2/pipeline/${pipeline_id}/workflow" \
|
|
--header "Circle-Token: ${API_KEY}" \
|
|
--header 'content-type: application/json' \
|
|
--header 'Accept: application/json')
|
|
|
|
number_build_success_workflows=$(echo ${workflows} | jq '.items | map(select(.name == "build" and .status == "success")) | length')
|
|
if [ $number_build_success_workflows -gt 0 ]; then
|
|
# we've found a successful run
|
|
found_passing_pipeline=1
|
|
break
|
|
fi
|
|
|
|
number_build_failed_workflows=$(echo ${workflows} | jq '.items | map(select(.name == "build" and .status == "failed")) | length')
|
|
if [ $number_build_failed_workflows -gt 0 ]; then
|
|
# there's a failed run, let's retry it
|
|
found_failed_pipeline=1
|
|
failed_pipeline_workflow_id=$(echo ${workflows} | jq -r '.items | .[0] | .id')
|
|
failed_pipeline_id=$pipeline_id
|
|
failed_pipeline_number=$(echo ${all_pipelines} | jq -r --arg pipeline_id "${pipeline_id}" '.items | map(select(.id == $pipeline_id)) | .[0] | .number')
|
|
break
|
|
fi
|
|
fi
|
|
done
|
|
|
|
# terminate early if we found a passing pipeline for this SHA
|
|
if [ $found_passing_pipeline -eq 1 ]; then
|
|
printf "\nSUCCESS: Found a passing monitor-ci pipeline for this SHA, will not re-run these tests\n"
|
|
exit 0
|
|
elif [ $found_failed_pipeline -eq 1 ]; then
|
|
printf "\nfound a failed monitor-ci pipeline for this SHA, will retry the failed jobs\n"
|
|
else
|
|
printf "\nno passing monitor-ci pipelines found for this SHA, starting a new one\n"
|
|
fi
|
|
}
|
|
|
|
# make dir for artifacts
|
|
mkdir -p monitor-ci/test-artifacts/results/{build-oss-image,oss-e2e,build-image,cloud-e2e,cloud-e2e-firefox,cloud-e2e-k8s-idpe,cloud-lighthouse,smoke,build-prod-image,deploy}/{shared,oss,cloud}
|
|
|
|
# get monitor-ci pipelines we've already run on this SHA
|
|
found_passing_pipeline=0
|
|
found_failed_pipeline=0
|
|
|
|
if [[ -z "${API_KEY:-}" ]] || [[ -z "${MONITOR_CI_BRANCH:-}" ]]; then
|
|
printf "\nERROR: monitor-ci pipeline missing required env vars. Must set API_KEY and MONITOR_CI_BRANCH.\n"
|
|
exit 1
|
|
fi
|
|
|
|
if [[ "${RUN_WORKFLOW}" == "build_oss" ]]; then
|
|
required_workflows=( "build_oss" )
|
|
if [[ -z "${OSS_SHA:-}" ]]; then
|
|
printf "\nERROR: monitor-ci pipeline missing required env vars. Must set OSS_SHA.\n"
|
|
exit 1
|
|
fi
|
|
elif [[ "${RUN_WORKFLOW}" == "build_oss_embedded" ]]; then
|
|
required_workflows=( "build_oss_embedded" )
|
|
if [[ -z "${UI_SHA:-}" ]] || [[ -z "${UI_BRANCH:-}" ]] || [[ -z "${OSS_SHA:-}" ]]; then
|
|
printf "\nERROR: monitor-ci pipeline missing required env vars. Must set UI_SHA, UI_BRANCH, and OSS_SHA.\n"
|
|
exit 1
|
|
fi
|
|
else
|
|
printf "\nERROR: monitor-ci pipeline missing env var RUN_WORKFLOW.\nMust choose one of: 'build_oss', 'build_oss_embedded'\n"
|
|
exit 1
|
|
fi
|
|
|
|
pipelineStartMsg="starting monitor-ci pipeline targeting monitor-ci branch ${MONITOR_CI_BRANCH}, UI branch ${UI_BRANCH:-master} and using UI SHA ${UI_SHA:-latest}, using OSS SHA ${OSS_SHA:-latest}."
|
|
|
|
reqData="{\"branch\":\"${MONITOR_CI_BRANCH}\", \"parameters\":{ \"run-workflow\":\"${RUN_WORKFLOW}\", \"ui-sha\":\"${UI_SHA:-not-a-real-sha}\", \"ui-branch\":\"${UI_BRANCH:-master}\", \"oss-sha\":\"${OSS_SHA:-}\"}}"
|
|
|
|
# start a new pipeline if we didn't find an existing one to retry
|
|
if [ $found_failed_pipeline -eq 0 ]; then
|
|
startNewPipeline "${pipelineStartMsg}" "${reqData}"
|
|
else
|
|
retryFailedPipeline ${failed_pipeline_workflow_id} ${failed_pipeline_id} ${failed_pipeline_number}
|
|
fi
|
|
|
|
# poll the status of the monitor-ci pipeline
|
|
is_failure=0
|
|
attempts=0
|
|
max_attempts=30 # minutes
|
|
while [ $attempts -le $max_attempts ];
|
|
do
|
|
|
|
workflows=$(curl -s --request GET \
|
|
--url "https://circleci.com/api/v2/pipeline/${pipeline_id}/workflow" \
|
|
--header "Circle-Token: ${API_KEY}" \
|
|
--header 'content-type: application/json' \
|
|
--header 'Accept: application/json')
|
|
|
|
|
|
number_running_workflows=$(echo ${workflows} | jq -r '.items | map(select(.status == "running" or .status == "failing")) | length')
|
|
|
|
# when the pipeline has finished
|
|
if [ ${number_running_workflows} -eq 0 ]; then
|
|
# report failed jobs per required workflow
|
|
for required_workflow_name in "${required_workflows[@]}"; do
|
|
workflow_id=$(echo ${workflows} | jq -r --arg name "${required_workflow_name}" '.items | map(select(.name == $name and .status == "success")) | .[].id')
|
|
|
|
if [ -n "${workflow_id}" ]; then
|
|
printf "\nSUCCESS: monitor-ci workflow with id ${workflow_id} passed: https://app.circleci.com/pipelines/github/influxdata/monitor-ci/${pipeline_number}/workflows/${workflow_id} \n"
|
|
else
|
|
# set job failure
|
|
is_failure=1
|
|
|
|
# get the workflow_id of this failed required workflow (if there are multiple, get the most recent one)
|
|
workflow_id=$(echo ${workflows} | jq -r --arg name "${required_workflow_name}" '.items |= sort_by(.created_at) | .items | map(select(.name == $name and .status == "failed")) | .[-1].id')
|
|
|
|
# get the jobs that failed for this workflow
|
|
jobs=$(curl -s --request GET \
|
|
--url "https://circleci.com/api/v2/workflow/${workflow_id}/job" \
|
|
--header "Circle-Token: ${API_KEY}" \
|
|
--header 'content-type: application/json' \
|
|
--header 'Accept: application/json')
|
|
|
|
# print the names of the failed jobs
|
|
printf "\nFailed jobs:\n"
|
|
failed_jobs=$(echo ${jobs} | jq '.items | map(select(.status == "failed"))')
|
|
failed_jobs_names=( $(echo ${failed_jobs} | jq -r '.[].name') )
|
|
for name in "${failed_jobs_names[@]}"; do
|
|
printf " - ${name}\n"
|
|
done
|
|
|
|
# get the artifacts for each failed job
|
|
printf "\nArtifacts from failed jobs:\n"
|
|
for name in "${failed_jobs_names[@]}"; do
|
|
printf "\n===== ${name} =====\n"
|
|
job_number=$(echo ${failed_jobs} | jq -r --arg name "${name}" 'map(select(.name == $name)) | .[].job_number')
|
|
artifacts=$(curl -s --request GET \
|
|
--url "https://circleci.com/api/v1.1/project/github/influxdata/monitor-ci/${job_number}/artifacts" \
|
|
--header "Circle-Token: ${API_KEY}" \
|
|
--header 'content-type: application/json' \
|
|
--header 'Accept: application/json')
|
|
|
|
artifacts_length=$(echo ${artifacts} | jq -r 'length')
|
|
if [ ${artifacts_length} -eq 0 ]; then
|
|
printf "\n No artifacts for this failed job.\n"
|
|
else
|
|
artifacts_urls=( $(echo ${artifacts} | jq -r '.[].url') )
|
|
# download each artifact
|
|
for url in "${artifacts_urls[@]}"; do
|
|
path=$(echo ${artifacts} | jq --arg url "${url}" 'map(select(.url == $url)) | .[].pretty_path')
|
|
|
|
# download artifact
|
|
filename=$(basename "${path}")
|
|
filename="${filename::-1}" # removes extra " from end
|
|
# put shared artifacts in the shared folder
|
|
if [[ "${path}" == *"shared"* ]] ; then
|
|
subdirectory="shared"
|
|
else
|
|
if [[ "${path}" == *"cloud"* ]] ; then
|
|
subdirectory="cloud"
|
|
else
|
|
subdirectory="oss"
|
|
fi
|
|
fi
|
|
safeName="${name//\//-}"
|
|
if [[ "${safeName}" == *"remocal"* ]]; then
|
|
# put all remocal artifacts in the same parent directory
|
|
safeName="remocal/${safeName}"
|
|
fi
|
|
mkdir -p "monitor-ci/test-artifacts/results/${safeName}/${subdirectory}"
|
|
output="monitor-ci/test-artifacts/results/${safeName}/${subdirectory}/${filename}"
|
|
curl -L -s --request GET \
|
|
--output "${output}" \
|
|
--url "${url}" \
|
|
--header "Circle-Token: ${API_KEY}"
|
|
done
|
|
printf "\n ${artifacts_length} artifacts successfully downloaded for this failed job.\n"
|
|
fi
|
|
done
|
|
|
|
printf "\n\nFAILURE: monitor-ci workflow with id ${workflow_id} failed.\n"
|
|
printf "\n********************************************************\n"
|
|
printf "monitor-ci pipeline link: \nhttps://app.circleci.com/pipelines/github/influxdata/monitor-ci/${pipeline_number}/workflows/${workflow_id}\n"
|
|
printf "\n********************************************************\n"
|
|
fi
|
|
done
|
|
|
|
exit $is_failure
|
|
fi
|
|
|
|
# sleep 1 minute and poll the status again
|
|
attempts=$(($attempts+1))
|
|
remaining_attempts=$(($max_attempts-$attempts))
|
|
printf "\nmonitor-ci pipeline ${pipeline_number} isn't finished yet, waiting another minute... ($remaining_attempts minutes left)\n"
|
|
sleep 1m
|
|
|
|
done
|
|
|
|
printf "\nmonitor-ci pipeline did not finish in time, quitting\n"
|
|
exit 1
|