Refactor collect_data into collect_data + process_data.

This allows processing data coming from other sources.
2021-06-01 14:01:41 -07:00 · 2021-06-01 14:01:41 -07:00 · 78e9838283
parent 419f2506e6
commit 78e9838283
2 changed files with 29 additions and 23 deletions
--- a/hack/test-flake-chart/collect_data.sh
+++ b/hack/test-flake-chart/collect_data.sh
@ -1,28 +1,8 @@
 #!/bin/bash

-# Create temp path for partial data (storing everything but the commit date.)
-PARTIAL_DATA_PATH=$(mktemp)
-# Write 
-echo "Partial path: $PARTIAL_DATA_PATH" 1>&2
-
-# Print header.
-printf "Commit Hash,Commit Date,Environment,Test,Status,Duration\n"
+DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )

 # 1) "cat" together all summary files.
-# 2) Turn each test in each summary file to a CSV line containing its commit hash, environment, test, and status.
-# 3) Copy partial data to $PARTIAL_DATA_PATH to join with date later.
-# 4) Extract only commit hash for each row
-# 5) Make the commit hashes unique (we assume that gsutil cats files from the same hash next to each other).
-#   Also force buffering to occur per line so remainder of pipe can continue to process.
-# 6) Execute git log for each commit to get the date of each.
-# 7) Join dates with test data.
+# 2) Process all summary files.
 gsutil cat gs://minikube-builds/logs/master/*/*_summary.json \
-| jq -r '((.PassedTests[]? as $name | {commit: .Detail.Details, environment: .Detail.Name, test: $name, duration: .Durations[$name], status: "Passed"}),
-          (.FailedTests[]? as $name | {commit: .Detail.Details, environment: .Detail.Name, test: $name, duration: .Durations[$name], status: "Failed"}),
-          (.SkippedTests[]? as $name | {commit: .Detail.Details, environment: .Detail.Name, test: $name, duration: 0, status: "Skipped"}))
-          | .commit + "," + .environment + "," + .test + "," + .status + "," + (.duration | tostring)' \
-| tee $PARTIAL_DATA_PATH \
-| sed -r -n 's/^([^,]+),.*/\1/p' \
-| stdbuf -oL -eL uniq \
-| xargs -I {} git log -1 --pretty=format:"{},%as%n" {} \
-| join -t "," - $PARTIAL_DATA_PATH
+| $DIR/process_data.sh
--- a/hack/test-flake-chart/process_data.sh
+++ b/hack/test-flake-chart/process_data.sh
@ -0,0 +1,26 @@
+#!/bin/bash
+
+# Create temp path for partial data (storing everything but the commit date.)
+PARTIAL_DATA_PATH=$(mktemp)
+# Print the partial path for debugging/convenience.
+echo "Partial path: $PARTIAL_DATA_PATH" 1>&2
+
+# Print header.
+printf "Commit Hash,Commit Date,Environment,Test,Status,Duration\n"
+
+# 1) Turn each test in each summary file to a CSV line containing its commit hash, environment, test, and status.
+# 2) Copy partial data to $PARTIAL_DATA_PATH to join with date later.
+# 3) Extract only commit hash for each row
+# 4) Make the commit hashes unique (we assume that gsutil cats files from the same hash next to each other).
+#   Also force buffering to occur per line so remainder of pipe can continue to process.
+# 5) Execute git log for each commit to get the date of each.
+# 6) Join dates with test data.
+jq -r '((.PassedTests[]? as $name | {commit: .Detail.Details, environment: .Detail.Name, test: $name, duration: .Durations[$name], status: "Passed"}),
+          (.FailedTests[]? as $name | {commit: .Detail.Details, environment: .Detail.Name, test: $name, duration: .Durations[$name], status: "Failed"}),
+          (.SkippedTests[]? as $name | {commit: .Detail.Details, environment: .Detail.Name, test: $name, duration: 0, status: "Skipped"}))
+          | .commit + "," + .environment + "," + .test + "," + .status + "," + (.duration | tostring)' \
+| tee $PARTIAL_DATA_PATH \
+| sed -r -n 's/^([^,]+),.*/\1/p' \
+| stdbuf -oL -eL uniq \
+| xargs -I {} git log -1 --pretty=format:"{},%as%n" {} \
+| join -t "," - $PARTIAL_DATA_PATH