Merge pull request #5782 from tstromberg/test-timeout

Improve parallel start scheduling and autoset parallelism
pull/5802/head
Thomas Strömberg 2019-10-30 10:50:33 -07:00 committed by GitHub
commit 14adf4892c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 43 additions and 36 deletions

View File

@ -23,8 +23,6 @@
# EXTRA_START_ARGS: additional flags to pass into minikube start
# EXTRA_ARGS: additional flags to pass into minikube
# JOB_NAME: the name of the logfile and check name to update on github
# PARALLEL_COUNT: number of tests to run in parallel
readonly TEST_ROOT="${HOME}/minikube-integration"
readonly TEST_HOME="${TEST_ROOT}/${OS_ARCH}-${VM_DRIVER}-${MINIKUBE_LOCATION}-$$-${COMMIT}"
@ -243,7 +241,7 @@ if [ "$(uname)" != "Darwin" ]; then
docker build -t gcr.io/k8s-minikube/gvisor-addon:2 -f testdata/gvisor-addon-Dockerfile ./testdata
fi
readonly LOAD=$(uptime | egrep -o "load average.*: [0-9]" | cut -d" " -f3)
readonly LOAD=$(uptime | egrep -o "load average.*: [0-9]+" | cut -d" " -f3)
if [[ "${LOAD}" -gt 2 ]]; then
echo ""
echo "********************** LOAD WARNING ********************************"
@ -255,21 +253,18 @@ if [[ "${LOAD}" -gt 2 ]]; then
top -b -n1 | head -n 15
fi
echo "********************** LOAD WARNING ********************************"
echo ""
echo "Sleeping 30s to see if load goes down ...."
sleep 30
uptime
fi
echo ""
echo ">> Starting ${E2E_BIN} at $(date)"
set -x
${SUDO_PREFIX}${E2E_BIN} \
-minikube-start-args="--vm-driver=${VM_DRIVER} ${EXTRA_START_ARGS}" \
-expected-default-driver="${EXPECTED_DEFAULT_DRIVER}" \
-test.timeout=60m \
-test.parallel=${PARALLEL_COUNT} \
-test.timeout=70m \
${EXTRA_TEST_ARGS} \
-binary="${MINIKUBE_BIN}" && result=$? || result=$?
set +x

View File

@ -28,7 +28,6 @@ set -e
OS_ARCH="linux-amd64"
VM_DRIVER="kvm2"
JOB_NAME="KVM_Linux"
PARALLEL_COUNT=4
EXPECTED_DEFAULT_DRIVER="kvm2"
# We pick kvm as our gvisor testbed because it is fast & reliable

View File

@ -30,7 +30,6 @@ OS_ARCH="linux-amd64"
VM_DRIVER="none"
JOB_NAME="none_Linux"
EXTRA_ARGS="--bootstrapper=kubeadm"
PARALLEL_COUNT=1
EXPECTED_DEFAULT_DRIVER="kvm2"
SUDO_PREFIX="sudo -E "

View File

@ -28,7 +28,6 @@ set -e
OS_ARCH="linux-amd64"
VM_DRIVER="virtualbox"
JOB_NAME="VirtualBox_Linux"
PARALLEL_COUNT=4
EXPECTED_DEFAULT_DRIVER="kvm2"
# Download files and set permissions

View File

@ -31,7 +31,6 @@ VM_DRIVER="hyperkit"
JOB_NAME="HyperKit_macOS"
EXTRA_ARGS="--bootstrapper=kubeadm"
EXTRA_START_ARGS=""
PARALLEL_COUNT=3
EXPECTED_DEFAULT_DRIVER="hyperkit"

View File

@ -29,7 +29,6 @@ OS_ARCH="darwin-amd64"
VM_DRIVER="virtualbox"
JOB_NAME="VirtualBox_macOS"
EXTRA_ARGS="--bootstrapper=kubeadm"
PARALLEL_COUNT=3
# hyperkit behaves better, so it has higher precedence.
# Assumes that hyperkit is also installed on the VirtualBox CI host.
EXPECTED_DEFAULT_DRIVER="hyperkit"

View File

@ -36,8 +36,8 @@ import (
// TestAddons tests addons that require no special environment -- in parallel
func TestAddons(t *testing.T) {
MaybeSlowParallel(t)
MaybeParallel(t)
WaitForStartSlot(t)
profile := UniqueProfileName("addons")
ctx, cancel := context.WithTimeout(context.Background(), 40*time.Minute)
defer CleanupWithLogs(t, profile, cancel)

View File

@ -30,7 +30,8 @@ func TestDockerFlags(t *testing.T) {
if NoneDriver() {
t.Skip("skipping: none driver does not support ssh or bundle docker")
}
MaybeSlowParallel(t)
MaybeParallel(t)
WaitForStartSlot(t)
profile := UniqueProfileName("docker-flags")
ctx, cancel := context.WithTimeout(context.Background(), 20*time.Minute)

View File

@ -27,7 +27,9 @@ import (
)
func TestGuestEnvironment(t *testing.T) {
MaybeSlowParallel(t)
MaybeParallel(t)
WaitForStartSlot(t)
profile := UniqueProfileName("guest")
ctx, cancel := context.WithTimeout(context.Background(), 15*time.Minute)
defer CleanupWithLogs(t, profile, cancel)

View File

@ -34,8 +34,8 @@ func TestGvisorAddon(t *testing.T) {
t.Skip("skipping test because --gvisor=false")
}
MaybeSlowParallel(t)
MaybeParallel(t)
WaitForStartSlot(t)
profile := UniqueProfileName("gvisor")
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Minute)
defer func() {

View File

@ -42,8 +42,10 @@ import (
)
var (
antiRaceCounter = 0
antiRaceMutex = &sync.Mutex{}
// startTimes is a list of startup times, to guarantee --start-offset
startTimes = []time.Time{}
// startTimesMutex is a lock to update startTimes without a race condition
startTimesMutex = &sync.Mutex{}
)
// RunResult stores the result of an cmd.Run call
@ -330,25 +332,32 @@ func MaybeParallel(t *testing.T) {
t.Parallel()
}
// MaybeSlowParallel is a terrible workaround for tests which start clusters in a race-filled world
// TODO: Try removing this hack once certificates are deployed per-profile
func MaybeSlowParallel(t *testing.T) {
// NoneDriver shouldn't parallelize "minikube start"
// WaitForStartSlot enforces --start-offset to avoid startup race conditions
func WaitForStartSlot(t *testing.T) {
// Not parallel
if NoneDriver() {
return
}
antiRaceMutex.Lock()
antiRaceCounter++
antiRaceMutex.Unlock()
if antiRaceCounter > 0 {
// Slow enough to offset start, but not slow to be a major source of delay
penalty := time.Duration(5*antiRaceCounter) * time.Second
t.Logf("MaybeSlowParallel: Sleeping %s to avoid start race ...", penalty)
time.Sleep(penalty)
wakeup := time.Now()
startTimesMutex.Lock()
if len(startTimes) > 0 {
nextStart := startTimes[len(startTimes)-1].Add(*startOffset)
// Ignore nextStart if it is in the past - to guarantee offset for next caller
if time.Now().Before(nextStart) {
wakeup = nextStart
}
}
startTimes = append(startTimes, wakeup)
startTimesMutex.Unlock()
if time.Now().Before(wakeup) {
d := time.Until(wakeup)
t.Logf("Waiting for start slot at %s (sleeping %s) ...", wakeup, d)
time.Sleep(d)
} else {
t.Logf("No need to wait for start slot, it is already %s", time.Now())
}
t.Parallel()
}
// killProcessFamily kills a pid and all of its children

View File

@ -33,6 +33,7 @@ var defaultDriver = flag.String("expected-default-driver", "", "Expected default
var forceProfile = flag.String("profile", "", "force tests to run against a particular profile")
var cleanup = flag.Bool("cleanup", true, "cleanup failed test run")
var enableGvisor = flag.Bool("gvisor", false, "run gvisor integration test (slow)")
var startOffset = flag.Duration("start-offset", 30*time.Second, "how much time to offset between cluster starts")
var postMortemLogs = flag.Bool("postmortem-logs", true, "show logs after a failed test run")
// Paths to files - normally set for CI

View File

@ -75,7 +75,8 @@ func TestStartStop(t *testing.T) {
for _, tc := range tests {
tc := tc
t.Run(tc.name, func(t *testing.T) {
MaybeSlowParallel(t)
MaybeParallel(t)
WaitForStartSlot(t)
if !strings.Contains(tc.name, "docker") && NoneDriver() {
t.Skipf("skipping %s - incompatible with none driver", t.Name())
@ -136,6 +137,7 @@ func TestStartStop(t *testing.T) {
t.Errorf("status = %q; want = %q", got, state.Stopped)
}
WaitForStartSlot(t)
rr, err = Run(t, exec.CommandContext(ctx, Target(), startArgs...))
if err != nil {
// Explicit fatal so that failures don't move directly to deletion

View File

@ -39,9 +39,10 @@ import (
// the odlest supported k8s version and then runs the current head minikube
// and it tries to upgrade from the older supported k8s to news supported k8s
func TestVersionUpgrade(t *testing.T) {
MaybeParallel(t)
WaitForStartSlot(t)
profile := UniqueProfileName("vupgrade")
ctx, cancel := context.WithTimeout(context.Background(), 55*time.Minute)
MaybeSlowParallel(t)
defer CleanupWithLogs(t, profile, cancel)
@ -89,6 +90,7 @@ func TestVersionUpgrade(t *testing.T) {
t.Errorf("status = %q; want = %q", got, state.Stopped.String())
}
WaitForStartSlot(t)
args = append([]string{"start", "-p", profile, fmt.Sprintf("--kubernetes-version=%s", constants.NewestKubernetesVersion), "--alsologtostderr", "-v=1"}, StartArgs()...)
rr, err = Run(t, exec.CommandContext(ctx, Target(), args...))
if err != nil {