From df2dff532a448070e07fc78525aa731d3c2ecfeb Mon Sep 17 00:00:00 2001 From: Thomas Stromberg Date: Tue, 29 Oct 2019 20:03:39 -0700 Subject: [PATCH 1/6] Increase timeout to 90m, unset PARALLEL_COUNT --- hack/jenkins/common.sh | 5 +---- hack/jenkins/linux_integration_tests_kvm.sh | 1 - hack/jenkins/linux_integration_tests_none.sh | 1 - hack/jenkins/linux_integration_tests_virtualbox.sh | 1 - hack/jenkins/osx_integration_tests_virtualbox.sh | 1 - 5 files changed, 1 insertion(+), 8 deletions(-) diff --git a/hack/jenkins/common.sh b/hack/jenkins/common.sh index fb4ae49338..e127d5972c 100755 --- a/hack/jenkins/common.sh +++ b/hack/jenkins/common.sh @@ -23,8 +23,6 @@ # EXTRA_START_ARGS: additional flags to pass into minikube start # EXTRA_ARGS: additional flags to pass into minikube # JOB_NAME: the name of the logfile and check name to update on github -# PARALLEL_COUNT: number of tests to run in parallel - readonly TEST_ROOT="${HOME}/minikube-integration" readonly TEST_HOME="${TEST_ROOT}/${OS_ARCH}-${VM_DRIVER}-${MINIKUBE_LOCATION}-$$-${COMMIT}" @@ -260,8 +258,7 @@ set -x ${SUDO_PREFIX}${E2E_BIN} \ -minikube-start-args="--vm-driver=${VM_DRIVER} ${EXTRA_START_ARGS}" \ -expected-default-driver="${EXPECTED_DEFAULT_DRIVER}" \ - -test.timeout=60m \ - -test.parallel=${PARALLEL_COUNT} \ + -test.timeout=90m \ -binary="${MINIKUBE_BIN}" && result=$? || result=$? set +x echo ">> ${E2E_BIN} exited with ${result} at $(date)" diff --git a/hack/jenkins/linux_integration_tests_kvm.sh b/hack/jenkins/linux_integration_tests_kvm.sh index fe40576f65..723688b3a7 100755 --- a/hack/jenkins/linux_integration_tests_kvm.sh +++ b/hack/jenkins/linux_integration_tests_kvm.sh @@ -28,7 +28,6 @@ set -e OS_ARCH="linux-amd64" VM_DRIVER="kvm2" JOB_NAME="KVM_Linux" -PARALLEL_COUNT=4 EXPECTED_DEFAULT_DRIVER="kvm2" # Download files and set permissions diff --git a/hack/jenkins/linux_integration_tests_none.sh b/hack/jenkins/linux_integration_tests_none.sh index 767e6867a0..aebb714073 100755 --- a/hack/jenkins/linux_integration_tests_none.sh +++ b/hack/jenkins/linux_integration_tests_none.sh @@ -30,7 +30,6 @@ OS_ARCH="linux-amd64" VM_DRIVER="none" JOB_NAME="none_Linux" EXTRA_ARGS="--bootstrapper=kubeadm" -PARALLEL_COUNT=1 EXPECTED_DEFAULT_DRIVER="kvm2" SUDO_PREFIX="sudo -E " diff --git a/hack/jenkins/linux_integration_tests_virtualbox.sh b/hack/jenkins/linux_integration_tests_virtualbox.sh index 6f624eeead..d159afef5c 100755 --- a/hack/jenkins/linux_integration_tests_virtualbox.sh +++ b/hack/jenkins/linux_integration_tests_virtualbox.sh @@ -28,7 +28,6 @@ set -e OS_ARCH="linux-amd64" VM_DRIVER="virtualbox" JOB_NAME="VirtualBox_Linux" -PARALLEL_COUNT=4 EXPECTED_DEFAULT_DRIVER="kvm2" # Download files and set permissions diff --git a/hack/jenkins/osx_integration_tests_virtualbox.sh b/hack/jenkins/osx_integration_tests_virtualbox.sh index 8f7c6e3dd0..8ebdb846c1 100755 --- a/hack/jenkins/osx_integration_tests_virtualbox.sh +++ b/hack/jenkins/osx_integration_tests_virtualbox.sh @@ -29,7 +29,6 @@ OS_ARCH="darwin-amd64" VM_DRIVER="virtualbox" JOB_NAME="VirtualBox_macOS" EXTRA_ARGS="--bootstrapper=kubeadm" -PARALLEL_COUNT=3 # hyperkit behaves better, so it has higher precedence. # Assumes that hyperkit is also installed on the VirtualBox CI host. EXPECTED_DEFAULT_DRIVER="hyperkit" From 78d4406f88a062f18a50a23e349ba5ab277737a1 Mon Sep 17 00:00:00 2001 From: Thomas Stromberg Date: Tue, 29 Oct 2019 20:09:39 -0700 Subject: [PATCH 2/6] Increase time penalty to 30s to further offset test runs --- test/integration/helpers.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/integration/helpers.go b/test/integration/helpers.go index 7c3688bac3..03c247c05e 100644 --- a/test/integration/helpers.go +++ b/test/integration/helpers.go @@ -345,7 +345,8 @@ func MaybeSlowParallel(t *testing.T) { if antiRaceCounter > 0 { // Slow enough to offset start, but not slow to be a major source of delay - penalty := time.Duration(5*antiRaceCounter) * time.Second + // TODO: Remove or minimize once #5353 is resolved + penalty := time.Duration(30*antiRaceCounter) * time.Second t.Logf("MaybeSlowParallel: Sleeping %s to avoid start race ...", penalty) time.Sleep(penalty) } From 2c960a1435d5d1d4ed9c10460d5f26e73eb114be Mon Sep 17 00:00:00 2001 From: Thomas Stromberg Date: Tue, 29 Oct 2019 20:15:35 -0700 Subject: [PATCH 3/6] Remove PARALLEL_COUNT --- hack/jenkins/osx_integration_tests_hyperkit.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/hack/jenkins/osx_integration_tests_hyperkit.sh b/hack/jenkins/osx_integration_tests_hyperkit.sh index aa9d5d69d1..8dc49ab367 100755 --- a/hack/jenkins/osx_integration_tests_hyperkit.sh +++ b/hack/jenkins/osx_integration_tests_hyperkit.sh @@ -31,7 +31,6 @@ VM_DRIVER="hyperkit" JOB_NAME="HyperKit_macOS" EXTRA_ARGS="--bootstrapper=kubeadm" EXTRA_START_ARGS="" -PARALLEL_COUNT=3 EXPECTED_DEFAULT_DRIVER="hyperkit" From 215534a487a88acf647e1c191dc969852051db6b Mon Sep 17 00:00:00 2001 From: Thomas Stromberg Date: Tue, 29 Oct 2019 20:15:42 -0700 Subject: [PATCH 4/6] Increase gvisor timeout --- test/integration/gvisor_addon_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/integration/gvisor_addon_test.go b/test/integration/gvisor_addon_test.go index b75aa0bbb1..eb5786bde9 100644 --- a/test/integration/gvisor_addon_test.go +++ b/test/integration/gvisor_addon_test.go @@ -33,7 +33,7 @@ func TestGvisorAddon(t *testing.T) { MaybeSlowParallel(t) profile := UniqueProfileName("gvisor") - ctx, cancel := context.WithTimeout(context.Background(), 30*time.Minute) + ctx, cancel := context.WithTimeout(context.Background(), 45*time.Minute) defer func() { if t.Failed() { rr, err := Run(t, exec.CommandContext(ctx, "kubectl", "--context", profile, "logs", "gvisor", "-n", "kube-system")) From a3c8299995ecdb8da294b91f7ed929d4d597d8d3 Mon Sep 17 00:00:00 2001 From: Thomas Stromberg Date: Wed, 30 Oct 2019 07:47:09 -0700 Subject: [PATCH 5/6] Improve start offsets in MaybeSlowParallel by using a schedule --- test/integration/addons_test.go | 1 - test/integration/docker_test.go | 1 - test/integration/gvisor_addon_test.go | 1 - test/integration/helpers.go | 27 ++++++++++++++---------- test/integration/main.go | 1 + test/integration/version_upgrade_test.go | 2 +- 6 files changed, 18 insertions(+), 15 deletions(-) diff --git a/test/integration/addons_test.go b/test/integration/addons_test.go index e184b8e220..9fbc4c7641 100644 --- a/test/integration/addons_test.go +++ b/test/integration/addons_test.go @@ -37,7 +37,6 @@ import ( // TestAddons tests addons that require no special environment -- in parallel func TestAddons(t *testing.T) { MaybeSlowParallel(t) - profile := UniqueProfileName("addons") ctx, cancel := context.WithTimeout(context.Background(), 40*time.Minute) defer CleanupWithLogs(t, profile, cancel) diff --git a/test/integration/docker_test.go b/test/integration/docker_test.go index dd48ea5437..d6a50658b6 100644 --- a/test/integration/docker_test.go +++ b/test/integration/docker_test.go @@ -31,7 +31,6 @@ func TestDockerFlags(t *testing.T) { t.Skip("skipping: none driver does not support ssh or bundle docker") } MaybeSlowParallel(t) - profile := UniqueProfileName("docker-flags") ctx, cancel := context.WithTimeout(context.Background(), 20*time.Minute) defer CleanupWithLogs(t, profile, cancel) diff --git a/test/integration/gvisor_addon_test.go b/test/integration/gvisor_addon_test.go index 368273e975..e29ffd74b4 100644 --- a/test/integration/gvisor_addon_test.go +++ b/test/integration/gvisor_addon_test.go @@ -35,7 +35,6 @@ func TestGvisorAddon(t *testing.T) { } MaybeSlowParallel(t) - profile := UniqueProfileName("gvisor") ctx, cancel := context.WithTimeout(context.Background(), 60*time.Minute) defer func() { diff --git a/test/integration/helpers.go b/test/integration/helpers.go index 1b02973a29..dbfd1174eb 100644 --- a/test/integration/helpers.go +++ b/test/integration/helpers.go @@ -42,8 +42,8 @@ import ( ) var ( - antiRaceCounter = 0 - antiRaceMutex = &sync.Mutex{} + startSchedule = []time.Time{} + antiRaceMutex = &sync.Mutex{} ) // RunResult stores the result of an cmd.Run call @@ -330,24 +330,29 @@ func MaybeParallel(t *testing.T) { t.Parallel() } -// MaybeSlowParallel is a terrible workaround for tests which start clusters in a race-filled world -// TODO: Try removing this hack once certificates are deployed per-profile +// MaybeSlowParallel offsets cluster starts by the value of --start-offset +// TODO: Remove when possible func MaybeSlowParallel(t *testing.T) { // NoneDriver shouldn't parallelize "minikube start" if NoneDriver() { return } + wakeup := time.Now() antiRaceMutex.Lock() - antiRaceCounter++ + if len(startSchedule) == 0 { + startSchedule = append(startSchedule, wakeup) + } else { + wakeup = startSchedule[len(startSchedule)-1].Add(*startOffset) + startSchedule = append(startSchedule, wakeup) + } antiRaceMutex.Unlock() - if antiRaceCounter > 0 { - // Slow enough to offset start, but not slow to be a major source of delay - // TODO: Remove or minimize once #5353 is resolved - penalty := time.Duration(30*antiRaceCounter) * time.Second - t.Logf("MaybeSlowParallel: Sleeping %s to avoid start race ...", penalty) - time.Sleep(penalty) + if time.Now().Before(wakeup) { + d := time.Until(wakeup) + t.Logf("MaybeSlowParallel: Sleeping %s (until %s) to avoid start race ...", d, wakeup) + time.Sleep(d) + // Leave our entry in startSchedule, otherwise we can't guarantee a 30 second offset for the next caller } t.Parallel() } diff --git a/test/integration/main.go b/test/integration/main.go index bffb12f079..fe351e070d 100644 --- a/test/integration/main.go +++ b/test/integration/main.go @@ -33,6 +33,7 @@ var defaultDriver = flag.String("expected-default-driver", "", "Expected default var forceProfile = flag.String("profile", "", "force tests to run against a particular profile") var cleanup = flag.Bool("cleanup", true, "cleanup failed test run") var enableGvisor = flag.Bool("gvisor", false, "run gvisor integration test (slow)") +var startOffset = flag.Duration("start-offset", 30*time.Second, "how much time to offset between cluster starts") var postMortemLogs = flag.Bool("postmortem-logs", true, "show logs after a failed test run") // Paths to files - normally set for CI diff --git a/test/integration/version_upgrade_test.go b/test/integration/version_upgrade_test.go index 6ee49cb811..b386e98813 100644 --- a/test/integration/version_upgrade_test.go +++ b/test/integration/version_upgrade_test.go @@ -39,9 +39,9 @@ import ( // the odlest supported k8s version and then runs the current head minikube // and it tries to upgrade from the older supported k8s to news supported k8s func TestVersionUpgrade(t *testing.T) { + MaybeSlowParallel(t) profile := UniqueProfileName("vupgrade") ctx, cancel := context.WithTimeout(context.Background(), 55*time.Minute) - MaybeSlowParallel(t) defer CleanupWithLogs(t, profile, cancel) From 850ee4bf62a77159291b4408bd3e2afc95d08a08 Mon Sep 17 00:00:00 2001 From: Thomas Stromberg Date: Wed, 30 Oct 2019 09:27:25 -0700 Subject: [PATCH 6/6] Refactor wait code into WaitForStartSlot to avoid parallel conflation --- test/integration/addons_test.go | 3 +- test/integration/docker_test.go | 4 ++- test/integration/guest_env_test.go | 4 ++- test/integration/gvisor_addon_test.go | 3 +- test/integration/helpers.go | 35 ++++++++++++---------- test/integration/start_stop_delete_test.go | 4 ++- test/integration/version_upgrade_test.go | 4 ++- 7 files changed, 35 insertions(+), 22 deletions(-) diff --git a/test/integration/addons_test.go b/test/integration/addons_test.go index 9fbc4c7641..a575e96eb5 100644 --- a/test/integration/addons_test.go +++ b/test/integration/addons_test.go @@ -36,7 +36,8 @@ import ( // TestAddons tests addons that require no special environment -- in parallel func TestAddons(t *testing.T) { - MaybeSlowParallel(t) + MaybeParallel(t) + WaitForStartSlot(t) profile := UniqueProfileName("addons") ctx, cancel := context.WithTimeout(context.Background(), 40*time.Minute) defer CleanupWithLogs(t, profile, cancel) diff --git a/test/integration/docker_test.go b/test/integration/docker_test.go index d6a50658b6..7acb0f9dcc 100644 --- a/test/integration/docker_test.go +++ b/test/integration/docker_test.go @@ -30,7 +30,9 @@ func TestDockerFlags(t *testing.T) { if NoneDriver() { t.Skip("skipping: none driver does not support ssh or bundle docker") } - MaybeSlowParallel(t) + MaybeParallel(t) + WaitForStartSlot(t) + profile := UniqueProfileName("docker-flags") ctx, cancel := context.WithTimeout(context.Background(), 20*time.Minute) defer CleanupWithLogs(t, profile, cancel) diff --git a/test/integration/guest_env_test.go b/test/integration/guest_env_test.go index e9f4925a27..0e7dfb6634 100644 --- a/test/integration/guest_env_test.go +++ b/test/integration/guest_env_test.go @@ -27,7 +27,9 @@ import ( ) func TestGuestEnvironment(t *testing.T) { - MaybeSlowParallel(t) + MaybeParallel(t) + WaitForStartSlot(t) + profile := UniqueProfileName("guest") ctx, cancel := context.WithTimeout(context.Background(), 15*time.Minute) defer CleanupWithLogs(t, profile, cancel) diff --git a/test/integration/gvisor_addon_test.go b/test/integration/gvisor_addon_test.go index e29ffd74b4..f8ffb4d2c0 100644 --- a/test/integration/gvisor_addon_test.go +++ b/test/integration/gvisor_addon_test.go @@ -34,7 +34,8 @@ func TestGvisorAddon(t *testing.T) { t.Skip("skipping test because --gvisor=false") } - MaybeSlowParallel(t) + MaybeParallel(t) + WaitForStartSlot(t) profile := UniqueProfileName("gvisor") ctx, cancel := context.WithTimeout(context.Background(), 60*time.Minute) defer func() { diff --git a/test/integration/helpers.go b/test/integration/helpers.go index dbfd1174eb..654748250f 100644 --- a/test/integration/helpers.go +++ b/test/integration/helpers.go @@ -42,8 +42,10 @@ import ( ) var ( - startSchedule = []time.Time{} - antiRaceMutex = &sync.Mutex{} + // startTimes is a list of startup times, to guarantee --start-offset + startTimes = []time.Time{} + // startTimesMutex is a lock to update startTimes without a race condition + startTimesMutex = &sync.Mutex{} ) // RunResult stores the result of an cmd.Run call @@ -330,31 +332,32 @@ func MaybeParallel(t *testing.T) { t.Parallel() } -// MaybeSlowParallel offsets cluster starts by the value of --start-offset -// TODO: Remove when possible -func MaybeSlowParallel(t *testing.T) { - // NoneDriver shouldn't parallelize "minikube start" +// WaitForStartSlot enforces --start-offset to avoid startup race conditions +func WaitForStartSlot(t *testing.T) { + // Not parallel if NoneDriver() { return } wakeup := time.Now() - antiRaceMutex.Lock() - if len(startSchedule) == 0 { - startSchedule = append(startSchedule, wakeup) - } else { - wakeup = startSchedule[len(startSchedule)-1].Add(*startOffset) - startSchedule = append(startSchedule, wakeup) + startTimesMutex.Lock() + if len(startTimes) > 0 { + nextStart := startTimes[len(startTimes)-1].Add(*startOffset) + // Ignore nextStart if it is in the past - to guarantee offset for next caller + if time.Now().Before(nextStart) { + wakeup = nextStart + } } - antiRaceMutex.Unlock() + startTimes = append(startTimes, wakeup) + startTimesMutex.Unlock() if time.Now().Before(wakeup) { d := time.Until(wakeup) - t.Logf("MaybeSlowParallel: Sleeping %s (until %s) to avoid start race ...", d, wakeup) + t.Logf("Waiting for start slot at %s (sleeping %s) ...", wakeup, d) time.Sleep(d) - // Leave our entry in startSchedule, otherwise we can't guarantee a 30 second offset for the next caller + } else { + t.Logf("No need to wait for start slot, it is already %s", time.Now()) } - t.Parallel() } // killProcessFamily kills a pid and all of its children diff --git a/test/integration/start_stop_delete_test.go b/test/integration/start_stop_delete_test.go index ada918f480..52ac7a4202 100644 --- a/test/integration/start_stop_delete_test.go +++ b/test/integration/start_stop_delete_test.go @@ -75,7 +75,8 @@ func TestStartStop(t *testing.T) { for _, tc := range tests { tc := tc t.Run(tc.name, func(t *testing.T) { - MaybeSlowParallel(t) + MaybeParallel(t) + WaitForStartSlot(t) if !strings.Contains(tc.name, "docker") && NoneDriver() { t.Skipf("skipping %s - incompatible with none driver", t.Name()) @@ -136,6 +137,7 @@ func TestStartStop(t *testing.T) { t.Errorf("status = %q; want = %q", got, state.Stopped) } + WaitForStartSlot(t) rr, err = Run(t, exec.CommandContext(ctx, Target(), startArgs...)) if err != nil { // Explicit fatal so that failures don't move directly to deletion diff --git a/test/integration/version_upgrade_test.go b/test/integration/version_upgrade_test.go index b386e98813..78bc9ef712 100644 --- a/test/integration/version_upgrade_test.go +++ b/test/integration/version_upgrade_test.go @@ -39,7 +39,8 @@ import ( // the odlest supported k8s version and then runs the current head minikube // and it tries to upgrade from the older supported k8s to news supported k8s func TestVersionUpgrade(t *testing.T) { - MaybeSlowParallel(t) + MaybeParallel(t) + WaitForStartSlot(t) profile := UniqueProfileName("vupgrade") ctx, cancel := context.WithTimeout(context.Background(), 55*time.Minute) @@ -89,6 +90,7 @@ func TestVersionUpgrade(t *testing.T) { t.Errorf("status = %q; want = %q", got, state.Stopped.String()) } + WaitForStartSlot(t) args = append([]string{"start", "-p", profile, fmt.Sprintf("--kubernetes-version=%s", constants.NewestKubernetesVersion), "--alsologtostderr", "-v=1"}, StartArgs()...) rr, err = Run(t, exec.CommandContext(ctx, Target(), args...)) if err != nil {