Fix waiting for all pods having specified labels to be Ready (#20315)

* Fix waiting for all kube-system pods having one of specified labels to be Ready

* Fix waiting for all kube-system pods having one of specified labels to be Ready

* Fix waiting for all kube-system pods having one of specified labels to be Ready
pull/20672/head
Predrag Rogic 2025-04-28 19:58:02 +01:00 committed by GitHub
parent 9797ec1b10
commit c8a63fa11f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 154 additions and 153 deletions

View File

@ -47,15 +47,14 @@ import (
"github.com/spf13/viper"
"golang.org/x/text/cases"
"golang.org/x/text/language"
"k8s.io/minikube/pkg/minikube/command"
"k8s.io/minikube/pkg/minikube/firewall"
netutil "k8s.io/minikube/pkg/network"
"k8s.io/klog/v2"
cmdcfg "k8s.io/minikube/cmd/minikube/cmd/config"
"k8s.io/minikube/pkg/drivers/kic/oci"
"k8s.io/minikube/pkg/minikube/bootstrapper/bsutil"
"k8s.io/minikube/pkg/minikube/bootstrapper/bsutil/kverify"
"k8s.io/minikube/pkg/minikube/bootstrapper/images"
"k8s.io/minikube/pkg/minikube/command"
"k8s.io/minikube/pkg/minikube/config"
"k8s.io/minikube/pkg/minikube/constants"
"k8s.io/minikube/pkg/minikube/cruntime"
@ -64,6 +63,7 @@ import (
"k8s.io/minikube/pkg/minikube/driver"
"k8s.io/minikube/pkg/minikube/driver/auxdriver"
"k8s.io/minikube/pkg/minikube/exit"
"k8s.io/minikube/pkg/minikube/firewall"
"k8s.io/minikube/pkg/minikube/kubeconfig"
"k8s.io/minikube/pkg/minikube/localpath"
"k8s.io/minikube/pkg/minikube/machine"
@ -74,13 +74,14 @@ import (
"k8s.io/minikube/pkg/minikube/out/register"
"k8s.io/minikube/pkg/minikube/pause"
"k8s.io/minikube/pkg/minikube/reason"
"k8s.io/minikube/pkg/minikube/style"
pkgtrace "k8s.io/minikube/pkg/trace"
"k8s.io/minikube/pkg/minikube/registry"
"k8s.io/minikube/pkg/minikube/style"
"k8s.io/minikube/pkg/minikube/translate"
netutil "k8s.io/minikube/pkg/network"
pkgtrace "k8s.io/minikube/pkg/trace"
"k8s.io/minikube/pkg/util"
"k8s.io/minikube/pkg/version"
kconst "k8s.io/minikube/third_party/kubeadm/app/constants"
)
type versionJSON struct {
@ -287,6 +288,12 @@ func runStart(cmd *cobra.Command, _ []string) {
exit.Error(reason.GuestStart, "failed to start node", err)
}
if starter.Cfg.VerifyComponents[kverify.ExtraKey] {
if err := kverify.WaitExtra(ClusterFlagValue(), kverify.CorePodsLabels, kconst.DefaultControlPlaneTimeout); err != nil {
exit.Message(reason.GuestStart, "extra waiting: {{.error}}", out.V{"error": err})
}
}
if err := showKubectlInfo(kubeconfig, starter.Node.KubernetesVersion, starter.Node.ContainerRuntime, starter.Cfg.Name); err != nil {
klog.Errorf("kubectl info: %v", err)
}

View File

@ -52,7 +52,7 @@ var (
// DefaultWaitList is list of all default components to wait for. only names to be used for start flags.
DefaultWaitList = []string{APIServerWaitKey, SystemPodsWaitKey}
// AllComponentsList list of all valid components keys to wait for. only names to be used used for start flags.
AllComponentsList = []string{APIServerWaitKey, SystemPodsWaitKey, DefaultSAWaitKey, AppsRunningKey, NodeReadyKey, KubeletKey}
AllComponentsList = []string{APIServerWaitKey, SystemPodsWaitKey, DefaultSAWaitKey, AppsRunningKey, NodeReadyKey, KubeletKey, ExtraKey}
// AppsRunningList running list are valid k8s-app components to wait for them to be running
AppsRunningList = []string{
"kube-dns", // coredns

View File

@ -38,43 +38,47 @@ func WaitNodeCondition(cs *kubernetes.Clientset, name string, condition core.Nod
klog.Infof("duration metric: took %s for node %q to be %q ...", time.Since(start), name, condition)
}()
lap := time.Now()
checkCondition := func(_ context.Context) (bool, error) {
if time.Since(start) > timeout {
return false, fmt.Errorf("timed out waiting %v for node %q to be %q (will not retry!)", timeout, name, condition)
}
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()
status, reason := nodeConditionStatus(cs, name, condition)
lap := time.Now()
checkCondition := func(ctx context.Context) (bool, error) {
status, err := nodeConditionStatus(ctx, cs, name, condition)
// done if node has condition
if status == core.ConditionTrue {
klog.Info(reason)
klog.Infof("node %q is %q", name, condition)
return true, nil
}
// retry in all other cases, decrease log spam
if time.Since(lap) > (2 * time.Second) {
klog.Info(reason)
if err != nil {
klog.Warningf("error getting node %q condition %q status (will retry): %v", name, condition, err)
} else {
klog.Warningf("node %q has %q:%q status (will retry)", name, condition, status)
}
lap = time.Now()
}
return false, nil
}
if err := wait.PollUntilContextTimeout(context.Background(), kconst.APICallRetryInterval, kconst.DefaultControlPlaneTimeout, true, checkCondition); err != nil {
return fmt.Errorf("waitNodeCondition: %w", err)
if err := wait.PollUntilContextCancel(ctx, kconst.APICallRetryInterval, true, checkCondition); err != nil {
return fmt.Errorf("WaitNodeCondition: %w", err)
}
return nil
}
// nodeConditionStatus returns if node is in specified condition and verbose reason.
func nodeConditionStatus(cs *kubernetes.Clientset, name string, condition core.NodeConditionType) (status core.ConditionStatus, reason string) {
node, err := cs.CoreV1().Nodes().Get(context.Background(), name, meta.GetOptions{})
// nodeConditionStatus checks if node exists and returns condition status.
func nodeConditionStatus(ctx context.Context, cs *kubernetes.Clientset, name string, condition core.NodeConditionType) (core.ConditionStatus, error) {
node, err := cs.CoreV1().Nodes().Get(ctx, name, meta.GetOptions{})
if err != nil {
return core.ConditionUnknown, fmt.Sprintf("error getting node %q: %v", name, err)
return core.ConditionUnknown, err
}
// check if node has the condition
for _, c := range node.Status.Conditions {
if c.Type == condition {
return c.Status, fmt.Sprintf("node %q has status %q:%q", node.Name, condition, c.Status)
return c.Status, nil
}
}
// assume transient condition
return core.ConditionFalse, fmt.Sprintf("node %q doesn't have %q status: %+v", node.Name, condition, node.Status)
// assume transient error
return core.ConditionUnknown, fmt.Errorf("node %q does not have %q condition type: %+v", name, condition, node.Status)
}

View File

@ -19,140 +19,138 @@ package kverify
import (
"context"
"errors"
"fmt"
"time"
core "k8s.io/api/core/v1"
kerrors "k8s.io/apimachinery/pkg/api/errors"
meta "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/kubernetes"
"k8s.io/klog/v2"
"k8s.io/minikube/pkg/kapi"
kconst "k8s.io/minikube/third_party/kubeadm/app/constants"
)
// WaitExtra calls waitPodCondition for all system-critical pods including those with specified labels.
func WaitExtra(cs *kubernetes.Clientset, labels []string, timeout time.Duration) error {
klog.Infof("extra waiting up to %v for all system-critical pods including labels %v to be %q ...", timeout, labels, core.PodReady)
// WaitExtra calls waitPodCondition for all (at least one) kube-system pods having one of specified labels to be "Ready" on profile cluster.
func WaitExtra(profile string, labels []string, timeout time.Duration) error {
klog.Infof("extra waiting up to %v for all %q pods having one of %v labels to be %q ...", timeout, meta.NamespaceSystem, labels, core.PodReady)
start := time.Now()
defer func() {
klog.Infof("duration metric: took %s for extra waiting for all system-critical and pods with labels %v to be %q ...", time.Since(start), labels, core.PodReady)
klog.Infof("duration metric: took %s for extra waiting for all %q pods having one of %v labels to be %q ...", time.Since(start), meta.NamespaceSystem, labels, core.PodReady)
}()
pods, err := cs.CoreV1().Pods(meta.NamespaceSystem).List(context.Background(), meta.ListOptions{})
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()
cs, err := kapi.Client(profile)
if err != nil {
return fmt.Errorf("error listing pods in %q namespace: %w", meta.NamespaceSystem, err)
return fmt.Errorf("failed to get kube client: %v", err)
}
for _, pod := range pods.Items {
if time.Since(start) > timeout {
return fmt.Errorf("timed out waiting %v for all system-critical and pods with labels %v to be %q", timeout, labels, core.NodeReady)
// podsReady poll function checks if all (at least one) pods in the namespace having the label is Ready
var label string
podsReady := func(ctx context.Context) (bool, error) {
pods, err := cs.CoreV1().Pods(meta.NamespaceSystem).List(ctx, meta.ListOptions{LabelSelector: label})
if err != nil {
klog.Warningf("error listing pods in %q namespace with %q label, will retry: %v", meta.NamespaceSystem, label, err)
return false, nil
}
for k, v := range pod.Labels {
label := fmt.Sprintf("%s=%s", k, v)
match := false
for _, l := range labels {
if l == label {
match = true
break
}
}
// ignore system-critical pods' non-essential labels
if !match && pod.Namespace != meta.NamespaceSystem && k != "k8s-app" && k != "component" {
continue
}
if match || pod.Spec.PriorityClassName == "system-cluster-critical" || pod.Spec.PriorityClassName == "system-node-critical" {
if err := waitPodCondition(cs, pod.Name, pod.Namespace, core.PodReady, timeout); err != nil {
klog.Errorf("WaitExtra: %v", err)
}
break
if len(pods.Items) == 0 {
klog.Warningf("no pods in %q namespace with %q label found, will retry", meta.NamespaceSystem, label)
return false, nil
}
for _, pod := range pods.Items {
if err := waitPodCondition(ctx, cs, pod.Name, pod.Namespace, core.PodReady); err != nil {
klog.Warningf("not all pods in %q namespace with %q label are %q, will retry: %v", meta.NamespaceSystem, label, core.PodReady, err)
return false, nil
}
}
return true, nil
}
for _, l := range labels {
label = l
if err := wait.PollUntilContextCancel(ctx, kconst.APICallRetryInterval, true, podsReady); err != nil {
return fmt.Errorf("WaitExtra: %w", err)
}
}
return nil
}
// waitPodCondition waits for specified condition of podName in a namespace.
func waitPodCondition(cs *kubernetes.Clientset, name, namespace string, condition core.PodConditionType, timeout time.Duration) error {
klog.Infof("waiting up to %v for pod %q in %q namespace to be %q ...", timeout, name, namespace, condition)
// waitPodCondition waits for specified condition of pod name in namespace.
func waitPodCondition(ctx context.Context, cs *kubernetes.Clientset, name, namespace string, condition core.PodConditionType) error {
klog.Infof("waiting for pod %q in %q namespace to be %q or be gone ...", name, namespace, condition)
start := time.Now()
defer func() {
klog.Infof("duration metric: took %s for pod %q in %q namespace to be %q ...", time.Since(start), name, namespace, condition)
klog.Infof("duration metric: took %s for pod %q in %q namespace to be %q or be gone ...", time.Since(start), name, namespace, condition)
}()
lap := time.Now()
checkCondition := func(_ context.Context) (bool, error) {
if time.Since(start) > timeout {
return false, fmt.Errorf("timed out waiting %v for pod %q in %q namespace to be %q (will not retry!)", timeout, name, namespace, condition)
}
status, reason := podConditionStatus(cs, name, namespace, condition)
checkCondition := func(ctx context.Context) (bool, error) {
status, err := podConditionStatus(ctx, cs, name, namespace, condition)
// done if pod has condition
if status == core.ConditionTrue {
klog.Info(reason)
klog.Infof("pod %q is %q", name, condition)
return true, nil
}
// return immediately: status == core.ConditionUnknown
if status == core.ConditionUnknown {
klog.Info(reason)
return false, errors.New(reason)
// back off if pod or node is gone
if kerrors.IsNotFound(err) || status == core.TaintNodeUnreachable {
klog.Infof("pod %q in %q namespace is gone: %v", name, namespace, err)
return true, nil
}
// reduce log spam
// retry in all other cases, decrease log spam
if time.Since(lap) > (2 * time.Second) {
klog.Info(reason)
klog.Warningf("pod %q is not %q, error: %v", name, condition, err)
lap = time.Now()
}
// return immediately: status == core.ConditionFalse
return false, nil
}
if err := wait.PollUntilContextTimeout(context.Background(), kconst.APICallRetryInterval, kconst.DefaultControlPlaneTimeout, true, checkCondition); err != nil {
if err := wait.PollUntilContextCancel(ctx, kconst.APICallRetryInterval, true, checkCondition); err != nil {
return fmt.Errorf("waitPodCondition: %w", err)
}
return nil
}
// podConditionStatus returns if pod is in specified condition and verbose reason.
func podConditionStatus(cs *kubernetes.Clientset, name, namespace string, condition core.PodConditionType) (status core.ConditionStatus, reason string) {
pod, err := cs.CoreV1().Pods(namespace).Get(context.Background(), name, meta.GetOptions{})
// podConditionStatus returns if pod is in specified condition.
func podConditionStatus(ctx context.Context, cs *kubernetes.Clientset, name, namespace string, condition core.PodConditionType) (core.ConditionStatus, error) {
pod, err := cs.CoreV1().Pods(namespace).Get(ctx, name, meta.GetOptions{})
if err != nil {
return core.ConditionUnknown, fmt.Sprintf("error getting pod %q in %q namespace (skipping!): %v", name, namespace, err)
return core.ConditionUnknown, fmt.Errorf("getting pod %q in %q namespace (will retry): %w", name, namespace, err)
}
// check if undelying node is Ready - in case we got stale data about the pod
if pod.Spec.NodeName != "" {
if status, reason := nodeConditionStatus(cs, pod.Spec.NodeName, core.NodeReady); status != core.ConditionTrue {
return core.ConditionUnknown, fmt.Sprintf("node %q hosting pod %q in %q namespace is currently not %q (skipping!): %v", pod.Spec.NodeName, name, namespace, core.NodeReady, reason)
// check if pod is scheduled on any node
if pod.Spec.NodeName == "" {
return core.ConditionUnknown, fmt.Errorf("pod %q in %q namespace is not scheduled on any node (will retry): %+v", name, namespace, pod.Status)
}
// check if node exists and is Ready (KubeAPI)
nodeReadyStatus, err := nodeConditionStatus(ctx, cs, pod.Spec.NodeName, core.NodeReady)
if err != nil {
if kerrors.IsNotFound(err) {
return core.TaintNodeUnreachable, fmt.Errorf("node %q hosting pod %q is not found/running (skipping!): %v", pod.Spec.NodeName, name, err)
}
return core.ConditionUnknown, fmt.Errorf("node %q hosting pod %q is not %q (will retry): %v", pod.Spec.NodeName, name, core.NodeReady, err)
}
if pod.Status.Phase != core.PodRunning && pod.Status.Phase != core.PodPending {
return core.ConditionUnknown, fmt.Sprintf("pod %q in %q namespace has status phase %q (skipping!): %+v", pod.Name, pod.Namespace, pod.Status.Phase, pod.Status)
if nodeReadyStatus != core.ConditionTrue {
return core.ConditionUnknown, fmt.Errorf("node %q hosting pod %q is not %q (will retry)", pod.Spec.NodeName, name, core.NodeReady)
}
// check if pod has the condition
for _, c := range pod.Status.Conditions {
if c.Type == condition {
return c.Status, fmt.Sprintf("pod %q in %q namespace has status %q:%q", pod.Name, pod.Namespace, condition, c.Status)
return c.Status, nil
}
}
// assume transient condition
return core.ConditionFalse, fmt.Sprintf("pod %q in %q namespace doesn't have %q status: %+v", pod.Name, pod.Namespace, core.PodReady, pod.Status)
// assume transient error
return core.ConditionUnknown, fmt.Errorf("pod %q does not have %q condition type: %+v", name, condition, pod.Status)
}
// IsPodReady returns if pod is Ready and verbose reason.
func IsPodReady(pod *core.Pod) (ready bool, reason string) {
if pod.Status.Phase != core.PodRunning {
return false, fmt.Sprintf("pod %q in %q namespace is not Running: %+v", pod.Name, pod.Namespace, pod.Status)
}
// IsPodReady returns if pod is Ready.
func IsPodReady(pod *core.Pod) bool {
for _, c := range pod.Status.Conditions {
if c.Type == core.PodReady {
if c.Status != core.ConditionTrue {
return false, fmt.Sprintf("pod %q in %q namespace is not Ready: %+v", pod.Name, pod.Namespace, c)
}
return true, fmt.Sprintf("pod %q in %q namespace is Ready: %+v", pod.Name, pod.Namespace, c)
return c.Status == core.ConditionTrue
}
}
return false, fmt.Sprintf("pod %q in %q namespace does not have %q status: %+v", pod.Name, pod.Namespace, core.PodReady, pod.Status)
// assume transient error
return false
}

View File

@ -490,7 +490,8 @@ func (k *Bootstrapper) client(ip string, port int) (*kubernetes.Clientset, error
return c, err
}
// WaitForNode blocks until the node appears to be healthy
// WaitForNode blocks until the node appears to be healthy.
// It should not be called for [re]started primary control-plane node in HA clusters.
func (k *Bootstrapper) WaitForNode(cfg config.ClusterConfig, n config.Node, timeout time.Duration) error {
start := time.Now()
register.Reg.SetStep(register.VerifyingKubernetes)
@ -525,19 +526,14 @@ func (k *Bootstrapper) WaitForNode(cfg config.ClusterConfig, n config.Node, time
return nil
}
if cfg.VerifyComponents[kverify.NodeReadyKey] {
// if extra waiting for system pods to be ready is required, we need node to be ready beforehands
if cfg.VerifyComponents[kverify.NodeReadyKey] || cfg.VerifyComponents[kverify.ExtraKey] {
name := bsutil.KubeNodeName(cfg, n)
if err := kverify.WaitNodeCondition(client, name, core.NodeReady, timeout); err != nil {
return errors.Wrap(err, "waiting for node to be ready")
}
}
if cfg.VerifyComponents[kverify.ExtraKey] {
if err := kverify.WaitExtra(client, kverify.CorePodsLabels, timeout); err != nil {
return errors.Wrap(err, "extra waiting")
}
}
cr, err := cruntime.New(cruntime.Config{Type: cfg.KubernetesConfig.ContainerRuntime, Runner: k.c})
if err != nil {
return errors.Wrapf(err, "create runtme-manager %s", cfg.KubernetesConfig.ContainerRuntime)
@ -589,7 +585,7 @@ func (k *Bootstrapper) WaitForNode(cfg config.ClusterConfig, n config.Node, time
}
// restartPrimaryControlPlane restarts the kubernetes cluster configured by kubeadm.
func (k *Bootstrapper) restartPrimaryControlPlane(cfg config.ClusterConfig) error {
func (k *Bootstrapper) restartPrimaryControlPlane(cfg config.ClusterConfig) error { //nolint: gocyclo
klog.Infof("restartPrimaryControlPlane start ...")
start := time.Now()
@ -729,7 +725,7 @@ func (k *Bootstrapper) restartPrimaryControlPlane(cfg config.ClusterConfig) erro
return err
}
for _, pod := range pods.Items {
if ready, _ := kverify.IsPodReady(&pod); !ready {
if !kverify.IsPodReady(&pod) {
return nil
}
}
@ -738,10 +734,6 @@ func (k *Bootstrapper) restartPrimaryControlPlane(cfg config.ClusterConfig) erro
_ = retry.Expo(wait, 250*time.Millisecond, 1*time.Minute)
klog.Infof("kubelet initialised")
klog.Infof("duration metric: took %s waiting for restarted kubelet to initialise ...", time.Since(start))
if err := kverify.WaitExtra(client, kverify.CorePodsLabels, kconst.DefaultControlPlaneTimeout); err != nil {
return errors.Wrap(err, "extra")
}
}
if err := bsutil.AdjustResourceLimits(k.c); err != nil {

View File

@ -97,14 +97,14 @@ func TestMultiControlPlane(t *testing.T) {
// validateHAStartCluster ensures ha (multi-control plane) cluster can start.
func validateHAStartCluster(ctx context.Context, t *testing.T, profile string) {
// start ha (multi-control plane) cluster
startArgs := append([]string{"start", "-p", profile, "--wait=true", "--memory=2200", "--ha", "-v=7", "--alsologtostderr"}, StartArgs()...)
startArgs := append([]string{"-p", profile, "start", "--ha", "--memory", "2200", "--wait", "true", "--alsologtostderr", "-v", "5"}, StartArgs()...)
rr, err := Run(t, exec.CommandContext(ctx, Target(), startArgs...))
if err != nil {
t.Fatalf("failed to fresh-start ha (multi-control plane) cluster. args %q : %v", rr.Command(), err)
}
// ensure minikube status shows 3 operational control-plane nodes
rr, err = Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "status", "-v=7", "--alsologtostderr"))
rr, err = Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "status", "--alsologtostderr", "-v", "5"))
if err != nil {
t.Fatalf("failed to run minikube status. args %q : %v", rr.Command(), err)
}
@ -125,19 +125,19 @@ func validateHAStartCluster(ctx context.Context, t *testing.T, profile string) {
// validateHADeployApp deploys an app to ha (multi-control plane) cluster and ensures all nodes can serve traffic.
func validateHADeployApp(ctx context.Context, t *testing.T, profile string) {
// Create a deployment for app
_, err := Run(t, exec.CommandContext(ctx, Target(), "kubectl", "-p", profile, "--", "apply", "-f", "./testdata/ha/ha-pod-dns-test.yaml"))
_, err := Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "kubectl", "--", "apply", "-f", "./testdata/ha/ha-pod-dns-test.yaml"))
if err != nil {
t.Errorf("failed to create busybox deployment to ha (multi-control plane) cluster")
}
_, err = Run(t, exec.CommandContext(ctx, Target(), "kubectl", "-p", profile, "--", "rollout", "status", "deployment/busybox"))
_, err = Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "kubectl", "--", "rollout", "status", "deployment/busybox"))
if err != nil {
t.Errorf("failed to deploy busybox to ha (multi-control plane) cluster")
}
// resolve Pod IPs
resolvePodIPs := func() error {
rr, err := Run(t, exec.CommandContext(ctx, Target(), "kubectl", "-p", profile, "--", "get", "pods", "-o", "jsonpath='{.items[*].status.podIP}'"))
rr, err := Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "kubectl", "--", "get", "pods", "-o", "jsonpath='{.items[*].status.podIP}'"))
if err != nil {
err := fmt.Errorf("failed to retrieve Pod IPs (may be temporary): %v", err)
t.Log(err.Error())
@ -160,7 +160,7 @@ func validateHADeployApp(ctx context.Context, t *testing.T, profile string) {
}
// get Pod names
rr, err := Run(t, exec.CommandContext(ctx, Target(), "kubectl", "-p", profile, "--", "get", "pods", "-o", "jsonpath='{.items[*].metadata.name}'"))
rr, err := Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "kubectl", "--", "get", "pods", "-o", "jsonpath='{.items[*].metadata.name}'"))
if err != nil {
t.Errorf("failed get Pod names")
}
@ -168,7 +168,7 @@ func validateHADeployApp(ctx context.Context, t *testing.T, profile string) {
// verify all Pods could resolve a public DNS
for _, name := range podNames {
_, err = Run(t, exec.CommandContext(ctx, Target(), "kubectl", "-p", profile, "--", "exec", name, "--", "nslookup", "kubernetes.io"))
_, err = Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "kubectl", "--", "exec", name, "--", "nslookup", "kubernetes.io"))
if err != nil {
t.Errorf("Pod %s could not resolve 'kubernetes.io': %v", name, err)
}
@ -178,7 +178,7 @@ func validateHADeployApp(ctx context.Context, t *testing.T, profile string) {
// this one is also checked by k8s e2e node conformance tests:
// https://github.com/kubernetes/kubernetes/blob/f137c4777095b3972e2dd71a01365d47be459389/test/e2e_node/environment/conformance.go#L125-L179
for _, name := range podNames {
_, err = Run(t, exec.CommandContext(ctx, Target(), "kubectl", "-p", profile, "--", "exec", name, "--", "nslookup", "kubernetes.default"))
_, err = Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "kubectl", "--", "exec", name, "--", "nslookup", "kubernetes.default"))
if err != nil {
t.Errorf("Pod %s could not resolve 'kubernetes.default': %v", name, err)
}
@ -186,7 +186,7 @@ func validateHADeployApp(ctx context.Context, t *testing.T, profile string) {
// verify all pods could resolve to a local service.
for _, name := range podNames {
_, err = Run(t, exec.CommandContext(ctx, Target(), "kubectl", "-p", profile, "--", "exec", name, "--", "nslookup", "kubernetes.default.svc.cluster.local"))
_, err = Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "kubectl", "--", "exec", name, "--", "nslookup", "kubernetes.default.svc.cluster.local"))
if err != nil {
t.Errorf("Pod %s could not resolve local service (kubernetes.default.svc.cluster.local): %v", name, err)
}
@ -196,7 +196,7 @@ func validateHADeployApp(ctx context.Context, t *testing.T, profile string) {
// validateHAPingHostFromPods uses app previously deplyed by validateDeployAppToHACluster to verify its pods, located on different nodes, can resolve "host.minikube.internal".
func validateHAPingHostFromPods(ctx context.Context, t *testing.T, profile string) {
// get Pod names
rr, err := Run(t, exec.CommandContext(ctx, Target(), "kubectl", "-p", profile, "--", "get", "pods", "-o", "jsonpath='{.items[*].metadata.name}'"))
rr, err := Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "kubectl", "--", "get", "pods", "-o", "jsonpath='{.items[*].metadata.name}'"))
if err != nil {
t.Fatalf("failed to get Pod names: %v", err)
}
@ -204,7 +204,7 @@ func validateHAPingHostFromPods(ctx context.Context, t *testing.T, profile strin
for _, name := range podNames {
// get host.minikube.internal ip as resolved by nslookup
out, err := Run(t, exec.CommandContext(ctx, Target(), "kubectl", "-p", profile, "--", "exec", name, "--", "sh", "-c", "nslookup host.minikube.internal | awk 'NR==5' | cut -d' ' -f3"))
out, err := Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "kubectl", "--", "exec", name, "--", "sh", "-c", "nslookup host.minikube.internal | awk 'NR==5' | cut -d' ' -f3"))
if err != nil {
t.Errorf("Pod %s could not resolve 'host.minikube.internal': %v", name, err)
continue
@ -215,7 +215,7 @@ func validateHAPingHostFromPods(ctx context.Context, t *testing.T, profile strin
}
// try pinging host from pod
ping := fmt.Sprintf("ping -c 1 %s", hostIP)
if _, err := Run(t, exec.CommandContext(ctx, Target(), "kubectl", "-p", profile, "--", "exec", name, "--", "sh", "-c", ping)); err != nil {
if _, err := Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "kubectl", "--", "exec", name, "--", "sh", "-c", ping)); err != nil {
t.Errorf("Failed to ping host (%s) from pod (%s): %v", hostIP, name, err)
}
}
@ -224,14 +224,14 @@ func validateHAPingHostFromPods(ctx context.Context, t *testing.T, profile strin
// validateHAAddWorkerNode uses the minikube node add command to add a worker node to an existing ha (multi-control plane) cluster.
func validateHAAddWorkerNode(ctx context.Context, t *testing.T, profile string) {
// add a node to the current ha (multi-control plane) cluster
addArgs := []string{"node", "add", "-p", profile, "-v=7", "--alsologtostderr"}
addArgs := []string{"-p", profile, "node", "add", "--alsologtostderr", "-v", "5"}
rr, err := Run(t, exec.CommandContext(ctx, Target(), addArgs...))
if err != nil {
t.Fatalf("failed to add worker node to current ha (multi-control plane) cluster. args %q : %v", rr.Command(), err)
}
// ensure minikube status shows 3 operational control-plane nodes and 1 worker node
rr, err = Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "status", "-v=7", "--alsologtostderr"))
rr, err = Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "status", "--alsologtostderr", "-v", "5"))
if err != nil {
t.Fatalf("failed to run minikube status. args %q : %v", rr.Command(), err)
}
@ -325,7 +325,7 @@ func validateHACopyFile(ctx context.Context, t *testing.T, profile string) {
t.Skipf("skipping: cp is unsupported by none driver")
}
rr, err := Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "status", "--output", "json", "-v=7", "--alsologtostderr"))
rr, err := Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "status", "--output", "json", "--alsologtostderr", "-v", "5"))
if err != nil && rr.ExitCode != 7 {
t.Fatalf("failed to run minikube status. args %q : %v", rr.Command(), err)
}
@ -362,13 +362,13 @@ func validateHACopyFile(ctx context.Context, t *testing.T, profile string) {
// validateHAStopSecondaryNode tests ha (multi-control plane) cluster by stopping a secondary control-plane node using minikube node stop command.
func validateHAStopSecondaryNode(ctx context.Context, t *testing.T, profile string) {
// run minikube node stop on secondary control-plane node
rr, err := Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "node", "stop", SecondNodeName, "-v=7", "--alsologtostderr"))
rr, err := Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "node", "stop", SecondNodeName, "--alsologtostderr", "-v", "5"))
if err != nil {
t.Errorf("secondary control-plane node stop returned an error. args %q: %v", rr.Command(), err)
}
// ensure minikube status shows 3 running nodes and 1 stopped node
rr, err = Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "status", "-v=7", "--alsologtostderr"))
rr, err = Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "status", "--alsologtostderr", "-v", "5"))
// exit code 7 means a host is stopped, which we are expecting
if err != nil && rr.ExitCode != 7 {
t.Fatalf("failed to run minikube status. args %q : %v", rr.Command(), err)
@ -419,7 +419,7 @@ func validateHAStatusDegraded(ctx context.Context, t *testing.T, profile string)
// validateHARestartSecondaryNode tests the minikube node start command on existing stopped secondary node.
func validateHARestartSecondaryNode(ctx context.Context, t *testing.T, profile string) {
// start stopped node(s) back up
rr, err := Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "node", "start", SecondNodeName, "-v=7", "--alsologtostderr"))
rr, err := Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "node", "start", SecondNodeName, "--alsologtostderr", "-v", "5"))
if err != nil {
t.Log(rr.Stderr.String())
t.Errorf("secondary control-plane node start returned an error. args %q: %v", rr.Command(), err)
@ -427,7 +427,7 @@ func validateHARestartSecondaryNode(ctx context.Context, t *testing.T, profile s
// ensure minikube status shows all 4 nodes running, waiting for ha (multi-control plane) cluster/apiservers to stabilise
minikubeStatus := func() error {
rr, err = Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "status", "-v=7", "--alsologtostderr"))
rr, err = Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "status", "--alsologtostderr", "-v", "5"))
return err
}
if err := retry.Expo(minikubeStatus, 1*time.Second, 60*time.Second); err != nil {
@ -455,23 +455,23 @@ func validateHARestartSecondaryNode(ctx context.Context, t *testing.T, profile s
// validateHARestartClusterKeepsNodes restarts minikube cluster and checks if the reported node list is unchanged.
func validateHARestartClusterKeepsNodes(ctx context.Context, t *testing.T, profile string) {
rr, err := Run(t, exec.CommandContext(ctx, Target(), "node", "list", "-p", profile, "-v=7", "--alsologtostderr"))
rr, err := Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "node", "list", "--alsologtostderr", "-v", "5"))
if err != nil {
t.Errorf("failed to run node list. args %q : %v", rr.Command(), err)
}
nodeList := rr.Stdout.String()
_, err = Run(t, exec.CommandContext(ctx, Target(), "stop", "-p", profile, "-v=7", "--alsologtostderr"))
_, err = Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "stop", "--alsologtostderr", "-v", "5"))
if err != nil {
t.Errorf("failed to run minikube stop. args %q : %v", rr.Command(), err)
}
_, err = Run(t, exec.CommandContext(ctx, Target(), "start", "-p", profile, "--wait=true", "-v=7", "--alsologtostderr"))
_, err = Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "start", "--wait", "true", "--alsologtostderr", "-v", "5"))
if err != nil {
t.Errorf("failed to run minikube start. args %q : %v", rr.Command(), err)
}
rr, err = Run(t, exec.CommandContext(ctx, Target(), "node", "list", "-p", profile))
rr, err = Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "node", "list", "--alsologtostderr", "-v", "5"))
if err != nil {
t.Errorf("failed to run node list. args %q : %v", rr.Command(), err)
}
@ -486,13 +486,13 @@ func validateHARestartClusterKeepsNodes(ctx context.Context, t *testing.T, profi
// note: currently, 'minikube status' subcommand relies on primary control-plane node and storage-provisioner only runs on a primary control-plane node.
func validateHADeleteSecondaryNode(ctx context.Context, t *testing.T, profile string) {
// delete the other secondary control-plane node
rr, err := Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "node", "delete", ThirdNodeName, "-v=7", "--alsologtostderr"))
rr, err := Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "node", "delete", ThirdNodeName, "--alsologtostderr", "-v", "5"))
if err != nil {
t.Errorf("node delete returned an error. args %q: %v", rr.Command(), err)
}
// ensure status is back down to 3 hosts
rr, err = Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "status", "-v=7", "--alsologtostderr"))
rr, err = Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "status", "--alsologtostderr", "-v", "5"))
if err != nil {
t.Fatalf("failed to run minikube status. args %q : %v", rr.Command(), err)
}
@ -530,13 +530,13 @@ func validateHADeleteSecondaryNode(ctx context.Context, t *testing.T, profile st
// validateHAStopCluster runs minikube stop on a ha (multi-control plane) cluster.
func validateHAStopCluster(ctx context.Context, t *testing.T, profile string) {
// Run minikube stop on the cluster
rr, err := Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "stop", "-v=7", "--alsologtostderr"))
rr, err := Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "stop", "--alsologtostderr", "-v", "5"))
if err != nil {
t.Errorf("failed to stop cluster. args %q: %v", rr.Command(), err)
}
// ensure minikube status shows all 3 nodes stopped
rr, err = Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "status", "-v=7", "--alsologtostderr"))
rr, err = Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "status", "--alsologtostderr", "-v", "5"))
// exit code 7 means a host is stopped, which we are expecting
if err != nil && rr.ExitCode != 7 {
t.Fatalf("failed to run minikube status. args %q : %v", rr.Command(), err)
@ -558,14 +558,14 @@ func validateHAStopCluster(ctx context.Context, t *testing.T, profile string) {
// validateHARestartCluster verifies a soft restart on a ha (multi-control plane) cluster works.
func validateHARestartCluster(ctx context.Context, t *testing.T, profile string) {
// restart cluster with minikube start
startArgs := append([]string{"start", "-p", profile, "--wait=true", "-v=7", "--alsologtostderr"}, StartArgs()...)
startArgs := append([]string{"-p", profile, "start", "--wait", "true", "--alsologtostderr", "-v", "5"}, StartArgs()...)
rr, err := Run(t, exec.CommandContext(ctx, Target(), startArgs...))
if err != nil {
t.Fatalf("failed to start cluster. args %q : %v", rr.Command(), err)
}
// ensure minikube status shows all 3 nodes running
rr, err = Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "status", "-v=7", "--alsologtostderr"))
rr, err = Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "status", "--alsologtostderr", "-v", "5"))
if err != nil {
t.Fatalf("failed to run minikube status. args %q : %v", rr.Command(), err)
}
@ -603,14 +603,14 @@ func validateHARestartCluster(ctx context.Context, t *testing.T, profile string)
// validateHAAddSecondaryNode uses the minikube node add command to add a secondary control-plane node to an existing ha (multi-control plane) cluster.
func validateHAAddSecondaryNode(ctx context.Context, t *testing.T, profile string) {
// add a node to the current ha (multi-control plane) cluster
addArgs := []string{"node", "add", "-p", profile, "--control-plane", "-v=7", "--alsologtostderr"}
addArgs := []string{"-p", profile, "node", "add", "--control-plane", "--alsologtostderr", "-v", "5"}
rr, err := Run(t, exec.CommandContext(ctx, Target(), addArgs...))
if err != nil {
t.Fatalf("failed to add control-plane node to current ha (multi-control plane) cluster. args %q : %v", rr.Command(), err)
}
// ensure minikube status shows 3 operational control-plane nodes and 1 worker node
rr, err = Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "status", "-v=7", "--alsologtostderr"))
rr, err = Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "status", "--alsologtostderr", "-v", "5"))
if err != nil {
t.Fatalf("failed to run minikube status. args %q : %v", rr.Command(), err)
}

View File

@ -92,7 +92,7 @@ func TestMultiNode(t *testing.T) {
// validateMultiNodeStart makes sure a 2 node cluster can start
func validateMultiNodeStart(ctx context.Context, t *testing.T, profile string) {
// Start a 2 node cluster with the --nodes param
startArgs := append([]string{"start", "-p", profile, "--wait=true", "--memory=2200", "--nodes=2", "-v=8", "--alsologtostderr"}, StartArgs()...)
startArgs := append([]string{"start", "-p", profile, "--wait=true", "--memory=2200", "--nodes=2", "-v=5", "--alsologtostderr"}, StartArgs()...)
rr, err := Run(t, exec.CommandContext(ctx, Target(), startArgs...))
if err != nil {
t.Fatalf("failed to start cluster. args %q : %v", rr.Command(), err)
@ -117,7 +117,7 @@ func validateMultiNodeStart(ctx context.Context, t *testing.T, profile string) {
// validateAddNodeToMultiNode uses the minikube node add command to add a node to an existing cluster
func validateAddNodeToMultiNode(ctx context.Context, t *testing.T, profile string) {
// Add a node to the current cluster
addArgs := []string{"node", "add", "-p", profile, "-v", "3", "--alsologtostderr"}
addArgs := []string{"node", "add", "-p", profile, "-v=5", "--alsologtostderr"}
rr, err := Run(t, exec.CommandContext(ctx, Target(), addArgs...))
if err != nil {
t.Fatalf("failed to add node to current cluster. args %q : %v", rr.Command(), err)
@ -279,7 +279,7 @@ func validateStopRunningNode(ctx context.Context, t *testing.T, profile string)
// validateStartNodeAfterStop tests the minikube node start command on an existing stopped node
func validateStartNodeAfterStop(ctx context.Context, t *testing.T, profile string) {
// Start the node back up
rr, err := Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "node", "start", ThirdNodeName, "-v=7", "--alsologtostderr"))
rr, err := Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "node", "start", ThirdNodeName, "-v=5", "--alsologtostderr"))
if err != nil {
t.Log(rr.Stderr.String())
t.Errorf("node start returned an error. args %q: %v", rr.Command(), err)
@ -287,7 +287,7 @@ func validateStartNodeAfterStop(ctx context.Context, t *testing.T, profile strin
// Make sure minikube status shows 3 running hosts
minikubeStatus := func() error {
rr, err = Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "status", "-v=7", "--alsologtostderr"))
rr, err = Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "status", "-v=5", "--alsologtostderr"))
return err
}
if err := retry.Expo(minikubeStatus, 1*time.Second, 60*time.Second); err != nil {
@ -323,7 +323,7 @@ func validateRestartKeepsNodes(ctx context.Context, t *testing.T, profile string
t.Errorf("failed to run minikube stop. args %q : %v", rr.Command(), err)
}
_, err = Run(t, exec.CommandContext(ctx, Target(), "start", "-p", profile, "--wait=true", "-v=8", "--alsologtostderr"))
_, err = Run(t, exec.CommandContext(ctx, Target(), "start", "-p", profile, "--wait=true", "-v=5", "--alsologtostderr"))
if err != nil {
t.Errorf("failed to run minikube start. args %q : %v", rr.Command(), err)
}
@ -372,7 +372,7 @@ func validateStopMultiNodeCluster(ctx context.Context, t *testing.T, profile str
// validateRestartMultiNodeCluster verifies a soft restart on a multinode cluster works
func validateRestartMultiNodeCluster(ctx context.Context, t *testing.T, profile string) {
// Restart a full cluster with minikube start
startArgs := append([]string{"start", "-p", profile, "--wait=true", "-v=8", "--alsologtostderr"}, StartArgs()...)
startArgs := append([]string{"start", "-p", profile, "--wait=true", "-v=5", "--alsologtostderr"}, StartArgs()...)
rr, err := Run(t, exec.CommandContext(ctx, Target(), startArgs...))
if err != nil {
t.Fatalf("failed to start cluster. args %q : %v", rr.Command(), err)