diff --git a/cmd/minikube/cmd/start.go b/cmd/minikube/cmd/start.go index c31dd21f43..cd6e786d23 100644 --- a/cmd/minikube/cmd/start.go +++ b/cmd/minikube/cmd/start.go @@ -47,15 +47,14 @@ import ( "github.com/spf13/viper" "golang.org/x/text/cases" "golang.org/x/text/language" - "k8s.io/minikube/pkg/minikube/command" - "k8s.io/minikube/pkg/minikube/firewall" - netutil "k8s.io/minikube/pkg/network" - "k8s.io/klog/v2" + cmdcfg "k8s.io/minikube/cmd/minikube/cmd/config" "k8s.io/minikube/pkg/drivers/kic/oci" "k8s.io/minikube/pkg/minikube/bootstrapper/bsutil" + "k8s.io/minikube/pkg/minikube/bootstrapper/bsutil/kverify" "k8s.io/minikube/pkg/minikube/bootstrapper/images" + "k8s.io/minikube/pkg/minikube/command" "k8s.io/minikube/pkg/minikube/config" "k8s.io/minikube/pkg/minikube/constants" "k8s.io/minikube/pkg/minikube/cruntime" @@ -64,6 +63,7 @@ import ( "k8s.io/minikube/pkg/minikube/driver" "k8s.io/minikube/pkg/minikube/driver/auxdriver" "k8s.io/minikube/pkg/minikube/exit" + "k8s.io/minikube/pkg/minikube/firewall" "k8s.io/minikube/pkg/minikube/kubeconfig" "k8s.io/minikube/pkg/minikube/localpath" "k8s.io/minikube/pkg/minikube/machine" @@ -74,13 +74,14 @@ import ( "k8s.io/minikube/pkg/minikube/out/register" "k8s.io/minikube/pkg/minikube/pause" "k8s.io/minikube/pkg/minikube/reason" - "k8s.io/minikube/pkg/minikube/style" - pkgtrace "k8s.io/minikube/pkg/trace" - "k8s.io/minikube/pkg/minikube/registry" + "k8s.io/minikube/pkg/minikube/style" "k8s.io/minikube/pkg/minikube/translate" + netutil "k8s.io/minikube/pkg/network" + pkgtrace "k8s.io/minikube/pkg/trace" "k8s.io/minikube/pkg/util" "k8s.io/minikube/pkg/version" + kconst "k8s.io/minikube/third_party/kubeadm/app/constants" ) type versionJSON struct { @@ -287,6 +288,12 @@ func runStart(cmd *cobra.Command, _ []string) { exit.Error(reason.GuestStart, "failed to start node", err) } + if starter.Cfg.VerifyComponents[kverify.ExtraKey] { + if err := kverify.WaitExtra(ClusterFlagValue(), kverify.CorePodsLabels, kconst.DefaultControlPlaneTimeout); err != nil { + exit.Message(reason.GuestStart, "extra waiting: {{.error}}", out.V{"error": err}) + } + } + if err := showKubectlInfo(kubeconfig, starter.Node.KubernetesVersion, starter.Node.ContainerRuntime, starter.Cfg.Name); err != nil { klog.Errorf("kubectl info: %v", err) } diff --git a/pkg/minikube/bootstrapper/bsutil/kverify/kverify.go b/pkg/minikube/bootstrapper/bsutil/kverify/kverify.go index b4d068646a..6b27d76a60 100644 --- a/pkg/minikube/bootstrapper/bsutil/kverify/kverify.go +++ b/pkg/minikube/bootstrapper/bsutil/kverify/kverify.go @@ -52,7 +52,7 @@ var ( // DefaultWaitList is list of all default components to wait for. only names to be used for start flags. DefaultWaitList = []string{APIServerWaitKey, SystemPodsWaitKey} // AllComponentsList list of all valid components keys to wait for. only names to be used used for start flags. - AllComponentsList = []string{APIServerWaitKey, SystemPodsWaitKey, DefaultSAWaitKey, AppsRunningKey, NodeReadyKey, KubeletKey} + AllComponentsList = []string{APIServerWaitKey, SystemPodsWaitKey, DefaultSAWaitKey, AppsRunningKey, NodeReadyKey, KubeletKey, ExtraKey} // AppsRunningList running list are valid k8s-app components to wait for them to be running AppsRunningList = []string{ "kube-dns", // coredns diff --git a/pkg/minikube/bootstrapper/bsutil/kverify/node_ready.go b/pkg/minikube/bootstrapper/bsutil/kverify/node_ready.go index 17d5172b03..999942d062 100644 --- a/pkg/minikube/bootstrapper/bsutil/kverify/node_ready.go +++ b/pkg/minikube/bootstrapper/bsutil/kverify/node_ready.go @@ -38,43 +38,47 @@ func WaitNodeCondition(cs *kubernetes.Clientset, name string, condition core.Nod klog.Infof("duration metric: took %s for node %q to be %q ...", time.Since(start), name, condition) }() - lap := time.Now() - checkCondition := func(_ context.Context) (bool, error) { - if time.Since(start) > timeout { - return false, fmt.Errorf("timed out waiting %v for node %q to be %q (will not retry!)", timeout, name, condition) - } + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() - status, reason := nodeConditionStatus(cs, name, condition) + lap := time.Now() + checkCondition := func(ctx context.Context) (bool, error) { + status, err := nodeConditionStatus(ctx, cs, name, condition) + // done if node has condition if status == core.ConditionTrue { - klog.Info(reason) + klog.Infof("node %q is %q", name, condition) return true, nil } + // retry in all other cases, decrease log spam if time.Since(lap) > (2 * time.Second) { - klog.Info(reason) + if err != nil { + klog.Warningf("error getting node %q condition %q status (will retry): %v", name, condition, err) + } else { + klog.Warningf("node %q has %q:%q status (will retry)", name, condition, status) + } lap = time.Now() } return false, nil } - if err := wait.PollUntilContextTimeout(context.Background(), kconst.APICallRetryInterval, kconst.DefaultControlPlaneTimeout, true, checkCondition); err != nil { - return fmt.Errorf("waitNodeCondition: %w", err) + if err := wait.PollUntilContextCancel(ctx, kconst.APICallRetryInterval, true, checkCondition); err != nil { + return fmt.Errorf("WaitNodeCondition: %w", err) } return nil } -// nodeConditionStatus returns if node is in specified condition and verbose reason. -func nodeConditionStatus(cs *kubernetes.Clientset, name string, condition core.NodeConditionType) (status core.ConditionStatus, reason string) { - node, err := cs.CoreV1().Nodes().Get(context.Background(), name, meta.GetOptions{}) +// nodeConditionStatus checks if node exists and returns condition status. +func nodeConditionStatus(ctx context.Context, cs *kubernetes.Clientset, name string, condition core.NodeConditionType) (core.ConditionStatus, error) { + node, err := cs.CoreV1().Nodes().Get(ctx, name, meta.GetOptions{}) if err != nil { - return core.ConditionUnknown, fmt.Sprintf("error getting node %q: %v", name, err) + return core.ConditionUnknown, err } - + // check if node has the condition for _, c := range node.Status.Conditions { if c.Type == condition { - return c.Status, fmt.Sprintf("node %q has status %q:%q", node.Name, condition, c.Status) + return c.Status, nil } } - - // assume transient condition - return core.ConditionFalse, fmt.Sprintf("node %q doesn't have %q status: %+v", node.Name, condition, node.Status) + // assume transient error + return core.ConditionUnknown, fmt.Errorf("node %q does not have %q condition type: %+v", name, condition, node.Status) } diff --git a/pkg/minikube/bootstrapper/bsutil/kverify/pod_ready.go b/pkg/minikube/bootstrapper/bsutil/kverify/pod_ready.go index 6cbe791ee8..666c24731c 100644 --- a/pkg/minikube/bootstrapper/bsutil/kverify/pod_ready.go +++ b/pkg/minikube/bootstrapper/bsutil/kverify/pod_ready.go @@ -19,140 +19,138 @@ package kverify import ( "context" - "errors" "fmt" "time" core "k8s.io/api/core/v1" + kerrors "k8s.io/apimachinery/pkg/api/errors" meta "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/kubernetes" "k8s.io/klog/v2" + "k8s.io/minikube/pkg/kapi" kconst "k8s.io/minikube/third_party/kubeadm/app/constants" ) -// WaitExtra calls waitPodCondition for all system-critical pods including those with specified labels. -func WaitExtra(cs *kubernetes.Clientset, labels []string, timeout time.Duration) error { - klog.Infof("extra waiting up to %v for all system-critical pods including labels %v to be %q ...", timeout, labels, core.PodReady) +// WaitExtra calls waitPodCondition for all (at least one) kube-system pods having one of specified labels to be "Ready" on profile cluster. +func WaitExtra(profile string, labels []string, timeout time.Duration) error { + klog.Infof("extra waiting up to %v for all %q pods having one of %v labels to be %q ...", timeout, meta.NamespaceSystem, labels, core.PodReady) start := time.Now() defer func() { - klog.Infof("duration metric: took %s for extra waiting for all system-critical and pods with labels %v to be %q ...", time.Since(start), labels, core.PodReady) + klog.Infof("duration metric: took %s for extra waiting for all %q pods having one of %v labels to be %q ...", time.Since(start), meta.NamespaceSystem, labels, core.PodReady) }() - pods, err := cs.CoreV1().Pods(meta.NamespaceSystem).List(context.Background(), meta.ListOptions{}) + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + + cs, err := kapi.Client(profile) if err != nil { - return fmt.Errorf("error listing pods in %q namespace: %w", meta.NamespaceSystem, err) + return fmt.Errorf("failed to get kube client: %v", err) } - for _, pod := range pods.Items { - if time.Since(start) > timeout { - return fmt.Errorf("timed out waiting %v for all system-critical and pods with labels %v to be %q", timeout, labels, core.NodeReady) + // podsReady poll function checks if all (at least one) pods in the namespace having the label is Ready + var label string + podsReady := func(ctx context.Context) (bool, error) { + pods, err := cs.CoreV1().Pods(meta.NamespaceSystem).List(ctx, meta.ListOptions{LabelSelector: label}) + if err != nil { + klog.Warningf("error listing pods in %q namespace with %q label, will retry: %v", meta.NamespaceSystem, label, err) + return false, nil } - - for k, v := range pod.Labels { - label := fmt.Sprintf("%s=%s", k, v) - match := false - for _, l := range labels { - if l == label { - match = true - break - } - } - // ignore system-critical pods' non-essential labels - if !match && pod.Namespace != meta.NamespaceSystem && k != "k8s-app" && k != "component" { - continue - } - if match || pod.Spec.PriorityClassName == "system-cluster-critical" || pod.Spec.PriorityClassName == "system-node-critical" { - if err := waitPodCondition(cs, pod.Name, pod.Namespace, core.PodReady, timeout); err != nil { - klog.Errorf("WaitExtra: %v", err) - } - break + if len(pods.Items) == 0 { + klog.Warningf("no pods in %q namespace with %q label found, will retry", meta.NamespaceSystem, label) + return false, nil + } + for _, pod := range pods.Items { + if err := waitPodCondition(ctx, cs, pod.Name, pod.Namespace, core.PodReady); err != nil { + klog.Warningf("not all pods in %q namespace with %q label are %q, will retry: %v", meta.NamespaceSystem, label, core.PodReady, err) + return false, nil } } + return true, nil + } + for _, l := range labels { + label = l + if err := wait.PollUntilContextCancel(ctx, kconst.APICallRetryInterval, true, podsReady); err != nil { + return fmt.Errorf("WaitExtra: %w", err) + } } return nil } -// waitPodCondition waits for specified condition of podName in a namespace. -func waitPodCondition(cs *kubernetes.Clientset, name, namespace string, condition core.PodConditionType, timeout time.Duration) error { - klog.Infof("waiting up to %v for pod %q in %q namespace to be %q ...", timeout, name, namespace, condition) +// waitPodCondition waits for specified condition of pod name in namespace. +func waitPodCondition(ctx context.Context, cs *kubernetes.Clientset, name, namespace string, condition core.PodConditionType) error { + klog.Infof("waiting for pod %q in %q namespace to be %q or be gone ...", name, namespace, condition) start := time.Now() defer func() { - klog.Infof("duration metric: took %s for pod %q in %q namespace to be %q ...", time.Since(start), name, namespace, condition) + klog.Infof("duration metric: took %s for pod %q in %q namespace to be %q or be gone ...", time.Since(start), name, namespace, condition) }() lap := time.Now() - checkCondition := func(_ context.Context) (bool, error) { - if time.Since(start) > timeout { - return false, fmt.Errorf("timed out waiting %v for pod %q in %q namespace to be %q (will not retry!)", timeout, name, namespace, condition) - } - - status, reason := podConditionStatus(cs, name, namespace, condition) + checkCondition := func(ctx context.Context) (bool, error) { + status, err := podConditionStatus(ctx, cs, name, namespace, condition) + // done if pod has condition if status == core.ConditionTrue { - klog.Info(reason) + klog.Infof("pod %q is %q", name, condition) return true, nil } - // return immediately: status == core.ConditionUnknown - if status == core.ConditionUnknown { - klog.Info(reason) - return false, errors.New(reason) + // back off if pod or node is gone + if kerrors.IsNotFound(err) || status == core.TaintNodeUnreachable { + klog.Infof("pod %q in %q namespace is gone: %v", name, namespace, err) + return true, nil } - // reduce log spam + // retry in all other cases, decrease log spam if time.Since(lap) > (2 * time.Second) { - klog.Info(reason) + klog.Warningf("pod %q is not %q, error: %v", name, condition, err) lap = time.Now() } - // return immediately: status == core.ConditionFalse return false, nil } - if err := wait.PollUntilContextTimeout(context.Background(), kconst.APICallRetryInterval, kconst.DefaultControlPlaneTimeout, true, checkCondition); err != nil { + if err := wait.PollUntilContextCancel(ctx, kconst.APICallRetryInterval, true, checkCondition); err != nil { return fmt.Errorf("waitPodCondition: %w", err) } return nil } -// podConditionStatus returns if pod is in specified condition and verbose reason. -func podConditionStatus(cs *kubernetes.Clientset, name, namespace string, condition core.PodConditionType) (status core.ConditionStatus, reason string) { - pod, err := cs.CoreV1().Pods(namespace).Get(context.Background(), name, meta.GetOptions{}) +// podConditionStatus returns if pod is in specified condition. +func podConditionStatus(ctx context.Context, cs *kubernetes.Clientset, name, namespace string, condition core.PodConditionType) (core.ConditionStatus, error) { + pod, err := cs.CoreV1().Pods(namespace).Get(ctx, name, meta.GetOptions{}) if err != nil { - return core.ConditionUnknown, fmt.Sprintf("error getting pod %q in %q namespace (skipping!): %v", name, namespace, err) + return core.ConditionUnknown, fmt.Errorf("getting pod %q in %q namespace (will retry): %w", name, namespace, err) } - - // check if undelying node is Ready - in case we got stale data about the pod - if pod.Spec.NodeName != "" { - if status, reason := nodeConditionStatus(cs, pod.Spec.NodeName, core.NodeReady); status != core.ConditionTrue { - return core.ConditionUnknown, fmt.Sprintf("node %q hosting pod %q in %q namespace is currently not %q (skipping!): %v", pod.Spec.NodeName, name, namespace, core.NodeReady, reason) + // check if pod is scheduled on any node + if pod.Spec.NodeName == "" { + return core.ConditionUnknown, fmt.Errorf("pod %q in %q namespace is not scheduled on any node (will retry): %+v", name, namespace, pod.Status) + } + // check if node exists and is Ready (KubeAPI) + nodeReadyStatus, err := nodeConditionStatus(ctx, cs, pod.Spec.NodeName, core.NodeReady) + if err != nil { + if kerrors.IsNotFound(err) { + return core.TaintNodeUnreachable, fmt.Errorf("node %q hosting pod %q is not found/running (skipping!): %v", pod.Spec.NodeName, name, err) } + return core.ConditionUnknown, fmt.Errorf("node %q hosting pod %q is not %q (will retry): %v", pod.Spec.NodeName, name, core.NodeReady, err) } - - if pod.Status.Phase != core.PodRunning && pod.Status.Phase != core.PodPending { - return core.ConditionUnknown, fmt.Sprintf("pod %q in %q namespace has status phase %q (skipping!): %+v", pod.Name, pod.Namespace, pod.Status.Phase, pod.Status) + if nodeReadyStatus != core.ConditionTrue { + return core.ConditionUnknown, fmt.Errorf("node %q hosting pod %q is not %q (will retry)", pod.Spec.NodeName, name, core.NodeReady) } - + // check if pod has the condition for _, c := range pod.Status.Conditions { if c.Type == condition { - return c.Status, fmt.Sprintf("pod %q in %q namespace has status %q:%q", pod.Name, pod.Namespace, condition, c.Status) + return c.Status, nil } } - - // assume transient condition - return core.ConditionFalse, fmt.Sprintf("pod %q in %q namespace doesn't have %q status: %+v", pod.Name, pod.Namespace, core.PodReady, pod.Status) + // assume transient error + return core.ConditionUnknown, fmt.Errorf("pod %q does not have %q condition type: %+v", name, condition, pod.Status) } -// IsPodReady returns if pod is Ready and verbose reason. -func IsPodReady(pod *core.Pod) (ready bool, reason string) { - if pod.Status.Phase != core.PodRunning { - return false, fmt.Sprintf("pod %q in %q namespace is not Running: %+v", pod.Name, pod.Namespace, pod.Status) - } +// IsPodReady returns if pod is Ready. +func IsPodReady(pod *core.Pod) bool { for _, c := range pod.Status.Conditions { if c.Type == core.PodReady { - if c.Status != core.ConditionTrue { - return false, fmt.Sprintf("pod %q in %q namespace is not Ready: %+v", pod.Name, pod.Namespace, c) - } - return true, fmt.Sprintf("pod %q in %q namespace is Ready: %+v", pod.Name, pod.Namespace, c) + return c.Status == core.ConditionTrue } } - return false, fmt.Sprintf("pod %q in %q namespace does not have %q status: %+v", pod.Name, pod.Namespace, core.PodReady, pod.Status) + // assume transient error + return false } diff --git a/pkg/minikube/bootstrapper/kubeadm/kubeadm.go b/pkg/minikube/bootstrapper/kubeadm/kubeadm.go index 325ec08176..fc6c25de15 100644 --- a/pkg/minikube/bootstrapper/kubeadm/kubeadm.go +++ b/pkg/minikube/bootstrapper/kubeadm/kubeadm.go @@ -490,7 +490,8 @@ func (k *Bootstrapper) client(ip string, port int) (*kubernetes.Clientset, error return c, err } -// WaitForNode blocks until the node appears to be healthy +// WaitForNode blocks until the node appears to be healthy. +// It should not be called for [re]started primary control-plane node in HA clusters. func (k *Bootstrapper) WaitForNode(cfg config.ClusterConfig, n config.Node, timeout time.Duration) error { start := time.Now() register.Reg.SetStep(register.VerifyingKubernetes) @@ -525,19 +526,14 @@ func (k *Bootstrapper) WaitForNode(cfg config.ClusterConfig, n config.Node, time return nil } - if cfg.VerifyComponents[kverify.NodeReadyKey] { + // if extra waiting for system pods to be ready is required, we need node to be ready beforehands + if cfg.VerifyComponents[kverify.NodeReadyKey] || cfg.VerifyComponents[kverify.ExtraKey] { name := bsutil.KubeNodeName(cfg, n) if err := kverify.WaitNodeCondition(client, name, core.NodeReady, timeout); err != nil { return errors.Wrap(err, "waiting for node to be ready") } } - if cfg.VerifyComponents[kverify.ExtraKey] { - if err := kverify.WaitExtra(client, kverify.CorePodsLabels, timeout); err != nil { - return errors.Wrap(err, "extra waiting") - } - } - cr, err := cruntime.New(cruntime.Config{Type: cfg.KubernetesConfig.ContainerRuntime, Runner: k.c}) if err != nil { return errors.Wrapf(err, "create runtme-manager %s", cfg.KubernetesConfig.ContainerRuntime) @@ -589,7 +585,7 @@ func (k *Bootstrapper) WaitForNode(cfg config.ClusterConfig, n config.Node, time } // restartPrimaryControlPlane restarts the kubernetes cluster configured by kubeadm. -func (k *Bootstrapper) restartPrimaryControlPlane(cfg config.ClusterConfig) error { +func (k *Bootstrapper) restartPrimaryControlPlane(cfg config.ClusterConfig) error { //nolint: gocyclo klog.Infof("restartPrimaryControlPlane start ...") start := time.Now() @@ -729,7 +725,7 @@ func (k *Bootstrapper) restartPrimaryControlPlane(cfg config.ClusterConfig) erro return err } for _, pod := range pods.Items { - if ready, _ := kverify.IsPodReady(&pod); !ready { + if !kverify.IsPodReady(&pod) { return nil } } @@ -738,10 +734,6 @@ func (k *Bootstrapper) restartPrimaryControlPlane(cfg config.ClusterConfig) erro _ = retry.Expo(wait, 250*time.Millisecond, 1*time.Minute) klog.Infof("kubelet initialised") klog.Infof("duration metric: took %s waiting for restarted kubelet to initialise ...", time.Since(start)) - - if err := kverify.WaitExtra(client, kverify.CorePodsLabels, kconst.DefaultControlPlaneTimeout); err != nil { - return errors.Wrap(err, "extra") - } } if err := bsutil.AdjustResourceLimits(k.c); err != nil { diff --git a/test/integration/ha_test.go b/test/integration/ha_test.go index 3d7f7adc9b..02c5a55889 100644 --- a/test/integration/ha_test.go +++ b/test/integration/ha_test.go @@ -97,14 +97,14 @@ func TestMultiControlPlane(t *testing.T) { // validateHAStartCluster ensures ha (multi-control plane) cluster can start. func validateHAStartCluster(ctx context.Context, t *testing.T, profile string) { // start ha (multi-control plane) cluster - startArgs := append([]string{"start", "-p", profile, "--wait=true", "--memory=2200", "--ha", "-v=7", "--alsologtostderr"}, StartArgs()...) + startArgs := append([]string{"-p", profile, "start", "--ha", "--memory", "2200", "--wait", "true", "--alsologtostderr", "-v", "5"}, StartArgs()...) rr, err := Run(t, exec.CommandContext(ctx, Target(), startArgs...)) if err != nil { t.Fatalf("failed to fresh-start ha (multi-control plane) cluster. args %q : %v", rr.Command(), err) } // ensure minikube status shows 3 operational control-plane nodes - rr, err = Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "status", "-v=7", "--alsologtostderr")) + rr, err = Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "status", "--alsologtostderr", "-v", "5")) if err != nil { t.Fatalf("failed to run minikube status. args %q : %v", rr.Command(), err) } @@ -125,19 +125,19 @@ func validateHAStartCluster(ctx context.Context, t *testing.T, profile string) { // validateHADeployApp deploys an app to ha (multi-control plane) cluster and ensures all nodes can serve traffic. func validateHADeployApp(ctx context.Context, t *testing.T, profile string) { // Create a deployment for app - _, err := Run(t, exec.CommandContext(ctx, Target(), "kubectl", "-p", profile, "--", "apply", "-f", "./testdata/ha/ha-pod-dns-test.yaml")) + _, err := Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "kubectl", "--", "apply", "-f", "./testdata/ha/ha-pod-dns-test.yaml")) if err != nil { t.Errorf("failed to create busybox deployment to ha (multi-control plane) cluster") } - _, err = Run(t, exec.CommandContext(ctx, Target(), "kubectl", "-p", profile, "--", "rollout", "status", "deployment/busybox")) + _, err = Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "kubectl", "--", "rollout", "status", "deployment/busybox")) if err != nil { t.Errorf("failed to deploy busybox to ha (multi-control plane) cluster") } // resolve Pod IPs resolvePodIPs := func() error { - rr, err := Run(t, exec.CommandContext(ctx, Target(), "kubectl", "-p", profile, "--", "get", "pods", "-o", "jsonpath='{.items[*].status.podIP}'")) + rr, err := Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "kubectl", "--", "get", "pods", "-o", "jsonpath='{.items[*].status.podIP}'")) if err != nil { err := fmt.Errorf("failed to retrieve Pod IPs (may be temporary): %v", err) t.Log(err.Error()) @@ -160,7 +160,7 @@ func validateHADeployApp(ctx context.Context, t *testing.T, profile string) { } // get Pod names - rr, err := Run(t, exec.CommandContext(ctx, Target(), "kubectl", "-p", profile, "--", "get", "pods", "-o", "jsonpath='{.items[*].metadata.name}'")) + rr, err := Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "kubectl", "--", "get", "pods", "-o", "jsonpath='{.items[*].metadata.name}'")) if err != nil { t.Errorf("failed get Pod names") } @@ -168,7 +168,7 @@ func validateHADeployApp(ctx context.Context, t *testing.T, profile string) { // verify all Pods could resolve a public DNS for _, name := range podNames { - _, err = Run(t, exec.CommandContext(ctx, Target(), "kubectl", "-p", profile, "--", "exec", name, "--", "nslookup", "kubernetes.io")) + _, err = Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "kubectl", "--", "exec", name, "--", "nslookup", "kubernetes.io")) if err != nil { t.Errorf("Pod %s could not resolve 'kubernetes.io': %v", name, err) } @@ -178,7 +178,7 @@ func validateHADeployApp(ctx context.Context, t *testing.T, profile string) { // this one is also checked by k8s e2e node conformance tests: // https://github.com/kubernetes/kubernetes/blob/f137c4777095b3972e2dd71a01365d47be459389/test/e2e_node/environment/conformance.go#L125-L179 for _, name := range podNames { - _, err = Run(t, exec.CommandContext(ctx, Target(), "kubectl", "-p", profile, "--", "exec", name, "--", "nslookup", "kubernetes.default")) + _, err = Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "kubectl", "--", "exec", name, "--", "nslookup", "kubernetes.default")) if err != nil { t.Errorf("Pod %s could not resolve 'kubernetes.default': %v", name, err) } @@ -186,7 +186,7 @@ func validateHADeployApp(ctx context.Context, t *testing.T, profile string) { // verify all pods could resolve to a local service. for _, name := range podNames { - _, err = Run(t, exec.CommandContext(ctx, Target(), "kubectl", "-p", profile, "--", "exec", name, "--", "nslookup", "kubernetes.default.svc.cluster.local")) + _, err = Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "kubectl", "--", "exec", name, "--", "nslookup", "kubernetes.default.svc.cluster.local")) if err != nil { t.Errorf("Pod %s could not resolve local service (kubernetes.default.svc.cluster.local): %v", name, err) } @@ -196,7 +196,7 @@ func validateHADeployApp(ctx context.Context, t *testing.T, profile string) { // validateHAPingHostFromPods uses app previously deplyed by validateDeployAppToHACluster to verify its pods, located on different nodes, can resolve "host.minikube.internal". func validateHAPingHostFromPods(ctx context.Context, t *testing.T, profile string) { // get Pod names - rr, err := Run(t, exec.CommandContext(ctx, Target(), "kubectl", "-p", profile, "--", "get", "pods", "-o", "jsonpath='{.items[*].metadata.name}'")) + rr, err := Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "kubectl", "--", "get", "pods", "-o", "jsonpath='{.items[*].metadata.name}'")) if err != nil { t.Fatalf("failed to get Pod names: %v", err) } @@ -204,7 +204,7 @@ func validateHAPingHostFromPods(ctx context.Context, t *testing.T, profile strin for _, name := range podNames { // get host.minikube.internal ip as resolved by nslookup - out, err := Run(t, exec.CommandContext(ctx, Target(), "kubectl", "-p", profile, "--", "exec", name, "--", "sh", "-c", "nslookup host.minikube.internal | awk 'NR==5' | cut -d' ' -f3")) + out, err := Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "kubectl", "--", "exec", name, "--", "sh", "-c", "nslookup host.minikube.internal | awk 'NR==5' | cut -d' ' -f3")) if err != nil { t.Errorf("Pod %s could not resolve 'host.minikube.internal': %v", name, err) continue @@ -215,7 +215,7 @@ func validateHAPingHostFromPods(ctx context.Context, t *testing.T, profile strin } // try pinging host from pod ping := fmt.Sprintf("ping -c 1 %s", hostIP) - if _, err := Run(t, exec.CommandContext(ctx, Target(), "kubectl", "-p", profile, "--", "exec", name, "--", "sh", "-c", ping)); err != nil { + if _, err := Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "kubectl", "--", "exec", name, "--", "sh", "-c", ping)); err != nil { t.Errorf("Failed to ping host (%s) from pod (%s): %v", hostIP, name, err) } } @@ -224,14 +224,14 @@ func validateHAPingHostFromPods(ctx context.Context, t *testing.T, profile strin // validateHAAddWorkerNode uses the minikube node add command to add a worker node to an existing ha (multi-control plane) cluster. func validateHAAddWorkerNode(ctx context.Context, t *testing.T, profile string) { // add a node to the current ha (multi-control plane) cluster - addArgs := []string{"node", "add", "-p", profile, "-v=7", "--alsologtostderr"} + addArgs := []string{"-p", profile, "node", "add", "--alsologtostderr", "-v", "5"} rr, err := Run(t, exec.CommandContext(ctx, Target(), addArgs...)) if err != nil { t.Fatalf("failed to add worker node to current ha (multi-control plane) cluster. args %q : %v", rr.Command(), err) } // ensure minikube status shows 3 operational control-plane nodes and 1 worker node - rr, err = Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "status", "-v=7", "--alsologtostderr")) + rr, err = Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "status", "--alsologtostderr", "-v", "5")) if err != nil { t.Fatalf("failed to run minikube status. args %q : %v", rr.Command(), err) } @@ -325,7 +325,7 @@ func validateHACopyFile(ctx context.Context, t *testing.T, profile string) { t.Skipf("skipping: cp is unsupported by none driver") } - rr, err := Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "status", "--output", "json", "-v=7", "--alsologtostderr")) + rr, err := Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "status", "--output", "json", "--alsologtostderr", "-v", "5")) if err != nil && rr.ExitCode != 7 { t.Fatalf("failed to run minikube status. args %q : %v", rr.Command(), err) } @@ -362,13 +362,13 @@ func validateHACopyFile(ctx context.Context, t *testing.T, profile string) { // validateHAStopSecondaryNode tests ha (multi-control plane) cluster by stopping a secondary control-plane node using minikube node stop command. func validateHAStopSecondaryNode(ctx context.Context, t *testing.T, profile string) { // run minikube node stop on secondary control-plane node - rr, err := Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "node", "stop", SecondNodeName, "-v=7", "--alsologtostderr")) + rr, err := Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "node", "stop", SecondNodeName, "--alsologtostderr", "-v", "5")) if err != nil { t.Errorf("secondary control-plane node stop returned an error. args %q: %v", rr.Command(), err) } // ensure minikube status shows 3 running nodes and 1 stopped node - rr, err = Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "status", "-v=7", "--alsologtostderr")) + rr, err = Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "status", "--alsologtostderr", "-v", "5")) // exit code 7 means a host is stopped, which we are expecting if err != nil && rr.ExitCode != 7 { t.Fatalf("failed to run minikube status. args %q : %v", rr.Command(), err) @@ -419,7 +419,7 @@ func validateHAStatusDegraded(ctx context.Context, t *testing.T, profile string) // validateHARestartSecondaryNode tests the minikube node start command on existing stopped secondary node. func validateHARestartSecondaryNode(ctx context.Context, t *testing.T, profile string) { // start stopped node(s) back up - rr, err := Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "node", "start", SecondNodeName, "-v=7", "--alsologtostderr")) + rr, err := Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "node", "start", SecondNodeName, "--alsologtostderr", "-v", "5")) if err != nil { t.Log(rr.Stderr.String()) t.Errorf("secondary control-plane node start returned an error. args %q: %v", rr.Command(), err) @@ -427,7 +427,7 @@ func validateHARestartSecondaryNode(ctx context.Context, t *testing.T, profile s // ensure minikube status shows all 4 nodes running, waiting for ha (multi-control plane) cluster/apiservers to stabilise minikubeStatus := func() error { - rr, err = Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "status", "-v=7", "--alsologtostderr")) + rr, err = Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "status", "--alsologtostderr", "-v", "5")) return err } if err := retry.Expo(minikubeStatus, 1*time.Second, 60*time.Second); err != nil { @@ -455,23 +455,23 @@ func validateHARestartSecondaryNode(ctx context.Context, t *testing.T, profile s // validateHARestartClusterKeepsNodes restarts minikube cluster and checks if the reported node list is unchanged. func validateHARestartClusterKeepsNodes(ctx context.Context, t *testing.T, profile string) { - rr, err := Run(t, exec.CommandContext(ctx, Target(), "node", "list", "-p", profile, "-v=7", "--alsologtostderr")) + rr, err := Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "node", "list", "--alsologtostderr", "-v", "5")) if err != nil { t.Errorf("failed to run node list. args %q : %v", rr.Command(), err) } nodeList := rr.Stdout.String() - _, err = Run(t, exec.CommandContext(ctx, Target(), "stop", "-p", profile, "-v=7", "--alsologtostderr")) + _, err = Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "stop", "--alsologtostderr", "-v", "5")) if err != nil { t.Errorf("failed to run minikube stop. args %q : %v", rr.Command(), err) } - _, err = Run(t, exec.CommandContext(ctx, Target(), "start", "-p", profile, "--wait=true", "-v=7", "--alsologtostderr")) + _, err = Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "start", "--wait", "true", "--alsologtostderr", "-v", "5")) if err != nil { t.Errorf("failed to run minikube start. args %q : %v", rr.Command(), err) } - rr, err = Run(t, exec.CommandContext(ctx, Target(), "node", "list", "-p", profile)) + rr, err = Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "node", "list", "--alsologtostderr", "-v", "5")) if err != nil { t.Errorf("failed to run node list. args %q : %v", rr.Command(), err) } @@ -486,13 +486,13 @@ func validateHARestartClusterKeepsNodes(ctx context.Context, t *testing.T, profi // note: currently, 'minikube status' subcommand relies on primary control-plane node and storage-provisioner only runs on a primary control-plane node. func validateHADeleteSecondaryNode(ctx context.Context, t *testing.T, profile string) { // delete the other secondary control-plane node - rr, err := Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "node", "delete", ThirdNodeName, "-v=7", "--alsologtostderr")) + rr, err := Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "node", "delete", ThirdNodeName, "--alsologtostderr", "-v", "5")) if err != nil { t.Errorf("node delete returned an error. args %q: %v", rr.Command(), err) } // ensure status is back down to 3 hosts - rr, err = Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "status", "-v=7", "--alsologtostderr")) + rr, err = Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "status", "--alsologtostderr", "-v", "5")) if err != nil { t.Fatalf("failed to run minikube status. args %q : %v", rr.Command(), err) } @@ -530,13 +530,13 @@ func validateHADeleteSecondaryNode(ctx context.Context, t *testing.T, profile st // validateHAStopCluster runs minikube stop on a ha (multi-control plane) cluster. func validateHAStopCluster(ctx context.Context, t *testing.T, profile string) { // Run minikube stop on the cluster - rr, err := Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "stop", "-v=7", "--alsologtostderr")) + rr, err := Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "stop", "--alsologtostderr", "-v", "5")) if err != nil { t.Errorf("failed to stop cluster. args %q: %v", rr.Command(), err) } // ensure minikube status shows all 3 nodes stopped - rr, err = Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "status", "-v=7", "--alsologtostderr")) + rr, err = Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "status", "--alsologtostderr", "-v", "5")) // exit code 7 means a host is stopped, which we are expecting if err != nil && rr.ExitCode != 7 { t.Fatalf("failed to run minikube status. args %q : %v", rr.Command(), err) @@ -558,14 +558,14 @@ func validateHAStopCluster(ctx context.Context, t *testing.T, profile string) { // validateHARestartCluster verifies a soft restart on a ha (multi-control plane) cluster works. func validateHARestartCluster(ctx context.Context, t *testing.T, profile string) { // restart cluster with minikube start - startArgs := append([]string{"start", "-p", profile, "--wait=true", "-v=7", "--alsologtostderr"}, StartArgs()...) + startArgs := append([]string{"-p", profile, "start", "--wait", "true", "--alsologtostderr", "-v", "5"}, StartArgs()...) rr, err := Run(t, exec.CommandContext(ctx, Target(), startArgs...)) if err != nil { t.Fatalf("failed to start cluster. args %q : %v", rr.Command(), err) } // ensure minikube status shows all 3 nodes running - rr, err = Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "status", "-v=7", "--alsologtostderr")) + rr, err = Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "status", "--alsologtostderr", "-v", "5")) if err != nil { t.Fatalf("failed to run minikube status. args %q : %v", rr.Command(), err) } @@ -603,14 +603,14 @@ func validateHARestartCluster(ctx context.Context, t *testing.T, profile string) // validateHAAddSecondaryNode uses the minikube node add command to add a secondary control-plane node to an existing ha (multi-control plane) cluster. func validateHAAddSecondaryNode(ctx context.Context, t *testing.T, profile string) { // add a node to the current ha (multi-control plane) cluster - addArgs := []string{"node", "add", "-p", profile, "--control-plane", "-v=7", "--alsologtostderr"} + addArgs := []string{"-p", profile, "node", "add", "--control-plane", "--alsologtostderr", "-v", "5"} rr, err := Run(t, exec.CommandContext(ctx, Target(), addArgs...)) if err != nil { t.Fatalf("failed to add control-plane node to current ha (multi-control plane) cluster. args %q : %v", rr.Command(), err) } // ensure minikube status shows 3 operational control-plane nodes and 1 worker node - rr, err = Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "status", "-v=7", "--alsologtostderr")) + rr, err = Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "status", "--alsologtostderr", "-v", "5")) if err != nil { t.Fatalf("failed to run minikube status. args %q : %v", rr.Command(), err) } diff --git a/test/integration/multinode_test.go b/test/integration/multinode_test.go index e2c902e1b4..89696e7995 100644 --- a/test/integration/multinode_test.go +++ b/test/integration/multinode_test.go @@ -92,7 +92,7 @@ func TestMultiNode(t *testing.T) { // validateMultiNodeStart makes sure a 2 node cluster can start func validateMultiNodeStart(ctx context.Context, t *testing.T, profile string) { // Start a 2 node cluster with the --nodes param - startArgs := append([]string{"start", "-p", profile, "--wait=true", "--memory=2200", "--nodes=2", "-v=8", "--alsologtostderr"}, StartArgs()...) + startArgs := append([]string{"start", "-p", profile, "--wait=true", "--memory=2200", "--nodes=2", "-v=5", "--alsologtostderr"}, StartArgs()...) rr, err := Run(t, exec.CommandContext(ctx, Target(), startArgs...)) if err != nil { t.Fatalf("failed to start cluster. args %q : %v", rr.Command(), err) @@ -117,7 +117,7 @@ func validateMultiNodeStart(ctx context.Context, t *testing.T, profile string) { // validateAddNodeToMultiNode uses the minikube node add command to add a node to an existing cluster func validateAddNodeToMultiNode(ctx context.Context, t *testing.T, profile string) { // Add a node to the current cluster - addArgs := []string{"node", "add", "-p", profile, "-v", "3", "--alsologtostderr"} + addArgs := []string{"node", "add", "-p", profile, "-v=5", "--alsologtostderr"} rr, err := Run(t, exec.CommandContext(ctx, Target(), addArgs...)) if err != nil { t.Fatalf("failed to add node to current cluster. args %q : %v", rr.Command(), err) @@ -279,7 +279,7 @@ func validateStopRunningNode(ctx context.Context, t *testing.T, profile string) // validateStartNodeAfterStop tests the minikube node start command on an existing stopped node func validateStartNodeAfterStop(ctx context.Context, t *testing.T, profile string) { // Start the node back up - rr, err := Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "node", "start", ThirdNodeName, "-v=7", "--alsologtostderr")) + rr, err := Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "node", "start", ThirdNodeName, "-v=5", "--alsologtostderr")) if err != nil { t.Log(rr.Stderr.String()) t.Errorf("node start returned an error. args %q: %v", rr.Command(), err) @@ -287,7 +287,7 @@ func validateStartNodeAfterStop(ctx context.Context, t *testing.T, profile strin // Make sure minikube status shows 3 running hosts minikubeStatus := func() error { - rr, err = Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "status", "-v=7", "--alsologtostderr")) + rr, err = Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "status", "-v=5", "--alsologtostderr")) return err } if err := retry.Expo(minikubeStatus, 1*time.Second, 60*time.Second); err != nil { @@ -323,7 +323,7 @@ func validateRestartKeepsNodes(ctx context.Context, t *testing.T, profile string t.Errorf("failed to run minikube stop. args %q : %v", rr.Command(), err) } - _, err = Run(t, exec.CommandContext(ctx, Target(), "start", "-p", profile, "--wait=true", "-v=8", "--alsologtostderr")) + _, err = Run(t, exec.CommandContext(ctx, Target(), "start", "-p", profile, "--wait=true", "-v=5", "--alsologtostderr")) if err != nil { t.Errorf("failed to run minikube start. args %q : %v", rr.Command(), err) } @@ -372,7 +372,7 @@ func validateStopMultiNodeCluster(ctx context.Context, t *testing.T, profile str // validateRestartMultiNodeCluster verifies a soft restart on a multinode cluster works func validateRestartMultiNodeCluster(ctx context.Context, t *testing.T, profile string) { // Restart a full cluster with minikube start - startArgs := append([]string{"start", "-p", profile, "--wait=true", "-v=8", "--alsologtostderr"}, StartArgs()...) + startArgs := append([]string{"start", "-p", profile, "--wait=true", "-v=5", "--alsologtostderr"}, StartArgs()...) rr, err := Run(t, exec.CommandContext(ctx, Target(), startArgs...)) if err != nil { t.Fatalf("failed to start cluster. args %q : %v", rr.Command(), err)