diff --git a/pkg/minikube/bootstrapper/bsutil/kverify/kverify.go b/pkg/minikube/bootstrapper/bsutil/kverify/kverify.go index f6abca315f..a9f9ec6638 100644 --- a/pkg/minikube/bootstrapper/bsutil/kverify/kverify.go +++ b/pkg/minikube/bootstrapper/bsutil/kverify/kverify.go @@ -37,8 +37,8 @@ const ( NodeReadyKey = "node_ready" // KubeletKey is the name used in the flags for waiting for the kubelet status to be ready KubeletKey = "kubelet" - // OperationalKey is the name used for waiting for pods in CorePodsList to be Ready - OperationalKey = "operational" + // ExtraKey is the name used for extra waiting for pods in CorePodsList to be Ready + ExtraKey = "extra" ) // vars related to the --wait flag @@ -46,9 +46,9 @@ var ( // DefaultComponents is map of the the default components to wait for DefaultComponents = map[string]bool{APIServerWaitKey: true, SystemPodsWaitKey: true} // NoWaitComponents is map of componets to wait for if specified 'none' or 'false' - NoComponents = map[string]bool{APIServerWaitKey: false, SystemPodsWaitKey: false, DefaultSAWaitKey: false, AppsRunningKey: false, NodeReadyKey: false, KubeletKey: false, OperationalKey: false} + NoComponents = map[string]bool{APIServerWaitKey: false, SystemPodsWaitKey: false, DefaultSAWaitKey: false, AppsRunningKey: false, NodeReadyKey: false, KubeletKey: false, ExtraKey: false} // AllComponents is map for waiting for all components. - AllComponents = map[string]bool{APIServerWaitKey: true, SystemPodsWaitKey: true, DefaultSAWaitKey: true, AppsRunningKey: true, NodeReadyKey: true, KubeletKey: true, OperationalKey: true} + AllComponents = map[string]bool{APIServerWaitKey: true, SystemPodsWaitKey: true, DefaultSAWaitKey: true, AppsRunningKey: true, NodeReadyKey: true, KubeletKey: true, ExtraKey: true} // DefaultWaitList is list of all default components to wait for. only names to be used for start flags. DefaultWaitList = []string{APIServerWaitKey, SystemPodsWaitKey} // AllComponentsList list of all valid components keys to wait for. only names to be used used for start flags. @@ -62,7 +62,7 @@ var ( "kube-proxy", "kube-scheduler", } - // CorePodsList is a list of essential pods for running kurnetes to wait for them to be operational ("Ready") + // CorePodsList is a list of essential pods for running kurnetes to extra wait for them to be Ready CorePodsList = []string{ "kube-dns", // coredns "etcd", diff --git a/pkg/minikube/bootstrapper/bsutil/kverify/pod_ready.go b/pkg/minikube/bootstrapper/bsutil/kverify/pod_ready.go index 29115b66a5..125a2e5a6f 100644 --- a/pkg/minikube/bootstrapper/bsutil/kverify/pod_ready.go +++ b/pkg/minikube/bootstrapper/bsutil/kverify/pod_ready.go @@ -31,12 +31,12 @@ import ( kconst "k8s.io/kubernetes/cmd/kubeadm/app/constants" ) -// WaitOperational calls WaitForPodReadyByLabel for each pod in labels list and returns any errors occurred. -func WaitOperational(cs *kubernetes.Clientset, labels []string, timeout time.Duration) error { - klog.Info("waiting for kube-system core pods %s to be Ready ...", labels) - pStart := time.Now() +// WaitExtra calls WaitForPodReadyByLabel for each pod in labels list and returns any errors occurred. +func WaitExtra(cs *kubernetes.Clientset, labels []string, timeout time.Duration) error { + klog.Infof("extra waiting for kube-system core pods %s to be Ready ...", labels) + start := time.Now() defer func() { - klog.Infof("duration metric: took %s for waiting for kube-system core pods to be Ready ...", time.Since(pStart)) + klog.Infof("duration metric: took %s for extra waiting for kube-system core pods to be Ready ...", time.Since(start)) }() var errs []string @@ -84,7 +84,6 @@ func WaitForPodReadyByLabel(cs *kubernetes.Clientset, label, namespace string, t if time.Since(start) > timeout { return false, fmt.Errorf("wait for pod with %q label in %q namespace to be Ready timed out", label, namespace) } - pods, err := cs.CoreV1().Pods(namespace).List(meta.ListOptions{}) if err != nil { klog.Infof("error listing pods in %q namespace, will retry: %v", namespace, err) @@ -110,7 +109,6 @@ func WaitForPodReadyByLabel(cs *kubernetes.Clientset, label, namespace string, t klog.Infof("pod with %q label in %q namespace was not found, will retry", label, namespace) return false, nil } - if err := wait.PollImmediate(kconst.APICallRetryInterval, kconst.DefaultControlPlaneTimeout, checkReady); err != nil { return errors.Wrapf(err, "wait pod Ready") } @@ -136,7 +134,6 @@ func WaitForPodReadyByName(cs *kubernetes.Clientset, name, namespace string, tim if time.Since(start) > timeout { return false, fmt.Errorf("wait for pod %q in %q namespace to be Ready timed out", name, namespace) } - pod, err := cs.CoreV1().Pods(namespace).Get(name, meta.GetOptions{}) if err != nil { klog.Infof("error getting pod %q in %q namespace, will retry: %v", name, namespace, err) @@ -154,7 +151,6 @@ func WaitForPodReadyByName(cs *kubernetes.Clientset, name, namespace string, tim } return false, nil } - if err := wait.PollImmediate(kconst.APICallRetryInterval, kconst.DefaultControlPlaneTimeout, checkReady); err != nil { return errors.Wrapf(err, "wait pod Ready") } diff --git a/pkg/minikube/bootstrapper/kubeadm/kubeadm.go b/pkg/minikube/bootstrapper/kubeadm/kubeadm.go index dea2a80b81..34194b9540 100644 --- a/pkg/minikube/bootstrapper/kubeadm/kubeadm.go +++ b/pkg/minikube/bootstrapper/kubeadm/kubeadm.go @@ -36,6 +36,7 @@ import ( "github.com/docker/machine/libmachine" "github.com/docker/machine/libmachine/state" "github.com/pkg/errors" + meta "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/kubernetes" "k8s.io/klog/v2" @@ -470,9 +471,9 @@ func (k *Bootstrapper) WaitForNode(cfg config.ClusterConfig, n config.Node, time return nil } - if cfg.VerifyComponents[kverify.OperationalKey] { - if err := kverify.WaitOperational(client, kverify.CorePodsList, timeout); err != nil { - return errors.Wrap(err, "waiting for operational status") + if cfg.VerifyComponents[kverify.ExtraKey] { + if err := kverify.WaitExtra(client, kverify.CorePodsList, timeout); err != nil { + return errors.Wrap(err, "extra waiting") } } @@ -664,14 +665,32 @@ func (k *Bootstrapper) restartControlPlane(cfg config.ClusterConfig) error { } } - if cfg.VerifyComponents[kverify.OperationalKey] { + if cfg.VerifyComponents[kverify.ExtraKey] { // after kubelet is restarted (with 'kubeadm init phase kubelet-start' above), - // it appears to be immediately Ready as are all kube-system pods - // then (after ~10sec) it realises it has some changes to apply, implying also pods restarts - // so we wait for kubelet to initialise itself... - time.Sleep(10 * time.Second) - if err := kverify.WaitOperational(client, kverify.CorePodsList, kconst.DefaultControlPlaneTimeout); err != nil { - return errors.Wrap(err, "operational status") + // it appears as to be immediately Ready as well as all kube-system pods, + // then (after ~10sec) it realises it has some changes to apply, implying also pods restarts, + // and by that time we would exit completely, so we wait until kubelet begins restarting pods + klog.Info("waiting for restarted kubelet to initialise ...") + start := time.Now() + wait := func() error { + pods, err := client.CoreV1().Pods("kube-system").List(meta.ListOptions{}) + if err != nil { + return err + } + for _, pod := range pods.Items { + if pod.Labels["tier"] == "control-plane" { + if ready, _ := kverify.IsPodReady(&pod); !ready { + return nil + } + } + } + return fmt.Errorf("kubelet not initialised") + } + _ = retry.Expo(wait, 250*time.Millisecond, 1*time.Minute) + klog.Infof("kubelet initialised") + klog.Infof("duration metric: took %s waiting for restarted kubelet to initialise ...", time.Since(start)) + if err := kverify.WaitExtra(client, kverify.CorePodsList, kconst.DefaultControlPlaneTimeout); err != nil { + return errors.Wrap(err, "extra") } } diff --git a/test/integration/functional_test.go b/test/integration/functional_test.go index 0f90a5cbc7..d29a8f7916 100644 --- a/test/integration/functional_test.go +++ b/test/integration/functional_test.go @@ -448,6 +448,7 @@ func imageID(image string) string { } // validateComponentHealth asserts that all Kubernetes components are healthy +// note: it expects all components to be Ready, so it makes sense to run it close after only those tests that include '--wait=all' start flag (ie, with extra wait) func validateComponentHealth(ctx context.Context, t *testing.T, profile string) { defer PostMortemLogs(t, profile)