replace sleep with retry.Expo()

pull/10424/head
Predrag Rogic 2021-02-16 22:27:26 +00:00
parent 90cd9c3a60
commit b8052fe33d
No known key found for this signature in database
GPG Key ID: F1FF5748C4855229
4 changed files with 40 additions and 24 deletions

View File

@ -37,8 +37,8 @@ const (
NodeReadyKey = "node_ready" NodeReadyKey = "node_ready"
// KubeletKey is the name used in the flags for waiting for the kubelet status to be ready // KubeletKey is the name used in the flags for waiting for the kubelet status to be ready
KubeletKey = "kubelet" KubeletKey = "kubelet"
// OperationalKey is the name used for waiting for pods in CorePodsList to be Ready // ExtraKey is the name used for extra waiting for pods in CorePodsList to be Ready
OperationalKey = "operational" ExtraKey = "extra"
) )
// vars related to the --wait flag // vars related to the --wait flag
@ -46,9 +46,9 @@ var (
// DefaultComponents is map of the the default components to wait for // DefaultComponents is map of the the default components to wait for
DefaultComponents = map[string]bool{APIServerWaitKey: true, SystemPodsWaitKey: true} DefaultComponents = map[string]bool{APIServerWaitKey: true, SystemPodsWaitKey: true}
// NoWaitComponents is map of componets to wait for if specified 'none' or 'false' // NoWaitComponents is map of componets to wait for if specified 'none' or 'false'
NoComponents = map[string]bool{APIServerWaitKey: false, SystemPodsWaitKey: false, DefaultSAWaitKey: false, AppsRunningKey: false, NodeReadyKey: false, KubeletKey: false, OperationalKey: false} NoComponents = map[string]bool{APIServerWaitKey: false, SystemPodsWaitKey: false, DefaultSAWaitKey: false, AppsRunningKey: false, NodeReadyKey: false, KubeletKey: false, ExtraKey: false}
// AllComponents is map for waiting for all components. // AllComponents is map for waiting for all components.
AllComponents = map[string]bool{APIServerWaitKey: true, SystemPodsWaitKey: true, DefaultSAWaitKey: true, AppsRunningKey: true, NodeReadyKey: true, KubeletKey: true, OperationalKey: true} AllComponents = map[string]bool{APIServerWaitKey: true, SystemPodsWaitKey: true, DefaultSAWaitKey: true, AppsRunningKey: true, NodeReadyKey: true, KubeletKey: true, ExtraKey: true}
// DefaultWaitList is list of all default components to wait for. only names to be used for start flags. // DefaultWaitList is list of all default components to wait for. only names to be used for start flags.
DefaultWaitList = []string{APIServerWaitKey, SystemPodsWaitKey} DefaultWaitList = []string{APIServerWaitKey, SystemPodsWaitKey}
// AllComponentsList list of all valid components keys to wait for. only names to be used used for start flags. // AllComponentsList list of all valid components keys to wait for. only names to be used used for start flags.
@ -62,7 +62,7 @@ var (
"kube-proxy", "kube-proxy",
"kube-scheduler", "kube-scheduler",
} }
// CorePodsList is a list of essential pods for running kurnetes to wait for them to be operational ("Ready") // CorePodsList is a list of essential pods for running kurnetes to extra wait for them to be Ready
CorePodsList = []string{ CorePodsList = []string{
"kube-dns", // coredns "kube-dns", // coredns
"etcd", "etcd",

View File

@ -31,12 +31,12 @@ import (
kconst "k8s.io/kubernetes/cmd/kubeadm/app/constants" kconst "k8s.io/kubernetes/cmd/kubeadm/app/constants"
) )
// WaitOperational calls WaitForPodReadyByLabel for each pod in labels list and returns any errors occurred. // WaitExtra calls WaitForPodReadyByLabel for each pod in labels list and returns any errors occurred.
func WaitOperational(cs *kubernetes.Clientset, labels []string, timeout time.Duration) error { func WaitExtra(cs *kubernetes.Clientset, labels []string, timeout time.Duration) error {
klog.Info("waiting for kube-system core pods %s to be Ready ...", labels) klog.Infof("extra waiting for kube-system core pods %s to be Ready ...", labels)
pStart := time.Now() start := time.Now()
defer func() { defer func() {
klog.Infof("duration metric: took %s for waiting for kube-system core pods to be Ready ...", time.Since(pStart)) klog.Infof("duration metric: took %s for extra waiting for kube-system core pods to be Ready ...", time.Since(start))
}() }()
var errs []string var errs []string
@ -84,7 +84,6 @@ func WaitForPodReadyByLabel(cs *kubernetes.Clientset, label, namespace string, t
if time.Since(start) > timeout { if time.Since(start) > timeout {
return false, fmt.Errorf("wait for pod with %q label in %q namespace to be Ready timed out", label, namespace) return false, fmt.Errorf("wait for pod with %q label in %q namespace to be Ready timed out", label, namespace)
} }
pods, err := cs.CoreV1().Pods(namespace).List(meta.ListOptions{}) pods, err := cs.CoreV1().Pods(namespace).List(meta.ListOptions{})
if err != nil { if err != nil {
klog.Infof("error listing pods in %q namespace, will retry: %v", namespace, err) klog.Infof("error listing pods in %q namespace, will retry: %v", namespace, err)
@ -110,7 +109,6 @@ func WaitForPodReadyByLabel(cs *kubernetes.Clientset, label, namespace string, t
klog.Infof("pod with %q label in %q namespace was not found, will retry", label, namespace) klog.Infof("pod with %q label in %q namespace was not found, will retry", label, namespace)
return false, nil return false, nil
} }
if err := wait.PollImmediate(kconst.APICallRetryInterval, kconst.DefaultControlPlaneTimeout, checkReady); err != nil { if err := wait.PollImmediate(kconst.APICallRetryInterval, kconst.DefaultControlPlaneTimeout, checkReady); err != nil {
return errors.Wrapf(err, "wait pod Ready") return errors.Wrapf(err, "wait pod Ready")
} }
@ -136,7 +134,6 @@ func WaitForPodReadyByName(cs *kubernetes.Clientset, name, namespace string, tim
if time.Since(start) > timeout { if time.Since(start) > timeout {
return false, fmt.Errorf("wait for pod %q in %q namespace to be Ready timed out", name, namespace) return false, fmt.Errorf("wait for pod %q in %q namespace to be Ready timed out", name, namespace)
} }
pod, err := cs.CoreV1().Pods(namespace).Get(name, meta.GetOptions{}) pod, err := cs.CoreV1().Pods(namespace).Get(name, meta.GetOptions{})
if err != nil { if err != nil {
klog.Infof("error getting pod %q in %q namespace, will retry: %v", name, namespace, err) klog.Infof("error getting pod %q in %q namespace, will retry: %v", name, namespace, err)
@ -154,7 +151,6 @@ func WaitForPodReadyByName(cs *kubernetes.Clientset, name, namespace string, tim
} }
return false, nil return false, nil
} }
if err := wait.PollImmediate(kconst.APICallRetryInterval, kconst.DefaultControlPlaneTimeout, checkReady); err != nil { if err := wait.PollImmediate(kconst.APICallRetryInterval, kconst.DefaultControlPlaneTimeout, checkReady); err != nil {
return errors.Wrapf(err, "wait pod Ready") return errors.Wrapf(err, "wait pod Ready")
} }

View File

@ -36,6 +36,7 @@ import (
"github.com/docker/machine/libmachine" "github.com/docker/machine/libmachine"
"github.com/docker/machine/libmachine/state" "github.com/docker/machine/libmachine/state"
"github.com/pkg/errors" "github.com/pkg/errors"
meta "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/wait" "k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/kubernetes" "k8s.io/client-go/kubernetes"
"k8s.io/klog/v2" "k8s.io/klog/v2"
@ -470,9 +471,9 @@ func (k *Bootstrapper) WaitForNode(cfg config.ClusterConfig, n config.Node, time
return nil return nil
} }
if cfg.VerifyComponents[kverify.OperationalKey] { if cfg.VerifyComponents[kverify.ExtraKey] {
if err := kverify.WaitOperational(client, kverify.CorePodsList, timeout); err != nil { if err := kverify.WaitExtra(client, kverify.CorePodsList, timeout); err != nil {
return errors.Wrap(err, "waiting for operational status") return errors.Wrap(err, "extra waiting")
} }
} }
@ -664,14 +665,32 @@ func (k *Bootstrapper) restartControlPlane(cfg config.ClusterConfig) error {
} }
} }
if cfg.VerifyComponents[kverify.OperationalKey] { if cfg.VerifyComponents[kverify.ExtraKey] {
// after kubelet is restarted (with 'kubeadm init phase kubelet-start' above), // after kubelet is restarted (with 'kubeadm init phase kubelet-start' above),
// it appears to be immediately Ready as are all kube-system pods // it appears as to be immediately Ready as well as all kube-system pods,
// then (after ~10sec) it realises it has some changes to apply, implying also pods restarts // then (after ~10sec) it realises it has some changes to apply, implying also pods restarts,
// so we wait for kubelet to initialise itself... // and by that time we would exit completely, so we wait until kubelet begins restarting pods
time.Sleep(10 * time.Second) klog.Info("waiting for restarted kubelet to initialise ...")
if err := kverify.WaitOperational(client, kverify.CorePodsList, kconst.DefaultControlPlaneTimeout); err != nil { start := time.Now()
return errors.Wrap(err, "operational status") wait := func() error {
pods, err := client.CoreV1().Pods("kube-system").List(meta.ListOptions{})
if err != nil {
return err
}
for _, pod := range pods.Items {
if pod.Labels["tier"] == "control-plane" {
if ready, _ := kverify.IsPodReady(&pod); !ready {
return nil
}
}
}
return fmt.Errorf("kubelet not initialised")
}
_ = retry.Expo(wait, 250*time.Millisecond, 1*time.Minute)
klog.Infof("kubelet initialised")
klog.Infof("duration metric: took %s waiting for restarted kubelet to initialise ...", time.Since(start))
if err := kverify.WaitExtra(client, kverify.CorePodsList, kconst.DefaultControlPlaneTimeout); err != nil {
return errors.Wrap(err, "extra")
} }
} }

View File

@ -448,6 +448,7 @@ func imageID(image string) string {
} }
// validateComponentHealth asserts that all Kubernetes components are healthy // validateComponentHealth asserts that all Kubernetes components are healthy
// note: it expects all components to be Ready, so it makes sense to run it close after only those tests that include '--wait=all' start flag (ie, with extra wait)
func validateComponentHealth(ctx context.Context, t *testing.T, profile string) { func validateComponentHealth(ctx context.Context, t *testing.T, profile string) {
defer PostMortemLogs(t, profile) defer PostMortemLogs(t, profile)