replace sleep with retry.Expo()
parent
90cd9c3a60
commit
b8052fe33d
|
@ -37,8 +37,8 @@ const (
|
|||
NodeReadyKey = "node_ready"
|
||||
// KubeletKey is the name used in the flags for waiting for the kubelet status to be ready
|
||||
KubeletKey = "kubelet"
|
||||
// OperationalKey is the name used for waiting for pods in CorePodsList to be Ready
|
||||
OperationalKey = "operational"
|
||||
// ExtraKey is the name used for extra waiting for pods in CorePodsList to be Ready
|
||||
ExtraKey = "extra"
|
||||
)
|
||||
|
||||
// vars related to the --wait flag
|
||||
|
@ -46,9 +46,9 @@ var (
|
|||
// DefaultComponents is map of the the default components to wait for
|
||||
DefaultComponents = map[string]bool{APIServerWaitKey: true, SystemPodsWaitKey: true}
|
||||
// NoWaitComponents is map of componets to wait for if specified 'none' or 'false'
|
||||
NoComponents = map[string]bool{APIServerWaitKey: false, SystemPodsWaitKey: false, DefaultSAWaitKey: false, AppsRunningKey: false, NodeReadyKey: false, KubeletKey: false, OperationalKey: false}
|
||||
NoComponents = map[string]bool{APIServerWaitKey: false, SystemPodsWaitKey: false, DefaultSAWaitKey: false, AppsRunningKey: false, NodeReadyKey: false, KubeletKey: false, ExtraKey: false}
|
||||
// AllComponents is map for waiting for all components.
|
||||
AllComponents = map[string]bool{APIServerWaitKey: true, SystemPodsWaitKey: true, DefaultSAWaitKey: true, AppsRunningKey: true, NodeReadyKey: true, KubeletKey: true, OperationalKey: true}
|
||||
AllComponents = map[string]bool{APIServerWaitKey: true, SystemPodsWaitKey: true, DefaultSAWaitKey: true, AppsRunningKey: true, NodeReadyKey: true, KubeletKey: true, ExtraKey: true}
|
||||
// DefaultWaitList is list of all default components to wait for. only names to be used for start flags.
|
||||
DefaultWaitList = []string{APIServerWaitKey, SystemPodsWaitKey}
|
||||
// AllComponentsList list of all valid components keys to wait for. only names to be used used for start flags.
|
||||
|
@ -62,7 +62,7 @@ var (
|
|||
"kube-proxy",
|
||||
"kube-scheduler",
|
||||
}
|
||||
// CorePodsList is a list of essential pods for running kurnetes to wait for them to be operational ("Ready")
|
||||
// CorePodsList is a list of essential pods for running kurnetes to extra wait for them to be Ready
|
||||
CorePodsList = []string{
|
||||
"kube-dns", // coredns
|
||||
"etcd",
|
||||
|
|
|
@ -31,12 +31,12 @@ import (
|
|||
kconst "k8s.io/kubernetes/cmd/kubeadm/app/constants"
|
||||
)
|
||||
|
||||
// WaitOperational calls WaitForPodReadyByLabel for each pod in labels list and returns any errors occurred.
|
||||
func WaitOperational(cs *kubernetes.Clientset, labels []string, timeout time.Duration) error {
|
||||
klog.Info("waiting for kube-system core pods %s to be Ready ...", labels)
|
||||
pStart := time.Now()
|
||||
// WaitExtra calls WaitForPodReadyByLabel for each pod in labels list and returns any errors occurred.
|
||||
func WaitExtra(cs *kubernetes.Clientset, labels []string, timeout time.Duration) error {
|
||||
klog.Infof("extra waiting for kube-system core pods %s to be Ready ...", labels)
|
||||
start := time.Now()
|
||||
defer func() {
|
||||
klog.Infof("duration metric: took %s for waiting for kube-system core pods to be Ready ...", time.Since(pStart))
|
||||
klog.Infof("duration metric: took %s for extra waiting for kube-system core pods to be Ready ...", time.Since(start))
|
||||
}()
|
||||
|
||||
var errs []string
|
||||
|
@ -84,7 +84,6 @@ func WaitForPodReadyByLabel(cs *kubernetes.Clientset, label, namespace string, t
|
|||
if time.Since(start) > timeout {
|
||||
return false, fmt.Errorf("wait for pod with %q label in %q namespace to be Ready timed out", label, namespace)
|
||||
}
|
||||
|
||||
pods, err := cs.CoreV1().Pods(namespace).List(meta.ListOptions{})
|
||||
if err != nil {
|
||||
klog.Infof("error listing pods in %q namespace, will retry: %v", namespace, err)
|
||||
|
@ -110,7 +109,6 @@ func WaitForPodReadyByLabel(cs *kubernetes.Clientset, label, namespace string, t
|
|||
klog.Infof("pod with %q label in %q namespace was not found, will retry", label, namespace)
|
||||
return false, nil
|
||||
}
|
||||
|
||||
if err := wait.PollImmediate(kconst.APICallRetryInterval, kconst.DefaultControlPlaneTimeout, checkReady); err != nil {
|
||||
return errors.Wrapf(err, "wait pod Ready")
|
||||
}
|
||||
|
@ -136,7 +134,6 @@ func WaitForPodReadyByName(cs *kubernetes.Clientset, name, namespace string, tim
|
|||
if time.Since(start) > timeout {
|
||||
return false, fmt.Errorf("wait for pod %q in %q namespace to be Ready timed out", name, namespace)
|
||||
}
|
||||
|
||||
pod, err := cs.CoreV1().Pods(namespace).Get(name, meta.GetOptions{})
|
||||
if err != nil {
|
||||
klog.Infof("error getting pod %q in %q namespace, will retry: %v", name, namespace, err)
|
||||
|
@ -154,7 +151,6 @@ func WaitForPodReadyByName(cs *kubernetes.Clientset, name, namespace string, tim
|
|||
}
|
||||
return false, nil
|
||||
}
|
||||
|
||||
if err := wait.PollImmediate(kconst.APICallRetryInterval, kconst.DefaultControlPlaneTimeout, checkReady); err != nil {
|
||||
return errors.Wrapf(err, "wait pod Ready")
|
||||
}
|
||||
|
|
|
@ -36,6 +36,7 @@ import (
|
|||
"github.com/docker/machine/libmachine"
|
||||
"github.com/docker/machine/libmachine/state"
|
||||
"github.com/pkg/errors"
|
||||
meta "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/util/wait"
|
||||
"k8s.io/client-go/kubernetes"
|
||||
"k8s.io/klog/v2"
|
||||
|
@ -470,9 +471,9 @@ func (k *Bootstrapper) WaitForNode(cfg config.ClusterConfig, n config.Node, time
|
|||
return nil
|
||||
}
|
||||
|
||||
if cfg.VerifyComponents[kverify.OperationalKey] {
|
||||
if err := kverify.WaitOperational(client, kverify.CorePodsList, timeout); err != nil {
|
||||
return errors.Wrap(err, "waiting for operational status")
|
||||
if cfg.VerifyComponents[kverify.ExtraKey] {
|
||||
if err := kverify.WaitExtra(client, kverify.CorePodsList, timeout); err != nil {
|
||||
return errors.Wrap(err, "extra waiting")
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -664,14 +665,32 @@ func (k *Bootstrapper) restartControlPlane(cfg config.ClusterConfig) error {
|
|||
}
|
||||
}
|
||||
|
||||
if cfg.VerifyComponents[kverify.OperationalKey] {
|
||||
if cfg.VerifyComponents[kverify.ExtraKey] {
|
||||
// after kubelet is restarted (with 'kubeadm init phase kubelet-start' above),
|
||||
// it appears to be immediately Ready as are all kube-system pods
|
||||
// then (after ~10sec) it realises it has some changes to apply, implying also pods restarts
|
||||
// so we wait for kubelet to initialise itself...
|
||||
time.Sleep(10 * time.Second)
|
||||
if err := kverify.WaitOperational(client, kverify.CorePodsList, kconst.DefaultControlPlaneTimeout); err != nil {
|
||||
return errors.Wrap(err, "operational status")
|
||||
// it appears as to be immediately Ready as well as all kube-system pods,
|
||||
// then (after ~10sec) it realises it has some changes to apply, implying also pods restarts,
|
||||
// and by that time we would exit completely, so we wait until kubelet begins restarting pods
|
||||
klog.Info("waiting for restarted kubelet to initialise ...")
|
||||
start := time.Now()
|
||||
wait := func() error {
|
||||
pods, err := client.CoreV1().Pods("kube-system").List(meta.ListOptions{})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, pod := range pods.Items {
|
||||
if pod.Labels["tier"] == "control-plane" {
|
||||
if ready, _ := kverify.IsPodReady(&pod); !ready {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
return fmt.Errorf("kubelet not initialised")
|
||||
}
|
||||
_ = retry.Expo(wait, 250*time.Millisecond, 1*time.Minute)
|
||||
klog.Infof("kubelet initialised")
|
||||
klog.Infof("duration metric: took %s waiting for restarted kubelet to initialise ...", time.Since(start))
|
||||
if err := kverify.WaitExtra(client, kverify.CorePodsList, kconst.DefaultControlPlaneTimeout); err != nil {
|
||||
return errors.Wrap(err, "extra")
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -448,6 +448,7 @@ func imageID(image string) string {
|
|||
}
|
||||
|
||||
// validateComponentHealth asserts that all Kubernetes components are healthy
|
||||
// note: it expects all components to be Ready, so it makes sense to run it close after only those tests that include '--wait=all' start flag (ie, with extra wait)
|
||||
func validateComponentHealth(ctx context.Context, t *testing.T, profile string) {
|
||||
defer PostMortemLogs(t, profile)
|
||||
|
||||
|
|
Loading…
Reference in New Issue