replace sleep with retry.Expo()
parent
90cd9c3a60
commit
b8052fe33d
|
@ -37,8 +37,8 @@ const (
|
||||||
NodeReadyKey = "node_ready"
|
NodeReadyKey = "node_ready"
|
||||||
// KubeletKey is the name used in the flags for waiting for the kubelet status to be ready
|
// KubeletKey is the name used in the flags for waiting for the kubelet status to be ready
|
||||||
KubeletKey = "kubelet"
|
KubeletKey = "kubelet"
|
||||||
// OperationalKey is the name used for waiting for pods in CorePodsList to be Ready
|
// ExtraKey is the name used for extra waiting for pods in CorePodsList to be Ready
|
||||||
OperationalKey = "operational"
|
ExtraKey = "extra"
|
||||||
)
|
)
|
||||||
|
|
||||||
// vars related to the --wait flag
|
// vars related to the --wait flag
|
||||||
|
@ -46,9 +46,9 @@ var (
|
||||||
// DefaultComponents is map of the the default components to wait for
|
// DefaultComponents is map of the the default components to wait for
|
||||||
DefaultComponents = map[string]bool{APIServerWaitKey: true, SystemPodsWaitKey: true}
|
DefaultComponents = map[string]bool{APIServerWaitKey: true, SystemPodsWaitKey: true}
|
||||||
// NoWaitComponents is map of componets to wait for if specified 'none' or 'false'
|
// NoWaitComponents is map of componets to wait for if specified 'none' or 'false'
|
||||||
NoComponents = map[string]bool{APIServerWaitKey: false, SystemPodsWaitKey: false, DefaultSAWaitKey: false, AppsRunningKey: false, NodeReadyKey: false, KubeletKey: false, OperationalKey: false}
|
NoComponents = map[string]bool{APIServerWaitKey: false, SystemPodsWaitKey: false, DefaultSAWaitKey: false, AppsRunningKey: false, NodeReadyKey: false, KubeletKey: false, ExtraKey: false}
|
||||||
// AllComponents is map for waiting for all components.
|
// AllComponents is map for waiting for all components.
|
||||||
AllComponents = map[string]bool{APIServerWaitKey: true, SystemPodsWaitKey: true, DefaultSAWaitKey: true, AppsRunningKey: true, NodeReadyKey: true, KubeletKey: true, OperationalKey: true}
|
AllComponents = map[string]bool{APIServerWaitKey: true, SystemPodsWaitKey: true, DefaultSAWaitKey: true, AppsRunningKey: true, NodeReadyKey: true, KubeletKey: true, ExtraKey: true}
|
||||||
// DefaultWaitList is list of all default components to wait for. only names to be used for start flags.
|
// DefaultWaitList is list of all default components to wait for. only names to be used for start flags.
|
||||||
DefaultWaitList = []string{APIServerWaitKey, SystemPodsWaitKey}
|
DefaultWaitList = []string{APIServerWaitKey, SystemPodsWaitKey}
|
||||||
// AllComponentsList list of all valid components keys to wait for. only names to be used used for start flags.
|
// AllComponentsList list of all valid components keys to wait for. only names to be used used for start flags.
|
||||||
|
@ -62,7 +62,7 @@ var (
|
||||||
"kube-proxy",
|
"kube-proxy",
|
||||||
"kube-scheduler",
|
"kube-scheduler",
|
||||||
}
|
}
|
||||||
// CorePodsList is a list of essential pods for running kurnetes to wait for them to be operational ("Ready")
|
// CorePodsList is a list of essential pods for running kurnetes to extra wait for them to be Ready
|
||||||
CorePodsList = []string{
|
CorePodsList = []string{
|
||||||
"kube-dns", // coredns
|
"kube-dns", // coredns
|
||||||
"etcd",
|
"etcd",
|
||||||
|
|
|
@ -31,12 +31,12 @@ import (
|
||||||
kconst "k8s.io/kubernetes/cmd/kubeadm/app/constants"
|
kconst "k8s.io/kubernetes/cmd/kubeadm/app/constants"
|
||||||
)
|
)
|
||||||
|
|
||||||
// WaitOperational calls WaitForPodReadyByLabel for each pod in labels list and returns any errors occurred.
|
// WaitExtra calls WaitForPodReadyByLabel for each pod in labels list and returns any errors occurred.
|
||||||
func WaitOperational(cs *kubernetes.Clientset, labels []string, timeout time.Duration) error {
|
func WaitExtra(cs *kubernetes.Clientset, labels []string, timeout time.Duration) error {
|
||||||
klog.Info("waiting for kube-system core pods %s to be Ready ...", labels)
|
klog.Infof("extra waiting for kube-system core pods %s to be Ready ...", labels)
|
||||||
pStart := time.Now()
|
start := time.Now()
|
||||||
defer func() {
|
defer func() {
|
||||||
klog.Infof("duration metric: took %s for waiting for kube-system core pods to be Ready ...", time.Since(pStart))
|
klog.Infof("duration metric: took %s for extra waiting for kube-system core pods to be Ready ...", time.Since(start))
|
||||||
}()
|
}()
|
||||||
|
|
||||||
var errs []string
|
var errs []string
|
||||||
|
@ -84,7 +84,6 @@ func WaitForPodReadyByLabel(cs *kubernetes.Clientset, label, namespace string, t
|
||||||
if time.Since(start) > timeout {
|
if time.Since(start) > timeout {
|
||||||
return false, fmt.Errorf("wait for pod with %q label in %q namespace to be Ready timed out", label, namespace)
|
return false, fmt.Errorf("wait for pod with %q label in %q namespace to be Ready timed out", label, namespace)
|
||||||
}
|
}
|
||||||
|
|
||||||
pods, err := cs.CoreV1().Pods(namespace).List(meta.ListOptions{})
|
pods, err := cs.CoreV1().Pods(namespace).List(meta.ListOptions{})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
klog.Infof("error listing pods in %q namespace, will retry: %v", namespace, err)
|
klog.Infof("error listing pods in %q namespace, will retry: %v", namespace, err)
|
||||||
|
@ -110,7 +109,6 @@ func WaitForPodReadyByLabel(cs *kubernetes.Clientset, label, namespace string, t
|
||||||
klog.Infof("pod with %q label in %q namespace was not found, will retry", label, namespace)
|
klog.Infof("pod with %q label in %q namespace was not found, will retry", label, namespace)
|
||||||
return false, nil
|
return false, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := wait.PollImmediate(kconst.APICallRetryInterval, kconst.DefaultControlPlaneTimeout, checkReady); err != nil {
|
if err := wait.PollImmediate(kconst.APICallRetryInterval, kconst.DefaultControlPlaneTimeout, checkReady); err != nil {
|
||||||
return errors.Wrapf(err, "wait pod Ready")
|
return errors.Wrapf(err, "wait pod Ready")
|
||||||
}
|
}
|
||||||
|
@ -136,7 +134,6 @@ func WaitForPodReadyByName(cs *kubernetes.Clientset, name, namespace string, tim
|
||||||
if time.Since(start) > timeout {
|
if time.Since(start) > timeout {
|
||||||
return false, fmt.Errorf("wait for pod %q in %q namespace to be Ready timed out", name, namespace)
|
return false, fmt.Errorf("wait for pod %q in %q namespace to be Ready timed out", name, namespace)
|
||||||
}
|
}
|
||||||
|
|
||||||
pod, err := cs.CoreV1().Pods(namespace).Get(name, meta.GetOptions{})
|
pod, err := cs.CoreV1().Pods(namespace).Get(name, meta.GetOptions{})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
klog.Infof("error getting pod %q in %q namespace, will retry: %v", name, namespace, err)
|
klog.Infof("error getting pod %q in %q namespace, will retry: %v", name, namespace, err)
|
||||||
|
@ -154,7 +151,6 @@ func WaitForPodReadyByName(cs *kubernetes.Clientset, name, namespace string, tim
|
||||||
}
|
}
|
||||||
return false, nil
|
return false, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := wait.PollImmediate(kconst.APICallRetryInterval, kconst.DefaultControlPlaneTimeout, checkReady); err != nil {
|
if err := wait.PollImmediate(kconst.APICallRetryInterval, kconst.DefaultControlPlaneTimeout, checkReady); err != nil {
|
||||||
return errors.Wrapf(err, "wait pod Ready")
|
return errors.Wrapf(err, "wait pod Ready")
|
||||||
}
|
}
|
||||||
|
|
|
@ -36,6 +36,7 @@ import (
|
||||||
"github.com/docker/machine/libmachine"
|
"github.com/docker/machine/libmachine"
|
||||||
"github.com/docker/machine/libmachine/state"
|
"github.com/docker/machine/libmachine/state"
|
||||||
"github.com/pkg/errors"
|
"github.com/pkg/errors"
|
||||||
|
meta "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||||
"k8s.io/apimachinery/pkg/util/wait"
|
"k8s.io/apimachinery/pkg/util/wait"
|
||||||
"k8s.io/client-go/kubernetes"
|
"k8s.io/client-go/kubernetes"
|
||||||
"k8s.io/klog/v2"
|
"k8s.io/klog/v2"
|
||||||
|
@ -470,9 +471,9 @@ func (k *Bootstrapper) WaitForNode(cfg config.ClusterConfig, n config.Node, time
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
if cfg.VerifyComponents[kverify.OperationalKey] {
|
if cfg.VerifyComponents[kverify.ExtraKey] {
|
||||||
if err := kverify.WaitOperational(client, kverify.CorePodsList, timeout); err != nil {
|
if err := kverify.WaitExtra(client, kverify.CorePodsList, timeout); err != nil {
|
||||||
return errors.Wrap(err, "waiting for operational status")
|
return errors.Wrap(err, "extra waiting")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -664,14 +665,32 @@ func (k *Bootstrapper) restartControlPlane(cfg config.ClusterConfig) error {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if cfg.VerifyComponents[kverify.OperationalKey] {
|
if cfg.VerifyComponents[kverify.ExtraKey] {
|
||||||
// after kubelet is restarted (with 'kubeadm init phase kubelet-start' above),
|
// after kubelet is restarted (with 'kubeadm init phase kubelet-start' above),
|
||||||
// it appears to be immediately Ready as are all kube-system pods
|
// it appears as to be immediately Ready as well as all kube-system pods,
|
||||||
// then (after ~10sec) it realises it has some changes to apply, implying also pods restarts
|
// then (after ~10sec) it realises it has some changes to apply, implying also pods restarts,
|
||||||
// so we wait for kubelet to initialise itself...
|
// and by that time we would exit completely, so we wait until kubelet begins restarting pods
|
||||||
time.Sleep(10 * time.Second)
|
klog.Info("waiting for restarted kubelet to initialise ...")
|
||||||
if err := kverify.WaitOperational(client, kverify.CorePodsList, kconst.DefaultControlPlaneTimeout); err != nil {
|
start := time.Now()
|
||||||
return errors.Wrap(err, "operational status")
|
wait := func() error {
|
||||||
|
pods, err := client.CoreV1().Pods("kube-system").List(meta.ListOptions{})
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
for _, pod := range pods.Items {
|
||||||
|
if pod.Labels["tier"] == "control-plane" {
|
||||||
|
if ready, _ := kverify.IsPodReady(&pod); !ready {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return fmt.Errorf("kubelet not initialised")
|
||||||
|
}
|
||||||
|
_ = retry.Expo(wait, 250*time.Millisecond, 1*time.Minute)
|
||||||
|
klog.Infof("kubelet initialised")
|
||||||
|
klog.Infof("duration metric: took %s waiting for restarted kubelet to initialise ...", time.Since(start))
|
||||||
|
if err := kverify.WaitExtra(client, kverify.CorePodsList, kconst.DefaultControlPlaneTimeout); err != nil {
|
||||||
|
return errors.Wrap(err, "extra")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -448,6 +448,7 @@ func imageID(image string) string {
|
||||||
}
|
}
|
||||||
|
|
||||||
// validateComponentHealth asserts that all Kubernetes components are healthy
|
// validateComponentHealth asserts that all Kubernetes components are healthy
|
||||||
|
// note: it expects all components to be Ready, so it makes sense to run it close after only those tests that include '--wait=all' start flag (ie, with extra wait)
|
||||||
func validateComponentHealth(ctx context.Context, t *testing.T, profile string) {
|
func validateComponentHealth(ctx context.Context, t *testing.T, profile string) {
|
||||||
defer PostMortemLogs(t, profile)
|
defer PostMortemLogs(t, profile)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue