test pr 11106 and pr 11185

pull/11209/head
Predrag Rogic 2021-04-26 21:29:13 +01:00
parent fe0a1774a6
commit d0cc7f09fc
No known key found for this signature in database
GPG Key ID: F1FF5748C4855229
2 changed files with 35 additions and 24 deletions

View File

@ -20,6 +20,8 @@ package kverify
import (
"context"
"fmt"
"regexp"
"strings"
"time"
core "k8s.io/api/core/v1"
@ -43,6 +45,7 @@ func WaitExtra(cs *kubernetes.Clientset, labels []string, timeout time.Duration)
return fmt.Errorf("error listing pods in %q namespace: %w", meta.NamespaceSystem, err)
}
unstartedMinion := regexp.MustCompile(`node "(.*-m[0-9]+)" has status "Ready":"Unknown"`)
for _, pod := range pods.Items {
if time.Since(start) > timeout {
return fmt.Errorf("timed out waiting %v for all system-critical and pods with labels %v to be %q", timeout, labels, core.NodeReady)
@ -63,7 +66,14 @@ func WaitExtra(cs *kubernetes.Clientset, labels []string, timeout time.Duration)
}
if match || pod.Spec.PriorityClassName == "system-cluster-critical" || pod.Spec.PriorityClassName == "system-node-critical" {
if err := waitPodCondition(cs, pod.Name, pod.Namespace, core.PodReady, timeout); err != nil {
klog.Errorf("WaitExtra: %v", err)
// ignore unstarted minion nodes and terminated pods, eg:
// error: `waitPodCondition: node "docker-containerd-multinode-wait-m02" hosting pod "kube-proxy-frz7b" in "kube-system" namespace is currently not "Ready": node "docker-containerd-multinode-wait-m02" has status "Ready":"Unknown"`, or
// error: `waitPodCondition: error getting pod "coredns-74ff55c5b-57fhw" in "kube-system" namespace: pods "coredns-74ff55c5b-57fhw" not found`
terminatedPod := strings.HasSuffix(err.Error(), "not found")
if !unstartedMinion.MatchString(err.Error()) && !terminatedPod {
klog.Errorf("WaitExtra: %v", err)
}
continue
}
break
}

View File

@ -676,6 +676,30 @@ func (k *Bootstrapper) restartControlPlane(cfg config.ClusterConfig) error {
}
}
cr, err := cruntime.New(cruntime.Config{Type: cfg.KubernetesConfig.ContainerRuntime, Runner: k.c})
if err != nil {
return errors.Wrap(err, "runtime")
}
// We must ensure that the apiserver is healthy before proceeding
if err := kverify.WaitForAPIServerProcess(cr, k, cfg, k.c, time.Now(), kconst.DefaultControlPlaneTimeout); err != nil {
return errors.Wrap(err, "apiserver healthz")
}
if err := kverify.WaitForHealthyAPIServer(cr, k, cfg, k.c, client, time.Now(), hostname, port, kconst.DefaultControlPlaneTimeout); err != nil {
return errors.Wrap(err, "apiserver health")
}
// because reboots clear /etc/cni
if err := k.applyCNI(cfg); err != nil {
return errors.Wrap(err, "apply cni")
}
if err := kverify.WaitForSystemPods(cr, k, cfg, k.c, client, time.Now(), kconst.DefaultControlPlaneTimeout); err != nil {
return errors.Wrap(err, "system pods")
}
// must be called after applyCNI
if cfg.VerifyComponents[kverify.ExtraKey] {
// after kubelet is restarted (with 'kubeadm init phase kubelet-start' above),
// it appears as to be immediately Ready as well as all kube-system pods (last observed state),
@ -704,29 +728,6 @@ func (k *Bootstrapper) restartControlPlane(cfg config.ClusterConfig) error {
}
}
cr, err := cruntime.New(cruntime.Config{Type: cfg.KubernetesConfig.ContainerRuntime, Runner: k.c})
if err != nil {
return errors.Wrap(err, "runtime")
}
// We must ensure that the apiserver is healthy before proceeding
if err := kverify.WaitForAPIServerProcess(cr, k, cfg, k.c, time.Now(), kconst.DefaultControlPlaneTimeout); err != nil {
return errors.Wrap(err, "apiserver healthz")
}
if err := kverify.WaitForHealthyAPIServer(cr, k, cfg, k.c, client, time.Now(), hostname, port, kconst.DefaultControlPlaneTimeout); err != nil {
return errors.Wrap(err, "apiserver health")
}
// because reboots clear /etc/cni
if err := k.applyCNI(cfg); err != nil {
return errors.Wrap(err, "apply cni")
}
if err := kverify.WaitForSystemPods(cr, k, cfg, k.c, client, time.Now(), kconst.DefaultControlPlaneTimeout); err != nil {
return errors.Wrap(err, "system pods")
}
if err := kverify.NodePressure(client); err != nil {
adviseNodePressure(err, cfg.Name, cfg.Driver)
}