Split WaitForCluster up into smaller functions, be more conservative

pull/5894/head
Thomas Stromberg 2019-11-12 21:14:37 -08:00
parent f5736fd9af
commit 7e5ab0417b
1 changed files with 43 additions and 16 deletions

View File

@ -148,12 +148,13 @@ func (k *Bootstrapper) GetAPIServerStatus(ip net.IP, apiserverPort int) (string,
} }
client := &http.Client{Transport: tr} client := &http.Client{Transport: tr}
resp, err := client.Get(url) resp, err := client.Get(url)
glog.Infof("%s response: %v %+v", url, err, resp)
// Connection refused, usually. // Connection refused, usually.
if err != nil { if err != nil {
glog.Warningf("%s response: %v %+v", url, err, resp)
return state.Stopped.String(), nil return state.Stopped.String(), nil
} }
if resp.StatusCode != http.StatusOK { if resp.StatusCode != http.StatusOK {
glog.Warningf("%s response: %v %+v", url, err, resp)
return state.Error.String(), nil return state.Error.String(), nil
} }
return state.Running.String(), nil return state.Running.String(), nil
@ -350,13 +351,9 @@ func (k *Bootstrapper) client(k8s config.KubernetesConfig) (*kubernetes.Clientse
return kubernetes.NewForConfig(config) return kubernetes.NewForConfig(config)
} }
// WaitForCluster blocks until the cluster appears to be healthy func (k *Bootstrapper) waitForApiServerProcess(start time.Time, timeout time.Duration) error {
func (k *Bootstrapper) WaitForCluster(k8s config.KubernetesConfig, timeout time.Duration) error {
start := time.Now()
out.T(out.Waiting, "Waiting for cluster to come online ...")
glog.Infof("waiting for apiserver process to appear ...") glog.Infof("waiting for apiserver process to appear ...")
err := wait.PollImmediate(time.Second*1, time.Minute*5, func() (bool, error) { err := wait.PollImmediate(time.Second*1, timeout, func() (bool, error) {
if time.Since(start) > timeout { if time.Since(start) > timeout {
return false, fmt.Errorf("cluster wait timed out during process check") return false, fmt.Errorf("cluster wait timed out during process check")
} }
@ -371,7 +368,10 @@ func (k *Bootstrapper) WaitForCluster(k8s config.KubernetesConfig, timeout time.
return fmt.Errorf("apiserver process never appeared") return fmt.Errorf("apiserver process never appeared")
} }
glog.Infof("duration metric: took %s to wait for apiserver process to appear ...", time.Since(start)) glog.Infof("duration metric: took %s to wait for apiserver process to appear ...", time.Since(start))
return nil
}
func (k *Bootstrapper) waitForApiServerHealthz(start time.Time, k8s config.KubernetesConfig, timeout time.Duration) error {
glog.Infof("waiting for apiserver healthz status ...") glog.Infof("waiting for apiserver healthz status ...")
hStart := time.Now() hStart := time.Now()
healthz := func() (bool, error) { healthz := func() (bool, error) {
@ -390,32 +390,46 @@ func (k *Bootstrapper) WaitForCluster(k8s config.KubernetesConfig, timeout time.
return true, nil return true, nil
} }
if err = wait.PollImmediate(kconst.APICallRetryInterval, kconst.DefaultControlPlaneTimeout, healthz); err != nil { if err := wait.PollImmediate(kconst.APICallRetryInterval, kconst.DefaultControlPlaneTimeout, healthz); err != nil {
return fmt.Errorf("apiserver healthz never reported healthy") return fmt.Errorf("apiserver healthz never reported healthy")
} }
glog.Infof("duration metric: took %s to wait for apiserver healthz status ...", time.Since(hStart)) glog.Infof("duration metric: took %s to wait for apiserver healthz status ...", time.Since(hStart))
return nil
}
glog.Infof("waiting for pod list to contain data ...") func (k *Bootstrapper) waitForSystemPods(start time.Time, k8s config.KubernetesConfig, timeout time.Duration) error {
glog.Infof("waiting for kube-system pods to appear ...")
pStart := time.Now() pStart := time.Now()
client, err := k.client(k8s) client, err := k.client(k8s)
if err != nil { if err != nil {
return errors.Wrap(err, "client") return errors.Wrap(err, "client")
} }
podStart := time.Time{}
podList := func() (bool, error) { podList := func() (bool, error) {
if time.Since(start) > timeout { if time.Since(start) > timeout {
return false, fmt.Errorf("cluster wait timed out during pod check") return false, fmt.Errorf("cluster wait timed out during pod check")
} }
// Wait for any system pod, as waiting for apiserver may block until etcd // Wait for any system pod, as waiting for apiserver may block until etcd
pods, err := client.CoreV1().Pods("kube-system").List(meta.ListOptions{}) pods, err := client.CoreV1().Pods("kube-system").List(meta.ListOptions{})
if len(pods.Items) == 0 { if len(pods.Items) < 2 {
return true, nil podStart = time.Time{}
}
if err != nil {
return false, nil return false, nil
} }
glog.Infof("%d kube-system pods found", len(pods.Items)) if err != nil {
return true, nil podStart = time.Time{}
return false, nil
}
if podStart.IsZero() {
podStart = time.Now()
}
glog.Infof("%d kube-system pods found since %s", len(pods.Items), podStart)
if time.Since(podStart) > 2*kconst.APICallRetryInterval {
glog.Infof("stability requirement met, returning")
return true, nil
}
return false, nil
} }
if err = wait.PollImmediate(kconst.APICallRetryInterval, kconst.DefaultControlPlaneTimeout, podList); err != nil { if err = wait.PollImmediate(kconst.APICallRetryInterval, kconst.DefaultControlPlaneTimeout, podList); err != nil {
return fmt.Errorf("apiserver never returned a pod list") return fmt.Errorf("apiserver never returned a pod list")
@ -424,6 +438,19 @@ func (k *Bootstrapper) WaitForCluster(k8s config.KubernetesConfig, timeout time.
return nil return nil
} }
// WaitForCluster blocks until the cluster appears to be healthy
func (k *Bootstrapper) WaitForCluster(k8s config.KubernetesConfig, timeout time.Duration) error {
start := time.Now()
out.T(out.Waiting, "Waiting for cluster to come online ...")
if err := k.waitForApiServerProcess(start, timeout); err != nil {
return err
}
if err := k.waitForApiServerHealthz(start, k8s, timeout); err != nil {
return err
}
return k.waitForSystemPods(start, k8s, timeout)
}
// RestartCluster restarts the Kubernetes cluster configured by kubeadm // RestartCluster restarts the Kubernetes cluster configured by kubeadm
func (k *Bootstrapper) RestartCluster(k8s config.KubernetesConfig) error { func (k *Bootstrapper) RestartCluster(k8s config.KubernetesConfig) error {
glog.Infof("RestartCluster start") glog.Infof("RestartCluster start")