From 3fb1cded917f805c74a2d1b4bb89379e347b5f99 Mon Sep 17 00:00:00 2001 From: Thomas Stromberg Date: Thu, 25 Jun 2020 09:43:45 -0700 Subject: [PATCH] Add retry.Local + retry logging --- .../bsutil/kverify/system_pods.go | 25 ++++++++----------- pkg/util/retry/retry.go | 17 ++++++++++++- 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/pkg/minikube/bootstrapper/bsutil/kverify/system_pods.go b/pkg/minikube/bootstrapper/bsutil/kverify/system_pods.go index bd03ecec5b..d5c6dc21b0 100644 --- a/pkg/minikube/bootstrapper/bsutil/kverify/system_pods.go +++ b/pkg/minikube/bootstrapper/bsutil/kverify/system_pods.go @@ -27,7 +27,6 @@ import ( "github.com/pkg/errors" core "k8s.io/api/core/v1" meta "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/kubernetes" kconst "k8s.io/kubernetes/cmd/kubeadm/app/constants" "k8s.io/minikube/pkg/minikube/bootstrapper" @@ -44,10 +43,7 @@ func WaitForSystemPods(r cruntime.Manager, bs bootstrapper.Bootstrapper, cfg con glog.Info("waiting for kube-system pods to appear ...") pStart := time.Now() - podList := func() (bool, error) { - if time.Since(start) > timeout { - return false, fmt.Errorf("cluster wait timed out during pod check") - } + podList := func() error { if time.Since(start) > minLogCheckTime { announceProblems(r, bs, cfg, cr) time.Sleep(kconst.APICallRetryInterval * 5) @@ -57,19 +53,22 @@ func WaitForSystemPods(r cruntime.Manager, bs bootstrapper.Bootstrapper, cfg con pods, err := client.CoreV1().Pods("kube-system").List(meta.ListOptions{}) if err != nil { glog.Warningf("pod list returned error: %v", err) - return false, nil + return err } + glog.Infof("%d kube-system pods found", len(pods.Items)) for _, pod := range pods.Items { glog.Infof(podStatusMsg(pod)) } if len(pods.Items) < 2 { - return false, nil + return fmt.Errorf("only %d pod(s) have shown up", len(pods.Items)) } - return true, nil + + return nil } - if err := wait.PollImmediate(kconst.APICallRetryInterval, timeout, podList); err != nil { + + if err := retry.Local(podList, timeout); err != nil { return fmt.Errorf("apiserver never returned a pod list") } glog.Infof("duration metric: took %s to wait for pod list to return data ...", time.Since(pStart)) @@ -118,14 +117,10 @@ func WaitForAppsRunning(cs *kubernetes.Clientset, expected []string, timeout tim start := time.Now() checkRunning := func() error { - err := ExpectAppsRunning(cs, expected) - if err != nil { - glog.Warningf("expect apps running failed: %v", err) - } - return err + return ExpectAppsRunning(cs, expected) } - if err := retry.Expo(checkRunning, kconst.APICallRetryInterval, timeout); err != nil { + if err := retry.Local(checkRunning, timeout); err != nil { return errors.Wrapf(err, "expected k8s-apps") } glog.Infof("duration metric: took %s to wait for k8s-apps to be running ...", time.Since(start)) diff --git a/pkg/util/retry/retry.go b/pkg/util/retry/retry.go index 18298459b3..8bdacf4610 100644 --- a/pkg/util/retry/retry.go +++ b/pkg/util/retry/retry.go @@ -14,16 +14,31 @@ See the License for the specific language governing permissions and limitations under the License. */ +// Package retry implements wrappers to retry function calls package retry import ( "time" "github.com/cenkalti/backoff" + "github.com/golang/glog" ) const defaultMaxRetries = 113 +func notify(err error, d time.Duration) { + glog.Infof("will retry after %s: %v", d, err) +} + +// Local is back-off retry for local connections +func Local(callback func() error, maxTime time.Duration) error { + b := backoff.NewExponentialBackOff() + b.InitialInterval = 250 * time.Millisecond + b.RandomizationFactor = 0.25 + b.Multiplier = 1.25 + return backoff.RetryNotify(callback, b, notify) +} + // Expo is exponential backoff retry. // initInterval is the initial waiting time to start with. // maxTime is the max time allowed to spend on the all the retries. @@ -40,7 +55,7 @@ func Expo(callback func() error, initInterval time.Duration, maxTime time.Durati b.RandomizationFactor = 0.5 b.Multiplier = 1.5 bm := backoff.WithMaxRetries(b, maxRetry) - return backoff.Retry(callback, bm) + return backoff.RetryNotify(callback, bm, notify) } // RetriableError is an error that can be tried again