From dc3a50e026b00cee569abb9dd1b074d7248b1bb7 Mon Sep 17 00:00:00 2001 From: Thomas Stromberg Date: Tue, 12 Nov 2019 13:38:48 -0800 Subject: [PATCH] Make --wait=false wait for nothing again, change default to --wait=true --- cmd/minikube/cmd/start.go | 20 +- pkg/minikube/bootstrapper/bootstrapper.go | 2 +- pkg/minikube/bootstrapper/kubeadm/kubeadm.go | 203 +++++++------------ pkg/minikube/out/style.go | 1 - 4 files changed, 78 insertions(+), 148 deletions(-) diff --git a/cmd/minikube/cmd/start.go b/cmd/minikube/cmd/start.go index 6b14c8c4e5..abef3ca25f 100644 --- a/cmd/minikube/cmd/start.go +++ b/cmd/minikube/cmd/start.go @@ -170,7 +170,7 @@ func initMinikubeFlags() { startCmd.Flags().String(criSocket, "", "The cri socket path to be used.") startCmd.Flags().String(networkPlugin, "", "The name of the network plugin.") startCmd.Flags().Bool(enableDefaultCNI, false, "Enable the default CNI plugin (/etc/cni/net.d/k8s.conf). Used in conjunction with \"--network-plugin=cni\".") - startCmd.Flags().Bool(waitUntilHealthy, false, "Wait until Kubernetes core services are healthy before exiting.") + startCmd.Flags().Bool(waitUntilHealthy, true, "Wait until apiserver API calls return content before exiting.") startCmd.Flags().Duration(waitTimeout, 6*time.Minute, "max time to wait per Kubernetes core services to be healthy.") startCmd.Flags().Bool(nativeSSH, true, "Use native Golang SSH client (default true). Set to 'false' to use the command line 'ssh' command when accessing the docker machine. Useful for the machine drivers when they will not start with 'Waiting for SSH'.") startCmd.Flags().Bool(autoUpdate, true, "If set, automatically updates drivers to the latest version. Defaults to true.") @@ -365,7 +365,11 @@ func runStart(cmd *cobra.Command, args []string) { if driverName == driver.None { prepareNone() } - waitCluster(bs, config) + if viper.GetBool(waitUntilHealthy) { + if err := bs.WaitForCluster(config.KubernetesConfig, viper.GetDuration(waitTimeout)); err != nil { + exit.WithError("Wait failed", err) + } + } if err := showKubectlInfo(kubeconfig, k8sVersion, config.Name); err != nil { glog.Errorf("kubectl info: %v", err) } @@ -389,18 +393,6 @@ func enableAddons() { } } -func waitCluster(bs bootstrapper.Bootstrapper, config cfg.MachineConfig) { - var podsToWaitFor []string - - if !viper.GetBool(waitUntilHealthy) { - // only wait for apiserver if wait=false - podsToWaitFor = []string{"apiserver"} - } - if err := bs.WaitForPods(config.KubernetesConfig, viper.GetDuration(waitTimeout), podsToWaitFor); err != nil { - exit.WithError("Wait failed", err) - } -} - func displayVersion(version string) { prefix := "" if viper.GetString(cfg.MachineProfile) != constants.DefaultMachineName { diff --git a/pkg/minikube/bootstrapper/bootstrapper.go b/pkg/minikube/bootstrapper/bootstrapper.go index f0b244f3e7..f54c0b4f41 100644 --- a/pkg/minikube/bootstrapper/bootstrapper.go +++ b/pkg/minikube/bootstrapper/bootstrapper.go @@ -41,7 +41,7 @@ type Bootstrapper interface { UpdateCluster(config.KubernetesConfig) error RestartCluster(config.KubernetesConfig) error DeleteCluster(config.KubernetesConfig) error - WaitForPods(config.KubernetesConfig, time.Duration, []string) error + WaitForCluster(config.KubernetesConfig, time.Duration) error // LogCommands returns a map of log type to a command which will display that log. LogCommands(LogOptions) map[string]string SetupCerts(cfg config.KubernetesConfig) error diff --git a/pkg/minikube/bootstrapper/kubeadm/kubeadm.go b/pkg/minikube/bootstrapper/kubeadm/kubeadm.go index 7e9c27e41b..2982551731 100644 --- a/pkg/minikube/bootstrapper/kubeadm/kubeadm.go +++ b/pkg/minikube/bootstrapper/kubeadm/kubeadm.go @@ -39,7 +39,6 @@ import ( "github.com/spf13/viper" "golang.org/x/sync/errgroup" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/kubernetes" kconst "k8s.io/kubernetes/cmd/kubeadm/app/constants" @@ -66,7 +65,6 @@ const ( defaultCNIConfigPath = "/etc/cni/net.d/k8s.conf" kubeletServiceFile = "/lib/systemd/system/kubelet.service" kubeletSystemdConfFile = "/etc/systemd/system/kubelet.service.d/10-kubeadm.conf" - AllPods = "ALL_PODS" ) const ( @@ -96,22 +94,6 @@ var KubeadmExtraArgsWhitelist = map[int][]string{ }, } -type pod struct { - // Human friendly name - name string - key string - value string -} - -// PodsByLayer are queries we run when health checking, sorted roughly by dependency layer -var PodsByLayer = []pod{ - {"proxy", "k8s-app", "kube-proxy"}, - {"etcd", "component", "etcd"}, - {"scheduler", "component", "kube-scheduler"}, - {"controller", "component", "kube-controller-manager"}, - {"dns", "k8s-app", "kube-dns"}, -} - // yamlConfigPath is the path to the kubeadm configuration var yamlConfigPath = path.Join(vmpath.GuestEphemeralDir, "kubeadm.yaml") @@ -354,7 +336,6 @@ func addAddons(files *[]assets.CopyableFile, data interface{}) error { // client returns a Kubernetes client to use to speak to a kubeadm launched apiserver func (k *Bootstrapper) client(k8s config.KubernetesConfig) (*kubernetes.Clientset, error) { - // Catch case if WaitForPods was called with a stale ~/.kube/config config, err := kapi.ClientConfig(k.contextName) if err != nil { return nil, errors.Wrap(err, "client config") @@ -369,69 +350,71 @@ func (k *Bootstrapper) client(k8s config.KubernetesConfig) (*kubernetes.Clientse return kubernetes.NewForConfig(config) } -// WaitForPods blocks until pods specified in podsToWaitFor appear to be healthy. -func (k *Bootstrapper) WaitForPods(k8s config.KubernetesConfig, timeout time.Duration, podsToWaitFor []string) error { - // Do not wait for "k8s-app" pods in the case of CNI, as they are managed - // by a CNI plugin which is usually started after minikube has been brought - // up. Otherwise, minikube won't start, as "k8s-app" pods are not ready. - componentsOnly := k8s.NetworkPlugin == "cni" - out.T(out.WaitingPods, "Waiting for:") +// WaitForCluster blocks until the cluster appears to be healthy +func (k *Bootstrapper) WaitForCluster(k8s config.KubernetesConfig, timeout time.Duration) error { + start := time.Now() + out.T(out.Waiting, "Waiting for cluster to come online ...") - // Wait until the apiserver can answer queries properly. We don't care if the apiserver - // pod shows up as registered, but need the webserver for all subsequent queries. - - if shouldWaitForPod("apiserver", podsToWaitFor) { - out.String(" apiserver") - if err := k.waitForAPIServer(k8s); err != nil { - return errors.Wrap(err, "waiting for apiserver") + glog.Infof("waiting for apiserver process to appear ...") + err := wait.PollImmediate(time.Second*1, time.Minute*5, func() (bool, error) { + if time.Since(start) > timeout { + return false, fmt.Errorf("cluster wait timed out during process check") } - } - - client, err := k.client(k8s) + rr, ierr := k.c.RunCmd(exec.Command("sudo", "pgrep", "kube-apiserver")) + if ierr != nil { + glog.Warningf("pgrep apiserver: %v cmd: %s", ierr, rr.Command()) + return false, nil + } + return true, nil + }) if err != nil { - return errors.Wrap(err, "client") + return fmt.Errorf("apiserver process never appeared") + } + glog.Infof("duration metric: took %s to wait for apiserver process to appear ...", time.Since(start)) + + glog.Infof("waiting for apiserver healthz status ...") + hStart := time.Now() + healthz := func() (bool, error) { + if time.Since(start) > timeout { + return false, fmt.Errorf("cluster wait timed out during healthz check") + } + + status, err := k.GetAPIServerStatus(net.ParseIP(k8s.NodeIP), k8s.NodePort) + if err != nil { + glog.Warningf("status: %v", err) + return false, nil + } + if status != "Running" { + return false, nil + } + return true, nil } - for _, p := range PodsByLayer { - if componentsOnly && p.key != "component" { // skip component check if network plugin is cni - continue - } - if !shouldWaitForPod(p.name, podsToWaitFor) { - continue - } - out.String(" %s", p.name) - selector := labels.SelectorFromSet(labels.Set(map[string]string{p.key: p.value})) - if err := kapi.WaitForPodsWithLabelRunning(client, "kube-system", selector, timeout); err != nil { - return errors.Wrap(err, fmt.Sprintf("waiting for %s=%s", p.key, p.value)) - } + if err = wait.PollImmediate(kconst.APICallRetryInterval, kconst.DefaultControlPlaneTimeout, healthz); err != nil { + return fmt.Errorf("apiserver healthz never reported healthy") } - out.Ln("") + glog.Infof("duration metric: took %s to wait for apiserver healthz status ...", time.Since(hStart)) + + glog.Infof("waiting for apiserver to appear in pod list ...") + pStart := time.Now() + client, err := k.client(k8s) + podList := func() (bool, error) { + if time.Since(start) > timeout { + return false, fmt.Errorf("cluster wait timed out during pod check") + } + _, err := client.CoreV1().Pods("kube-system").Get("kube-apiserver-minikube", metav1.GetOptions{}) + if err != nil { + return false, nil + } + return true, nil + } + if err = wait.PollImmediate(kconst.APICallRetryInterval, kconst.DefaultControlPlaneTimeout, podList); err != nil { + return fmt.Errorf("apiserver never returned a pod list") + } + glog.Infof("duration metric: took %s to wait for pod list to return data ...", time.Since(pStart)) return nil } -// shouldWaitForPod returns true if: -// 1. podsToWaitFor is nil -// 2. name is in podsToWaitFor -// 3. ALL_PODS is in podsToWaitFor -// else, return false -func shouldWaitForPod(name string, podsToWaitFor []string) bool { - if podsToWaitFor == nil { - return true - } - if len(podsToWaitFor) == 0 { - return false - } - for _, p := range podsToWaitFor { - if p == AllPods { - return true - } - if p == name { - return true - } - } - return false -} - // RestartCluster restarts the Kubernetes cluster configured by kubeadm func (k *Bootstrapper) RestartCluster(k8s config.KubernetesConfig) error { glog.Infof("RestartCluster start") @@ -472,8 +455,21 @@ func (k *Bootstrapper) RestartCluster(k8s config.KubernetesConfig) error { } } - if err := k.waitForAPIServer(k8s); err != nil { - return errors.Wrap(err, "waiting for apiserver") + // We must ensure that the apiserver is healthy before proceeding + glog.Infof("waiting for apiserver healthz ...") + healthz := func() (bool, error) { + status, err := k.GetAPIServerStatus(net.ParseIP(k8s.NodeIP), k8s.NodePort) + if err != nil { + glog.Warningf("status: %v", err) + return false, nil + } + if status != "Running" { + return false, nil + } + return true, nil + } + if err = wait.PollImmediate(kconst.APICallRetryInterval, kconst.DefaultControlPlaneTimeout, healthz); err != nil { + return fmt.Errorf("apiserver healthz never reported healthy") } // restart the proxy and coredns @@ -487,63 +483,6 @@ func (k *Bootstrapper) RestartCluster(k8s config.KubernetesConfig) error { return nil } -// waitForAPIServer waits for the apiserver to start up -func (k *Bootstrapper) waitForAPIServer(k8s config.KubernetesConfig) error { - start := time.Now() - defer func() { - glog.Infof("duration metric: took %s to wait for apiserver status ...", time.Since(start)) - }() - - glog.Infof("Waiting for apiserver process ...") - // To give a better error message, first check for process existence via ssh - // Needs minutes in case the image isn't cached (such as with v1.10.x) - err := wait.PollImmediate(time.Millisecond*300, time.Minute*3, func() (bool, error) { - rr, ierr := k.c.RunCmd(exec.Command("sudo", "pgrep", "kube-apiserver")) - if ierr != nil { - glog.Warningf("pgrep apiserver: %v cmd: %s", ierr, rr.Command()) - return false, nil - } - return true, nil - }) - if err != nil { - return fmt.Errorf("apiserver process never appeared") - } - - glog.Infof("Waiting for apiserver to port healthy status ...") - var client *kubernetes.Clientset - f := func() (bool, error) { - status, err := k.GetAPIServerStatus(net.ParseIP(k8s.NodeIP), k8s.NodePort) - glog.Infof("apiserver status: %s, err: %v", status, err) - if err != nil { - glog.Warningf("status: %v", err) - return false, nil - } - if status != "Running" { - return false, nil - } - // Make sure apiserver pod is retrievable - if client == nil { - // We only want to get the clientset once, because this line takes ~1 second to complete - client, err = k.client(k8s) - if err != nil { - glog.Warningf("get kubernetes client: %v", err) - return false, nil - } - } - - _, err = client.CoreV1().Pods("kube-system").Get("kube-apiserver-minikube", metav1.GetOptions{}) - if err != nil { - return false, nil - } - - return true, nil - // TODO: Check apiserver/kubelet logs for fatal errors so that users don't - // need to wait minutes to find out their flag didn't work. - } - err = wait.PollImmediate(kconst.APICallRetryInterval, 2*kconst.DefaultControlPlaneTimeout, f) - return err -} - // DeleteCluster removes the components that were started earlier func (k *Bootstrapper) DeleteCluster(k8s config.KubernetesConfig) error { version, err := parseKubernetesVersion(k8s.KubernetesVersion) diff --git a/pkg/minikube/out/style.go b/pkg/minikube/out/style.go index 0af52ed464..b14c9d5883 100644 --- a/pkg/minikube/out/style.go +++ b/pkg/minikube/out/style.go @@ -65,7 +65,6 @@ var styles = map[StyleEnum]style{ Stopped: {Prefix: "🛑 "}, WarningType: {Prefix: "⚠️ ", LowPrefix: lowWarning}, Waiting: {Prefix: "⌛ "}, - WaitingPods: {Prefix: "⌛ ", OmitNewline: true}, Usage: {Prefix: "💡 "}, Launch: {Prefix: "🚀 "}, Sad: {Prefix: "😿 "},