Merge pull request #11209 from prezha/test-wait-cni-cr

improve how cni and cruntimes work together v2
pull/11233/head^2
Medya Ghazizadeh 2021-04-30 10:31:44 -07:00 committed by GitHub
commit ecc3a914cb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 58 additions and 41 deletions

View File

@ -249,6 +249,12 @@ func generateClusterConfig(cmd *cobra.Command, existing *config.ClusterConfig, k
var cc config.ClusterConfig
if existing != nil {
cc = updateExistingConfigFromFlags(cmd, existing)
// identify appropriate cni then configure cruntime accordingly
_, err := cni.New(&cc)
if err != nil {
return cc, config.Node{}, errors.Wrap(err, "cni")
}
} else {
klog.Info("no existing cluster config was found, will generate one from the flags ")
cc = generateNewConfigFromFlags(cmd, k8sVersion, drvName)

View File

@ -92,6 +92,7 @@ func waitPodCondition(cs *kubernetes.Clientset, name, namespace string, conditio
klog.Info(reason)
return true, nil
}
// return immediately: status == core.ConditionUnknown
if status == core.ConditionUnknown {
klog.Info(reason)
return false, fmt.Errorf(reason)
@ -101,6 +102,7 @@ func waitPodCondition(cs *kubernetes.Clientset, name, namespace string, conditio
klog.Info(reason)
lap = time.Now()
}
// return immediately: status == core.ConditionFalse
return false, nil
}
if err := wait.PollImmediate(kconst.APICallRetryInterval, kconst.DefaultControlPlaneTimeout, checkCondition); err != nil {
@ -114,13 +116,13 @@ func waitPodCondition(cs *kubernetes.Clientset, name, namespace string, conditio
func podConditionStatus(cs *kubernetes.Clientset, name, namespace string, condition core.PodConditionType) (status core.ConditionStatus, reason string) {
pod, err := cs.CoreV1().Pods(namespace).Get(context.Background(), name, meta.GetOptions{})
if err != nil {
return core.ConditionUnknown, fmt.Sprintf("error getting pod %q in %q namespace: %v", name, namespace, err)
return core.ConditionUnknown, fmt.Sprintf("error getting pod %q in %q namespace (skipping!): %v", name, namespace, err)
}
// check if undelying node is Ready - in case we got stale data about the pod
if pod.Spec.NodeName != "" {
if status, reason := nodeConditionStatus(cs, pod.Spec.NodeName, core.NodeReady); status != core.ConditionTrue {
return core.ConditionUnknown, fmt.Sprintf("node %q hosting pod %q in %q namespace is currently not %q: %v", pod.Spec.NodeName, name, namespace, core.NodeReady, reason)
return core.ConditionUnknown, fmt.Sprintf("node %q hosting pod %q in %q namespace is currently not %q (skipping!): %v", pod.Spec.NodeName, name, namespace, core.NodeReady, reason)
}
}

View File

@ -676,34 +676,6 @@ func (k *Bootstrapper) restartControlPlane(cfg config.ClusterConfig) error {
}
}
if cfg.VerifyComponents[kverify.ExtraKey] {
// after kubelet is restarted (with 'kubeadm init phase kubelet-start' above),
// it appears as to be immediately Ready as well as all kube-system pods (last observed state),
// then (after ~10sec) it realises it has some changes to apply, implying also pods restarts,
// and by that time we would exit completely, so we wait until kubelet begins restarting pods
klog.Info("waiting for restarted kubelet to initialise ...")
start := time.Now()
wait := func() error {
pods, err := client.CoreV1().Pods(meta.NamespaceSystem).List(context.Background(), meta.ListOptions{LabelSelector: "tier=control-plane"})
if err != nil {
return err
}
for _, pod := range pods.Items {
if ready, _ := kverify.IsPodReady(&pod); !ready {
return nil
}
}
return fmt.Errorf("kubelet not initialised")
}
_ = retry.Expo(wait, 250*time.Millisecond, 1*time.Minute)
klog.Infof("kubelet initialised")
klog.Infof("duration metric: took %s waiting for restarted kubelet to initialise ...", time.Since(start))
if err := kverify.WaitExtra(client, kverify.CorePodsLabels, kconst.DefaultControlPlaneTimeout); err != nil {
return errors.Wrap(err, "extra")
}
}
cr, err := cruntime.New(cruntime.Config{Type: cfg.KubernetesConfig.ContainerRuntime, Runner: k.c})
if err != nil {
return errors.Wrap(err, "runtime")
@ -741,6 +713,35 @@ func (k *Bootstrapper) restartControlPlane(cfg config.ClusterConfig) error {
return errors.Wrap(err, "addons")
}
// must be called after applyCNI and `kubeadm phase addon all` (ie, coredns redeploy)
if cfg.VerifyComponents[kverify.ExtraKey] {
// after kubelet is restarted (with 'kubeadm init phase kubelet-start' above),
// it appears as to be immediately Ready as well as all kube-system pods (last observed state),
// then (after ~10sec) it realises it has some changes to apply, implying also pods restarts,
// and by that time we would exit completely, so we wait until kubelet begins restarting pods
klog.Info("waiting for restarted kubelet to initialise ...")
start := time.Now()
wait := func() error {
pods, err := client.CoreV1().Pods(meta.NamespaceSystem).List(context.Background(), meta.ListOptions{LabelSelector: "tier=control-plane"})
if err != nil {
return err
}
for _, pod := range pods.Items {
if ready, _ := kverify.IsPodReady(&pod); !ready {
return nil
}
}
return fmt.Errorf("kubelet not initialised")
}
_ = retry.Expo(wait, 250*time.Millisecond, 1*time.Minute)
klog.Infof("kubelet initialised")
klog.Infof("duration metric: took %s waiting for restarted kubelet to initialise ...", time.Since(start))
if err := kverify.WaitExtra(client, kverify.CorePodsLabels, kconst.DefaultControlPlaneTimeout); err != nil {
return errors.Wrap(err, "extra")
}
}
if err := bsutil.AdjustResourceLimits(k.c); err != nil {
klog.Warningf("unable to adjust resource limits: %v", err)
}
@ -776,9 +777,17 @@ func (k *Bootstrapper) GenerateToken(cc config.ClusterConfig) (string, error) {
joinCmd := r.Stdout.String()
joinCmd = strings.Replace(joinCmd, "kubeadm", bsutil.InvokeKubeadm(cc.KubernetesConfig.KubernetesVersion), 1)
joinCmd = fmt.Sprintf("%s --ignore-preflight-errors=all", strings.TrimSpace(joinCmd))
if cc.KubernetesConfig.CRISocket != "" {
joinCmd = fmt.Sprintf("%s --cri-socket %s", joinCmd, cc.KubernetesConfig.CRISocket)
// avoid "Found multiple CRI sockets, please use --cri-socket to select one: /var/run/dockershim.sock, /var/run/crio/crio.sock" error
cr, err := cruntime.New(cruntime.Config{Type: cc.KubernetesConfig.ContainerRuntime, Runner: k.c, Socket: cc.KubernetesConfig.CRISocket})
if err != nil {
klog.Errorf("cruntime: %v", err)
}
sp := cr.SocketPath()
if sp == "" {
sp = kconst.DefaultDockerCRISocket
}
joinCmd = fmt.Sprintf("%s --cri-socket %s", joinCmd, sp)
return joinCmd, nil
}

View File

@ -143,15 +143,6 @@ func chooseDefault(cc config.ClusterConfig) Manager {
return Bridge{}
}
if cc.KubernetesConfig.ContainerRuntime != "docker" {
if driver.IsKIC(cc.Driver) {
klog.Infof("%q driver + %s runtime found, recommending kindnet", cc.Driver, cc.KubernetesConfig.ContainerRuntime)
return KindNet{cc: cc}
}
klog.Infof("%q driver + %s runtime found, recommending bridge", cc.Driver, cc.KubernetesConfig.ContainerRuntime)
return Bridge{cc: cc}
}
if driver.BareMetal(cc.Driver) {
klog.Infof("Driver %s used, CNI unnecessary in this configuration, recommending no CNI", cc.Driver)
return Disabled{cc: cc}
@ -164,6 +155,15 @@ func chooseDefault(cc config.ClusterConfig) Manager {
return KindNet{cc: cc}
}
if cc.KubernetesConfig.ContainerRuntime != "docker" {
if driver.IsKIC(cc.Driver) {
klog.Infof("%q driver + %s runtime found, recommending kindnet", cc.Driver, cc.KubernetesConfig.ContainerRuntime)
return KindNet{cc: cc}
}
klog.Infof("%q driver + %s runtime found, recommending bridge", cc.Driver, cc.KubernetesConfig.ContainerRuntime)
return Bridge{cc: cc}
}
klog.Infof("CNI unnecessary in this configuration, recommending no CNI")
return Disabled{cc: cc}
}