fix cri-o: work with selected cni and detected cgroup driver

pull/15463/head
Predrag Rogic 2023-01-18 02:41:24 +00:00
parent e2f2bc923a
commit 4c18f9d264
No known key found for this signature in database
GPG Key ID: F1FF5748C4855229
5 changed files with 84 additions and 57 deletions

View File

@ -48,14 +48,6 @@ const (
DefaultConfDir = "/etc/cni/net.d"
)
var (
// Network is the network name that CNI should use (eg, "kindnet").
// Currently, only crio (and podman) can use it, so that setting custom ConfDir is not necessary.
// ref: https://github.com/cri-o/cri-o/issues/2121 (and https://github.com/containers/podman/issues/2370)
// ref: https://github.com/cri-o/cri-o/blob/master/docs/crio.conf.5.md#crionetwork-table
Network = ""
)
// Runner is the subset of command.Runner this package consumes
type Runner interface {
RunCmd(cmd *exec.Cmd) (*command.RunResult, error)
@ -200,25 +192,41 @@ func applyManifest(cc config.ClusterConfig, r Runner, f assets.CopyableFile) err
return nil
}
// ConfigureLoopback ensures loopback has expected version ("1.0.0") and valid name ("loopback") in its config file in /etc/cni/net.d
// cri-o is leaving name out atm (https://github.com/cri-o/cri-o/pull/6273)
// avoid errors like:
// ConfigureLoopbackCNI configures loopback cni.
// If disable is true, sets extension of its config file in /etc/cni/net.d to "mk_disabled".
// Otherwise, ensures loopback cni has expected version ("1.0.0") and valid name ("loopback") in its config file in /etc/cni/net.d.
// Note: cri-o is leaving out name atm (https://github.com/cri-o/cri-o/pull/6273).
// Avoid errors like:
// - Failed to create pod sandbox: rpc error: code = Unknown desc = [failed to set up sandbox container "..." network for pod "...": networkPlugin cni failed to set up pod "..." network: missing network name:,
// - failed to clean up sandbox container "..." network for pod "...": networkPlugin cni failed to teardown pod "..." network: missing network name]
// It is caller's responsibility to restart container runtime for these changes to take effect.
func ConfigureLoopback(r Runner) error {
func ConfigureLoopbackCNI(r Runner, disable bool) error {
loopback := "/etc/cni/net.d/*loopback.conf*" // usually: 200-loopback.conf
// turn { "cniVersion": "0.3.1", "type": "loopback" }
// into { "cniVersion": "0.3.1", "name": "loopback", "type": "loopback" }
if _, err := r.RunCmd(exec.Command("sh", "-c", fmt.Sprintf("stat %s", loopback))); err != nil {
klog.Warningf("%q not found, skipping patching loopback config step", loopback)
klog.Warningf("loopback cni configuration skipped: %q not found", loopback)
return nil
}
if _, err := r.RunCmd(exec.Command(
"sudo", "find", filepath.Dir(loopback), "-maxdepth", "1", "-type", "f", "-name", filepath.Base(loopback), "-exec", "sh", "-c",
`grep -q loopback {} && ( grep -q name {} || sudo sed -i '/"type": "loopback"/i \ \ \ \ "name": "loopback",' {} ) && sudo sed -i 's|"cniVersion": ".*"|"cniVersion": "1.0.0"|g' {}`, ";")); err != nil {
return fmt.Errorf("unable to patch loopback config %q: %v", loopback, err)
findExec := []string{"find", filepath.Dir(loopback), "-maxdepth", "1", "-type", "f", "-name", filepath.Base(loopback), "-exec", "sh", "-c"}
if disable {
if _, err := r.RunCmd(exec.Command(
"sudo", append(findExec,
`sudo mv {} {}.mk_disabled`, ";")...)); err != nil {
return fmt.Errorf("unable to disable loopback cni %q: %v", loopback, err)
}
klog.Infof("loopback cni configuration disabled: %q found", loopback)
return nil
}
if _, err := r.RunCmd(exec.Command(
"sudo", append(findExec,
`grep -q loopback {} && ( grep -q name {} || sudo sed -i '/"type": "loopback"/i \ \ \ \ "name": "loopback",' {} ) && sudo sed -i 's|"cniVersion": ".*"|"cniVersion": "1.0.0"|g' {}`, ";")...)); err != nil {
return fmt.Errorf("unable to patch loopback cni config %q: %v", loopback, err)
}
klog.Infof("loopback cni configuration patched: %q found", loopback)
return nil
}
@ -239,7 +247,8 @@ func disableAllBridgeCNIs(r Runner) error {
path := "/etc/cni/net.d"
out, err := r.RunCmd(exec.Command(
"sudo", "find", path, "-maxdepth", "1", "-type", "f", "-name", "*bridge*", "-not", "-name", "*.mk_disabled", "-printf", "%p, ", "-exec", "sh", "-c",
// for cri-o, we also disable 87-podman.conflist (that does not have 'bridge' in its name)
"sudo", "find", path, "-maxdepth", "1", "-type", "f", "(", "-name", "*bridge*", "-or", "-name", "*podman*", "-and", "-not", "-name", "*.mk_disabled", ")", "-printf", "%p, ", "-exec", "sh", "-c",
`sudo mv {} {}.mk_disabled`, ";"))
if err != nil {
return fmt.Errorf("failed to disable all bridge cni configs in %q: %v", path, err)
@ -257,7 +266,7 @@ func disableAllBridgeCNIs(r Runner) error {
// ref: https://github.com/containernetworking/cni/blob/main/libcni/conf.go
// ref: https://kubernetes.io/docs/tasks/administer-cluster/migrating-from-dockershim/troubleshooting-cni-plugin-related-errors/
func configureAllBridgeCNIs(r Runner, cidr string) error {
// non-podman configs:
// non-podman bridge configs:
out, err := r.RunCmd(exec.Command(
"sudo", "find", DefaultConfDir, "-maxdepth", "1", "-type", "f", "-name", "*bridge*", "-not", "-name", "*podman*", "-not", "-name", "*.mk_disabled", "-printf", "%p, ", "-exec", "sh", "-c",
// remove ipv6 entries to avoid "failed to set bridge addr: could not add IP address to \"cni0\": permission denied"
@ -273,7 +282,8 @@ func configureAllBridgeCNIs(r Runner, cidr string) error {
}
configs := out.Stdout.String()
// podman config(s):
// podman bridge config(s):
// could be eg, 87-podman-bridge.conflist or 87-podman.conflist
// ref: https://github.com/containers/podman/blob/main/cni/87-podman-bridge.conflist
ip, ipnet, err := net.ParseCIDR(cidr)
if err != nil || ip.To4() == nil {
@ -282,7 +292,7 @@ func configureAllBridgeCNIs(r Runner, cidr string) error {
gateway := ip.Mask(ipnet.Mask)
gateway[3]++
out, err = r.RunCmd(exec.Command(
"sudo", "find", DefaultConfDir, "-maxdepth", "1", "-type", "f", "-name", "*bridge*", "-name", "*podman*", "-not", "-name", "*.mk_disabled", "-printf", "%p, ", "-exec", "sh", "-c",
"sudo", "find", DefaultConfDir, "-maxdepth", "1", "-type", "f", "-name", "*podman*", "-not", "-name", "*.mk_disabled", "-printf", "%p, ", "-exec", "sh", "-c",
fmt.Sprintf(`sudo sed -i -r -e 's|^(.*)"subnet": ".*"(.*)$|\1"subnet": "%s"\2|g' -e 's|^(.*)"gateway": ".*"(.*)$|\1"gateway": "%s"\2|g' {}`, cidr, gateway), ";"))
if err != nil {
return fmt.Errorf("failed to configure podman bridge cni configs in %q: %v", DefaultConfDir, err)

View File

@ -137,34 +137,41 @@ func generateContainerdConfig(cr CommandRunner, imageRepository string, kv semve
if _, err := cr.RunCmd(exec.Command("sh", "-c", fmt.Sprintf(`sudo sed -i -r 's|^( *)restrict_oom_score_adj = .*$|\1restrict_oom_score_adj = %t|' %s`, inUserNamespace, containerdConfigFile))); err != nil {
return errors.Wrap(err, "update restrict_oom_score_adj")
}
// configure cgroup driver
if cgroupDriver != constants.UnknownCgroupDriver {
klog.Infof("configuring containerd to use %q as cgroup driver...", cgroupDriver)
useSystemd := cgroupDriver == constants.SystemdCgroupDriver
if _, err := cr.RunCmd(exec.Command("sh", "-c", fmt.Sprintf(`sudo sed -i -r 's|^( *)SystemdCgroup = .*$|\1SystemdCgroup = %t|g' %s`, useSystemd, containerdConfigFile))); err != nil {
return errors.Wrap(err, "configuring SystemdCgroup")
}
if cgroupDriver == constants.UnknownCgroupDriver {
klog.Warningf("unable to configure containerd to use unknown cgroup driver, will use default %q instead", constants.DefaultCgroupDriver)
cgroupDriver = constants.DefaultCgroupDriver
}
klog.Infof("configuring containerd to use %q as cgroup driver...", cgroupDriver)
useSystemd := cgroupDriver == constants.SystemdCgroupDriver
if _, err := cr.RunCmd(exec.Command("sh", "-c", fmt.Sprintf(`sudo sed -i -r 's|^( *)SystemdCgroup = .*$|\1SystemdCgroup = %t|g' %s`, useSystemd, containerdConfigFile))); err != nil {
return errors.Wrap(err, "configuring SystemdCgroup")
}
// handle deprecated/removed features
// ref: https://github.com/containerd/containerd/blob/main/RELEASES.md#deprecated-features
if _, err := cr.RunCmd(exec.Command("sh", "-c", fmt.Sprintf(`sudo sed -i 's|"io.containerd.runtime.v1.linux"|"io.containerd.runc.v2"|g' %s`, containerdConfigFile))); err != nil {
return errors.Wrap(err, "configuring io.containerd.runtime version")
}
// avoid containerd v1.6.14+ "failed to load plugin io.containerd.grpc.v1.cri" error="invalid plugin config: `systemd_cgroup` only works for runtime io.containerd.runtime.v1.linux" error
// that then leads to crictl "getting the runtime version: rpc error: code = Unimplemented desc = unknown service runtime.v1alpha2.RuntimeService" error
// ref: https://github.com/containerd/containerd/issues/4203
if _, err := cr.RunCmd(exec.Command("sh", "-c", fmt.Sprintf(`sudo sed -i '/systemd_cgroup/d' %s`, containerdConfigFile))); err != nil {
return errors.Wrap(err, "removing deprecated systemd_cgroup param")
}
// "runtime_type" has to be specified and it should be "io.containerd.runc.v2"
// ref: https://github.com/containerd/containerd/issues/6964#issuecomment-1132378279
if _, err := cr.RunCmd(exec.Command("sh", "-c", fmt.Sprintf(`sudo sed -i 's|"io.containerd.runc.v1"|"io.containerd.runc.v2"|g' %s`, containerdConfigFile))); err != nil {
return errors.Wrap(err, "configuring io.containerd.runc version")
}
// ensure conf_dir is using '/etc/cni/net.d'
// we might still want to try removing '/etc/cni/net.mk' in case of upgrade from previous minikube version that had/used it
if _, err := cr.RunCmd(exec.Command("sh", "-c", `sudo rm -rf /etc/cni/net.mk`)); err != nil {
return fmt.Errorf("unable to remove /etc/cni/net.mk directory: %v", err)
klog.Warningf("unable to remove /etc/cni/net.mk directory: %v", err)
}
if _, err := cr.RunCmd(exec.Command("sh", "-c", fmt.Sprintf(`sudo sed -i -r 's|^( *)conf_dir = .*$|\1conf_dir = %q|g' %s`, cni.DefaultConfDir, containerdConfigFile))); err != nil {
return errors.Wrap(err, "update conf_dir")

View File

@ -31,7 +31,6 @@ import (
"k8s.io/klog/v2"
"k8s.io/minikube/pkg/minikube/assets"
"k8s.io/minikube/pkg/minikube/bootstrapper/images"
"k8s.io/minikube/pkg/minikube/cni"
"k8s.io/minikube/pkg/minikube/command"
"k8s.io/minikube/pkg/minikube/config"
"k8s.io/minikube/pkg/minikube/constants"
@ -41,7 +40,7 @@ import (
)
const (
// CRIOConfFile is the path to the CRI-O configuration
// crioConfigFile is the path to the CRI-O configuration
crioConfigFile = "/etc/crio/crio.conf.d/02-crio.conf"
)
@ -54,35 +53,37 @@ type CRIO struct {
Init sysinit.Manager
}
// generateCRIOConfig sets up /etc/crio/crio.conf
func generateCRIOConfig(cr CommandRunner, imageRepository string, kv semver.Version) error {
// generateCRIOConfig sets up pause image and cgroup manager for cri-o in crioConfigFile
func generateCRIOConfig(cr CommandRunner, imageRepository string, kv semver.Version, cgroupDriver string) error {
pauseImage := images.Pause(kv, imageRepository)
c := exec.Command("/bin/bash", "-c", fmt.Sprintf("sudo sed -e 's|^.*pause_image = .*$|pause_image = \"%s\"|' -i %s", pauseImage, crioConfigFile))
klog.Infof("configure cri-o to use %q pause image...", pauseImage)
c := exec.Command("sh", "-c", fmt.Sprintf(`sudo sed -i 's|^.*pause_image = .*$|pause_image = %q|' %s`, pauseImage, crioConfigFile))
if _, err := cr.RunCmd(c); err != nil {
return errors.Wrap(err, "generateCRIOConfig")
return errors.Wrap(err, "update pause_image")
}
if cni.Network != "" {
klog.Infof("Updating CRIO to use the custom CNI network %q", cni.Network)
if _, err := cr.RunCmd(exec.Command("/bin/bash", "-c", fmt.Sprintf("sudo sed -e 's|^.*cni_default_network = .*$|cni_default_network = \"%s\"|' -i %s", cni.Network, crioConfigFile))); err != nil {
return errors.Wrap(err, "update network_dir")
}
// configure cgroup driver
if cgroupDriver == constants.UnknownCgroupDriver {
klog.Warningf("unable to configure cri-o to use unknown cgroup driver, will use default %q instead", constants.DefaultCgroupDriver)
cgroupDriver = constants.DefaultCgroupDriver
}
return nil
}
func (r *CRIO) setCGroup(driver string) error {
if driver == constants.UnknownCgroupDriver {
return fmt.Errorf("unable to configure cri-o to use unknown cgroup driver")
}
klog.Infof("configuring cri-o to use %q as cgroup driver...", driver)
c := exec.Command("/bin/bash", "-c", fmt.Sprintf(`sudo sed -i 's|^.*cgroup_manager = .*$|cgroup_manager = %q|' %s`, driver, crioConfigFile))
if _, err := r.Runner.RunCmd(c); err != nil {
klog.Infof("configuring cri-o to use %q as cgroup driver...", cgroupDriver)
if _, err := cr.RunCmd(exec.Command("sh", "-c", fmt.Sprintf(`sudo sed -i 's|^.*cgroup_manager = .*$|cgroup_manager = %q|' %s`, cgroupDriver, crioConfigFile))); err != nil {
return errors.Wrap(err, "configuring cgroup_manager")
}
// avoid errors like:
// - level=fatal msg="Validating runtime config: conmon cgroup should be 'pod' or a systemd slice"
// - level=fatal msg="Validating runtime config: cgroupfs manager conmon cgroup should be 'pod' or empty"
// ref: https://github.com/cri-o/cri-o/pull/3940
// ref: https://github.com/cri-o/cri-o/issues/6047
if _, err := cr.RunCmd(exec.Command("sh", "-c", fmt.Sprintf(`sudo sed -i '/cgroup_manager = .*/a conmon_cgroup = %q' %s`, "pod", crioConfigFile))); err != nil {
return errors.Wrap(err, "configuring conmon_cgroup")
}
// we might still want to try removing '/etc/cni/net.mk' in case of upgrade from previous minikube version that had/used it
if _, err := cr.RunCmd(exec.Command("sh", "-c", `sudo rm -rf /etc/cni/net.mk`)); err != nil {
klog.Warningf("unable to remove /etc/cni/net.mk directory: %v", err)
}
return nil
}
@ -200,15 +201,12 @@ func (r *CRIO) Enable(disOthers bool, cgroupDriver string, inUserNamespace bool)
if err := populateCRIConfig(r.Runner, r.SocketPath()); err != nil {
return err
}
if err := generateCRIOConfig(r.Runner, r.ImageRepository, r.KubernetesVersion); err != nil {
if err := generateCRIOConfig(r.Runner, r.ImageRepository, r.KubernetesVersion, cgroupDriver); err != nil {
return err
}
if err := enableIPForwarding(r.Runner); err != nil {
return err
}
if err := r.setCGroup(cgroupDriver); err != nil {
return err
}
if inUserNamespace {
if err := CheckKernelCompatibility(r.Runner, 5, 11); err != nil {
// For using overlayfs

View File

@ -381,9 +381,16 @@ func configureRuntimes(runner cruntime.CommandRunner, cc config.ClusterConfig, k
exit.Error(reason.InternalRuntime, "Failed runtime", err)
}
// 87-podman.conflist cni conf potentially conflicts with others and is created by podman on its first invocation,
// so we "provoke" it here to ensure it's generated and that we can disable it
// note: using 'help' or '--help' would be cheaper, but does not trigger that; 'version' seems to be next best option
if co.Type == constants.CRIO {
_, _ = runner.RunCmd(exec.Command("sudo", "sh", "-c", `podman version >/dev/null`))
}
// ensure loopback is properly configured
// make sure container runtime is restarted afterwards for these changes to take effect
if err := cni.ConfigureLoopback(runner); err != nil {
disableLoopback := co.Type == constants.CRIO
if err := cni.ConfigureLoopbackCNI(runner, disableLoopback); err != nil {
klog.Warningf("unable to name loopback interface in dockerConfigureNetworkPlugin: %v", err)
}
if kv.GTE(semver.MustParse("1.24.0-alpha.2")) {

View File

@ -218,6 +218,11 @@ func TestNetworkPlugins(t *testing.T) {
func validateFalseCNI(ctx context.Context, t *testing.T, profile string) {
cr := ContainerRuntime()
// override cri-o name
if cr == "cri-o" {
cr = "crio"
}
startArgs := []string{"start", "-p", profile, "--memory=2048", "--alsologtostderr", "--cni=false"}
startArgs = append(startArgs, StartArgs()...)