Add support to docker runtime for OCI access to NVIDIA GPUs (#20959)
* Add support for --device=nvidia.com/gpu=all * Add "nvidia.com" to valid options for gpus flag in TestValidateGPUs.pull/21260/head
parent
f4dd7b4fbf
commit
fc70407316
|
@ -1479,8 +1479,8 @@ func validateGPUs(value, drvName, rtime string) error {
|
|||
if err := validateGPUsArch(); err != nil {
|
||||
return err
|
||||
}
|
||||
if value != "nvidia" && value != "all" && value != "amd" {
|
||||
return errors.Errorf(`The gpus flag must be passed a value of "nvidia", "amd" or "all"`)
|
||||
if value != "nvidia" && value != "all" && value != "amd" && value != "nvidia.com" {
|
||||
return errors.Errorf(`The gpus flag must be passed a value of "nvidia", "nvidia.com", "amd" or "all"`)
|
||||
}
|
||||
if drvName == constants.Docker && (rtime == constants.Docker || rtime == constants.DefaultContainerRuntime) {
|
||||
return nil
|
||||
|
|
|
@ -814,7 +814,7 @@ func TestValidateGPUs(t *testing.T) {
|
|||
{"nvidia", "docker", "", ""},
|
||||
{"all", "kvm", "docker", "The gpus flag can only be used with the docker driver and docker container-runtime"},
|
||||
{"nvidia", "docker", "containerd", "The gpus flag can only be used with the docker driver and docker container-runtime"},
|
||||
{"cat", "docker", "docker", `The gpus flag must be passed a value of "nvidia", "amd" or "all"`},
|
||||
{"cat", "docker", "docker", `The gpus flag must be passed a value of "nvidia", "nvidia.com", "amd" or "all"`},
|
||||
{"amd", "docker", "docker", ""},
|
||||
{"amd", "docker", "", ""},
|
||||
{"amd", "docker", "containerd", "The gpus flag can only be used with the docker driver and docker container-runtime"},
|
||||
|
|
|
@ -194,6 +194,8 @@ func CreateContainerNode(p CreateParams) error { //nolint to suppress cyclomatic
|
|||
switch p.GPUs {
|
||||
case "all", "nvidia":
|
||||
runArgs = append(runArgs, "--gpus", "all", "--env", "NVIDIA_DRIVER_CAPABILITIES=all")
|
||||
case "nvidia.com":
|
||||
runArgs = append(runArgs, "--device", "nvidia.com/gpu=all")
|
||||
case "amd":
|
||||
/* https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/docker.html
|
||||
* "--security-opt seccomp=unconfined" is also required but included above.
|
||||
|
|
|
@ -595,7 +595,7 @@ func (r *Docker) configureDocker(driver string) error {
|
|||
}
|
||||
|
||||
switch r.GPUs {
|
||||
case "all", "nvidia":
|
||||
case "all", "nvidia", "nvidia.com":
|
||||
assets.Addons["nvidia-device-plugin"].EnableByDefault()
|
||||
daemonConfig.DefaultRuntime = "nvidia"
|
||||
runtimes := &dockerDaemonRuntimes{}
|
||||
|
|
|
@ -57,7 +57,7 @@ minikube start [flags]
|
|||
--feature-gates string A set of key=value pairs that describe feature gates for alpha/experimental features.
|
||||
--force Force minikube to perform possibly dangerous operations
|
||||
--force-systemd If set, force the container runtime to use systemd as cgroup manager. Defaults to false.
|
||||
-g, --gpus string Allow pods to use your GPUs. Options include: [all,nvidia,amd] (Docker driver with Docker container-runtime only)
|
||||
-g, --gpus string Allow pods to use your GPUs. Options include: [all,nvidia,amd,nvidia.com] (Docker driver with Docker container-runtime only)
|
||||
--ha Create Highly Available Multi-Control Plane Cluster with a minimum of three control-plane nodes that will also be marked for work.
|
||||
--host-dns-resolver Enable host resolver for NAT DNS requests (virtualbox driver only) (default true)
|
||||
--host-only-cidr string The CIDR to be used for the minikube VM (virtualbox driver only) (default "192.168.59.1/24")
|
||||
|
|
|
@ -30,7 +30,9 @@ date: 2018-01-02
|
|||
sudo sysctl -p
|
||||
```
|
||||
|
||||
- Install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) on your host machine
|
||||
- Install NVIDIA support using one of:
|
||||
- Install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) on your host machine
|
||||
- Enable [NVIDIA CDI resources](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/cdi-support.html) on your host machine.
|
||||
|
||||
- Configure Docker:
|
||||
```shell
|
||||
|
@ -45,10 +47,15 @@ date: 2018-01-02
|
|||
```
|
||||
This will make sure minikube does any required setup or addon installs now that the nvidia runtime is available.
|
||||
|
||||
- Start minikube:
|
||||
```shell
|
||||
minikube start --driver docker --container-runtime docker --gpus all
|
||||
```
|
||||
- Start minikube with one of:
|
||||
- The NVIDIA Container Toolkit
|
||||
```shell
|
||||
minikube start --driver docker --container-runtime docker --gpus all
|
||||
```
|
||||
- NVIDIA CDI resources
|
||||
```shell
|
||||
minikube start --driver docker --container-runtime docker --gpus nvidia.com
|
||||
```
|
||||
|
||||
{{% /tab %}}
|
||||
{{% tab none %}}
|
||||
|
|
Loading…
Reference in New Issue