Hardware:
Mobo = Intel® Server Board S2600CW2R
Processors = 2x E-2650L
Ram = 256GB
PVE = 7.4-3
GPU driver = ./NVIDIA-Linux-x86_64-530.30.02.run (--no-kernel-module in LXCs)
I am successfully passing through a RTX-3060 to multiple LXCs and using them.
However, I get errors if I install Docker and the NVIDIA Container Toolkit in the LXC.
I tried installing Docker and NVIDIA Container Toolkit on the host with no errors [now uninstalled since not supported ]
I am most grateful for any assistance in advance!
Mobo = Intel® Server Board S2600CW2R
Processors = 2x E-2650L
Ram = 256GB
PVE = 7.4-3
GPU driver = ./NVIDIA-Linux-x86_64-530.30.02.run (--no-kernel-module in LXCs)
I am successfully passing through a RTX-3060 to multiple LXCs and using them.
However, I get errors if I install Docker and the NVIDIA Container Toolkit in the LXC.
I tried installing Docker and NVIDIA Container Toolkit on the host with no errors [now uninstalled since not supported ]
I am most grateful for any assistance in advance!
root@pve001:~# cat /etc/pve/lxc/125.conf
# Allow cgroup access
# Pass through device files
#lxc.mount.entry%3A /dev/nvidia1 dev/nvidia1 none bind,optional,create=file
arch: amd64
cores: 4
features: nesting=1
hostname: docker-cuda
memory: 8192
nameserver: 1.1.1.1
net0: name=eth0,bridge=vmbr0,firewall=1,gw=192.168.1.1,hwaddr=32:D4:08:14:FA:98,ip=192.168.1.251/24,type=veth
ostype: debian
rootfs: local:125/vm-125-disk-0.raw,size=50G
swap: 8192
unprivileged: 1
lxc.cgroup.devices.allow: c 80:* rwm
lxc.cgroup.devices.allow: c 195:* rwm
lxc.cgroup.devices.allow: c 254:* rwm
lxc.cgroup.devices.allow: c 255:* rwm
lxc.cgroup.devices.allow: c 508:* rwm
lxc.cgroup.devices.allow: c 0:* rwm
lxc.cgroup.devices.allow: c 1:* rwm
lxc.mount.entry: /dev/nvidia0 dev/nvidia0 none bind,optional,create=file
lxc.mount.entry: /dev/nvidia-caps dev/nvidia-caps none bind,optional,create=file
lxc.mount.entry: /dev/nvidiactl dev/nvidiactl none bind,optional,create=file
lxc.mount.entry: /dev/nvidia-modeset dev/nvidia-modeset none bind,optional,create=file
lxc.mount.entry: /dev/nvidia-uvm dev/nvidia-uvm none bind,optional,create=file
lxc.mount.entry: /dev/nvidia-uvm-tools dev/nvidia-uvm-tools none bind,optional,create=file
root@docker-cuda:~# nvidia-smi
Thu Mar 30 12:47:34 2023
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 530.30.02 Driver Version: 530.30.02 CUDA Version: 12.1 |
|-----------------------------------------+----------------------+----------------------+
| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
| | | MIG M. |
|=========================================+======================+======================|
| 0 NVIDIA GeForce RTX 3060 Off| 00000000:83:00.0 Off | N/A |
| 71% 63C P2 50W / 170W| 2957MiB / 12288MiB | 0% Default |
| | | N/A |
+-----------------------------------------+----------------------+----------------------+
+---------------------------------------------------------------------------------------+
| Processes: |
| GPU GI CI PID Type Process name GPU Memory |
| ID ID Usage |
|=======================================================================================|
+---------------------------------------------------------------------------------------+
root@docker-cuda:~# sudo docker run --rm --runtime=nvidia --gpus all nvidia/cuda:11.6.2-base-ubuntu20.04 nvidia-smi
docker: Error response from daemon: failed to create shim task: OCI runtime create failed: runc create failed: unable to start container process: error during container init: error running hook #0: error running hook: exit status 1, stdout: , stderr: Auto-detected mode as 'legacy'
nvidia-container-cli: mount error: failed to add device rules: unable to find any existing device filters attached to the cgroup: bpf_prog_query(BPF_CGROUP_DEVICE) failed: operation not permitted: unknown.