Hi support team,I have a 4-node proxmox cluster!
One of the nodes has NVIDIA L4 GPU, using its vgpu, makes multiple windows virtual machines have stronger graphics capabilities, and it has been normal in the past year, but yesterday I started a windows with vgpu 32GB 10 vm, cannot start, the error is as follows:
Feb 20 14:30:10 test11 pvedaemon[826880]: start failed: command '/usr/bin/kvm -id 188 -name 'testvm,debug-threads=on' -no-shutdown -uuid 00000000-0000-0000-0000-000000000188 -chardev 'socket,id=qmp,path=/var/run/qemu-server/188.qmp,server=on,wait=off' -mon 'chardev=qmp,mode=control' -chardev 'socket,id=qmp-event,path=/var/run/qmeventd.sock,reconnect=5' -mon 'chardev=qmp-event,mode=control' -pidfile /var/run/qemu-server/188.pid -daemonize -smbios 'type=1,uuid=4bbfa1e0-18c1-450f-be56-fee5dcdb948b' -smp '8,sockets=1,cores=8,maxcpus=8' -nodefaults -boot 'menu=on,strict=on,reboot-timeout=1000,splash=/usr/share/qemu-server/bootsplash.jpg' -vnc 'unix:/var/run/qemu-server/188.vnc,password=on' -no-hpet -cpu 'kvm64,enforce,hv_ipi,hv_relaxed,hv_reset,hv_runtime,hv_spinlocks=0x1fff,hv_stimer,hv_synic,hv_time,hv_vapic,hv_vpindex,+kvm_pv_eoi,+kvm_pv_unhalt,+lahf_lm,+sep' -m 32768 -device 'pci-bridge,id=pci.1,chassis_nr=1,bus=pci.0,addr=0x1e' -device 'pci-bridge,id=pci.2,chassis_nr=2,bus=pci.0,addr=0x1f' -device 'vmgenid,guid=ab284ae5-ab4d-48ff-8ad8-97f7831b432c' -device 'piix3-usb-uhci,id=uhci,bus=pci.0,addr=0x1.0x2' -device 'vfio-pci,sysfsdev=/sys/bus/mdev/devices/00000000-0000-0000-0000-000000000188,id=hostpci0,bus=pci.0,addr=0x10' -device 'qxl-vga,id=vga,bus=pci.0,addr=0x2' -chardev 'socket,path=/var/run/qemu-server/188.qga,server=on,wait=off,id=qga0' -device 'virtio-serial,id=qga0,bus=pci.0,addr=0x8' -device 'virtserialport,chardev=qga0,name=org.qemu.guest_agent.0' -device 'virtio-serial,id=spice,bus=pci.0,addr=0x9' -chardev 'spicevmc,id=vdagent,name=vdagent' -device 'virtserialport,chardev=vdagent,name=com.redhat.spice.0' -spice 'port=2111,addr=0.0.0.0,seamless-migration=on,disable-ticketing=on' -device 'virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x3,free-page-reporting=on' -iscsi 'initiator-name=iqn.1993-08.org.debian:01:f0e79b183d4d' -device 'virtio-scsi-pci,id=scsihw0,bus=pci.0,addr=0x5' -drive 'file=rbd:data/vm-188-disk-0:conf=/etc/pve/ceph.conf:id=admin:keyring=/etc/pve/priv/ceph/data.keyring,if=none,id=drive-scsi1,format=raw,cache=none,aio=io_uring,detect-zeroes=on' -device 'scsi-hd,bus=scsihw0.0,channel=0,scsi-id=0,lun=1,drive=drive-scsi1,id=scsi1,bootindex=100' -netdev 'type=tap,id=net0,ifname=tap188i0,script=/var/lib/qemu-server/pve-bridge,downscript=/var/lib/qemu-server/pve-bridgedown' -device 'e1000,mac=72:90:7F:AA:4A:61,netdev=net0,bus=pci.0,addr=0x12,id=net0,bootindex=101' -netdev 'type=tap,id=net1,ifname=tap188i1,script=/var/lib/qemu-server/pve-bridge,downscript=/var/lib/qemu-server/pve-bridgedown' -device 'e1000,mac=5E:7C:E6:99:BE:1E,netdev=net1,bus=pci.0,addr=0x13,id=net1' -rtc 'driftfix=slew,base=localtime' -machine 'type=pc-i440fx-7.2+pve0' -global 'kvm-pit.lost_tick_policy=discard' -uuid 4bbfa1e0-18c1-450f-be56-fee5dcdb948b' failed: got timeout
Feb 20 14:30:10 test11 kernel: [8449112.703566] nvidia-vgpu-vfio 00000000-0000-0000-0000-000000000188: Removing from iommu group 329
Feb 20 14:30:10 test11 kernel: [8449112.703618] nvidia-vgpu-vfio 00000000-0000-0000-0000-000000000188: MDEV: detaching iommu
But I can turn on the vgpu de vm memory to 16GB. If the vm does not have vgpu, the 32GB virtual machine can also be turned on normally.
This is the current memory usage:
One of the nodes has NVIDIA L4 GPU, using its vgpu, makes multiple windows virtual machines have stronger graphics capabilities, and it has been normal in the past year, but yesterday I started a windows with vgpu 32GB 10 vm, cannot start, the error is as follows:
Feb 20 14:30:10 test11 pvedaemon[826880]: start failed: command '/usr/bin/kvm -id 188 -name 'testvm,debug-threads=on' -no-shutdown -uuid 00000000-0000-0000-0000-000000000188 -chardev 'socket,id=qmp,path=/var/run/qemu-server/188.qmp,server=on,wait=off' -mon 'chardev=qmp,mode=control' -chardev 'socket,id=qmp-event,path=/var/run/qmeventd.sock,reconnect=5' -mon 'chardev=qmp-event,mode=control' -pidfile /var/run/qemu-server/188.pid -daemonize -smbios 'type=1,uuid=4bbfa1e0-18c1-450f-be56-fee5dcdb948b' -smp '8,sockets=1,cores=8,maxcpus=8' -nodefaults -boot 'menu=on,strict=on,reboot-timeout=1000,splash=/usr/share/qemu-server/bootsplash.jpg' -vnc 'unix:/var/run/qemu-server/188.vnc,password=on' -no-hpet -cpu 'kvm64,enforce,hv_ipi,hv_relaxed,hv_reset,hv_runtime,hv_spinlocks=0x1fff,hv_stimer,hv_synic,hv_time,hv_vapic,hv_vpindex,+kvm_pv_eoi,+kvm_pv_unhalt,+lahf_lm,+sep' -m 32768 -device 'pci-bridge,id=pci.1,chassis_nr=1,bus=pci.0,addr=0x1e' -device 'pci-bridge,id=pci.2,chassis_nr=2,bus=pci.0,addr=0x1f' -device 'vmgenid,guid=ab284ae5-ab4d-48ff-8ad8-97f7831b432c' -device 'piix3-usb-uhci,id=uhci,bus=pci.0,addr=0x1.0x2' -device 'vfio-pci,sysfsdev=/sys/bus/mdev/devices/00000000-0000-0000-0000-000000000188,id=hostpci0,bus=pci.0,addr=0x10' -device 'qxl-vga,id=vga,bus=pci.0,addr=0x2' -chardev 'socket,path=/var/run/qemu-server/188.qga,server=on,wait=off,id=qga0' -device 'virtio-serial,id=qga0,bus=pci.0,addr=0x8' -device 'virtserialport,chardev=qga0,name=org.qemu.guest_agent.0' -device 'virtio-serial,id=spice,bus=pci.0,addr=0x9' -chardev 'spicevmc,id=vdagent,name=vdagent' -device 'virtserialport,chardev=vdagent,name=com.redhat.spice.0' -spice 'port=2111,addr=0.0.0.0,seamless-migration=on,disable-ticketing=on' -device 'virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x3,free-page-reporting=on' -iscsi 'initiator-name=iqn.1993-08.org.debian:01:f0e79b183d4d' -device 'virtio-scsi-pci,id=scsihw0,bus=pci.0,addr=0x5' -drive 'file=rbd:data/vm-188-disk-0:conf=/etc/pve/ceph.conf:id=admin:keyring=/etc/pve/priv/ceph/data.keyring,if=none,id=drive-scsi1,format=raw,cache=none,aio=io_uring,detect-zeroes=on' -device 'scsi-hd,bus=scsihw0.0,channel=0,scsi-id=0,lun=1,drive=drive-scsi1,id=scsi1,bootindex=100' -netdev 'type=tap,id=net0,ifname=tap188i0,script=/var/lib/qemu-server/pve-bridge,downscript=/var/lib/qemu-server/pve-bridgedown' -device 'e1000,mac=72:90:7F:AA:4A:61,netdev=net0,bus=pci.0,addr=0x12,id=net0,bootindex=101' -netdev 'type=tap,id=net1,ifname=tap188i1,script=/var/lib/qemu-server/pve-bridge,downscript=/var/lib/qemu-server/pve-bridgedown' -device 'e1000,mac=5E:7C:E6:99:BE:1E,netdev=net1,bus=pci.0,addr=0x13,id=net1' -rtc 'driftfix=slew,base=localtime' -machine 'type=pc-i440fx-7.2+pve0' -global 'kvm-pit.lost_tick_policy=discard' -uuid 4bbfa1e0-18c1-450f-be56-fee5dcdb948b' failed: got timeout
Feb 20 14:30:10 test11 kernel: [8449112.703566] nvidia-vgpu-vfio 00000000-0000-0000-0000-000000000188: Removing from iommu group 329
Feb 20 14:30:10 test11 kernel: [8449112.703618] nvidia-vgpu-vfio 00000000-0000-0000-0000-000000000188: MDEV: detaching iommu
But I can turn on the vgpu de vm memory to 16GB. If the vm does not have vgpu, the 32GB virtual machine can also be turned on normally.
This is the current memory usage:
Code:
~# free -h
total used free shared buff/cache available
Mem: 503Gi 312Gi 169Gi 71Mi 21Gi 188Gi
Swap: 8.0Gi 7.7Gi 271Mi
Last edited: