* does it still crash without the GPU passthrough in the VM?How can i debug this ?
* which version of PVE are you on?What information can i provide here ?
pveversion -v
qm config VMID
(replace VMID with yours)root@pve:~# pveversion -v
proxmox-ve: 7.1-1 (running kernel: 5.13.19-6-pve)
pve-manager: 7.1-12 (running version: 7.1-12/b3c09de3)
pve-kernel-helper: 7.1-14
pve-kernel-5.13: 7.1-9
pve-kernel-5.13.19-6-pve: 5.13.19-15
pve-kernel-5.13.19-2-pve: 5.13.19-4
ceph-fuse: 15.2.15-pve1
corosync: 3.1.5-pve2
criu: 3.15-1+pve-1
glusterfs-client: 9.2-1
ifupdown2: 3.1.0-1+pmx3
ksm-control-daemon: 1.4-1
libjs-extjs: 7.0.0-1
libknet1: 1.22-pve2
libproxmox-acme-perl: 1.4.1
libproxmox-backup-qemu0: 1.2.0-1
libpve-access-control: 7.1-7
libpve-apiclient-perl: 3.2-1
libpve-common-perl: 7.1-5
libpve-guest-common-perl: 4.1-1
libpve-http-server-perl: 4.1-1
libpve-storage-perl: 7.1-1
libspice-server1: 0.14.3-2.1
lvm2: 2.03.11-2.1
lxc-pve: 4.0.11-1
lxcfs: 4.0.11-pve1
novnc-pve: 1.3.0-2
proxmox-backup-client: 2.1.5-1
proxmox-backup-file-restore: 2.1.5-1
proxmox-mini-journalreader: 1.3-1
proxmox-widget-toolkit: 3.4-7
pve-cluster: 7.1-3
pve-container: 4.1-4
pve-docs: 7.1-2
pve-edk2-firmware: 3.20210831-2
pve-firewall: 4.2-5
pve-firmware: 3.3-6
pve-ha-manager: 3.3-3
pve-i18n: 2.6-2
pve-qemu-kvm: 6.1.1-2
pve-xtermjs: 4.16.0-1
qemu-server: 7.1-4
smartmontools: 7.2-1
spiceterm: 3.2-2
swtpm: 0.7.1~bpo11+1
vncterm: 1.7-1
zfsutils-linux: 2.1.4-pve1
agent: 1
args: -cpu 'host,+kvm_pv_unhalt,+kvm_pv_eoi,hv_vendor_id=NV43FIX,kvm=off'
audio0: device=intel-hda,driver=spice
bios: ovmf
boot: order=virtio0;net0
cores: 16
cpu: host,hidden=1,flags=+pcid
efidisk0: local-lvm:vm-100-disk-0,efitype=4m,pre-enrolled-keys=1,size=4M
hostpci0: 0b:00,pcie=1
machine: pc-q35-6.1
memory: 61440
meta: creation-qemu=6.1.1,ctime=1648993561
name: w11
net0: virtio=CA:24:EA:3F:AE:34,bridge=vmbr0,firewall=1
numa: 0
onboot: 1
ostype: win11
parent: install_jeux
scsihw: virtio-scsi-pci
smbios1: uuid=20e3d62d-bc73-43e4-b8d4-e4c68e4134e6
sockets: 1
tpmstate0: local-lvm:vm-100-disk-1,size=4M,version=v2.0
usb0: host=1b1c:1b62
usb1: host=1b1c:1b51
virtio0: local-lvm:vm-100-disk-2,size=200G
virtio1: local-lvm:vm-100-disk-3,backup=0,size=1000G
virtio2: local-lvm:vm-100-disk-4,backup=0,size=500G
vmgenid: ea8f360f-fc03-409a-a66f-71aa8bbb63fc
---- /etc/default/grub -------------------------------------
#GRUB_CMDLINE_LINUX_DEFAULT="quiet amd_iommu=on iommu=pt textonly video=astdrmfb video=efifb:off"
GRUB_CMDLINE_LINUX_DEFAULT="quiet amd_iommu=on iommu=pt pcie_acs_override=downstream,multifunction nofb nomodeset vieo=vesafb:off,efifb:off"
---- /etc/modules ------------------------------------------
# /etc/modules: kernel modules to load at boot time.
#
# This file contains the names of kernel modules that should be loaded
# at boot time, one per line. Lines beginning with "#" are ignored.
vfio
vfio_iommu_type1
vfio_pci
vfio_virqfd
---- /etc/modprobe.d/iommu_unsafe_interrupts.conf ----------
options vfio_iommu_type1 allow_unsafe_interrupts=1
---- /etc/modprobe.d/kvm.conf ------------------------------
options kvm ignore_msrs=1 report_ignored_msrs=0
---- /etc/modprobe.d/blacklist.conf ------------------------
blacklist radeon
blacklist nouveau
blacklist nvidia
---- lspci -v | grep NVIDIA --------------------------------
0b:00.0 VGA compatible controller: NVIDIA Corporation Device 2488 (rev a1) (prog-if 00 [VGA controller])
0b:00.1 Audio device: NVIDIA Corporation GA104 High Definition Audio Controller (rev a1)
---- spci -n -s 0b:00 -------------------------------------
0b:00.0 0300: 10de:2488 (rev a1)
0b:00.1 0403: 10de:228b (rev a1)
---- /etc/modprobe.d/vfio.conf -----------------------------
options vfio-pci ids=10de:2488,10de:228b disable_vga=1
---- /etc/default/grub -------------------------------------
GRUB_CMDLINE_LINUX_DEFAULT="quiet amd_iommu=on iommu=pt textonly video=astdrmfb video=efifb:off"
---- /etc/modules ------------------------------------------
vfio
vfio_iommu_type1
vfio_pci
vfio_virqfd
---- dmesg | grep remapping --------------------------------
[ 0.831432] AMD-Vi: Interrupt remapping enabled
---- /etc/modprobe.d/kvm.conf ------------------------------
options kvm ignore_msrs=1 report_ignored_msrs=0
---- /etc/modprobe.d/blacklist.conf ------------------------
blacklist radeon
blacklist nouveau
blacklist nvidia
blacklist nvidiafb
---- spci -n -s 0b:00 -------------------------------------
0b:00.0 0300: 10de:2488 (rev a1)
0b:00.1 0403: 10de:228b (rev a1)
---- lspci -nnk -d lspci -nnk -d 10de:2488 ----------------
0b:00.0 VGA compatible controller [0300]: NVIDIA Corporation Device [10de:2488] (rev a1)
Subsystem: Micro-Star International Co., Ltd. [MSI] Device [1462:3904]
Kernel driver in use: vfio-pci
Kernel modules: nvidiafb, nouveau
0b:00.1 Audio device [0403]: NVIDIA Corporation GA104 High Definition Audio Controller [10de:228b] (rev a1)
Subsystem: Micro-Star International Co., Ltd. [MSI] Device [1462:3904]
Kernel driver in use: vfio-pci
Kernel modules: snd_hda_intel
---- /etc/modprobe.d/vfio.conf -----------------------------
options vfio-pci ids=10de:2488,10de:228b disable_vga=1
---- qm config 100 ----------------------------------------
agent: 1
args: -cpu 'host,+kvm_pv_unhalt,+kvm_pv_eoi,hv_vendor_id=NV43FIX,kvm=off'
audio0: device=intel-hda,driver=spice
bios: ovmf
boot: order=virtio0
cores: 16
cpu: host,hidden=1,flags=+pcid
description: sans GPU %3A virtio GPU ok%0A%0Aavec GPU sans audio %3A display = default OK (d%C3%A9marrage long)
efidisk0: local-lvm:vm-100-disk-0,efitype=4m,pre-enrolled-keys=1,size=4M
hostpci0: 0000:0b:00,pcie=1,x-vga=1
machine: pc-q35-6.1
memory: 16384
meta: creation-qemu=6.1.1,ctime=1648993561
name: w11
net0: virtio=CA:24:EA:3F:AE:34,bridge=vmbr0,firewall=1
numa: 0
onboot: 1
ostype: win11
parent: ok_4
scsihw: virtio-scsi-pci
smbios1: uuid=20e3d62d-bc73-43e4-b8d4-e4c68e4134e6
sockets: 1
tpmstate0: local-lvm:vm-100-disk-1,size=4M,version=v2.0
usb0: host=1-3
usb1: host=1b1c:1b6e
usb2: host=1b1c:1b4b
vga: none
virtio0: local-lvm:vm-100-disk-2,discard=on,size=200G
virtio1: local-lvm:vm-100-disk-3,backup=0,discard=on,size=1000G
virtio2: local-lvm:vm-100-disk-4,backup=0,discard=on,size=500G
vmgenid: 97bca21e-6529-4d81-bbc6-2298b90031d4
---- pveversion -v ---------------------------------------
proxmox-ve: 7.1-1 (running kernel: 5.13.19-6-pve)
pve-manager: 7.1-12 (running version: 7.1-12/b3c09de3)
pve-kernel-helper: 7.1-14
pve-kernel-5.13: 7.1-9
pve-kernel-5.13.19-6-pve: 5.13.19-15
pve-kernel-5.13.19-2-pve: 5.13.19-4
ceph-fuse: 15.2.15-pve1
corosync: 3.1.5-pve2
criu: 3.15-1+pve-1
glusterfs-client: 9.2-1
ifupdown2: 3.1.0-1+pmx3
ksm-control-daemon: 1.4-1
libjs-extjs: 7.0.0-1
libknet1: 1.22-pve2
libproxmox-acme-perl: 1.4.1
libproxmox-backup-qemu0: 1.2.0-1
libpve-access-control: 7.1-7
libpve-apiclient-perl: 3.2-1
libpve-common-perl: 7.1-5
libpve-guest-common-perl: 4.1-1
libpve-http-server-perl: 4.1-1
libpve-storage-perl: 7.1-1
libspice-server1: 0.14.3-2.1
lvm2: 2.03.11-2.1
lxc-pve: 4.0.11-1
lxcfs: 4.0.11-pve1
novnc-pve: 1.3.0-2
proxmox-backup-client: 2.1.5-1
proxmox-backup-file-restore: 2.1.5-1
proxmox-mini-journalreader: 1.3-1
proxmox-widget-toolkit: 3.4-7
pve-cluster: 7.1-3
pve-container: 4.1-4
pve-docs: 7.1-2
pve-edk2-firmware: 3.20210831-2
pve-firewall: 4.2-5
pve-firmware: 3.3-6
pve-ha-manager: 3.3-3
pve-i18n: 2.6-2
pve-qemu-kvm: 6.1.1-2
pve-xtermjs: 4.16.0-1
qemu-server: 7.1-4
smartmontools: 7.2-1
spiceterm: 3.2-2
swtpm: 0.7.1~bpo11+1
vncterm: 1.7-1
zfsutils-linux: 2.1.4-pve1
---- MAX_USB_DEVICES -------------------------------------
#cat /usr/share/perl5/PVE/QemuServer.pm | grep -e MAX_USB_DEVICES
my $MAX_USB_DEVICES = 30;
for (my $i = 0; $i < $MAX_USB_DEVICES; $i++) {
$conf, $bridges, $arch, $machine_type, $usbdesc->{format}, $MAX_USB_DEVICES);
$conf, $usbdesc->{format}, $MAX_USB_DEVICES, $usb_dev_features, $bootorder);
/config/workspace/perso/px/go-px.sh
clear
srv="px"
echo "---- LOCAL $srv --------------------------------------------"
u="root"
ip="192.168.1.54"
port="22"
ddist="./code"
scriptl="./perso/$srv/$srv.sh"
roml="./perso/$srv/gpu.rom"
romd="/usr/share/kvm/gpu.rom"
scriptd="$ddist/$srv.sh"
cert="./perso/ssh/id_vscode"
echo "---- LOCAL GO --------------------------------------------"
#scp -P $port $cert.pub $u@$ip:$ddist/key.pub
scp -P $port -i $cert -r $scriptl "$u@$ip:$ddist"
#scp -P $port -i $cert -r $roml "$u@$ip:$romd" # copy gpu.rom file
ssh -p$port -t $u@$ip -i $cert "bash $scriptd"
echo "---- LOCAL $srv-FIN ----------------------------------------"
/config/workspace/perso/px/px.sh
#!/bin/bash
export TERM=xterm
#echo "---- FX ----------------------------------------"
plIommuGroups() {
#view the IOMMU groups and attached devices
shopt -s nullglob
for g in $(find /sys/kernel/iommu_groups/* -maxdepth 0 -type d | sort -V); do
echo "IOMMU Group ${g##*/}:"
for d in $g/devices/*; do
echo -e "\t$(lspci -nns ${d##*/})"
done
done
}
plGpuPassthroughInfo(){
echo "---- DEBUG -------------------------------------"
clear
echo "---- /etc/default/grub -------------------------------------"
#update-grub
cat /etc/default/grub | grep GRUB_CMDLINE_LINUX_DEFAULT
echo "---- /etc/modules ------------------------------------------"
cat /etc/modules
echo "---- /etc/modprobe.d/iommu_unsafe_interrupts.conf ----------"
dmesg | grep remapping
#echo "options vfio_iommu_type1 allow_unsafe_interrupts=1" > /etc/modprobe.d/iommu_unsafe_interrupts.conf
cat /etc/modprobe.d/iommu_unsafe_interrupts.conf
echo "---- /etc/modprobe.d/kvm.conf ------------------------------"
#echo "options kvm ignore_msrs=1" > /etc/modprobe.d/kvm.conf
cat /etc/modprobe.d/kvm.conf
echo "---- /etc/modprobe.d/blacklist.conf ------------------------"
#echo "blacklist radeon" >> /etc/modprobe.d/blacklist.conf
#echo "blacklist nouveau" >> /etc/modprobe.d/blacklist.conf
#echo "blacklist nvidia" >> /etc/modprobe.d/blacklist.conf
#echo "blacklist nvidiafb" >> /etc/modprobe.d/blacklist.conf
cat /etc/modprobe.d/blacklist.conf
echo "---- spci -n -s 0b:00 -------------------------------------"
lspci -n -s 0b:00
echo "---- lspci -nnk -d lspci -nnk -d 10de:2488 ----------------"
lspci -nnk -d 10de:2488
lspci -nnk -d 10de:228b
echo "---- /etc/modprobe.d/vfio.conf -----------------------------"
#echo "options vfio-pci ids=10de:2488,10de:228b disable_vga=1"> /etc/modprobe.d/vfio.conf
#update-initramfs -u
#reset
cat /etc/modprobe.d/vfio.conf
echo "---- /etc/pve/qemu-server/100.conf -------------------------"
# BAK : cp /etc/pve/qemu-server/100.conf /etc/pve/qemu-server/100.conf.bak
# NEW :
#machine: q35
#hostpci0: 0b:00,pcie=1,romfile=gpu.rom
#cpu: host,hidden=1,flags=+pcid
#args: -cpu 'host,+kvm_pv_unhalt,+kvm_pv_eoi,hv_vendor_id=NV43FIX,kvm=off'
#usb0: host=1-3,usb3=1
cat /etc/pve/qemu-server/100.conf | grep -e machine -e cpu -e args
echo "---- qm config 100 ----------------------------------------"
#qm config 100
echo "---- pveversion -v ---------------------------------------"
#pveversion -v
echo "---- MAX_USB_DEVICES -------------------------------------"
# 1/ qm monitor 100
# info usbhost
# info usb
# 2/ lsusb -t
cat /usr/share/perl5/PVE/QemuServer.pm | grep -e MAX_USB_DEVICES
echo "---- DEBUG-END ---------------------------------"
exit
}
plGpuInitRom() {
ls -l /usr/share/kvm/g*.rom
# https://www.reddit.com/r/homelab/comments/b5xpua/the_ultimate_beginners_guide_to_gpu_passthrough/
# gpu rom file : https://www.techpowerup.com/vgabios/234907/msi-rtx3070-8192-210518
# /etc/pve/qemu-server/100.conf
# hostpci0: 0b:00,pcie=1,romfile=gpu.rom
}
plClean() {
# apt install ncdu
# ncdu /
rm -f /var/log/*.log
rm -f /var/log/messages
rm -f /var/log/syslog
rm -rf /var/log/journal/*
rm -rf /run/log/journal/*
find / -type f ! -path "*/dn/*" -size +100M
#find / -type f ! -path "*/dn/*" -name '*.log'
}
plInstall() {
echo "---- UPDATE ------------------------------------"
sudo apt update && sudo apt -y upgrade
apt autoremove -y
echo "---- INSTALL -----------------------------------"
apt install ncdu # free disk space tools : ncdu /
}
plInitSSH() {
echo "---- +SSH --------------------------------------"
cat $ddist/key.pub >> ~/.ssh/authorized_keys
rm $ddist/key.pub
echo "---- +SSH verif --------------------------------"
ls -l $ddist
cat ~/.ssh/authorized_keys
echo "---- +SSH-END ----------------------------------"
}
#echo "---- VARS ----------------------------------------"
srv="px"
ddist="./code"
echo "---- DIST $srv ---------------------------------"
#ls -l ./code
#echo "---- GRUB --------------------------------------"
<<'Comment'
https://pve.proxmox.com/wiki/Pci_passthrough#NVIDIA_Tips
Enable the IOMMU & PT Mode : OK
nano /etc/default/grub
GRUB_CMDLINE_LINUX_DEFAULT="quiet amd_iommu=on iommu=pt textonly video=astdrmfb video=efifb:off"
GRUB_CMDLINE_LINUX=""
update-grub
Verify IOMMU is enabled : OK
dmesg | grep -e DMAR -e IOMMU #There should be a line that looks like "DMAR: IOMMU enabled". If there is no output, something is wrong.
[ 0.830386] pci 0000:00:00.2: AMD-Vi: IOMMU performance counters supported
[ 0.835639] pci 0000:00:00.2: AMD-Vi: Found IOMMU cap 0x40
[ 0.836585] perf/amd_iommu: Detected AMD IOMMU #0 (2 banks, 4 counters/bank).
Required Modules : OK
nano /etc/modules
vfio
vfio_iommu_type1
vfio_pci
vfio_virqfd
IOMMU Interrupt Remapping : OK
dmesg | grep 'remapping'
[ 0.835646] AMD-Vi: Interrupt remapping enabled
Verify IOMMU Isolation : OK
BIOS : ACS (Access Control Services) ?
find /sys/kernel/iommu_groups/ -type l
/sys/kernel/iommu_groups/17/devices/0000:03:01.0
/sys/kernel/iommu_groups/7/devices/0000:00:05.0
/sys/kernel/iommu_groups/25/devices/0000:06:00.0
/sys/kernel/iommu_groups/15/devices/0000:02:00.0
...
#view the IOMMU groups and attached devices
plIommuGroups
Determine your PCI card address, and configure your VM
lspci | grep -i nvidia
0b:00.0 VGA compatible controller: NVIDIA Corporation Device 2488 (rev a1)
0b:00.1 Audio device: NVIDIA Corporation GA104 High Definition Audio Controller (rev a1)
Add Nvidia CARD to VM
nano /etc/pve/qemu-server/<vmid>.conf
nano /etc/pve/qemu-server/100.conf
hostpci0: 0b:00.0;0b:00.1
hostpci0: 0b:00,pcie=1
hostpci0: 0b:00,pcie=1,romfile=gpu.rom
First, find the device and vendor id of your vga card
lspci -n -s 0b:00
0b:00.0 0300: 10de:2488 (rev a1)
0b:00.1 0403: 10de:228b (rev a1)
Host doesn't try to use the GPU
echo "options vfio-pci ids=10de:2488,10de:228b" > /etc/modprobe.d/vfio.conf
Blacklist the drivers:
echo "blacklist radeon" >> /etc/modprobe.d/blacklist.conf
echo "blacklist nouveau" >> /etc/modprobe.d/blacklist.conf
echo "blacklist nvidia" >> /etc/modprobe.d/blacklist.conf
nano /etc/modprobe.d/blacklist.conf
REBOOT machine
reboot
VM CONFIG
1/ GPU OVMF PCI Passthrough (recommended)
Add disable_vga=1 to vfio-pci module, which try to to opt-out devices from vga arbitration if possible
echo "options vfio-pci ids=10de:2488,10de:228b disable_vga=1" > /etc/modprobe.d/vfio.conf
NVIDIA Tips
Some Windows applications like geforce experience, Passmark Performance Test and SiSoftware Sandra crash can crash the VM. You need to add:
echo "options kvm ignore_msrs=1" > /etc/modprobe.d/kvm.conf
If you see a lot of warning messages in your 'dmesg' system log, add the following instead:
echo "options kvm ignore_msrs=1 report_ignored_msrs=0" > /etc/modprobe.d/kvm.conf
Retirer le Display par défaut de la VM une fois qu'on a ajouté NVIDIA !!!
DEBUG
dmesg -T | grep -e BAR -e Intel -e iommu -e IOMMU -e passthrough -e DMAR -e bug -e Bug
dmesg -T | grep -e BAR -e AMD -e iommu -e IOMMU -e passthrough -e DMAR -e bug -e Bug
Comment
echo "---- CLEAN -------------------------------------"
#plInitSSH
#plInstall
plClean
#echo "---- Gpu Passthrough ---------------------------"
plGpuPassthroughInfo
#plGpuInitRom
echo "---- DIST $srv-END -----------------------------"
I have the exact same problem, with windows 10 in proxmox. But the solution you explained, it doesnt work for me, i still crash every 11 minutesOK i managed to make ti work.
I had :
-VIDEO_DXGKRNL_FATAL_ERROR blue screen of death inside Windows VM and crash/reboot every 5 minutes
-local (pve) volume (25gb) normally using 4gb, but filled with logs and running out of space every 5-10 minutes then run out of space and making the windows VM crash and corrupt
I tested many things, made a ssh script that cleans logs plClean, run it to try delete logs before 5 minutes limit.
The scripts are inside code server docker container in my synology.
From docker vs code web page, i run go-px.sh, wich copy second script px.sh via ssh to the proxmox and execute it there.
But used space just kept exploding every time i run the Windows VM.
The Windows VM got corrupted every time.
So i made clean Windows VM without gpu passthrugh and did snapshot before testing passthrugh.
Then tested adding/removing parameters one by one in all files from different gpu passthtugh tutorials,
and checked files via plGpuPassthroughInfo function.
My SOLUTION to exploding logs in local (pve) and VIDEO_DXGKRNL_FATAL_ERROR :
I am not sure but looks like this messed everything :
echo "options vfio_iommu_type1 allow_unsafe_interrupts=1" > /etc/modprobe.d/iommu_unsafe_interrupts.conf
while testing i commented
"options vfio_iommu_type1 allow_unsafe_interrupts=1" line
in the /etc/modprobe.d/iommu_unsafe_interrupts.conf file
After disabling unsafe interrupts local (pve) did not fill with growing logs and Windows VM did not crash.
Now i will add usb devices one by one and install drivers like lghub, one by one,
snapshotting the VM before every install, then look if something wired happpens before installing more things.
Now trying to add entire usb hub via port passthrugh ... not working
My final config :
Code:---- /etc/default/grub ------------------------------------- GRUB_CMDLINE_LINUX_DEFAULT="quiet amd_iommu=on iommu=pt textonly video=astdrmfb video=efifb:off" ---- /etc/modules ------------------------------------------ vfio vfio_iommu_type1 vfio_pci vfio_virqfd ---- dmesg | grep remapping -------------------------------- [ 0.831432] AMD-Vi: Interrupt remapping enabled ---- /etc/modprobe.d/kvm.conf ------------------------------ options kvm ignore_msrs=1 report_ignored_msrs=0 ---- /etc/modprobe.d/blacklist.conf ------------------------ blacklist radeon blacklist nouveau blacklist nvidia blacklist nvidiafb ---- spci -n -s 0b:00 ------------------------------------- 0b:00.0 0300: 10de:2488 (rev a1) 0b:00.1 0403: 10de:228b (rev a1) ---- lspci -nnk -d lspci -nnk -d 10de:2488 ---------------- 0b:00.0 VGA compatible controller [0300]: NVIDIA Corporation Device [10de:2488] (rev a1) Subsystem: Micro-Star International Co., Ltd. [MSI] Device [1462:3904] Kernel driver in use: vfio-pci Kernel modules: nvidiafb, nouveau 0b:00.1 Audio device [0403]: NVIDIA Corporation GA104 High Definition Audio Controller [10de:228b] (rev a1) Subsystem: Micro-Star International Co., Ltd. [MSI] Device [1462:3904] Kernel driver in use: vfio-pci Kernel modules: snd_hda_intel ---- /etc/modprobe.d/vfio.conf ----------------------------- options vfio-pci ids=10de:2488,10de:228b disable_vga=1 ---- qm config 100 ---------------------------------------- agent: 1 args: -cpu 'host,+kvm_pv_unhalt,+kvm_pv_eoi,hv_vendor_id=NV43FIX,kvm=off' audio0: device=intel-hda,driver=spice bios: ovmf boot: order=virtio0 cores: 16 cpu: host,hidden=1,flags=+pcid description: sans GPU %3A virtio GPU ok%0A%0Aavec GPU sans audio %3A display = default OK (d%C3%A9marrage long) efidisk0: local-lvm:vm-100-disk-0,efitype=4m,pre-enrolled-keys=1,size=4M hostpci0: 0000:0b:00,pcie=1,x-vga=1 machine: pc-q35-6.1 memory: 16384 meta: creation-qemu=6.1.1,ctime=1648993561 name: w11 net0: virtio=CA:24:EA:3F:AE:34,bridge=vmbr0,firewall=1 numa: 0 onboot: 1 ostype: win11 parent: ok_4 scsihw: virtio-scsi-pci smbios1: uuid=20e3d62d-bc73-43e4-b8d4-e4c68e4134e6 sockets: 1 tpmstate0: local-lvm:vm-100-disk-1,size=4M,version=v2.0 usb0: host=1-3 usb1: host=1b1c:1b6e usb2: host=1b1c:1b4b vga: none virtio0: local-lvm:vm-100-disk-2,discard=on,size=200G virtio1: local-lvm:vm-100-disk-3,backup=0,discard=on,size=1000G virtio2: local-lvm:vm-100-disk-4,backup=0,discard=on,size=500G vmgenid: 97bca21e-6529-4d81-bbc6-2298b90031d4 ---- pveversion -v --------------------------------------- proxmox-ve: 7.1-1 (running kernel: 5.13.19-6-pve) pve-manager: 7.1-12 (running version: 7.1-12/b3c09de3) pve-kernel-helper: 7.1-14 pve-kernel-5.13: 7.1-9 pve-kernel-5.13.19-6-pve: 5.13.19-15 pve-kernel-5.13.19-2-pve: 5.13.19-4 ceph-fuse: 15.2.15-pve1 corosync: 3.1.5-pve2 criu: 3.15-1+pve-1 glusterfs-client: 9.2-1 ifupdown2: 3.1.0-1+pmx3 ksm-control-daemon: 1.4-1 libjs-extjs: 7.0.0-1 libknet1: 1.22-pve2 libproxmox-acme-perl: 1.4.1 libproxmox-backup-qemu0: 1.2.0-1 libpve-access-control: 7.1-7 libpve-apiclient-perl: 3.2-1 libpve-common-perl: 7.1-5 libpve-guest-common-perl: 4.1-1 libpve-http-server-perl: 4.1-1 libpve-storage-perl: 7.1-1 libspice-server1: 0.14.3-2.1 lvm2: 2.03.11-2.1 lxc-pve: 4.0.11-1 lxcfs: 4.0.11-pve1 novnc-pve: 1.3.0-2 proxmox-backup-client: 2.1.5-1 proxmox-backup-file-restore: 2.1.5-1 proxmox-mini-journalreader: 1.3-1 proxmox-widget-toolkit: 3.4-7 pve-cluster: 7.1-3 pve-container: 4.1-4 pve-docs: 7.1-2 pve-edk2-firmware: 3.20210831-2 pve-firewall: 4.2-5 pve-firmware: 3.3-6 pve-ha-manager: 3.3-3 pve-i18n: 2.6-2 pve-qemu-kvm: 6.1.1-2 pve-xtermjs: 4.16.0-1 qemu-server: 7.1-4 smartmontools: 7.2-1 spiceterm: 3.2-2 swtpm: 0.7.1~bpo11+1 vncterm: 1.7-1 zfsutils-linux: 2.1.4-pve1 ---- MAX_USB_DEVICES ------------------------------------- #cat /usr/share/perl5/PVE/QemuServer.pm | grep -e MAX_USB_DEVICES my $MAX_USB_DEVICES = 30; for (my $i = 0; $i < $MAX_USB_DEVICES; $i++) { $conf, $bridges, $arch, $machine_type, $usbdesc->{format}, $MAX_USB_DEVICES); $conf, $usbdesc->{format}, $MAX_USB_DEVICES, $usb_dev_features, $bootorder);
Test scripts :
Code:/config/workspace/perso/px/go-px.sh clear srv="px" echo "---- LOCAL $srv --------------------------------------------" u="root" ip="192.168.1.54" port="22" ddist="./code" scriptl="./perso/$srv/$srv.sh" roml="./perso/$srv/gpu.rom" romd="/usr/share/kvm/gpu.rom" scriptd="$ddist/$srv.sh" cert="./perso/ssh/id_vscode" echo "---- LOCAL GO --------------------------------------------" #scp -P $port $cert.pub $u@$ip:$ddist/key.pub scp -P $port -i $cert -r $scriptl "$u@$ip:$ddist" #scp -P $port -i $cert -r $roml "$u@$ip:$romd" # copy gpu.rom file ssh -p$port -t $u@$ip -i $cert "bash $scriptd" echo "---- LOCAL $srv-FIN ----------------------------------------" /config/workspace/perso/px/px.sh #!/bin/bash export TERM=xterm #echo "---- FX ----------------------------------------" plIommuGroups() { #view the IOMMU groups and attached devices shopt -s nullglob for g in $(find /sys/kernel/iommu_groups/* -maxdepth 0 -type d | sort -V); do echo "IOMMU Group ${g##*/}:" for d in $g/devices/*; do echo -e "\t$(lspci -nns ${d##*/})" done done } plGpuPassthroughInfo(){ echo "---- DEBUG -------------------------------------" clear echo "---- /etc/default/grub -------------------------------------" #update-grub cat /etc/default/grub | grep GRUB_CMDLINE_LINUX_DEFAULT echo "---- /etc/modules ------------------------------------------" cat /etc/modules echo "---- /etc/modprobe.d/iommu_unsafe_interrupts.conf ----------" dmesg | grep remapping #echo "options vfio_iommu_type1 allow_unsafe_interrupts=1" > /etc/modprobe.d/iommu_unsafe_interrupts.conf cat /etc/modprobe.d/iommu_unsafe_interrupts.conf echo "---- /etc/modprobe.d/kvm.conf ------------------------------" #echo "options kvm ignore_msrs=1" > /etc/modprobe.d/kvm.conf cat /etc/modprobe.d/kvm.conf echo "---- /etc/modprobe.d/blacklist.conf ------------------------" #echo "blacklist radeon" >> /etc/modprobe.d/blacklist.conf #echo "blacklist nouveau" >> /etc/modprobe.d/blacklist.conf #echo "blacklist nvidia" >> /etc/modprobe.d/blacklist.conf #echo "blacklist nvidiafb" >> /etc/modprobe.d/blacklist.conf cat /etc/modprobe.d/blacklist.conf echo "---- spci -n -s 0b:00 -------------------------------------" lspci -n -s 0b:00 echo "---- lspci -nnk -d lspci -nnk -d 10de:2488 ----------------" lspci -nnk -d 10de:2488 lspci -nnk -d 10de:228b echo "---- /etc/modprobe.d/vfio.conf -----------------------------" #echo "options vfio-pci ids=10de:2488,10de:228b disable_vga=1"> /etc/modprobe.d/vfio.conf #update-initramfs -u #reset cat /etc/modprobe.d/vfio.conf echo "---- /etc/pve/qemu-server/100.conf -------------------------" # BAK : cp /etc/pve/qemu-server/100.conf /etc/pve/qemu-server/100.conf.bak # NEW : #machine: q35 #hostpci0: 0b:00,pcie=1,romfile=gpu.rom #cpu: host,hidden=1,flags=+pcid #args: -cpu 'host,+kvm_pv_unhalt,+kvm_pv_eoi,hv_vendor_id=NV43FIX,kvm=off' #usb0: host=1-3,usb3=1 cat /etc/pve/qemu-server/100.conf | grep -e machine -e cpu -e args echo "---- qm config 100 ----------------------------------------" #qm config 100 echo "---- pveversion -v ---------------------------------------" #pveversion -v echo "---- MAX_USB_DEVICES -------------------------------------" # 1/ qm monitor 100 # info usbhost # info usb # 2/ lsusb -t cat /usr/share/perl5/PVE/QemuServer.pm | grep -e MAX_USB_DEVICES echo "---- DEBUG-END ---------------------------------" exit } plGpuInitRom() { ls -l /usr/share/kvm/g*.rom # https://www.reddit.com/r/homelab/comments/b5xpua/the_ultimate_beginners_guide_to_gpu_passthrough/ # gpu rom file : https://www.techpowerup.com/vgabios/234907/msi-rtx3070-8192-210518 # /etc/pve/qemu-server/100.conf # hostpci0: 0b:00,pcie=1,romfile=gpu.rom } plClean() { # apt install ncdu # ncdu / rm -f /var/log/*.log rm -f /var/log/messages rm -f /var/log/syslog rm -rf /var/log/journal/* rm -rf /run/log/journal/* find / -type f ! -path "*/dn/*" -size +100M #find / -type f ! -path "*/dn/*" -name '*.log' } plInstall() { echo "---- UPDATE ------------------------------------" sudo apt update && sudo apt -y upgrade apt autoremove -y echo "---- INSTALL -----------------------------------" apt install ncdu # free disk space tools : ncdu / } plInitSSH() { echo "---- +SSH --------------------------------------" cat $ddist/key.pub >> ~/.ssh/authorized_keys rm $ddist/key.pub echo "---- +SSH verif --------------------------------" ls -l $ddist cat ~/.ssh/authorized_keys echo "---- +SSH-END ----------------------------------" } #echo "---- VARS ----------------------------------------" srv="px" ddist="./code" echo "---- DIST $srv ---------------------------------" #ls -l ./code #echo "---- GRUB --------------------------------------" <<'Comment' https://pve.proxmox.com/wiki/Pci_passthrough#NVIDIA_Tips Enable the IOMMU & PT Mode : OK nano /etc/default/grub GRUB_CMDLINE_LINUX_DEFAULT="quiet amd_iommu=on iommu=pt textonly video=astdrmfb video=efifb:off" GRUB_CMDLINE_LINUX="" update-grub Verify IOMMU is enabled : OK dmesg | grep -e DMAR -e IOMMU #There should be a line that looks like "DMAR: IOMMU enabled". If there is no output, something is wrong. [ 0.830386] pci 0000:00:00.2: AMD-Vi: IOMMU performance counters supported [ 0.835639] pci 0000:00:00.2: AMD-Vi: Found IOMMU cap 0x40 [ 0.836585] perf/amd_iommu: Detected AMD IOMMU #0 (2 banks, 4 counters/bank). Required Modules : OK nano /etc/modules vfio vfio_iommu_type1 vfio_pci vfio_virqfd IOMMU Interrupt Remapping : OK dmesg | grep 'remapping' [ 0.835646] AMD-Vi: Interrupt remapping enabled Verify IOMMU Isolation : OK BIOS : ACS (Access Control Services) ? find /sys/kernel/iommu_groups/ -type l /sys/kernel/iommu_groups/17/devices/0000:03:01.0 /sys/kernel/iommu_groups/7/devices/0000:00:05.0 /sys/kernel/iommu_groups/25/devices/0000:06:00.0 /sys/kernel/iommu_groups/15/devices/0000:02:00.0 ... #view the IOMMU groups and attached devices plIommuGroups Determine your PCI card address, and configure your VM lspci | grep -i nvidia 0b:00.0 VGA compatible controller: NVIDIA Corporation Device 2488 (rev a1) 0b:00.1 Audio device: NVIDIA Corporation GA104 High Definition Audio Controller (rev a1) Add Nvidia CARD to VM nano /etc/pve/qemu-server/<vmid>.conf nano /etc/pve/qemu-server/100.conf hostpci0: 0b:00.0;0b:00.1 hostpci0: 0b:00,pcie=1 hostpci0: 0b:00,pcie=1,romfile=gpu.rom First, find the device and vendor id of your vga card lspci -n -s 0b:00 0b:00.0 0300: 10de:2488 (rev a1) 0b:00.1 0403: 10de:228b (rev a1) Host doesn't try to use the GPU echo "options vfio-pci ids=10de:2488,10de:228b" > /etc/modprobe.d/vfio.conf Blacklist the drivers: echo "blacklist radeon" >> /etc/modprobe.d/blacklist.conf echo "blacklist nouveau" >> /etc/modprobe.d/blacklist.conf echo "blacklist nvidia" >> /etc/modprobe.d/blacklist.conf nano /etc/modprobe.d/blacklist.conf REBOOT machine reboot VM CONFIG 1/ GPU OVMF PCI Passthrough (recommended) Add disable_vga=1 to vfio-pci module, which try to to opt-out devices from vga arbitration if possible echo "options vfio-pci ids=10de:2488,10de:228b disable_vga=1" > /etc/modprobe.d/vfio.conf NVIDIA Tips Some Windows applications like geforce experience, Passmark Performance Test and SiSoftware Sandra crash can crash the VM. You need to add: echo "options kvm ignore_msrs=1" > /etc/modprobe.d/kvm.conf If you see a lot of warning messages in your 'dmesg' system log, add the following instead: echo "options kvm ignore_msrs=1 report_ignored_msrs=0" > /etc/modprobe.d/kvm.conf Retirer le Display par défaut de la VM une fois qu'on a ajouté NVIDIA !!! DEBUG dmesg -T | grep -e BAR -e Intel -e iommu -e IOMMU -e passthrough -e DMAR -e bug -e Bug dmesg -T | grep -e BAR -e AMD -e iommu -e IOMMU -e passthrough -e DMAR -e bug -e Bug Comment echo "---- CLEAN -------------------------------------" #plInitSSH #plInstall plClean #echo "---- Gpu Passthrough ---------------------------" plGpuPassthroughInfo #plGpuInitRom echo "---- DIST $srv-END -----------------------------"