SDN VLAN VNets no longer work after an update from 9.1.9 to 9.2.2

Cookiefamily

Renowned Member
Jan 29, 2020
149
42
68
Germany
I was using a VLAN Zone with two VNets and it seemed to work well.

Yesterday evening I updated our staging environment to 9.2.2 and since then I can no longer get connectivity through those VNets - when I use the vmbr0 bridge directly and tag it on the VMs network interface, it works fine.

This is the config:
Code:
root@node01:~# cat /etc/network/interfaces
# network interface settings; autogenerated
# Please do NOT modify this file directly, unless you know what
# you're doing.
#
# If you want to manage parts of the network configuration manually,
# please utilize the 'source' or 'source-directory' directives to do
# so.
# PVE will preserve these directives, but will NOT read its network
# configuration from sourced files, so do not attempt to move any of
# the PVE managed interfaces into external files!

auto lo
iface lo inet loopback

auto nic3
iface nic3 inet manual
#mgmt-B

auto nic8
iface nic8 inet manual
#frontend-A

auto nic9
iface nic9 inet manual
#mgmt-A

auto nic10
iface nic10 inet static
    address 172.16.91.11/24
#corosync-A ring0

auto nic0
iface nic0 inet manual
#ceph-A

auto nic1
iface nic1 inet manual
#ceph-B

auto nic2
iface nic2 inet manual
#frontend-B

auto nic4
iface nic4 inet static
    address 172.16.93.11/24
#corosync-B ring1

auto nic5
iface nic5 inet static
    address 10.10.10.141/25
    mtu 9000
#iscsi-B-1

iface nic6 inet manual

auto nic7
iface nic7 inet manual

iface idrac inet manual

auto nic11
iface nic11 inet static
    address 10.10.10.11/25
    mtu 9000
#iscsi-A-0

auto bond0
iface bond0 inet static
    address 172.16.90.11/24
    gateway 172.16.90.1
    bond-slaves nic3 nic9
    bond-miimon 100
    bond-mode active-backup
    bond-primary nic3
#Management

auto bond1
iface bond1 inet manual
    ovs_bonds nic2 nic8
    ovs_type OVSBond
    ovs_bridge vmbr0
    ovs_options lacp=active bond_mode=balance-tcp
#LACP Bond for Frontend

auto bond2
iface bond2 inet static
    address 172.16.92.11/24
    bond-slaves nic0 nic1
    bond-miimon 100
    bond-mode 802.3ad
    bond-xmit-hash-policy layer2+3
    mtu 9000
#LACP Bond for Ceph

auto bond0.60
iface bond0.60 inet static
    address 172.16.71.21/24
#For ESXi Migrations

auto vmbr0
iface vmbr0 inet manual
    ovs_type OVSBridge
    ovs_ports bond1
#Openvswitch Frontend

source /etc/network/interfaces.d/*

Code:
root@node01:~# cat /etc/network/interfaces.d/sdn
#version:16

auto ln_v0074
iface ln_v0074
    ovs_type OVSIntPort
    ovs_bridge vmbr0
    ovs_mtu 1500
    ovs_options tag=74

auto ln_v0083
iface ln_v0083
    ovs_type OVSIntPort
    ovs_bridge vmbr0
    ovs_mtu 1500
    ovs_options tag=83

auto ln_v0086
iface ln_v0086
    ovs_type OVSIntPort
    ovs_bridge vmbr0
    ovs_mtu 1500
    ovs_options tag=86

auto ln_v0087
iface ln_v0087
    ovs_type OVSIntPort
    ovs_bridge vmbr0
    ovs_mtu 1500
    ovs_options tag=87

auto ln_v0089
iface ln_v0089
    ovs_type OVSIntPort
    ovs_bridge vmbr0
    ovs_mtu 1500
    ovs_options tag=89

auto v0074
iface v0074
    bridge_ports ln_v0074
    bridge_stp off
    bridge_fd 0
    mtu 1500
    alias VLAN 74

auto v0083
iface v0083
    bridge_ports ln_v0083
    bridge_stp off
    bridge_fd 0
    mtu 1500
    alias VLAN 83

auto v0086
iface v0086
    bridge_ports ln_v0086
    bridge_stp off
    bridge_fd 0
    mtu 1500
    alias VLAN 86

auto v0087
iface v0087
    bridge_ports ln_v0087
    bridge_stp off
    bridge_fd 0
    mtu 1500
    alias VLAN 87

auto v0089
iface v0089
    bridge_ports ln_v0089
    bridge_stp off
    bridge_fd 0
    mtu 1500
    alias VLAN 89

auto vmbr0
iface vmbr0
    ovs_ports ln_v0074
    ovs_ports ln_v0083
    ovs_ports ln_v0086
    ovs_ports ln_v0087
    ovs_ports ln_v0089

Config of a VM that worked fine previously:
Code:
agent: 1
boot: order=scsi0;ide2;net0
cores: 4
cpu: x86-64-v2-AES
ide2: shared_iso:iso/proxmox-datacenter-manager_1.0-2.iso,media=cdrom,size=1445316K
memory: 4096
meta: creation-qemu=10.1.2,ctime=1778599956
name: pdm02
net0: virtio=BC:24:11:B8:B0:3A,bridge=v0074,mtu=1500
numa: 0
ostype: l26
scsi0: ceph01_vm:vm-131-disk-0,cache=writeback,discard=on,iothread=1,size=50G,ssd=1
scsihw: virtio-scsi-single
smbios1: uuid=9c774ce7-60cb-4555-b6cc-824d4cd5fa0f
sockets: 1
vmgenid: cbcf7176-5b08-470b-8270-65a4d77d5d9b

I already tried downgrading to an older Kernel since this was also the upgrade to Kernel 7.0 for me, that did not help.
Code:
# pveversion --verbose
proxmox-ve: 9.2.0 (running kernel: 7.0.2-6-pve)
pve-manager: 9.2.2 (running version: 9.2.2/b9984c6d90a4bd80)
proxmox-kernel-helper: 9.2.0
proxmox-kernel-7.0: 7.0.2-6
proxmox-kernel-7.0.2-6-pve-signed: 7.0.2-6
proxmox-kernel-6.17: 6.17.13-11
proxmox-kernel-6.17.13-11-pve-signed: 6.17.13-11
proxmox-kernel-6.17.13-2-pve-signed: 6.17.13-2
proxmox-kernel-6.17.2-1-pve-signed: 6.17.2-1
amd64-microcode: 3.20251202.1~bpo13+1
ceph: 19.2.3-pve4
ceph-fuse: 19.2.3-pve4
corosync: 3.1.10-pve2
criu: 4.1.1-1
frr-pythontools: 10.6.1-1+pve2
ifupdown2: 3.3.0-1+pmx12
intel-microcode: 3.20251111.1~deb13u1
ksm-control-daemon: 1.5-1
libjs-extjs: 7.0.0-5
libproxmox-acme-perl: 1.7.1
libproxmox-backup-qemu0: 2.0.2
libproxmox-rs-perl: 0.4.1
libpve-access-control: 9.1.1
libpve-apiclient-perl: 3.4.2
libpve-cluster-api-perl: 9.1.5
libpve-cluster-perl: 9.1.5
libpve-common-perl: 9.1.12
libpve-guest-common-perl: 6.0.3
libpve-http-server-perl: 6.0.5
libpve-network-perl: 1.6.6
libpve-notify-perl: 9.1.5
libpve-rs-perl: 0.15.3
libpve-storage-perl: 9.1.5
libspice-server1: 0.15.2-1+b1
lvm2: 2.03.31-2+pmx1
lxc-pve: 7.0.0-2
lxcfs: 7.0.0-pve1
novnc-pve: 1.7.0-1
openvswitch-switch: 3.5.0-1+b1
proxmox-backup-client: 4.2.0-1
proxmox-backup-file-restore: 4.2.0-1
proxmox-backup-restore-image: 1.0.0
proxmox-firewall: 1.2.3
proxmox-kernel-helper: 9.2.0
proxmox-mail-forward: 1.0.3
proxmox-mini-journalreader: 1.6
proxmox-offline-mirror-helper: 0.7.4
proxmox-widget-toolkit: 5.2.2
pve-cluster: 9.1.5
pve-container: 6.1.10
pve-docs: 9.2.1
pve-edk2-firmware: 4.2025.05-2
pve-esxi-import-tools: 1.0.1
pve-firewall: 6.0.4
pve-firmware: 3.18-3
pve-ha-manager: 5.2.4
pve-i18n: 3.7.4
pve-qemu-kvm: 11.0.0-3
pve-xtermjs: 6.0.0-1
qemu-server: 9.1.15
smartmontools: 7.5-pve2
spiceterm: 3.4.2
swtpm: 0.8.0+pve3
vncterm: 1.9.2
zfsutils-linux: 2.4.2-pve1

Does someone have an idea what happened here?
 
Can you post the output of the following commands?

Code:
ip a

Can you try reloading the network configuration via the following command and paste the full output?

Code:
ifreload -avd

Does anything odd pop up in the journal?

Code:
journalctl -u openvswitch-switch.service -b
 
Hi Stefan,

I attached the ip a and ifreload output as it is too verbose for a normal post.

The journal looks like this:
Code:
root@node02:~# journalctl -u openvswitch-switch.service -b
May 21 18:03:33 node02 systemd[1]: Starting openvswitch-switch.service - Open vSwitch...
May 21 18:03:33 node02 systemd[1]: Finished openvswitch-switch.service - Open vSwitch.
 

Attachments

Hey,

thanks for the output! I could not really find anything suspicious in the output, could you run tcpdump on the interfaces, then try ping:
- tap131i0
- ln_v0074
- bond1
(same on the receiving side, tap.. changes to the one of the target guest)

how far do the ICMP packets get? So, on which interfaces do they show up?
Also, could you try disabling lacp on the band, and changing the bond_mode. Anything different with that?