EVPN+VXLAN with multi exit nodes after upgarde 9.1 : All VMs are no longer reachable

adofou

Active Member
Mar 14, 2020
25
1
43
Hello,

This topic is a spiritual sequel to EVPN+VPLS with multi exit nodes : firewall drop packet with asymetric routing (not VPLS but VXLAN, error in title).

Since I had put "nf_conntrack_allow_invalid: 1" in each host.fw in the cluster.
And also this in sysctl.conf
Code:
net.ipv4.conf.all.arp_filter = 0
net.ipv4.conf.all.rp_filter = 0

With the upgrade to Proxmox 9.0 (which removes the sysctl.conf file), I created the file /usr/lib/sysctl.d/zzz-network.conf
Code:
net.ipv4.conf.all.rp_filter = 0
net.ipv4.conf.default.rp_filter = 0
net.ipv4.conf.*.rp_filter = 0

The issues was resolved until I upgrade from Proxmox 9.0 to Proxmox 9.1
Traffic entered from the network to a node in the cluster, but is not being forwarded to the EVPN or the VM.
All VMs in the EVPN can ping others and gateways on three nodes.

But the difference this time is that even with a single exit node, nothing works, not even the local VMs of the exit node!
It really seems that this is not a conntrack issue (which is disabled according to sysctl) but a firewall issue.
EDIT: With the firewall disabled on the host and the host + data center, the problem persists :-(

Code:
 sysctl -a | grep rp_filter
net.ipv4.conf.all.arp_filter = 0
net.ipv4.conf.all.rp_filter = 0
net.ipv4.conf.default.arp_filter = 0
net.ipv4.conf.default.rp_filter = 0
net.ipv4.conf.ens10f0np0.arp_filter = 0
net.ipv4.conf.ens10f0np0.rp_filter = 0
net.ipv4.conf.ens10f0np0/103.arp_filter = 0
net.ipv4.conf.ens10f0np0/103.rp_filter = 0
net.ipv4.conf.ens10f0np0/104.arp_filter = 0
net.ipv4.conf.ens10f0np0/104.rp_filter = 0
net.ipv4.conf.ens10f0np0/105.arp_filter = 0
net.ipv4.conf.ens10f0np0/105.rp_filter = 0
net.ipv4.conf.ens10f1np1.arp_filter = 0
net.ipv4.conf.ens10f1np1.rp_filter = 0
net.ipv4.conf.ens10f1np1/103.arp_filter = 0
net.ipv4.conf.ens10f1np1/103.rp_filter = 0
net.ipv4.conf.ens10f1np1/104.arp_filter = 0
net.ipv4.conf.ens10f1np1/104.rp_filter = 0
net.ipv4.conf.ens10f1np1/105.arp_filter = 0
net.ipv4.conf.ens10f1np1/105.rp_filter = 0
net.ipv4.conf.ens2f0np0.arp_filter = 0
net.ipv4.conf.ens2f0np0.rp_filter = 0
net.ipv4.conf.ens2f1np1.arp_filter = 0
net.ipv4.conf.ens2f1np1.rp_filter = 0
net.ipv4.conf.fwbr102i0.arp_filter = 0
net.ipv4.conf.fwbr102i0.rp_filter = 0
net.ipv4.conf.fwbr103i1.arp_filter = 0
net.ipv4.conf.fwbr103i1.rp_filter = 0
net.ipv4.conf.fwbr202i0.arp_filter = 0
net.ipv4.conf.fwbr202i0.rp_filter = 0
net.ipv4.conf.fwbr202i1.arp_filter = 0
net.ipv4.conf.fwbr202i1.rp_filter = 0
net.ipv4.conf.fwbr215i0.arp_filter = 0
net.ipv4.conf.fwbr215i0.rp_filter = 0
net.ipv4.conf.fwln102i0.arp_filter = 0
net.ipv4.conf.fwln102i0.rp_filter = 0
net.ipv4.conf.fwln103i1.arp_filter = 0
net.ipv4.conf.fwln103i1.rp_filter = 0
net.ipv4.conf.fwln202i0.arp_filter = 0
net.ipv4.conf.fwln202i0.rp_filter = 0
net.ipv4.conf.fwln202i1.arp_filter = 0
net.ipv4.conf.fwln202i1.rp_filter = 0
net.ipv4.conf.fwln215i0.arp_filter = 0
net.ipv4.conf.fwln215i0.rp_filter = 0
net.ipv4.conf.fwpr102p0.arp_filter = 0
net.ipv4.conf.fwpr102p0.rp_filter = 0
net.ipv4.conf.fwpr103p1.arp_filter = 0
net.ipv4.conf.fwpr103p1.rp_filter = 0
net.ipv4.conf.fwpr202p0.arp_filter = 0
net.ipv4.conf.fwpr202p0.rp_filter = 0
net.ipv4.conf.fwpr202p1.arp_filter = 0
net.ipv4.conf.fwpr202p1.rp_filter = 0
net.ipv4.conf.fwpr215p0.arp_filter = 0
net.ipv4.conf.fwpr215p0.rp_filter = 0
net.ipv4.conf.lo.arp_filter = 0
net.ipv4.conf.lo.rp_filter = 0
net.ipv4.conf.private.arp_filter = 0
net.ipv4.conf.private.rp_filter = 0
net.ipv4.conf.public.arp_filter = 0
net.ipv4.conf.public.rp_filter = 0
net.ipv4.conf.tap202i0.arp_filter = 0
net.ipv4.conf.tap202i0.rp_filter = 0
net.ipv4.conf.tap202i1.arp_filter = 0
net.ipv4.conf.tap202i1.rp_filter = 0
net.ipv4.conf.tap212i0.arp_filter = 0
net.ipv4.conf.tap212i0.rp_filter = 0
net.ipv4.conf.tap215i0.arp_filter = 0
net.ipv4.conf.tap215i0.rp_filter = 0
net.ipv4.conf.test.arp_filter = 0
net.ipv4.conf.test.rp_filter = 0
net.ipv4.conf.veth102i0.arp_filter = 0
net.ipv4.conf.veth102i0.rp_filter = 0
net.ipv4.conf.veth103i0.arp_filter = 0
net.ipv4.conf.veth103i0.rp_filter = 0
net.ipv4.conf.veth103i1.arp_filter = 0
net.ipv4.conf.veth103i1.rp_filter = 0
net.ipv4.conf.vmbr0.arp_filter = 0
net.ipv4.conf.vmbr0.rp_filter = 0
net.ipv4.conf.vmbr0/3.arp_filter = 0
net.ipv4.conf.vmbr0/3.rp_filter = 0
net.ipv4.conf.vmbr0/4.arp_filter = 0
net.ipv4.conf.vmbr0/4.rp_filter = 0
net.ipv4.conf.vmbr0/5.arp_filter = 0
net.ipv4.conf.vmbr0/5.rp_filter = 0
net.ipv4.conf.vmbr0/6.arp_filter = 0
net.ipv4.conf.vmbr0/6.rp_filter = 0
net.ipv4.conf.vmbr1.arp_filter = 0
net.ipv4.conf.vmbr1.rp_filter = 0
net.ipv4.conf.vmbr1/3.arp_filter = 0
net.ipv4.conf.vmbr1/3.rp_filter = 0
net.ipv4.conf.vmbr1/4.arp_filter = 0
net.ipv4.conf.vmbr1/4.rp_filter = 0
net.ipv4.conf.vmbr1/5.arp_filter = 0
net.ipv4.conf.vmbr1/5.rp_filter = 0
net.ipv4.conf.vmbr1/6.arp_filter = 0
net.ipv4.conf.vmbr1/6.rp_filter = 0
net.ipv4.conf.vmbr103.arp_filter = 0
net.ipv4.conf.vmbr103.rp_filter = 0
net.ipv4.conf.vmbr104.arp_filter = 0
net.ipv4.conf.vmbr104.rp_filter = 0
net.ipv4.conf.vmbr105.arp_filter = 0
net.ipv4.conf.vmbr105.rp_filter = 0
net.ipv4.conf.vrf_private.arp_filter = 0
net.ipv4.conf.vrf_private.rp_filter = 0
net.ipv4.conf.vrf_public.arp_filter = 0
net.ipv4.conf.vrf_public.rp_filter = 0
net.ipv4.conf.vrfbr_private.arp_filter = 0
net.ipv4.conf.vrfbr_private.rp_filter = 0
net.ipv4.conf.vrfbr_public.arp_filter = 0
net.ipv4.conf.vrfbr_public.rp_filter = 0
net.ipv4.conf.vrfvx_private.arp_filter = 0
net.ipv4.conf.vrfvx_private.rp_filter = 0
net.ipv4.conf.vrfvx_public.arp_filter = 0
net.ipv4.conf.vrfvx_public.rp_filter = 0
net.ipv4.conf.vxlan_private.arp_filter = 0
net.ipv4.conf.vxlan_private.rp_filter = 0
net.ipv4.conf.vxlan_public.arp_filter = 0
net.ipv4.conf.vxlan_public.rp_filter = 0
net.ipv4.conf.vxlan_test.arp_filter = 0
net.ipv4.conf.vxlan_test.rp_filter = 0


When I try ping from internet to my VM (on the only exit node selected for this EVPN) :

Code:
 tcpdump -i any -n "src 213.X.Y.243 or dst 213.X.Y.243"
tcpdump: WARNING: any: That device doesn't support promiscuous mode
(Promiscuous mode not supported on the "any" device)
tcpdump: verbose output suppressed, use -v[v]... for full protocol decode
listening on any, link-type LINUX_SLL2 (Linux cooked v2), snapshot length 262144 bytes
17:57:47.167683 ens10f0np0 In  IP 54.X.Y.249 > 213.X.Y.243: ICMP echo request, id 3, seq 16252, length 48
17:57:47.167683 vmbr0 In  IP 54.X.Y.249 > 213.X.Y.243: ICMP echo request, id 3, seq 16252, length 48
17:57:47.167683 vmbr0.3 In  IP 54.X.Y.249 > 213.X.Y.243: ICMP echo request, id 3, seq 16252, length 48
17:57:47.574429 ens10f0np0 In  IP 176.X.Y.158 > 213.X.Y.243: ICMP echo request, id 7, seq 2169, length 40
17:57:47.574429 vmbr0 In  IP 176.X.Y.158 > 213.X.Y.243: ICMP echo request, id 7, seq 2169, length 40
17:57:47.574429 vmbr0.3 In  IP 176.X.Y.158 > 213.X.Y.243: ICMP echo request, id 7, seq 2169, length 40

ens10f0np0 (physical interface) -> vmbr0 -> vmbr0.3 (VLAN on bridge)... and not more.

But from VM in the same EVPN in another node of the cluster :
Code:
18:00:49.984167 vxlan_public P   IP 213.X.Y.244 > 213.X.Y.243: ICMP echo request, id 38405, seq 1, length 64
18:00:49.984211 veth104i0 Out IP 213.X.Y.244 > 213.X.Y.243: ICMP echo request, id 38405, seq 1, length 64
18:00:49.984237 veth104i0 P   IP 213.X.Y.243 > 213.X.Y.244: ICMP echo reply, id 38405, seq 1, length 64
18:00:49.984243 vxlan_public Out IP 213.X.Y.243 > 213.X.Y.244: ICMP echo reply, id 38405, seq 1, length 64
18:00:51.041535 vxlan_public P   IP 213.X.Y.244 > 213.X.Y.243: ICMP echo request, id 38405, seq 2, length 64
18:00:51.041562 veth104i0 Out IP 213.X.Y.244 > 213.X.Y.243: ICMP echo request, id 38405, seq 2, length 64
18:00:51.041579 veth104i0 P   IP 213.X.Y.243 > 213.X.Y.244: ICMP echo reply, id 38405, seq 2, length 64
18:00:51.041590 vxlan_public Out IP 213.X.Y.243 > 213.X.Y.244: ICMP echo reply, id 38405, seq 2, length 64
18:00:52.065547 vxlan_public P   IP 213.X.Y.244 > 213.X.Y.243: ICMP echo request, id 38405, seq 3, length 64
18:00:52.065579 veth104i0 Out IP 213.X.Y.244 > 213.X.Y.243: ICMP echo request, id 38405, seq 3, length 64
18:00:52.065607 veth104i0 P   IP 213.X.Y.243 > 213.X.Y.244: ICMP echo reply, id 38405, seq 3, length 64
18:00:52.065612 vxlan_public Out IP 213.X.Y.243 > 213.X.Y.244: ICMP echo reply, id 38405, seq 3, length 64

And when I make ping to 8.8.8.8 from local VM on the exit node :
Code:
18:02:42.031143 veth104i0 P   IP 213.X.Y.243 > 8.8.8.8: ICMP echo request, id 318, seq 1, length 64
18:02:42.031143 public In  IP 213.X.Y.243 > 8.8.8.8: ICMP echo request, id 318, seq 1, length 64
18:02:42.031169 vmbr0.3 Out IP 213.X.Y.243 > 8.8.8.8: ICMP echo request, id 318, seq 1, length 64
18:02:42.031171 vmbr0 Out IP 213.X.Y.243 > 8.8.8.8: ICMP echo request, id 318, seq 1, length 64
18:02:42.031175 ens10f0np0 Out IP 213.X.Y.243 > 8.8.8.8: ICMP echo request, id 318, seq 1, length 64
18:02:42.031725 ens10f1np1 In  IP 8.8.8.8 > 213.X.Y.243: ICMP echo reply, id 318, seq 1, length 64
18:02:42.031725 vmbr1 In  IP 8.8.8.8 > 213.X.Y.243: ICMP echo reply, id 318, seq 1, length 64
18:02:42.031725 vmbr1.3 In  IP 8.8.8.8 > 213.X.Y.243: ICMP echo reply, id 318, seq 1, length 64

18:02:43.039637 veth104i0 P   IP 213.X.Y.243 > 8.8.8.8: ICMP echo request, id 318, seq 2, length 64
18:02:43.039637 public In  IP 213.X.Y.243 > 8.8.8.8: ICMP echo request, id 318, seq 2, length 64
18:02:43.039657 vmbr0.3 Out IP 213.X.Y.243 > 8.8.8.8: ICMP echo request, id 318, seq 2, length 64
18:02:43.039659 vmbr0 Out IP 213.X.Y.243 > 8.8.8.8: ICMP echo request, id 318, seq 2, length 64
18:02:43.039663 ens10f0np0 Out IP 213.X.Y.243 > 8.8.8.8: ICMP echo request, id 318, seq 2, length 64
18:02:43.040213 ens10f1np1 In  IP 8.8.8.8 > 213.X.Y.243: ICMP echo reply, id 318, seq 2, length 64
18:02:43.040213 vmbr1 In  IP 8.8.8.8 > 213.X.Y.243: ICMP echo reply, id 318, seq 2, length 64
18:02:43.040213 vmbr1.3 In  IP 8.8.8.8 > 213.X.Y.243: ICMP echo reply, id 318, seq 2, length 64

18:02:44.063636 veth104i0 P   IP 213.X.Y.243 > 8.8.8.8: ICMP echo request, id 318, seq 3, length 64
18:02:44.063636 public In  IP 213.X.Y.243 > 8.8.8.8: ICMP echo request, id 318, seq 3, length 64
18:02:44.063658 vmbr0.3 Out IP 213.X.Y.243 > 8.8.8.8: ICMP echo request, id 318, seq 3, length 64
18:02:44.063660 vmbr0 Out IP 213.X.Y.243 > 8.8.8.8: ICMP echo request, id 318, seq 3, length 64
18:02:44.063664 ens10f0np0 Out IP 213.X.Y.243 > 8.8.8.8: ICMP echo request, id 318, seq 3, length 64
18:02:44.064227 ens10f1np1 In  IP 8.8.8.8 > 213.X.Y.243: ICMP echo reply, id 318, seq 3, length 64
18:02:44.064227 vmbr1 In  IP 8.8.8.8 > 213.X.Y.243: ICMP echo reply, id 318, seq 3, length 64
18:02:44.064227 vmbr1.3 In  IP 8.8.8.8 > 213.X.Y.243: ICMP echo reply, id 318, seq 3, length 64

18:02:45.087659 veth104i0 P   IP 213.X.Y.243 > 8.8.8.8: ICMP echo request, id 318, seq 4, length 64
18:02:45.087659 public In  IP 213.X.Y.243 > 8.8.8.8: ICMP echo request, id 318, seq 4, length 64
18:02:45.087684 vmbr0.3 Out IP 213.X.Y.243 > 8.8.8.8: ICMP echo request, id 318, seq 4, length 64
18:02:45.087686 vmbr0 Out IP 213.X.Y.243 > 8.8.8.8: ICMP echo request, id 318, seq 4, length 64
18:02:45.087692 ens10f0np0 Out IP 213.X.Y.243 > 8.8.8.8: ICMP echo request, id 318, seq 4, length 64
18:02:45.088251 ens10f1np1 In  IP 8.8.8.8 > 213.X.Y.243: ICMP echo reply, id 318, seq 4, length 64
18:02:45.088251 vmbr1 In  IP 8.8.8.8 > 213.X.Y.243: ICMP echo reply, id 318, seq 4, length 64
18:02:45.088251 vmbr1.3 In  IP 8.8.8.8 > 213.X.Y.243: ICMP echo reply, id 318, seq 4, length 64

The ping egress from the VM to local brige of EVPN, and to internet.
We received answer from 8.8.8.8, but this echo reply never forward to the local VM in the EVPN :
Code:
# ping 8.8.8.8
PING 8.8.8.8 (8.8.8.8) 56(84) bytes of data.
^C
--- 8.8.8.8 ping statistics ---
157 packets transmitted, 0 received, 100% packet loss, time 159729ms

To be honest, I don't know where to look in the firewall, kernel, or Proxmox settings in general.
There seem to be a lot of changes going on with nftable (not enabled on all nodes).
Any help or clues would be appreciated.

EDIT: The problem doesn't seem to be caused by the firewall, because even after disabling all firewalls (verified with iptables -L), the problem persists :-/
So, I share the current conntrack configuration :
Code:
net.core.somaxconn = 4096
net.netfilter.nf_conntrack_acct = 0
net.netfilter.nf_conntrack_buckets = 262144
net.netfilter.nf_conntrack_checksum = 1
net.netfilter.nf_conntrack_count = 71
net.netfilter.nf_conntrack_events = 2
net.netfilter.nf_conntrack_expect_max = 4096
net.netfilter.nf_conntrack_frag6_high_thresh = 4194304
net.netfilter.nf_conntrack_frag6_low_thresh = 3145728
net.netfilter.nf_conntrack_frag6_timeout = 60
net.netfilter.nf_conntrack_generic_timeout = 600
net.netfilter.nf_conntrack_gre_timeout = 30
net.netfilter.nf_conntrack_gre_timeout_stream = 180
net.netfilter.nf_conntrack_icmp_timeout = 30
net.netfilter.nf_conntrack_icmpv6_timeout = 30
net.netfilter.nf_conntrack_log_invalid = 0
net.netfilter.nf_conntrack_max = 262144
net.netfilter.nf_conntrack_sctp_timeout_closed = 10
net.netfilter.nf_conntrack_sctp_timeout_cookie_echoed = 3
net.netfilter.nf_conntrack_sctp_timeout_cookie_wait = 3
net.netfilter.nf_conntrack_sctp_timeout_established = 210
net.netfilter.nf_conntrack_sctp_timeout_heartbeat_sent = 30
net.netfilter.nf_conntrack_sctp_timeout_shutdown_ack_sent = 3
net.netfilter.nf_conntrack_sctp_timeout_shutdown_recd = 3
net.netfilter.nf_conntrack_sctp_timeout_shutdown_sent = 3
net.netfilter.nf_conntrack_tcp_be_liberal = 0
net.netfilter.nf_conntrack_tcp_ignore_invalid_rst = 0
net.netfilter.nf_conntrack_tcp_loose = 1
net.netfilter.nf_conntrack_tcp_max_retrans = 3
net.netfilter.nf_conntrack_tcp_timeout_close = 10
net.netfilter.nf_conntrack_tcp_timeout_close_wait = 60
net.netfilter.nf_conntrack_tcp_timeout_established = 432000
net.netfilter.nf_conntrack_tcp_timeout_fin_wait = 120
net.netfilter.nf_conntrack_tcp_timeout_last_ack = 30
net.netfilter.nf_conntrack_tcp_timeout_max_retrans = 300
net.netfilter.nf_conntrack_tcp_timeout_syn_recv = 60
net.netfilter.nf_conntrack_tcp_timeout_syn_sent = 120
net.netfilter.nf_conntrack_tcp_timeout_time_wait = 120
net.netfilter.nf_conntrack_tcp_timeout_unacknowledged = 300
net.netfilter.nf_conntrack_timestamp = 0
net.netfilter.nf_conntrack_udp_timeout = 30
net.netfilter.nf_conntrack_udp_timeout_stream = 120
net.nf_conntrack_max = 262144

Many thanks!
 
Last edited: