Seems that cluster issues have been around for years. I am not sure what to do anymore. tried all kinds of things. Dedicated corosync network very low latency. 14 nodes cluster with over 700 vm/lxc problems started after having issues after growing the cluster with 600+ vm/lxc's. I have one node I guess not voting that is up and reachable but this is crazy if just one node brings down everything. Also the one node not voting is aying activity blocked. last pvecm status
root@nypve01:/var/log# pveversion -v
proxmox-ve: 8.2.0 (running kernel: 6.8.4-3-pve)
pve-manager: 8.2.2 (running version: 8.2.2/9355359cd7afbae4)
proxmox-kernel-helper: 8.1.0
pve-kernel-5.15: 7.4-9
proxmox-kernel-6.8: 6.8.4-3
proxmox-kernel-6.8.4-3-pve-signed: 6.8.4-3
proxmox-kernel-6.5.13-5-pve-signed: 6.5.13-5
proxmox-kernel-6.5: 6.5.13-5
proxmox-kernel-6.5.11-7-pve-signed: 6.5.11-7
pve-kernel-5.15.131-2-pve: 5.15.131-3
pve-kernel-5.15.116-1-pve: 5.15.116-1
pve-kernel-5.15.111-1-pve: 5.15.111-1
pve-kernel-5.15.107-2-pve: 5.15.107-2
pve-kernel-5.15.83-1-pve: 5.15.83-1
pve-kernel-5.15.64-1-pve: 5.15.64-1
pve-kernel-5.15.60-2-pve: 5.15.60-2
pve-kernel-5.15.30-2-pve: 5.15.30-3
ceph-fuse: 17.2.7-pve3
corosync: 3.1.7-pve3
criu: 3.17.1-2
glusterfs-client: 10.3-5
ifupdown2: 3.2.0-1+pmx8
ksm-control-daemon: 1.5-1
libjs-extjs: 7.0.0-4
libknet1: 1.28-pve1
libproxmox-acme-perl: 1.5.1
libproxmox-backup-qemu0: 1.4.1
libproxmox-rs-perl: 0.3.3
libpve-access-control: 8.1.4
libpve-apiclient-perl: 3.3.2
libpve-cluster-api-perl: 8.0.6
libpve-cluster-perl: 8.0.6
libpve-common-perl: 8.2.1
libpve-guest-common-perl: 5.1.1
libpve-http-server-perl: 5.1.0
libpve-network-perl: 0.9.8
libpve-rs-perl: 0.8.8
libpve-storage-perl: 8.2.1
libspice-server1: 0.15.1-1
lvm2: 2.03.16-2
lxc-pve: 6.0.0-1
lxcfs: 6.0.0-pve2
novnc-pve: 1.4.0-3
proxmox-backup-client: 3.2.2-1
proxmox-backup-file-restore: 3.2.2-1
proxmox-kernel-helper: 8.1.0
proxmox-mail-forward: 0.2.3
proxmox-mini-journalreader: 1.4.0
proxmox-offline-mirror-helper: 0.6.6
proxmox-widget-toolkit: 4.2.3
pve-cluster: 8.0.6
pve-container: 5.1.10
pve-docs: 8.2.2
pve-edk2-firmware: 4.2023.08-4
pve-esxi-import-tools: 0.7.0
pve-firewall: 5.0.7
pve-firmware: 3.11-1
pve-ha-manager: 4.0.4
pve-i18n: 3.2.2
pve-qemu-kvm: 8.1.5-6
pve-xtermjs: 5.3.0-3
qemu-server: 8.2.1
smartmontools: 7.3-pve1
spiceterm: 3.3.0
swtpm: 0.8.0+pve1
vncterm: 1.8.0
zfsutils-linux: 2.2.3-pve2
root@nypve01:/var/log# more /etc/corosync/corosync.conf
logging {
debug: off
to_syslog: yes
}
nodelist {
node {
name: nypve01
nodeid: 1
quorum_votes: 1
ring0_addr: 10.64.0.11
ring1_addr: 10.82.97.65
}
node {
name: nypve02
nodeid: 2
quorum_votes: 1
ring0_addr: 10.64.0.12
ring1_addr: 10.82.97.66
}
node {
name: nypve03
nodeid: 3
quorum_votes: 1
ring0_addr: 10.64.0.13
ring1_addr: 10.82.97.67
}
node {
name: nypve04
nodeid: 4
quorum_votes: 1
ring0_addr: 10.64.0.14
ring1_addr: 10.82.97.68
}
node {
name: nypve05
nodeid: 9
quorum_votes: 1
ring0_addr: 10.64.0.15
ring1_addr: 10.82.97.69
}
node {
name: nypve06
nodeid: 10
quorum_votes: 1
ring0_addr: 10.64.0.16
ring1_addr: 10.82.97.70
}
node {
name: nypve07
nodeid: 11
quorum_votes: 1
ring0_addr: 10.64.0.17
ring1_addr: 10.82.97.71
}
node {
name: sfpve01
nodeid: 5
quorum_votes: 1
ring0_addr: 10.64.0.21
ring1_addr: 10.82.31.9
}
node {
name: sfpve02
nodeid: 6
quorum_votes: 1
ring0_addr: 10.64.0.22
ring1_addr: 10.82.31.10
}
node {
name: sfpve03
nodeid: 7
quorum_votes: 1
ring0_addr: 10.64.0.23
ring1_addr: 10.82.31.11
}
node {
name: sfpve04
nodeid: 8
quorum_votes: 1
ring0_addr: 10.64.0.24
ring1_addr: 10.82.31.12
}
node {
name: sfpve05
nodeid: 12
quorum_votes: 1
ring0_addr: 10.64.0.25
ring1_addr: 10.82.31.13
}
node {
name: sfpve06
nodeid: 13
quorum_votes: 1
ring0_addr: 10.64.0.26
ring1_addr: 10.82.31.14
}
node {
name: sfpve07
nodeid: 14
quorum_votes: 1
ring0_addr: 10.64.0.27
ring1_addr: 10.82.31.15
}
}
quorum {
provider: corosync_votequorum
}
totem {
cluster_name: svl-b-labs
config_version: 16
interface {
linknumber: 0
}
ip_version: ipv4-6
link_mode: passive
secauth: on
version: 2
}
root@nypve01:/var/log# pvecm status
Cluster information
-------------------
Name: svl-b-labs
Config Version: 16
Transport: knet
Secure auth: on
Quorum information
------------------
Date: Wed May 22 11:46:50 2024
Quorum provider: corosync_votequorum
Nodes: 13
Node ID: 0x00000001
Ring ID: 1.beff
Quorate: Yes
Votequorum information
----------------------
Expected votes: 14
Highest expected: 14
Total votes: 13
Quorum: 8
Flags: Quorate
Membership information
----------------------
Nodeid Votes Name
0x00000001 1 10.64.0.11 (local)
0x00000002 1 10.64.0.12
0x00000003 1 10.64.0.13
0x00000004 1 10.64.0.14
0x00000005 1 10.64.0.21
0x00000006 1 10.64.0.22
0x00000007 1 10.64.0.23
0x00000008 1 10.64.0.24
0x00000009 1 10.64.0.15
0x0000000a 1 10.64.0.16
0x0000000c 1 10.64.0.25
0x0000000d 1 10.64.0.26
0x0000000e 1 10.64.0.27
root@nypve01:/var/log# pvecm nodes
Membership information
----------------------
Nodeid Votes Name
1 1 nypve01 (local)
2 1 nypve02
3 1 nypve03
4 1 nypve04
5 1 sfpve01
6 1 sfpve02
7 1 sfpve03
8 1 sfpve04
9 1 nypve05
10 1 nypve06
12 1 sfpve05
13 1 sfpve06
14 1 sfpve07
root@nypve07:/var/log# pvecm status
Cluster information
-------------------
Name: svl-b-labs
Config Version: 16
Transport: knet
Secure auth: on
Quorum information
------------------
Date: Wed May 22 11:36:03 2024
Quorum provider: corosync_votequorum
Nodes: 1
Node ID: 0x0000000b
Ring ID: 1.c23f
Quorate: No
Votequorum information
----------------------
Expected votes: 14
Highest expected: 14
Total votes: 1
Quorum: 8 Activity blocked
Flags:
Membership information
----------------------
Nodeid Votes Name
0x0000000b 1 10.64.0.17 (local)
root@nypve01:/var/log# pveversion -v
proxmox-ve: 8.2.0 (running kernel: 6.8.4-3-pve)
pve-manager: 8.2.2 (running version: 8.2.2/9355359cd7afbae4)
proxmox-kernel-helper: 8.1.0
pve-kernel-5.15: 7.4-9
proxmox-kernel-6.8: 6.8.4-3
proxmox-kernel-6.8.4-3-pve-signed: 6.8.4-3
proxmox-kernel-6.5.13-5-pve-signed: 6.5.13-5
proxmox-kernel-6.5: 6.5.13-5
proxmox-kernel-6.5.11-7-pve-signed: 6.5.11-7
pve-kernel-5.15.131-2-pve: 5.15.131-3
pve-kernel-5.15.116-1-pve: 5.15.116-1
pve-kernel-5.15.111-1-pve: 5.15.111-1
pve-kernel-5.15.107-2-pve: 5.15.107-2
pve-kernel-5.15.83-1-pve: 5.15.83-1
pve-kernel-5.15.64-1-pve: 5.15.64-1
pve-kernel-5.15.60-2-pve: 5.15.60-2
pve-kernel-5.15.30-2-pve: 5.15.30-3
ceph-fuse: 17.2.7-pve3
corosync: 3.1.7-pve3
criu: 3.17.1-2
glusterfs-client: 10.3-5
ifupdown2: 3.2.0-1+pmx8
ksm-control-daemon: 1.5-1
libjs-extjs: 7.0.0-4
libknet1: 1.28-pve1
libproxmox-acme-perl: 1.5.1
libproxmox-backup-qemu0: 1.4.1
libproxmox-rs-perl: 0.3.3
libpve-access-control: 8.1.4
libpve-apiclient-perl: 3.3.2
libpve-cluster-api-perl: 8.0.6
libpve-cluster-perl: 8.0.6
libpve-common-perl: 8.2.1
libpve-guest-common-perl: 5.1.1
libpve-http-server-perl: 5.1.0
libpve-network-perl: 0.9.8
libpve-rs-perl: 0.8.8
libpve-storage-perl: 8.2.1
libspice-server1: 0.15.1-1
lvm2: 2.03.16-2
lxc-pve: 6.0.0-1
lxcfs: 6.0.0-pve2
novnc-pve: 1.4.0-3
proxmox-backup-client: 3.2.2-1
proxmox-backup-file-restore: 3.2.2-1
proxmox-kernel-helper: 8.1.0
proxmox-mail-forward: 0.2.3
proxmox-mini-journalreader: 1.4.0
proxmox-offline-mirror-helper: 0.6.6
proxmox-widget-toolkit: 4.2.3
pve-cluster: 8.0.6
pve-container: 5.1.10
pve-docs: 8.2.2
pve-edk2-firmware: 4.2023.08-4
pve-esxi-import-tools: 0.7.0
pve-firewall: 5.0.7
pve-firmware: 3.11-1
pve-ha-manager: 4.0.4
pve-i18n: 3.2.2
pve-qemu-kvm: 8.1.5-6
pve-xtermjs: 5.3.0-3
qemu-server: 8.2.1
smartmontools: 7.3-pve1
spiceterm: 3.3.0
swtpm: 0.8.0+pve1
vncterm: 1.8.0
zfsutils-linux: 2.2.3-pve2
root@nypve01:/var/log# more /etc/corosync/corosync.conf
logging {
debug: off
to_syslog: yes
}
nodelist {
node {
name: nypve01
nodeid: 1
quorum_votes: 1
ring0_addr: 10.64.0.11
ring1_addr: 10.82.97.65
}
node {
name: nypve02
nodeid: 2
quorum_votes: 1
ring0_addr: 10.64.0.12
ring1_addr: 10.82.97.66
}
node {
name: nypve03
nodeid: 3
quorum_votes: 1
ring0_addr: 10.64.0.13
ring1_addr: 10.82.97.67
}
node {
name: nypve04
nodeid: 4
quorum_votes: 1
ring0_addr: 10.64.0.14
ring1_addr: 10.82.97.68
}
node {
name: nypve05
nodeid: 9
quorum_votes: 1
ring0_addr: 10.64.0.15
ring1_addr: 10.82.97.69
}
node {
name: nypve06
nodeid: 10
quorum_votes: 1
ring0_addr: 10.64.0.16
ring1_addr: 10.82.97.70
}
node {
name: nypve07
nodeid: 11
quorum_votes: 1
ring0_addr: 10.64.0.17
ring1_addr: 10.82.97.71
}
node {
name: sfpve01
nodeid: 5
quorum_votes: 1
ring0_addr: 10.64.0.21
ring1_addr: 10.82.31.9
}
node {
name: sfpve02
nodeid: 6
quorum_votes: 1
ring0_addr: 10.64.0.22
ring1_addr: 10.82.31.10
}
node {
name: sfpve03
nodeid: 7
quorum_votes: 1
ring0_addr: 10.64.0.23
ring1_addr: 10.82.31.11
}
node {
name: sfpve04
nodeid: 8
quorum_votes: 1
ring0_addr: 10.64.0.24
ring1_addr: 10.82.31.12
}
node {
name: sfpve05
nodeid: 12
quorum_votes: 1
ring0_addr: 10.64.0.25
ring1_addr: 10.82.31.13
}
node {
name: sfpve06
nodeid: 13
quorum_votes: 1
ring0_addr: 10.64.0.26
ring1_addr: 10.82.31.14
}
node {
name: sfpve07
nodeid: 14
quorum_votes: 1
ring0_addr: 10.64.0.27
ring1_addr: 10.82.31.15
}
}
quorum {
provider: corosync_votequorum
}
totem {
cluster_name: svl-b-labs
config_version: 16
interface {
linknumber: 0
}
ip_version: ipv4-6
link_mode: passive
secauth: on
version: 2
}
root@nypve01:/var/log# pvecm status
Cluster information
-------------------
Name: svl-b-labs
Config Version: 16
Transport: knet
Secure auth: on
Quorum information
------------------
Date: Wed May 22 11:46:50 2024
Quorum provider: corosync_votequorum
Nodes: 13
Node ID: 0x00000001
Ring ID: 1.beff
Quorate: Yes
Votequorum information
----------------------
Expected votes: 14
Highest expected: 14
Total votes: 13
Quorum: 8
Flags: Quorate
Membership information
----------------------
Nodeid Votes Name
0x00000001 1 10.64.0.11 (local)
0x00000002 1 10.64.0.12
0x00000003 1 10.64.0.13
0x00000004 1 10.64.0.14
0x00000005 1 10.64.0.21
0x00000006 1 10.64.0.22
0x00000007 1 10.64.0.23
0x00000008 1 10.64.0.24
0x00000009 1 10.64.0.15
0x0000000a 1 10.64.0.16
0x0000000c 1 10.64.0.25
0x0000000d 1 10.64.0.26
0x0000000e 1 10.64.0.27
root@nypve01:/var/log# pvecm nodes
Membership information
----------------------
Nodeid Votes Name
1 1 nypve01 (local)
2 1 nypve02
3 1 nypve03
4 1 nypve04
5 1 sfpve01
6 1 sfpve02
7 1 sfpve03
8 1 sfpve04
9 1 nypve05
10 1 nypve06
12 1 sfpve05
13 1 sfpve06
14 1 sfpve07
root@nypve07:/var/log# pvecm status
Cluster information
-------------------
Name: svl-b-labs
Config Version: 16
Transport: knet
Secure auth: on
Quorum information
------------------
Date: Wed May 22 11:36:03 2024
Quorum provider: corosync_votequorum
Nodes: 1
Node ID: 0x0000000b
Ring ID: 1.c23f
Quorate: No
Votequorum information
----------------------
Expected votes: 14
Highest expected: 14
Total votes: 1
Quorum: 8 Activity blocked
Flags:
Membership information
----------------------
Nodeid Votes Name
0x0000000b 1 10.64.0.17 (local)
Last edited: