One of 3 cluster nodes "offline"

xprs

New Member
May 10, 2020
2
0
1
36
I've created cluster with 2 nodes. And all is working fine, then I go with the same scenario to add one more node to my cluster, but stuck with NODE "offline" issue.
Screenshot 2020-05-10 at 11.17.44.png

I think, that it might be something with my "affected" node config.
I will provide as much details as I can.

proxmox - main node
xprs-prod-01 - second WORKING node
xprs-stage-01 - third NON WORKING node

/etc/pve/.members

Bash:
root@proxmox:/var/log/pve# cat /etc/pve/.members
{
"nodename": "proxmox",
"version": 6,
"cluster": { "name": "xprs", "version": 7, "nodes": 3, "quorate": 1 },
"nodelist": {
  "proxmox": { "id": 1, "online": 1, "ip": "192.168.100.2"},
  "xprs-prod-01": { "id": 2, "online": 1, "ip": "51.83.xxx.xx"},
  "xprs-stage-01": { "id": 3, "online": 0}
  }
}

Bash:
root@xprs-prod-01:~# cat /etc/pve/.members
{
"nodename": "xprs-prod-01",
"version": 4,
"cluster": { "name": "xprs", "version": 7, "nodes": 3, "quorate": 1 },
"nodelist": {
  "proxmox": { "id": 1, "online": 1, "ip": "192.168.100.2"},
  "xprs-prod-01": { "id": 2, "online": 1, "ip": "51.83.xxx.xx"},
  "xprs-stage-01": { "id": 3, "online": 0}
  }
}

Bash:
{
"nodename": "xprs-stage-01",
"version": 3,
"cluster": { "name": "xprs", "version": 7, "nodes": 3, "quorate": 0 },
"nodelist": {
  "proxmox": { "id": 1, "online": 0},
  "xprs-prod-01": { "id": 2, "online": 0},
  "xprs-stage-01": { "id": 3, "online": 1, "ip": "192.168.1.210"}
  }
}

/etc/pve/corosync.conf

Bash:
logging {
  debug: off
  to_syslog: yes
}

nodelist {
  node {
    name: proxmox
    nodeid: 1
    quorum_votes: 1
    ring0_addr: 193.178.xxx.xxx
  }
  node {
    name: xprs-prod-01
    nodeid: 2
    quorum_votes: 1
    ring0_addr: 51.83.xxx.xx
  }
  node {
    name: xprs-stage-01
    nodeid: 3
    quorum_votes: 1
    ring0_addr: 91.232.xxx.x
  }
}

quorum {
  provider: corosync_votequorum
}

totem {
  cluster_name: xprs
  config_version: 7
  interface {
    linknumber: 0
  }
  ip_version: ipv4-6
  secauth: on
  version: 2
}

Bash:
logging {
  debug: off
  to_syslog: yes
}

nodelist {
  node {
    name: proxmox
    nodeid: 1
    quorum_votes: 1
    ring0_addr: 193.178.xxx.xxx
  }
  node {
    name: xprs-prod-01
    nodeid: 2
    quorum_votes: 1
    ring0_addr: 51.83.xxx.xx
  }
  node {
    name: xprs-stage-01
    nodeid: 3
    quorum_votes: 1
    ring0_addr: 91.232.xxx.x
  }
}

quorum {
  provider: corosync_votequorum
}

totem {
  cluster_name: xprs
  config_version: 7
  interface {
    linknumber: 0
  }
  ip_version: ipv4-6
  secauth: on
  version: 2
}

Bash:
logging {
  debug: off
  to_syslog: yes
}

nodelist {
  node {
    name: proxmox
    nodeid: 1
    quorum_votes: 1
    ring0_addr: 193.178.xxx.xxx
  }
  node {
    name: xprs-prod-01
    nodeid: 2
    quorum_votes: 1
    ring0_addr: 51.83.xxx.xx
  }
  node {
    name: xprs-stage-01
    nodeid: 3
    quorum_votes: 1
    ring0_addr: 91.232.xxx.x
  }
}

quorum {
  provider: corosync_votequorum
}

totem {
  cluster_name: xprs
  config_version: 7
  interface {
    linknumber: 0
  }
  ip_version: ipv4-6
  secauth: on
  version: 2
}

/etc/hosts

Bash:
127.0.0.1 localhost.localdomain localhost
192.168.100.2 proxmox.domain.name proxmox
192.168.100.7 mail.domain.name
51.83.xxx.xx xprs-prod-01.domain.name xprs-prod-01
91.232.xxx.x xprs-stage-01.domain.name xprs-stage-01
# The following lines are desirable for IPv6 capable hosts

::1     ip6-localhost ip6-loopback
fe00::0 ip6-localnet
ff00::0 ip6-mcastprefix
ff02::1 ip6-allnodes
ff02::2 ip6-allrouters
ff02::3 ip6-allhosts

Bash:
51.83.xxx.xx xprs-prod-01.domain.name xprs-prod-01
91.232.xxx.x xprs-stage-01.domain.name xprs-stage-01
193.178.xxx.xxx proxmox.domain.name proxmox

Bash:
127.0.0.1 localhost.localdomain localhost
192.168.1.210 xprs-stage-01.domain.name xprs-stage-01
51.83.xxx.xx xprs-prod-01.domain.name xprs-prod-01
193.178.xxx.xxx proxmox.domain.name proxmox
# The following lines are desirable for IPv6 capable hosts

::1     ip6-localhost ip6-loopback
fe00::0 ip6-localnet
ff00::0 ip6-mcastprefix
ff02::1 ip6-allnodes
ff02::2 ip6-allrouters
ff02::3 ip6-allhosts

/etc/network/interfaces

Bash:
auto lo
iface lo inet loopback

auto eno1
iface eno1 inet static
    address 193.178.xxx.xxx/23
    netmask 255.255.254.0
    gateway 193.178.xxx.1

iface eno2 inet manual

auto vmbr0
iface vmbr0 inet static
    address 192.168.100.2/24
    netmask 255.255.255.0
    bridge-ports none
    bridge-stp off
    bridge-fd 0
    post-up echo 1 > /proc/sys/net/ipv4/ip_forward
    post-up iptables -t nat -A POSTROUTING -s '192.168.100.0/24' -o eno1 -j MASQUERADE
    post-up iptables -t nat -D POSTROUTING -s '192.168.100.0/24' -o eno1 -j MASQUERADE
    post-up   iptables -t raw -I PREROUTING -i fwbr+ -j CT --zone 1
    post-down iptables -t raw -D PREROUTING -i fwbr+ -j CT --zone 1

Bash:
auto lo
iface lo inet loopback

iface eno1 inet manual

iface eno2 inet manual

auto vmbr0
iface vmbr0 inet static
    address 192.168.1.2
    bridge-ports none
    bridge-stp off
    bridge-fd 0
    post-up echo 1 > /proc/sys/net/ipv4/ip_forward
        post-up iptables -t nat -A POSTROUTING -s '192.168.1.0/24' -o eno1 -j MASQUERADE
        post-down iptables -t nat -D POSTROUTING -s '192.168.1.0/24' -o eno1 -j MASQUERADE
        post-up   iptables -t raw -I PREROUTING -i fwbr+ -j CT --zone 1
        post-down iptables -t raw -D PREROUTING -i fwbr+ -j CT --zone 1

Bash:
auto lo
iface lo inet loopback

iface eno1 inet manual

auto vmbr0
iface vmbr0 inet static
    address 192.168.1.210
    netmask 255.255.255.0
    gateway 192.168.1.1
        bridge-ports eno1
        bridge-stp off
        bridge-fd 0

So my xprs-stage-01 node is showing offline and connection error. Need to fix that.
Any further information will be provided asap if needed. Thanks in advance for your help.
 
Hi,

your network setting does not match to your corosync IPs.
Also, you use a public IP for corosync.
Corosync depends on low latency. So is your public network fast enough?
 
Thank you very much for your answer. The main problem was in my hosts files and corosync files. After I've synced all this to one (poblic) IP of my damaged node, then restarted all proxmox servcies , everything was fixed. Again thank you for your effort
 

About

The Proxmox community has been around for many years and offers help and support for Proxmox VE, Proxmox Backup Server, and Proxmox Mail Gateway.
We think our community is one of the best thanks to people like you!

Get your subscription!

The Proxmox team works very hard to make sure you are running the best software and getting stable updates and security enhancements, as well as quick enterprise support. Tens of thousands of happy customers have a Proxmox subscription. Get yours easily in our online shop.

Buy now!