Hi,
I had a cluster with 7 nodes - one of them died so I removed it
Now, for whatever reason, I cannot join one the nodes to the cluster
Running "clustat" on it reports "Inquorate" although the same command on any other node report "quorate"
Note:
I reboot it few times with no effect and the "Fence_join= yes" is uncommented in /etc/default/redhat-cluster-pve)
Here are some info ( blh02-11 is the one with the problem )
root@blh02-11:~# clustat
Cluster Status for bl02-cluster01 @ Fri Nov 9 16:02:48 2012
Member Status: Inquorate
Member Name ID Status
------ ---- ---- ------
blh02-14 1 Offline
blh02-10 3 Offline
blh02-11 4 Online, Local
blh02-12 5 Offline
blh02-08 6 Offline
blh02-07 7 Offline
root@blh02-07:~# fence_tool -n ls
fence domain
member count 5
victim count 0
victim now 0
master nodeid 1
wait state none
members 1 3 5 6 7
all nodes
nodeid 1 member 1 victim 0 last fence master 2 how agent
nodeid 2 member 0 victim 0 last fence master 1 how agent
nodeid 3 member 1 victim 0 last fence master 1 how agent
nodeid 4 member 0 victim 0 last fence master 2 how agent
nodeid 5 member 1 victim 0 last fence master 0 how none
nodeid 6 member 1 victim 0 last fence master 0 how none
nodeid 7 member 1 victim 0 last fence master 1 how agent
root@blh02-07:~# clustat
Cluster Status for bl02-cluster01 @ Fri Nov 9 16:03:05 2012
Member Status: Quorate
Member Name ID Status
------ ---- ---- ------
blh02-14 1 Online, rgmanager
blh02-10 3 Online, rgmanager
blh02-11 4 Online
blh02-12 5 Online, rgmanager
blh02-08 6 Online, rgmanager
blh02-07 7 Online, Local, rgmanager
Service Name Owner (Last) State
------- ---- ----- ------ -----
pvevm:302 blh02-12 started
pvevm:304 blh02-10 started
pvevm:305 blh02-14 started
pvevm:306 blh02-10 started
pvevm:307 blh02-10 started
pvevm:308 blh02-10 started
pvevm:309 blh02-10 started
pvevm:310 blh02-10 started
pvevm:311 blh02-10 started
pvevm:312 blh02-10 started
pvevm:315 blh02-12 started
pvevm:316 blh02-10 started
pvevm:317 blh02-10 started
pvevm:318 blh02-10 started
pvevm:320 blh02-10 started
pvevm:321 blh02-08 started
pvevm:322 blh02-08 started
pvevm:324 blh02-07 started
pvevm:325 blh02-12 started
pvevm:327 blh02-08 started
pvevm:328 blh02-12 started
pvevm:329 blh02-14 started
pvevm:331 blh02-14 started
pvevm:335 blh02-12 started
pvevm:336 blh02-07 started
pvevm:338 (blh02-07) failed
pvevm:339 blh02-08 started
pvevm:340 blh02-07 started
pvevm:341 blh02-12 started
pvevm:342 blh02-12 started
pvevm:343 blh02-12 started
pvevm:346 blh02-12 started
pvevm:500 blh02-12 started
pvevm:501 blh02-12 started
pvevm:503 blh02-10 started
cat /etc/pve/cluster.conf
<?xml version="1.0"?>
<cluster config_version="68" name="bl02-cluster01">
<cman expected_votes="4" keyfile="/var/lib/pve-cluster/corosync.authkey"/>
<fence_daemon clean_start="0" post_fail_delay="0" post_join_delay="3"/>
<clusternodes>
<clusternode name="blh02-14" nodeid="1" votes="1">
<fence>
<method name="1">
<device action="reboot" name="bla02-14"/>
</method>
</fence>
</clusternode>
<clusternode name="blh02-10" nodeid="3" votes="1">
<fence>
<method name="1">
<device action="reboot" name="bla02-10"/>
</method>
</fence>
</clusternode>
<clusternode name="blh02-11" nodeid="4" votes="1">
<fence>
<method name="1">
<device action="reboot" name="bla02-11"/>
</method>
</fence>
</clusternode>
<clusternode name="blh02-12" nodeid="5" votes="1">
<fence>
<method name="1">
<device action="reboot" name="bla02-12"/>
</method>
</fence>
</clusternode>
<clusternode name="blh02-08" nodeid="6" votes="1">
<fence>
<method name="1">
<device action="reboot" name="bla02-08"/>
</method>
</fence>
</clusternode>
<clusternode name="blh02-07" nodeid="7" votes="1">
<fence>
<method name="1">
<device action="reboot" name="bla02-07"/>
</method>
</fence>
</clusternode>
</clusternodes>
<fencedevices>
.................................. removed ..........................
I had a cluster with 7 nodes - one of them died so I removed it
Now, for whatever reason, I cannot join one the nodes to the cluster
Running "clustat" on it reports "Inquorate" although the same command on any other node report "quorate"
Note:
I reboot it few times with no effect and the "Fence_join= yes" is uncommented in /etc/default/redhat-cluster-pve)
Here are some info ( blh02-11 is the one with the problem )
root@blh02-11:~# clustat
Cluster Status for bl02-cluster01 @ Fri Nov 9 16:02:48 2012
Member Status: Inquorate
Member Name ID Status
------ ---- ---- ------
blh02-14 1 Offline
blh02-10 3 Offline
blh02-11 4 Online, Local
blh02-12 5 Offline
blh02-08 6 Offline
blh02-07 7 Offline
root@blh02-07:~# fence_tool -n ls
fence domain
member count 5
victim count 0
victim now 0
master nodeid 1
wait state none
members 1 3 5 6 7
all nodes
nodeid 1 member 1 victim 0 last fence master 2 how agent
nodeid 2 member 0 victim 0 last fence master 1 how agent
nodeid 3 member 1 victim 0 last fence master 1 how agent
nodeid 4 member 0 victim 0 last fence master 2 how agent
nodeid 5 member 1 victim 0 last fence master 0 how none
nodeid 6 member 1 victim 0 last fence master 0 how none
nodeid 7 member 1 victim 0 last fence master 1 how agent
root@blh02-07:~# clustat
Cluster Status for bl02-cluster01 @ Fri Nov 9 16:03:05 2012
Member Status: Quorate
Member Name ID Status
------ ---- ---- ------
blh02-14 1 Online, rgmanager
blh02-10 3 Online, rgmanager
blh02-11 4 Online
blh02-12 5 Online, rgmanager
blh02-08 6 Online, rgmanager
blh02-07 7 Online, Local, rgmanager
Service Name Owner (Last) State
------- ---- ----- ------ -----
pvevm:302 blh02-12 started
pvevm:304 blh02-10 started
pvevm:305 blh02-14 started
pvevm:306 blh02-10 started
pvevm:307 blh02-10 started
pvevm:308 blh02-10 started
pvevm:309 blh02-10 started
pvevm:310 blh02-10 started
pvevm:311 blh02-10 started
pvevm:312 blh02-10 started
pvevm:315 blh02-12 started
pvevm:316 blh02-10 started
pvevm:317 blh02-10 started
pvevm:318 blh02-10 started
pvevm:320 blh02-10 started
pvevm:321 blh02-08 started
pvevm:322 blh02-08 started
pvevm:324 blh02-07 started
pvevm:325 blh02-12 started
pvevm:327 blh02-08 started
pvevm:328 blh02-12 started
pvevm:329 blh02-14 started
pvevm:331 blh02-14 started
pvevm:335 blh02-12 started
pvevm:336 blh02-07 started
pvevm:338 (blh02-07) failed
pvevm:339 blh02-08 started
pvevm:340 blh02-07 started
pvevm:341 blh02-12 started
pvevm:342 blh02-12 started
pvevm:343 blh02-12 started
pvevm:346 blh02-12 started
pvevm:500 blh02-12 started
pvevm:501 blh02-12 started
pvevm:503 blh02-10 started
cat /etc/pve/cluster.conf
<?xml version="1.0"?>
<cluster config_version="68" name="bl02-cluster01">
<cman expected_votes="4" keyfile="/var/lib/pve-cluster/corosync.authkey"/>
<fence_daemon clean_start="0" post_fail_delay="0" post_join_delay="3"/>
<clusternodes>
<clusternode name="blh02-14" nodeid="1" votes="1">
<fence>
<method name="1">
<device action="reboot" name="bla02-14"/>
</method>
</fence>
</clusternode>
<clusternode name="blh02-10" nodeid="3" votes="1">
<fence>
<method name="1">
<device action="reboot" name="bla02-10"/>
</method>
</fence>
</clusternode>
<clusternode name="blh02-11" nodeid="4" votes="1">
<fence>
<method name="1">
<device action="reboot" name="bla02-11"/>
</method>
</fence>
</clusternode>
<clusternode name="blh02-12" nodeid="5" votes="1">
<fence>
<method name="1">
<device action="reboot" name="bla02-12"/>
</method>
</fence>
</clusternode>
<clusternode name="blh02-08" nodeid="6" votes="1">
<fence>
<method name="1">
<device action="reboot" name="bla02-08"/>
</method>
</fence>
</clusternode>
<clusternode name="blh02-07" nodeid="7" votes="1">
<fence>
<method name="1">
<device action="reboot" name="bla02-07"/>
</method>
</fence>
</clusternode>
</clusternodes>
<fencedevices>
.................................. removed ..........................