Hi All
Sorry to bother you again gents, but I have an issue and I have been looking on forums for the last 3 days to try and resolve. I had a power failure and all my servers went down on at the same time. I ended up with a drbd setup like this:
root@jt1:~# /etc/init.d/drbd status
drbd driver loaded OK; device status:
version: 8.3.13 (api:88/proto:86-96)
GIT-hash: 83ca112086600faacab2f157bc5a9324f7bd7f77 build by root@sighted, 2012-10-09 12:47:51
m:res cs ro ds p mounted fstype
0:r0 WFConnection Secondary/Unknown Diskless/DUnknown C
root@jt2:~# /etc/init.d/drbd status
drbd driver loaded OK; device status:
version: 8.3.13 (api:88/proto:86-96)
GIT-hash: 83ca112086600faacab2f157bc5a9324f7bd7f77 build by root@sighted, 2012-10-09 12:47:51
m:res cs ro ds p mounted fstype
0:r0 Unconfigured
If I bring drbd up on the secondary node (jt2) then the output is exactly the same as jt1. I have tried many a things and the next step would be just to recreate everything as it is a test solution, but I would like to know how to fix this issue in the event that it happens again under production. Any help appreciated. Here are my settings:
root@jt1:~# cat /etc/drbd.d/r0.res
resource r0 {
device /dev/drbd0;
disk /dev/sdb1;
meta-disk internal;
on jt1 {
address 10.10.10.1:7789;
}
on jt2 {
address 10.10.10.2:7789;
}
}
root@jt1:~# cat /etc/drbd.d/global_common.conf
global {
usage-count ask;
# minor-count dialog-refresh disable-ip-verification
}
common {
protocol C;
handlers {
split-brain "/usr/lib/drbd/notify-split-brain.sh root";
# The following 3 handlers were disabled due to #576511.
# Please check the DRBD manual and enable them, if they make sense in your setup.
# pri-on-incon-degr "/usr/lib/drbd/notify-pri-on-incon-degr.sh; /usr/lib/drbd/notify-emergency-reboot.sh; echo b > /proc/sysrq-trigger ; reboot -f";
# pri-lost-after-sb "/usr/lib/drbd/notify-pri-lost-after-sb.sh; /usr/lib/drbd/notify-emergency-reboot.sh; echo b > /proc/sysrq-trigger ; reboot -f";
# local-io-error "/usr/lib/drbd/notify-io-error.sh; /usr/lib/drbd/notify-emergency-shutdown.sh; echo o > /proc/sysrq-trigger ; halt -f";
# fence-peer "/usr/lib/drbd/crm-fence-peer.sh";
# split-brain "/usr/lib/drbd/notify-split-brain.sh root";
# out-of-sync "/usr/lib/drbd/notify-out-of-sync.sh root";
# before-resync-target "/usr/lib/drbd/snapshot-resync-target-lvm.sh -p 15 -- -c 16k";
# after-resync-target /usr/lib/drbd/unsnapshot-resync-target-lvm.sh;
}
startup {
wfc-timeout 60;
degr-wfc-timeout 60;
become-primary-on both;
# wfc-timeout degr-wfc-timeout outdated-wfc-timeout wait-after-sb
}
disk {
on-io-error detach;
# on-io-error fencing use-bmbv no-disk-barrier no-disk-flushes
# no-disk-drain no-md-flushes max-bio-bvecs
}
net {
cram-hmac-alg sha1;
shared-secret "3KkanieH00RN13w@ts3JY!?";
allow-two-primaries;
after-sb-0pri discard-zero-changes;
after-sb-1pri discard-secondary;
after-sb-2pri disconnect;
# sndbuf-size rcvbuf-size timeout connect-int ping-int ping-timeout max-buffers
# max-epoch-size ko-count allow-two-primaries cram-hmac-alg shared-secret
# after-sb-0pri after-sb-1pri after-sb-2pri data-integrity-alg no-tcp-cork
}
syncer {
rate 200M;
verify-alg md5;
# rate after al-extents use-rle cpu-mask verify-alg csums-alg
}
}
root@jt1:~# vgscan
Reading all physical volumes. This may take a while...
Found volume group "drbdr0" using metadata type lvm2
Found volume group "pve" using metadata type lvm2
I am happy to send other details through upon request. Thanks again.
Sorry to bother you again gents, but I have an issue and I have been looking on forums for the last 3 days to try and resolve. I had a power failure and all my servers went down on at the same time. I ended up with a drbd setup like this:
root@jt1:~# /etc/init.d/drbd status
drbd driver loaded OK; device status:
version: 8.3.13 (api:88/proto:86-96)
GIT-hash: 83ca112086600faacab2f157bc5a9324f7bd7f77 build by root@sighted, 2012-10-09 12:47:51
m:res cs ro ds p mounted fstype
0:r0 WFConnection Secondary/Unknown Diskless/DUnknown C
root@jt2:~# /etc/init.d/drbd status
drbd driver loaded OK; device status:
version: 8.3.13 (api:88/proto:86-96)
GIT-hash: 83ca112086600faacab2f157bc5a9324f7bd7f77 build by root@sighted, 2012-10-09 12:47:51
m:res cs ro ds p mounted fstype
0:r0 Unconfigured
If I bring drbd up on the secondary node (jt2) then the output is exactly the same as jt1. I have tried many a things and the next step would be just to recreate everything as it is a test solution, but I would like to know how to fix this issue in the event that it happens again under production. Any help appreciated. Here are my settings:
root@jt1:~# cat /etc/drbd.d/r0.res
resource r0 {
device /dev/drbd0;
disk /dev/sdb1;
meta-disk internal;
on jt1 {
address 10.10.10.1:7789;
}
on jt2 {
address 10.10.10.2:7789;
}
}
root@jt1:~# cat /etc/drbd.d/global_common.conf
global {
usage-count ask;
# minor-count dialog-refresh disable-ip-verification
}
common {
protocol C;
handlers {
split-brain "/usr/lib/drbd/notify-split-brain.sh root";
# The following 3 handlers were disabled due to #576511.
# Please check the DRBD manual and enable them, if they make sense in your setup.
# pri-on-incon-degr "/usr/lib/drbd/notify-pri-on-incon-degr.sh; /usr/lib/drbd/notify-emergency-reboot.sh; echo b > /proc/sysrq-trigger ; reboot -f";
# pri-lost-after-sb "/usr/lib/drbd/notify-pri-lost-after-sb.sh; /usr/lib/drbd/notify-emergency-reboot.sh; echo b > /proc/sysrq-trigger ; reboot -f";
# local-io-error "/usr/lib/drbd/notify-io-error.sh; /usr/lib/drbd/notify-emergency-shutdown.sh; echo o > /proc/sysrq-trigger ; halt -f";
# fence-peer "/usr/lib/drbd/crm-fence-peer.sh";
# split-brain "/usr/lib/drbd/notify-split-brain.sh root";
# out-of-sync "/usr/lib/drbd/notify-out-of-sync.sh root";
# before-resync-target "/usr/lib/drbd/snapshot-resync-target-lvm.sh -p 15 -- -c 16k";
# after-resync-target /usr/lib/drbd/unsnapshot-resync-target-lvm.sh;
}
startup {
wfc-timeout 60;
degr-wfc-timeout 60;
become-primary-on both;
# wfc-timeout degr-wfc-timeout outdated-wfc-timeout wait-after-sb
}
disk {
on-io-error detach;
# on-io-error fencing use-bmbv no-disk-barrier no-disk-flushes
# no-disk-drain no-md-flushes max-bio-bvecs
}
net {
cram-hmac-alg sha1;
shared-secret "3KkanieH00RN13w@ts3JY!?";
allow-two-primaries;
after-sb-0pri discard-zero-changes;
after-sb-1pri discard-secondary;
after-sb-2pri disconnect;
# sndbuf-size rcvbuf-size timeout connect-int ping-int ping-timeout max-buffers
# max-epoch-size ko-count allow-two-primaries cram-hmac-alg shared-secret
# after-sb-0pri after-sb-1pri after-sb-2pri data-integrity-alg no-tcp-cork
}
syncer {
rate 200M;
verify-alg md5;
# rate after al-extents use-rle cpu-mask verify-alg csums-alg
}
}
root@jt1:~# vgscan
Reading all physical volumes. This may take a while...
Found volume group "drbdr0" using metadata type lvm2
Found volume group "pve" using metadata type lvm2
I am happy to send other details through upon request. Thanks again.