Hello!
I have several proxmox clusters separeated geographically.
Each cluster contains couples of servers sharing LVM over drbd, as a DRBD interlink i have Intel 10G ethernet cards. All servers have top-level RAID controllers with bbu.
All servers have pve-enterprise repository access via community subscription.
Since update in december some guests get sporadically messages like that independend of filesystem type. Some VMs have xfs, some ext4.
File system check don't find any corruptions, but some Windows VMs got data loss. BTW, filesystem check on windows VMs isn't able to find any inconsistencies.
proxmox01:~# cat /etc/drbd.d/r0.res
resource r0 {
protocol C;
startup {
wfc-timeout 0; # non-zero wfc-timeout can be dangerous (http://forum.proxmox.com/threads/3465-Is-it-safe-to-use-wfc-timeout-in-DRBD-configuration)
degr-wfc-timeout 60;
become-primary-on both;
}
net {
cram-hmac-alg sha1;
shared-secret "lai8IezievuCh0eneiph0eetaigaiMee";
allow-two-primaries;
after-sb-0pri discard-zero-changes;
after-sb-1pri discard-secondary;
after-sb-2pri disconnect;
max-buffers 8000;
max-epoch-size 8000;
sndbuf-size 0;
}
syncer {
al-extents 3389;
verify-alg crc32c;
}
disk {
no-disk-barrier;
no-disk-flushes;
}
on proxmox01 {
device /dev/drbd0;
disk /dev/sdb;
address 192.168.1.147:7788;
meta-disk internal;
}
on proxmox02 {
device /dev/drbd0;
disk /dev/sdb;
address 192.168.1.148:7788;
meta-disk internal;
}
}[/CODE]
Any ideas?
I have several proxmox clusters separeated geographically.
Each cluster contains couples of servers sharing LVM over drbd, as a DRBD interlink i have Intel 10G ethernet cards. All servers have top-level RAID controllers with bbu.
All servers have pve-enterprise repository access via community subscription.
Since update in december some guests get sporadically messages like that independend of filesystem type. Some VMs have xfs, some ext4.
Code:
[2015-02-14 16:37:06] end_request: I/O error, dev vda, sector 15763032
[2015-02-14 16:37:06] Buffer I/O error on device vda1, logical block 1970123
[2015-02-14 16:37:06] EXT4-fs warning (device vda1): ext4_end_bio:250: I/O error -5 writing to inode 398637 (offset 0 size 4096 starting block 1970380)
[2015-02-14 16:37:06] end_request: I/O error, dev vda, sector 15763064
[2015-02-14 16:37:06] Buffer I/O error on device vda1, logical block 1970127
[2015-02-14 16:37:06] EXT4-fs warning (device vda1): ext4_end_bio:250: I/O error -5 writing to inode 398637 (offset 16384 size 4096 starting block 1970384)
[2015-02-14 16:37:06] end_request: I/O error, dev vda, sector 15763144
[2015-02-14 16:37:06] Buffer I/O error on device vda1, logical block 1970137
[2015-02-14 16:37:06] EXT4-fs warning (device vda1): ext4_end_bio:250: I/O error -5 writing to inode 398637 (offset 57344 size 4096 starting block 1970394)
[2015-02-14 16:37:06] end_request: I/O error, dev vda, sector 15763176
[2015-02-14 16:37:06] Buffer I/O error on device vda1, logical block 1970141
[2015-02-14 16:37:06] EXT4-fs warning (device vda1): ext4_end_bio:250: I/O error -5 writing to inode 398637 (offset 73728 size 4096 starting block 1970398)
[2015-02-14 16:37:06] end_request: I/O error, dev vda, sector 15763256
[2015-02-14 16:37:06] Buffer I/O error on device vda1, logical block 1970151
[2015-02-14 16:37:06] EXT4-fs warning (device vda1): ext4_end_bio:250: I/O error -5 writing to inode 398637 (offset 114688 size 4096 starting block 1970408)
File system check don't find any corruptions, but some Windows VMs got data loss. BTW, filesystem check on windows VMs isn't able to find any inconsistencies.
Code:
[CODE]roxmox01:~# pveversion -v
proxmox-ve-2.6.32: 3.3-139 (running kernel: 2.6.32-34-pve)
pve-manager: 3.3-5 (running version: 3.3-5/bfebec03)
pve-kernel-2.6.32-32-pve: 2.6.32-136
pve-kernel-2.6.32-28-pve: 2.6.32-124
pve-kernel-2.6.32-30-pve: 2.6.32-130
pve-kernel-2.6.32-34-pve: 2.6.32-140
pve-kernel-2.6.32-26-pve: 2.6.32-114
pve-kernel-2.6.32-23-pve: 2.6.32-109
lvm2: 2.02.98-pve4
clvm: 2.02.98-pve4
corosync-pve: 1.4.7-1
openais-pve: 1.1.4-3
libqb0: 0.11.1-2
redhat-cluster-pve: 3.2.0-2
resource-agents-pve: 3.9.2-4
fence-agents-pve: 4.0.10-1
pve-cluster: 3.0-15
qemu-server: 3.3-3
pve-firmware: 1.1-3
libpve-common-perl: 3.0-19
libpve-access-control: 3.0-15
libpve-storage-perl: 3.0-25
pve-libspice-server1: 0.12.4-3
vncterm: 1.1-8
vzctl: 4.0-1pve6
vzprocps: 2.0.11-2
vzquota: 3.1-2
pve-qemu-kvm: 2.1-10
ksm-control-daemon: 1.1-1
glusterfs-client: 3.5.2-1
proxmox01:~# cat /etc/drbd.d/r0.res
resource r0 {
protocol C;
startup {
wfc-timeout 0; # non-zero wfc-timeout can be dangerous (http://forum.proxmox.com/threads/3465-Is-it-safe-to-use-wfc-timeout-in-DRBD-configuration)
degr-wfc-timeout 60;
become-primary-on both;
}
net {
cram-hmac-alg sha1;
shared-secret "lai8IezievuCh0eneiph0eetaigaiMee";
allow-two-primaries;
after-sb-0pri discard-zero-changes;
after-sb-1pri discard-secondary;
after-sb-2pri disconnect;
max-buffers 8000;
max-epoch-size 8000;
sndbuf-size 0;
}
syncer {
al-extents 3389;
verify-alg crc32c;
}
disk {
no-disk-barrier;
no-disk-flushes;
}
on proxmox01 {
device /dev/drbd0;
disk /dev/sdb;
address 192.168.1.147:7788;
meta-disk internal;
}
on proxmox02 {
device /dev/drbd0;
disk /dev/sdb;
address 192.168.1.148:7788;
meta-disk internal;
}
}[/CODE]
Any ideas?