Hello, everyone.
After a lot of reading on the web and trying to tune the ceph, we whre not able to make it HA. If one of the node is turned off, after some time we have partition corruption on the VM.
The idea is if a node (2 osd) goes down, or if 2 osd's on different nodes goes down, the VM would work without data loss...
We are runing a proxmox cluster with ceph as storage. Our Ceph cluster has at this moment 7 nodes, each node with 2 osd (2TB HDD), total of 14 OSD.
Some of them has journal on a DC SSD, some of the are using the default journal location on OSD HDD.
Software version is,
root@ceph07:~# pveversion -v
proxmox-ve: 4.4-79 (running kernel: 4.4.35-2-pve)
pve-manager: 4.4-12 (running version: 4.4-12/e71b7a74)
pve-kernel-4.4.35-1-pve: 4.4.35-77
pve-kernel-4.2.6-1-pve: 4.2.6-36
pve-kernel-4.4.35-2-pve: 4.4.35-79
lvm2: 2.02.116-pve3
corosync-pve: 2.4.0-1
libqb0: 1.0-1
pve-cluster: 4.0-48
qemu-server: 4.0-107
pve-firmware: 1.1-10
libpve-common-perl: 4.0-90
libpve-access-control: 4.0-23
libpve-storage-perl: 4.0-73
pve-libspice-server1: 0.12.8-1
vncterm: 1.2-1
pve-docs: 4.4-3
pve-qemu-kvm: 2.7.1-1
pve-container: 1.0-93
pve-firewall: 2.0-33
pve-ha-manager: 1.0-40
ksm-control-daemon: 1.2-1
glusterfs-client: 3.5.2-2+deb8u3
lxc-pve: 2.0.7-1
lxcfs: 2.0.6-pve1
criu: 1.6.0-1
novnc-pve: 0.5-8
smartmontools: 6.5+svn4324-1~pve80
zfsutils: 0.6.5.8-pve14~bpo80
ceph: 10.2.5-1~bpo80+1
Ceph was upgraded from Hammer to Jewel.
Ceph pool is using size 3, min_size 2, pg_num 512
ceph.conf has,
[global]
auth client required = cephx
auth cluster required = cephx
auth service required = cephx
cluster network = 10.10.10.0/24
filestore xattr use omap = true
fsid = b959b08a-0827-4840-89b0-da9f40d6ff22
keyring = /etc/pve/priv/$cluster.$name.keyring
log max recent = 250000
osd journal size = 5120
osd map message max = 10
osd max object name len = 256
osd max object namespace len = 64
osd pool default min size = 2
public network = 10.10.10.0/24
[client]
rbd cache = true
rbd cache max dirty = 67108864
rbd cache max dirty age = 5
rbd cache size = 134217728
[osd]
osd disk thread ioprio class = idle
osd disk thread ioprio priority = 7
filestore max sync interval = 15
filestore min sync interval = 10
filestore op threads = 2
filestore queue committing max bytes = 10485760000
filestore queue committing max ops = 5000
filestore queue max bytes = 10485760
filestore queue max ops = 25000
filestore xattr use omap = true
keyring = /var/lib/ceph/osd/ceph-$id/keyring
max open files = 131072
osd client message size cap = 524288000
osd deep scrub stride = 1058576
osd disk threads = 2
osd map cache bl size = 50
osd map cache size = 500
osd map max advance = 10
osd map share max epochs = 10
osd max backfills = 1
osd max write size = 180
osd pg epoch persisted max stale = 10
osd recovery max active = 1
osd recovery max single start = 1
osd recovery op priority = 1
[mon.1]
host = ceph02
mon addr = 10.10.10.2:6789
[mon.2]
host = ceph04
mon addr = 10.10.10.4:6789
[mon.0]
host = ceph03
mon addr = 10.10.10.3:6789
[osd.9]
osd journal = /dev/disk/by-partlabel/journal-9
osd journal size = 10240
[osd.6]
osd journal = /dev/disk/by-partlabel/journal-6
osd journal size = 10240
[osd.2]
osd journal = /dev/disk/by-partlabel/journal-2
osd journal size = 10240
[osd.1]
osd journal = /dev/disk/by-partlabel/journal-1
osd journal size = 10240
[osd.0]
osd journal = /dev/disk/by-partlabel/journal-0
osd journal size = 10240
[osd.8]
osd journal = /dev/disk/by-partlabel/journal-8
osd journal size = 10240
[osd.7]
osd journal = /dev/disk/by-partlabel/journal-7
osd journal size = 10240
[osd.11]
osd journal = /dev/disk/by-partlabel/journal-11
osd journal size = 10240
[osd.10]
osd journal = /dev/disk/by-partlabel/journal-10
osd journal size = 10240
[osd.3]
osd journal = /dev/disk/by-partlabel/journal-3
osd journal size = 10240
Please can someone gives us some suggestions on how to make this work.
Thank you.
After a lot of reading on the web and trying to tune the ceph, we whre not able to make it HA. If one of the node is turned off, after some time we have partition corruption on the VM.
The idea is if a node (2 osd) goes down, or if 2 osd's on different nodes goes down, the VM would work without data loss...
We are runing a proxmox cluster with ceph as storage. Our Ceph cluster has at this moment 7 nodes, each node with 2 osd (2TB HDD), total of 14 OSD.
Some of them has journal on a DC SSD, some of the are using the default journal location on OSD HDD.
Software version is,
root@ceph07:~# pveversion -v
proxmox-ve: 4.4-79 (running kernel: 4.4.35-2-pve)
pve-manager: 4.4-12 (running version: 4.4-12/e71b7a74)
pve-kernel-4.4.35-1-pve: 4.4.35-77
pve-kernel-4.2.6-1-pve: 4.2.6-36
pve-kernel-4.4.35-2-pve: 4.4.35-79
lvm2: 2.02.116-pve3
corosync-pve: 2.4.0-1
libqb0: 1.0-1
pve-cluster: 4.0-48
qemu-server: 4.0-107
pve-firmware: 1.1-10
libpve-common-perl: 4.0-90
libpve-access-control: 4.0-23
libpve-storage-perl: 4.0-73
pve-libspice-server1: 0.12.8-1
vncterm: 1.2-1
pve-docs: 4.4-3
pve-qemu-kvm: 2.7.1-1
pve-container: 1.0-93
pve-firewall: 2.0-33
pve-ha-manager: 1.0-40
ksm-control-daemon: 1.2-1
glusterfs-client: 3.5.2-2+deb8u3
lxc-pve: 2.0.7-1
lxcfs: 2.0.6-pve1
criu: 1.6.0-1
novnc-pve: 0.5-8
smartmontools: 6.5+svn4324-1~pve80
zfsutils: 0.6.5.8-pve14~bpo80
ceph: 10.2.5-1~bpo80+1
Ceph was upgraded from Hammer to Jewel.
Ceph pool is using size 3, min_size 2, pg_num 512
ceph.conf has,
[global]
auth client required = cephx
auth cluster required = cephx
auth service required = cephx
cluster network = 10.10.10.0/24
filestore xattr use omap = true
fsid = b959b08a-0827-4840-89b0-da9f40d6ff22
keyring = /etc/pve/priv/$cluster.$name.keyring
log max recent = 250000
osd journal size = 5120
osd map message max = 10
osd max object name len = 256
osd max object namespace len = 64
osd pool default min size = 2
public network = 10.10.10.0/24
[client]
rbd cache = true
rbd cache max dirty = 67108864
rbd cache max dirty age = 5
rbd cache size = 134217728
[osd]
osd disk thread ioprio class = idle
osd disk thread ioprio priority = 7
filestore max sync interval = 15
filestore min sync interval = 10
filestore op threads = 2
filestore queue committing max bytes = 10485760000
filestore queue committing max ops = 5000
filestore queue max bytes = 10485760
filestore queue max ops = 25000
filestore xattr use omap = true
keyring = /var/lib/ceph/osd/ceph-$id/keyring
max open files = 131072
osd client message size cap = 524288000
osd deep scrub stride = 1058576
osd disk threads = 2
osd map cache bl size = 50
osd map cache size = 500
osd map max advance = 10
osd map share max epochs = 10
osd max backfills = 1
osd max write size = 180
osd pg epoch persisted max stale = 10
osd recovery max active = 1
osd recovery max single start = 1
osd recovery op priority = 1
[mon.1]
host = ceph02
mon addr = 10.10.10.2:6789
[mon.2]
host = ceph04
mon addr = 10.10.10.4:6789
[mon.0]
host = ceph03
mon addr = 10.10.10.3:6789
[osd.9]
osd journal = /dev/disk/by-partlabel/journal-9
osd journal size = 10240
[osd.6]
osd journal = /dev/disk/by-partlabel/journal-6
osd journal size = 10240
[osd.2]
osd journal = /dev/disk/by-partlabel/journal-2
osd journal size = 10240
[osd.1]
osd journal = /dev/disk/by-partlabel/journal-1
osd journal size = 10240
[osd.0]
osd journal = /dev/disk/by-partlabel/journal-0
osd journal size = 10240
[osd.8]
osd journal = /dev/disk/by-partlabel/journal-8
osd journal size = 10240
[osd.7]
osd journal = /dev/disk/by-partlabel/journal-7
osd journal size = 10240
[osd.11]
osd journal = /dev/disk/by-partlabel/journal-11
osd journal size = 10240
[osd.10]
osd journal = /dev/disk/by-partlabel/journal-10
osd journal size = 10240
[osd.3]
osd journal = /dev/disk/by-partlabel/journal-3
osd journal size = 10240
Please can someone gives us some suggestions on how to make this work.
Thank you.
Last edited: