Hi.
We have a three node cluster running Proxmox 7 with CEPH 16.
Each node has 4 SSD OSD's and 8 HDD OSD's.
We are testing a lot now to see how we can mitigate problems that arise and one thing we noticed is that whenever we perform maintenance on one node we get some write issues which means that VM's are crippled.
Pre-maintenance we run
One error message that we noticed was:
This is how it looks in the Proxmox GUI after a few minutes if a node has been brought down
Here is the Ceph configuration:
EDIT: Noticed that CEPH pool size was set to 2 and not 3 (default). Changed back to 3 and after that we seem to be able to take down a node for a longer period of time without having any issues with the guests.
We have a three node cluster running Proxmox 7 with CEPH 16.
Each node has 4 SSD OSD's and 8 HDD OSD's.
We are testing a lot now to see how we can mitigate problems that arise and one thing we noticed is that whenever we perform maintenance on one node we get some write issues which means that VM's are crippled.
Pre-maintenance we run
ceph osd add-noout osd.X
for every OSD in that node.One error message that we noticed was:
Code:
pvestatd[3953]: unable to activate storage 'cephfs' - directory '/mnt/pve/cephfs' does not exist or is unreachable
This is how it looks in the Proxmox GUI after a few minutes if a node has been brought down
Here is the Ceph configuration:
Code:
[global]
auth_client_required = cephx
auth_cluster_required = cephx
auth_service_required = cephx
cluster_network = 10.5.33.0/24
fsid = 776bf0f9-8aba-42c5-b1e4-e94824ba0b7a
mon_allow_pool_delete = true
mon_host = 10.5.33.113 10.5.33.112 10.5.33.111
ms_bind_ipv4 = true
ms_bind_ipv6 = false
osd_pool_default_min_size = 2
osd_pool_default_size = 2
public_network = 10.5.33.0/24
[client]
keyring = /etc/pve/priv/$cluster.$name.keyring
[mds]
keyring = /var/lib/ceph/mds/ceph-$id/keyring
[mds.pve-11]
host = pve-11
mds_standby_for_name = pve
[mds.pve-12]
host = pve-12
mds_standby_for_name = pve
[mds.pve-13]
host = pve-13
mds_standby_for_name = pve
[mon.pve-11]
public_addr = 10.5.33.111
[mon.pve-12]
public_addr = 10.5.33.112
[mon.pve-13]
public_addr = 10.5.33.113
# begin crush map
tunable choose_local_tries 0
tunable choose_local_fallback_tries 0
tunable choose_total_tries 50
tunable chooseleaf_descend_once 1
tunable chooseleaf_vary_r 1
tunable chooseleaf_stable 1
tunable straw_calc_version 1
tunable allowed_bucket_algs 54
# devices
device 0 osd.0 class hdd
device 1 osd.1 class hdd
device 2 osd.2 class hdd
device 3 osd.3 class hdd
device 4 osd.4 class hdd
device 5 osd.5 class hdd
device 6 osd.6 class hdd
device 7 osd.7 class hdd
device 8 osd.8 class hdd
device 9 osd.9 class hdd
device 10 osd.10 class hdd
device 11 osd.11 class hdd
device 12 osd.12 class hdd
device 13 osd.13 class ssd
device 14 osd.14 class ssd
device 15 osd.15 class ssd
device 16 osd.16 class ssd
device 17 osd.17 class ssd
device 18 osd.18 class ssd
device 19 osd.19 class ssd
device 20 osd.20 class ssd
device 21 osd.21 class hdd
device 22 osd.22 class hdd
device 23 osd.23 class hdd
device 24 osd.24 class ssd
device 25 osd.25 class ssd
device 26 osd.26 class ssd
device 27 osd.27 class ssd
device 28 osd.28 class hdd
device 29 osd.29 class hdd
device 30 osd.30 class hdd
device 31 osd.31 class hdd
device 32 osd.32 class hdd
device 33 osd.33 class hdd
device 34 osd.34 class hdd
device 35 osd.35 class hdd
# types
type 0 osd
type 1 host
type 2 chassis
type 3 rack
type 4 row
type 5 pdu
type 6 pod
type 7 room
type 8 datacenter
type 9 zone
type 10 region
type 11 root
# buckets
host pve-13 {
id -3 # do not change unnecessarily
id -4 class hdd # do not change unnecessarily
id -7 class ssd # do not change unnecessarily
# weight 11.644
alg straw2
hash 0 # rjenkins1
item osd.0 weight 1.092
item osd.1 weight 1.092
item osd.2 weight 1.092
item osd.3 weight 1.092
item osd.4 weight 1.092
item osd.5 weight 1.092
item osd.6 weight 1.092
item osd.7 weight 1.092
item osd.13 weight 0.728
item osd.18 weight 0.728
item osd.19 weight 0.728
item osd.20 weight 0.728
}
host pve-12 {
id -5 # do not change unnecessarily
id -6 class hdd # do not change unnecessarily
id -8 class ssd # do not change unnecessarily
# weight 11.644
alg straw2
hash 0 # rjenkins1
item osd.8 weight 1.092
item osd.9 weight 1.092
item osd.10 weight 1.092
item osd.11 weight 1.092
item osd.12 weight 1.092
item osd.14 weight 0.728
item osd.15 weight 0.728
item osd.16 weight 0.728
item osd.17 weight 0.728
item osd.21 weight 1.092
item osd.22 weight 1.092
item osd.23 weight 1.092
}
host pve-11 {
id -10 # do not change unnecessarily
id -11 class hdd # do not change unnecessarily
id -12 class ssd # do not change unnecessarily
# weight 11.644
alg straw2
hash 0 # rjenkins1
item osd.24 weight 0.728
item osd.25 weight 0.728
item osd.26 weight 0.728
item osd.27 weight 0.728
item osd.28 weight 1.092
item osd.29 weight 1.092
item osd.30 weight 1.092
item osd.31 weight 1.092
item osd.32 weight 1.092
item osd.33 weight 1.092
item osd.34 weight 1.092
item osd.35 weight 1.092
}
root default {
id -1 # do not change unnecessarily
id -2 class hdd # do not change unnecessarily
id -9 class ssd # do not change unnecessarily
# weight 34.931
alg straw2
hash 0 # rjenkins1
item pve-13 weight 11.644
item pve-12 weight 11.644
item pve-11 weight 11.644
}
# rules
rule replicated_rule {
id 0
type replicated
min_size 1
max_size 10
step take default
step chooseleaf firstn 0 type host
step emit
}
rule replicated_ssd {
id 1
type replicated
min_size 1
max_size 10
step take default class ssd
step chooseleaf firstn 0 type host
step emit
}
rule replicated_hdd {
id 2
type replicated
min_size 1
max_size 10
step take default class hdd
step chooseleaf firstn 0 type host
step emit
}
# end crush map
EDIT: Noticed that CEPH pool size was set to 2 and not 3 (default). Changed back to 3 and after that we seem to be able to take down a node for a longer period of time without having any issues with the guests.
Last edited: