Hello,
We are running Proxmox 5.1 with Ceph here and really have big issues with performance.
Here are my results from running rados on our cluster:
(that is the ceph network)
Any tips what to check/change?
We are using ceph like this:
size 3/2, pg_num 256
Thanks for any thoughts and help!
We are running Proxmox 5.1 with Ceph here and really have big issues with performance.
Here are my results from running rados on our cluster:
Code:
1 root@XXXX~ # rados bench -p ceph-vm 10 write
hints = 1
Maintaining 16 concurrent writes of 4194304 bytes to objects of size 4194304 for up to 10 seconds or 0 objects
Object prefix: benchmark_data_XXXX_1506844
sec Cur ops started finished avg MB/s cur MB/s last lat(s) avg lat(s)
0 0 0 0 0 0 - 0
1 16 44 28 111.991 112 0.214861 0.252659
2 16 59 43 85.9884 60 0.193858 0.23163
3 16 81 65 86.6555 88 0.34684 0.510268
4 16 94 78 77.99 52 0.147488 0.515977
5 16 105 89 71.191 44 0.146993 0.533072
6 16 138 122 81.3236 132 0.590707 0.704277
7 16 174 158 90.2751 144 0.300035 0.664261
8 16 191 175 87.4896 68 0.211359 0.620998
9 16 196 180 79.9903 20 0.145171 0.631462
10 16 233 217 86.7891 148 0.312222 0.683847
11 15 233 218 79.2631 4 0.14334 0.681367
Total time run: 11.421283
Total writes made: 233
Write size: 4194304
Object size: 4194304
Bandwidth (MB/sec): 81.602
Stddev Bandwidth: 49.5037
Max bandwidth (MB/sec): 148
Min bandwidth (MB/sec): 4
Average IOPS: 20
Stddev IOPS: 12
Max IOPS: 37
Min IOPS: 1
Average Latency(s): 0.770668
Stddev Latency(s): 0.988464
Max latency(s): 5.16979
Min latency(s): 0.0893201
Cleaning up (deleting benchmark objects)
Removed 233 objects
Clean up completed and total clean up time :3.920963
We are using 8TB 3.5 HDD drives: WDC WD80EFZX-68UW8N0
The network connection between the servers is using 10G:
------------------------------------------------------------
Client connecting to 172.16.0.1, TCP port 5001
TCP window size: 85.0 KByte (default)
------------------------------------------------------------
[ 3] local 172.16.0.2 port 43056 connected with 172.16.0.1 port 5001
[ ID] Interval Transfer Bandwidth
[ 3] 0.0-10.0 sec 10.9 GBytes 9.37 Gbits/sec
(that is the ceph network)
Any tips what to check/change?
We are using ceph like this:
size 3/2, pg_num 256
Code:
[global]
auth client required = cephx
auth cluster required = cephx
auth service required = cephx
cluster network = 172.16.0.0/24
filestore xattr use omap = true
fsid = 217d7725-6ad8-4958-9ed6-94a39cd62482
keyring = /etc/pve/priv/$cluster.$name.keyring
osd journal size = 5120
osd pool default min size = 1
public network = 172.16.0.0/24
osd_max_object_name_len = 256
osd_max_object_namespace_len = 64
mon allow pool delete = true
[osd]
keyring = /var/lib/ceph/osd/ceph-$id/keyring
[mon.0]
host = XXXX
mon addr = 172.16.0.1:6789
[mon.1]
host = XXXXX
mon addr = 172.16.0.2:6789
[mon.2]
host = XXXXX
mon addr = 172.16.0.3:6789
#
[mds]
mds data = /var/lib/ceph/mds/mds.$id
keyring = /var/lib/ceph/mds/mds.$id/mds.$id.keyring
[mds.0]
host = 172.16.0.1
Code:
# begin crush map
tunable choose_local_tries 0
tunable choose_local_fallback_tries 0
tunable choose_total_tries 50
tunable chooseleaf_descend_once 1
tunable chooseleaf_vary_r 1
tunable chooseleaf_stable 1
tunable straw_calc_version 1
tunable allowed_bucket_algs 54
# devices
device 1 osd.1 class hdd
device 2 osd.2 class hdd
device 3 osd.3 class hdd
device 4 osd.4 class hdd
device 5 osd.5 class hdd
device 6 osd.6 class hdd
device 7 osd.7 class hdd
device 8 osd.8 class hdd
device 9 osd.9 class hdd
device 10 osd.10 class hdd
device 11 osd.11 class hdd
device 12 osd.12 class hdd
device 13 osd.13 class hdd
device 14 osd.14 class hdd
device 15 osd.15 class hdd
# types
type 0 osd
type 1 host
type 2 chassis
type 3 rack
type 4 row
type 5 pdu
type 6 pod
type 7 room
type 8 datacenter
type 9 region
type 10 root
# buckets
host XXXXXX{
id -2 # do not change unnecessarily
id -5 class hdd # do not change unnecessarily
# weight 36.361
alg straw
hash 0 # rjenkins1
item osd.2 weight 7.271
item osd.3 weight 7.271
item osd.4 weight 7.271
item osd.5 weight 7.271
item osd.1 weight 7.277
}
host XXXXXX{
id -3 # do not change unnecessarily
id -6 class hdd # do not change unnecessarily
# weight 36.355
alg straw
hash 0 # rjenkins1
item osd.6 weight 7.271
item osd.7 weight 7.271
item osd.8 weight 7.271
item osd.9 weight 7.271
item osd.10 weight 7.271
}
host XXXXXXXXXXXXXX{
id -4 # do not change unnecessarily
id -7 class hdd # do not change unnecessarily
# weight 36.355
alg straw
hash 0 # rjenkins1
item osd.11 weight 7.271
item osd.12 weight 7.271
item osd.13 weight 7.271
item osd.14 weight 7.271
item osd.15 weight 7.271
}
root default {
id -1 # do not change unnecessarily
id -8 class hdd # do not change unnecessarily
# weight 109.067
alg straw
hash 0 # rjenkins1
item XXXX weight 36.361
item XXXX weight 36.353
item XXXX weight 36.353
}
# rules
rule replicated_ruleset {
id 0
type replicated
min_size 1
max_size 10
step take default
step chooseleaf firstn 0 type host
step emit
}
# end crush map
Thanks for any thoughts and help!