Hi,
I have the following setup:
4 x Dell R740xD
512G RAM
6x10TB SATA
2x10G bond to Mikrotik CSR3xx (20G bonds) - for Storage Network
2x1G bond for VM's to a Juniper Switch
Ceph configuration is as it comes by default:
[global]
auth_client_required = cephx
auth_cluster_required = cephx
auth_service_required = cephx
cluster_network = 10.3.3.1/24
fsid = 73b00f31-65ae-4c77-86fd-4a7791cacfa1
mon_allow_pool_delete = true
mon_host = 10.2.2.2 10.2.2.3 10.2.2.4 10.2.2.1
ms_bind_ipv4 = true
ms_bind_ipv6 = false
osd_pool_default_min_size = 2
osd_pool_default_size = 3
public_network = 10.2.2.1/24
[client]
keyring = /etc/pve/priv/$cluster.$name.keyring
[mon.pvedell1] public_addr = 10.2.2.1
[mon.pvedell2] public_addr = 10.2.2.2
[mon.pvedell3] public_addr = 10.2.2.3
[mon.pvedell4] public_addr = 10.2.2.4
Network:
auto lo
iface lo inet loopback
auto eno3
iface eno3 inet manual
bond-master bond0
auto eno4
iface eno4 inet manual
bond-master bond0
auto eno1np0
iface eno1np0 inet manual
mtu 10218
auto eno2np1
iface eno2np1 inet manual
mtu 10218
auto bond0
iface bond0 inet manual
bond-slaves eno3 eno4
bond-miimon 100
bond-mode 802.3ad
bond-downdelay 200
bond-updelay 200
auto bond0.2302
iface bond0.2302 inet manual
auto bond0.2327
iface bond0.2327 inet static
address 10.2.2.1/24
#cluster backend
auto bond1
iface bond1 inet static
address 10.3.3.1/24
bond-slaves eno1np0 eno2np1
bond-miimon 100
bond-mode 802.3ad
bond-xmit-hash-policy layer2+3
mtu 10218
#storage
auto vmbr0
iface vmbr0 inet static
address 10.230.10.45/24
gateway 10.230.10.1
bridge-ports bond0.2302
bridge-stp off
bridge-fd 0
bridge-vlan-aware yes
bridge-vids 2-4094
#Cluj-servers1
Rados Benchmark
root@pvedell1:~# rados bench -p scbench 10 write --no-cleanup
hints = 1
Maintaining 16 concurrent writes of 4194304 bytes to objects of size 4194304 for up to 10 seconds or 0 objects
Object prefix: benchmark_data_pvedell1_711238
sec Cur ops started finished avg MB/s cur MB/s last lat(s) avg lat(s)
0 0 0 0 0 0 - 0
1 16 46 30 119.994 120 0.573648 0.389335
2 16 77 61 121.991 124 0.2369 0.393485
3 16 111 95 126.656 136 1.5376 0.431647
4 16 147 131 130.988 144 0.280869 0.44987
5 16 187 171 136.787 160 0.237824 0.437398
6 16 220 204 135.987 132 0.394379 0.440604
7 16 250 234 133.702 120 0.763875 0.450026
8 16 280 264 131.987 120 0.32537 0.454567
9 16 317 301 133.765 148 0.433321 0.458329
10 16 354 338 135.187 148 0.468543 0.456669
Total time run: 10.4526
Total writes made: 354
Write size: 4194304
Object size: 4194304
Bandwidth (MB/sec): 135.469
Stddev Bandwidth: 14.3357
Max bandwidth (MB/sec): 160
Min bandwidth (MB/sec): 120
Average IOPS: 33
Stddev IOPS: 3.58391
Max IOPS: 40
Min IOPS: 30
Average Latency(s): 0.463095
Stddev Latency(s): 0.370647
Max latency(s): 1.80769
Min latency(s): 0.0510852
Individual OSD Bench:
root@pvedell1:~# ceph tell osd.0 bench
{
"bytes_written": 1073741824,
"blocksize": 4194304,
"elapsed_sec": 19.446005580000001,
"bytes_per_sec": 55216574.919855595,
"iops": 13.164657335246943
}
VM's with disk on ceph are slow, everything seems sluggish.
How can I improve the cluster performance ? (except using SSD's - that's planned for a additional pool)
I have the following setup:
4 x Dell R740xD
512G RAM
6x10TB SATA
2x10G bond to Mikrotik CSR3xx (20G bonds) - for Storage Network
2x1G bond for VM's to a Juniper Switch
Ceph configuration is as it comes by default:
[global]
auth_client_required = cephx
auth_cluster_required = cephx
auth_service_required = cephx
cluster_network = 10.3.3.1/24
fsid = 73b00f31-65ae-4c77-86fd-4a7791cacfa1
mon_allow_pool_delete = true
mon_host = 10.2.2.2 10.2.2.3 10.2.2.4 10.2.2.1
ms_bind_ipv4 = true
ms_bind_ipv6 = false
osd_pool_default_min_size = 2
osd_pool_default_size = 3
public_network = 10.2.2.1/24
[client]
keyring = /etc/pve/priv/$cluster.$name.keyring
[mon.pvedell1] public_addr = 10.2.2.1
[mon.pvedell2] public_addr = 10.2.2.2
[mon.pvedell3] public_addr = 10.2.2.3
[mon.pvedell4] public_addr = 10.2.2.4
Network:
auto lo
iface lo inet loopback
auto eno3
iface eno3 inet manual
bond-master bond0
auto eno4
iface eno4 inet manual
bond-master bond0
auto eno1np0
iface eno1np0 inet manual
mtu 10218
auto eno2np1
iface eno2np1 inet manual
mtu 10218
auto bond0
iface bond0 inet manual
bond-slaves eno3 eno4
bond-miimon 100
bond-mode 802.3ad
bond-downdelay 200
bond-updelay 200
auto bond0.2302
iface bond0.2302 inet manual
auto bond0.2327
iface bond0.2327 inet static
address 10.2.2.1/24
#cluster backend
auto bond1
iface bond1 inet static
address 10.3.3.1/24
bond-slaves eno1np0 eno2np1
bond-miimon 100
bond-mode 802.3ad
bond-xmit-hash-policy layer2+3
mtu 10218
#storage
auto vmbr0
iface vmbr0 inet static
address 10.230.10.45/24
gateway 10.230.10.1
bridge-ports bond0.2302
bridge-stp off
bridge-fd 0
bridge-vlan-aware yes
bridge-vids 2-4094
#Cluj-servers1
Rados Benchmark
root@pvedell1:~# rados bench -p scbench 10 write --no-cleanup
hints = 1
Maintaining 16 concurrent writes of 4194304 bytes to objects of size 4194304 for up to 10 seconds or 0 objects
Object prefix: benchmark_data_pvedell1_711238
sec Cur ops started finished avg MB/s cur MB/s last lat(s) avg lat(s)
0 0 0 0 0 0 - 0
1 16 46 30 119.994 120 0.573648 0.389335
2 16 77 61 121.991 124 0.2369 0.393485
3 16 111 95 126.656 136 1.5376 0.431647
4 16 147 131 130.988 144 0.280869 0.44987
5 16 187 171 136.787 160 0.237824 0.437398
6 16 220 204 135.987 132 0.394379 0.440604
7 16 250 234 133.702 120 0.763875 0.450026
8 16 280 264 131.987 120 0.32537 0.454567
9 16 317 301 133.765 148 0.433321 0.458329
10 16 354 338 135.187 148 0.468543 0.456669
Total time run: 10.4526
Total writes made: 354
Write size: 4194304
Object size: 4194304
Bandwidth (MB/sec): 135.469
Stddev Bandwidth: 14.3357
Max bandwidth (MB/sec): 160
Min bandwidth (MB/sec): 120
Average IOPS: 33
Stddev IOPS: 3.58391
Max IOPS: 40
Min IOPS: 30
Average Latency(s): 0.463095
Stddev Latency(s): 0.370647
Max latency(s): 1.80769
Min latency(s): 0.0510852
Individual OSD Bench:
root@pvedell1:~# ceph tell osd.0 bench
{
"bytes_written": 1073741824,
"blocksize": 4194304,
"elapsed_sec": 19.446005580000001,
"bytes_per_sec": 55216574.919855595,
"iops": 13.164657335246943
}
VM's with disk on ceph are slow, everything seems sluggish.
How can I improve the cluster performance ? (except using SSD's - that's planned for a additional pool)