Unfortunately I could not reproduce the tests with those numbers. but new tests also show very poor performance.
root@dtln-ceph01:~# fio --filename=/dev/rbd0 --direct=1 --rw=randwrite --ioengine=libaio --bs=4k --iodepth=16 --runtime=60 --group_reporting --name=4ktest
4ktest: (g=0): rw=randwrite, bs=4K-4K/4K-4K/4K-4K, ioengine=libaio, iodepth=16
fio-2.16
Starting 1 process
Jobs: 1 (f=1): [w(1)] [100.0% done] [0KB/612KB/0KB /s] [0/153/0 iops] [eta 00m:00s]
4ktest: (groupid=0, jobs=1): err= 0: pid=46184: Tue Apr 16 13:28:49 2019
write: io=34212KB, bw=583865B/s, iops=142, runt= 60002msec
slat (usec): min=2, max=147, avg=17.29, stdev=18.21
clat (msec): min=2, max=252, avg=112.22, stdev=110.20
lat (msec): min=2, max=252, avg=112.24, stdev=110.20
clat percentiles (msec):
| 1.00th=[ 3], 5.00th=[ 3], 10.00th=[ 4], 20.00th=[ 4],
| 30.00th=[ 5], 40.00th=[ 6], 50.00th=[ 49], 60.00th=[ 176],
| 70.00th=[ 239], 80.00th=[ 249], 90.00th=[ 251], 95.00th=[ 251],
| 99.00th=[ 251], 99.50th=[ 251], 99.90th=[ 253], 99.95th=[ 253],
| 99.99th=[ 253]
lat (msec) : 4=29.57%, 10=11.13%, 20=1.80%, 50=7.75%, 100=4.85%
lat (msec) : 250=32.41%, 500=12.49%
cpu : usr=0.10%, sys=0.29%, ctx=5690, majf=0, minf=10
IO depths : 1=0.1%, 2=0.1%, 4=0.1%, 8=0.1%, 16=99.8%, 32=0.0%, >=64=0.0%
submit : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0%
complete : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.1%, 32=0.0%, 64=0.0%, >=64=0.0%
issued : total=r=0/w=8553/d=0, short=r=0/w=0/d=0, drop=r=0/w=0/d=0
latency : target=0, window=0, percentile=100.00%, depth=16
Run status group 0 (all jobs):
WRITE: io=34212KB, aggrb=570KB/s, minb=570KB/s, maxb=570KB/s, mint=60002msec, maxt=60002msec
Disk stats (read/write):
rbd0: ios=0/8530, merge=0/0, ticks=0/955324, in_queue=959632, util=99.89%
root@dtln-ceph01:~# fio --filename=/dev/rbd0 --direct=1 --rw=randwrite --bs=4k --iodepth=16 --runtime=60 --group_reporting --name=4ktest --ioengine=rbd --clientname=admin --pool=testpool --rbdname=testimage
4ktest: (g=0): rw=randwrite, bs=4K-4K/4K-4K/4K-4K, ioengine=rbd, iodepth=16
fio-2.16
Starting 1 process
rbd engine: RBD version: 1.12.0
Jobs: 1 (f=1): [w(1)] [100.0% done] [0KB/552KB/0KB /s] [0/138/0 iops] [eta 00m:00s]
4ktest: (groupid=0, jobs=1): err= 0: pid=48469: Tue Apr 16 13:30:18 2019
write: io=34032KB, bw=580638B/s, iops=141, runt= 60018msec
slat (usec): min=0, max=18, avg= 0.17, stdev= 0.48
clat (msec): min=2, max=504, avg=112.87, stdev=110.62
lat (msec): min=2, max=504, avg=112.87, stdev=110.62
clat percentiles (msec):
| 1.00th=[ 3], 5.00th=[ 4], 10.00th=[ 4], 20.00th=[ 4],
| 30.00th=[ 5], 40.00th=[ 29], 50.00th=[ 56], 60.00th=[ 176],
| 70.00th=[ 217], 80.00th=[ 247], 90.00th=[ 251], 95.00th=[ 251],
| 99.00th=[ 253], 99.50th=[ 465], 99.90th=[ 486], 99.95th=[ 498],
| 99.99th=[ 506]
lat (msec) : 4=25.85%, 10=11.95%, 20=1.41%, 50=10.48%, 100=5.81%
lat (msec) : 250=32.92%, 500=11.55%, 750=0.02%
cpu : usr=32.92%, sys=66.97%, ctx=6863, majf=0, minf=8410
IO depths : 1=0.1%, 2=0.1%, 4=0.1%, 8=0.1%, 16=99.8%, 32=0.0%, >=64=0.0%
submit : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0%
complete : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.1%, 32=0.0%, 64=0.0%, >=64=0.0%
issued : total=r=0/w=8508/d=0, short=r=0/w=0/d=0, drop=r=0/w=0/d=0
latency : target=0, window=0, percentile=100.00%, depth=16
Run status group 0 (all jobs):
WRITE: io=34032KB, aggrb=567KB/s, minb=567KB/s, maxb=567KB/s, mint=60018msec, maxt=60018msec
Disk stats (read/write):
rbd0: ios=0/0, merge=0/0, ticks=0/0, in_queue=0, util=0.00%
root@dtln-ceph01:~# bd bench --pool testpool --image testimage --io-pattern rand --io-type write
-su: bd: command not found
root@dtln-ceph01:~# rbd bench --pool testpool --image testimage --io-pattern rand --io-type write
bench type write io_size 4096 io_threads 16 bytes 1073741824 pattern random
SEC OPS OPS/SEC BYTES/SEC
1 5737 5213.48 21354414.69
2 7172 3474.09 14229863.04
3 8196 2689.16 11014809.43
4 9768 2437.96 9985884.04
5 11534 2157.69 8837910.42
6 12374 1340.67 5491399.29
7 13195 1218.54 4991129.98
8 14737 1314.60 5384592.49
9 16249 1296.48 5310392.07
10 17962 1294.63 5302812.04
11 18752 1286.35 5268876.86
12 20407 1364.81 5590248.15
13 21349 1325.99 5431245.62
14 23381 1414.37 5793249.86
15 24485 1386.11 5677489.99
16 26019 1426.56 5843169.33
17 27537 1487.71 6093663.45
18 29522 1463.57 5994786.24
19 30693 1462.38 5989919.22
20 31831 1378.43 5646066.85
21 32810 1359.09 5566832.45
22 34360 1374.07 5628197.07
23 36031 1369.10 5607813.86
24 36893 1229.59 5036396.44
25 37632 1222.37 5006812.55
26 38615 1161.05 4755670.59
27 39723 1072.61 4393412.48
28 40653 978.83 4009272.01
29 41936 1025.14 4198987.55
30 43478 1189.88 4873758.90
31 44640 1225.77 5020772.41
32 45273 1047.33 4289859.04
33 46442 1169.23 4789161.42
34 47934 1190.20 4875069.12
35 50238 1266.45 5187378.63
36 51244 1311.01 5369911.50
37 52717 1593.25 6525955.74
38 54002 1511.64 6191685.43
root@dtln-ceph01:~# rados bench -p testpool 10 write --no-cleanup
hints = 1
Maintaining 16 concurrent writes of 4194304 bytes to objects of size 4194304 for up to 10 seconds or 0 objects
Object prefix: benchmark_data_dtln-ceph01_52910
sec Cur ops started finished avg MB/s cur MB/s last lat(s) avg lat(s)
0 0 0 0 0 0 - 0
1 16 32 16 63.9967 64 0.302328 0.510102
2 16 67 51 101.989 140 0.341044 0.513168
3 16 87 71 94.6556 80 0.338666 0.604806
4 16 115 99 98.9876 112 0.284461 0.584217
5 16 150 134 107.186 140 0.279213 0.542141
6 16 176 160 106.653 104 0.484844 0.55153
7 16 206 190 108.558 120 0.83091 0.55646
8 16 246 230 114.985 160 0.332884 0.543845
9 16 286 270 119.985 160 0.238217 0.527408
10 16 312 296 118.385 104 0.59233 0.518593
Total time run: 10.358236
Total writes made: 313
Write size: 4194304
Object size: 4194304
Bandwidth (MB/sec): 120.87
Stddev Bandwidth: 32.122
Max bandwidth (MB/sec): 160
Min bandwidth (MB/sec): 64
Average IOPS: 30
Stddev IOPS: 8
Max IOPS: 40
Min IOPS: 16
Average Latency(s): 0.52931
Stddev Latency(s): 0.233903
Max latency(s): 1.21042
Min latency(s): 0.0258827
On this hardware, gluster gives 10K IOPS on a random write to the mirror.
root@dtln-ceph01:~# ceph osd tree
ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT PRI-AFF
-1 3.49316 root default
-3 0.87329 host dtln-ceph01
0 ssd 0.87329 osd.0 up 1.00000 1.00000
-5 0.87329 host dtln-ceph02
1 ssd 0.87329 osd.1 up 1.00000 1.00000
-7 0.87329 host dtln-ceph03
2 ssd 0.87329 osd.2 up 1.00000 1.00000
-9 0.87329 host dtln-ceph04
3 ssd 0.87329 osd.3 up 1.00000 1.00000