Benchmarks from node
From node1
root@node1:~# rados -p my-pool bench 10 write --no-cleanup
hints = 1
Maintaining 16 concurrent writes of 4194304 bytes to objects of size 4194304 for up to 10 seconds or 0 objects
Object prefix: benchmark_data_atlas_24983
sec Cur ops started finished avg MB/s cur MB/s last lat(s) avg lat(s)
0 0 0 0 0 0 - 0
1 16 20 4 15.9961 16 0.770399 0.565837
2 16 34 18 35.9909 56 0.613186 1.18047
3 16 44 28 37.3242 40 1.53114 1.26343
4 16 59 43 42.99 60 1.84357 1.24766
5 16 71 55 43.9898 48 1.7848 1.29295
6 16 78 62 41.324 28 1.34471 1.31874
7 16 90 74 42.2761 48 1.88063 1.35202
8 16 103 87 43.4907 52 1.22835 1.34052
9 16 109 93 41.3246 24 1.81452 1.37555
10 16 120 104 41.5912 44 2.06756 1.42633
Total time run: 10.800131
Total writes made: 121
Write size: 4194304
Object size: 4194304
Bandwidth (MB/sec): 44.8143
Stddev Bandwidth: 14.5082
Max bandwidth (MB/sec): 60
Min bandwidth (MB/sec): 16
Average IOPS: 11
Stddev IOPS: 3
Max IOPS: 15
Min IOPS: 4
Average Latency(s): 1.41582
Stddev Latency(s): 0.476247
Max latency(s): 2.30416
Min latency(s): 0.283322
root@node1:~#
root@node1:~# rados -p my-pool bench 60 seq --no-cleanup
hints = 1
sec Cur ops started finished avg MB/s cur MB/s last lat(s) avg lat(s)
0 0 0 0 0 0 - 0
1 16 45 29 115.967 116 0.997326 0.378364
2 16 79 63 125.968 136 0.956501 0.405377
3 16 109 93 123.971 120 0.787081 0.45363
4 14 121 107 106.977 56 1.05066 0.459556
Total time run: 4.470531
Total reads made: 121
Read size: 4194304
Object size: 4194304
Bandwidth (MB/sec): 108.265
Average IOPS: 27
Stddev IOPS: 8
Max IOPS: 34
Min IOPS: 14
Average Latency(s): 0.552363
Max latency(s): 1.72134
Min latency(s): 0.0343347
root@node1:~#
root@node-1:~# fio --filename=/dev/sdb --direct=1 --sync=1 --rw=write --bs=4k --numjobs=6 --iodepth=2 --runtime=60 --time_based --group_reporting --name=journal-test
journal-test: (g=0): rw=write, bs=4K-4K/4K-4K/4K-4K, ioengine=psync, iodepth=2
...
fio-2.16
Starting 6 processes
Jobs: 6 (f=6): [W(6)] [100.0% done] [0KB/1708KB/0KB /s] [0/427/0 iops] [eta 00m:00s]
journal-test: (groupid=0, jobs=6): err= 0: pid=18260: Mon Jan 15 15:56:07 2018
write: io=90952KB, bw=1514.6KB/s, iops=378, runt= 60053msec
clat (msec): min=1, max=493, avg=15.83, stdev=22.63
lat (msec): min=1, max=493, avg=15.83, stdev=22.63
clat percentiles (msec):
| 1.00th=[ 9], 5.00th=[ 9], 10.00th=[ 9], 20.00th=[ 9],
| 30.00th=[ 9], 40.00th=[ 9], 50.00th=[ 9], 60.00th=[ 13],
| 70.00th=[ 17], 80.00th=[ 17], 90.00th=[ 25], 95.00th=[ 33],
| 99.00th=[ 117], 99.50th=[ 186], 99.90th=[ 293], 99.95th=[ 396],
| 99.99th=[ 482]
lat (msec) : 2=0.01%, 4=0.01%, 10=56.29%, 20=27.07%, 50=14.33%
lat (msec) : 100=1.12%, 250=1.03%, 500=0.15%
cpu : usr=0.04%, sys=0.16%, ctx=22763, majf=0, minf=59
IO depths : 1=100.0%, 2=0.0%, 4=0.0%, 8=0.0%, 16=0.0%, 32=0.0%, >=64=0.0%
submit : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0%
complete : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0%
issued : total=r=0/w=22738/d=0, short=r=0/w=0/d=0, drop=r=0/w=0/d=0
latency : target=0, window=0, percentile=100.00%, depth=2
Run status group 0 (all jobs):
WRITE: io=90952KB, aggrb=1514KB/s, minb=1514KB/s, maxb=1514KB/s, mint=60053msec, maxt=60053msec
Disk stats (read/write):
sdb: ios=1024/24305, merge=0/351, ticks=5264/600420, in_queue=605720, util=100.00%
Nagios check for CEPH interface: Traffic In : 3.36 Mb/s (0.3 %), Out : 4.48 Mb/s (0.4 %) - Link Speed : 1000000000
From node-2
root@node-2:~# rados -p my-pool bench 10 write --no-cleanup
hints = 1
Maintaining 16 concurrent writes of 4194304 bytes to objects of size 4194304 for up to 10 seconds or 0 objects
Object prefix: benchmark_data_cadcluster-2_23011
sec Cur ops started finished avg MB/s cur MB/s last lat(s) avg lat(s)
0 0 0 0 0 0 - 0
1 16 18 2 7.99957 8 0.980511 0.837696
2 16 30 14 27.9964 48 0.713963 1.32265
3 16 33 17 22.6635 12 2.72949 1.47817
4 16 48 32 31.9953 60 0.85023 1.7088
5 16 53 37 29.5955 20 1.7041 1.68138
6 16 68 52 34.6613 60 1.62137 1.66006
7 16 77 61 34.8517 36 1.66339 1.60751
8 16 85 69 34.4946 32 1.21054 1.62791
9 16 92 76 33.7724 28 3.02591 1.64775
10 16 109 93 37.1941 68 1.56255 1.67116
11 16 110 94 34.1765 4 0.673507 1.66054
12 14 110 96 31.9951 8 2.21228 1.67381
Total time run: 12.142081
Total writes made: 110
Write size: 4194304
Object size: 4194304
Bandwidth (MB/sec): 36.2376
Stddev Bandwidth: 22.6274
Max bandwidth (MB/sec): 68
Min bandwidth (MB/sec): 4
Average IOPS: 9
Stddev IOPS: 5
Max IOPS: 17
Min IOPS: 1
Average Latency(s): 1.76148
Stddev Latency(s): 0.60523
Max latency(s): 3.13594
Min latency(s): 0.38257
root@node-2:~# rados -p my-pool bench 60 seq --no-cleanup
hints = 1
sec Cur ops started finished avg MB/s cur MB/s last lat(s) avg lat(s)
0 0 0 0 0 0 - 0
1 16 57 41 163.959 164 0.192498 0.302417
2 16 101 85 169.954 176 0.289278 0.329004
Total time run: 2.792823
Total reads made: 110
Read size: 4194304
Object size: 4194304
Bandwidth (MB/sec): 157.547
Average IOPS: 39
Stddev IOPS: 2
Max IOPS: 44
Min IOPS: 41
Average Latency(s): 0.399894
Max latency(s): 1.57963
Min latency(s): 0.0412193
root@node-2:~# fio --filename=/dev/sdb --direct=1 --sync=1 --rw=write --bs=4k --numjobs=6 --iodepth=2 --runtime=60 --time_based --group_reporting --name=journal-test
journal-test: (g=0): rw=write, bs=4K-4K/4K-4K/4K-4K, ioengine=psync, iodepth=2
...
fio-2.16
Starting 6 processes
Jobs: 6 (f=6): [W(6)] [100.0% done] [0KB/1464KB/0KB /s] [0/366/0 iops] [eta 00m:00s]
journal-test: (groupid=0, jobs=6): err= 0: pid=31340: Mon Jan 15 16:05:11 2018
write: io=60184KB, bw=1003.6KB/s, iops=250, runt= 60001msec
clat (msec): min=4, max=568, avg=23.92, stdev=33.54
lat (msec): min=4, max=568, avg=23.92, stdev=33.54
clat percentiles (msec):
| 1.00th=[ 11], 5.00th=[ 11], 10.00th=[ 12], 20.00th=[ 12],
| 30.00th=[ 12], 40.00th=[ 20], 50.00th=[ 23], 60.00th=[ 23],
| 70.00th=[ 23], 80.00th=[ 23], 90.00th=[ 33], 95.00th=[ 49],
| 99.00th=[ 188], 99.50th=[ 302], 99.90th=[ 400], 99.95th=[ 441],
| 99.99th=[ 570]
lat (msec) : 10=0.99%, 20=39.04%, 50=55.05%, 100=3.25%, 250=0.88%
lat (msec) : 500=0.75%, 750=0.04%
cpu : usr=0.03%, sys=0.27%, ctx=30138, majf=0, minf=65
IO depths : 1=100.0%, 2=0.0%, 4=0.0%, 8=0.0%, 16=0.0%, 32=0.0%, >=64=0.0%
submit : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0%
complete : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0%
issued : total=r=0/w=15046/d=0, short=r=0/w=0/d=0, drop=r=0/w=0/d=0
latency : target=0, window=0, percentile=100.00%, depth=2
Run status group 0 (all jobs):
WRITE: io=60184KB, aggrb=1003KB/s, minb=1003KB/s, maxb=1003KB/s, mint=60001msec, maxt=60001msec
Disk stats (read/write):
sdb: ios=318/31666, merge=0/209, ticks=6260/486752, in_queue=493228, util=99.87%
Nagios check for CEPH interface: Traffic In : 5.01 Mb/s (0.5 %), Out : 6.38 Mb/s (0.6 %) - Link Speed : 1000000000
From node-3
root@node-3:~# rados -p my-pool bench 10 write --no-cleanup
hints = 1
Maintaining 16 concurrent writes of 4194304 bytes to objects of size 4194304 for up to 10 seconds or 0 objects
Object prefix: benchmark_data_cadcluster-1_25994
sec Cur ops started finished avg MB/s cur MB/s last lat(s) avg lat(s)
0 0 0 0 0 0 - 0
1 16 23 7 27.9981 28 0.8353 0.701256
2 16 34 18 35.996 44 0.622092 0.987845
3 16 46 30 39.9958 48 0.789123 1.10658
4 16 63 47 46.9952 68 1.29141 1.20888
5 16 70 54 43.1953 28 1.32297 1.23323
6 16 84 68 45.3281 56 1.94003 1.26118
7 16 93 77 43.9946 36 0.414177 1.27202
8 16 99 83 41.4949 24 1.96712 1.32592
9 16 110 94 41.7725 44 2.0745 1.38354
10 16 119 103 41.1947 36 1.14513 1.38374
Total time run: 10.972736
Total writes made: 120
Write size: 4194304
Object size: 4194304
Bandwidth (MB/sec): 43.7448
Stddev Bandwidth: 13.734
Max bandwidth (MB/sec): 68
Min bandwidth (MB/sec): 24
Average IOPS: 10
Stddev IOPS: 3
Max IOPS: 17
Min IOPS: 6
Average Latency(s): 1.46007
Stddev Latency(s): 0.608293
Max latency(s): 2.70165
Min latency(s): 0.287395
root@node-3:~# rados -p my-pool bench 60 seq --no-cleanup
hints = 1
sec Cur ops started finished avg MB/s cur MB/s last lat(s) avg lat(s)
0 0 0 0 0 0 - 0
1 16 49 33 131.972 132 0.166881 0.288893
2 16 74 58 115.976 100 0.808538 0.426684
3 16 107 91 121.311 132 0.100169 0.444951
4 15 120 105 104.982 56 0.687433 0.460856
Total time run: 4.196130
Total reads made: 120
Read size: 4194304
Object size: 4194304
Bandwidth (MB/sec): 114.391
Average IOPS: 28
Stddev IOPS: 9
Max IOPS: 33
Min IOPS: 14
Average Latency(s): 0.545573
Max latency(s): 1.64453
Min latency(s): 0.0383955
root@node-3:~# fio --filename=/dev/sdb --direct=1 --sync=1 --rw=write --bs=4k --numjobs=6 --iodepth=2 --runtime=60 --time_based --group_reporting --name=journal-test
journal-test: (g=0): rw=write, bs=4K-4K/4K-4K/4K-4K, ioengine=psync, iodepth=2
...
fio-2.16
Starting 6 processes
Jobs: 6 (f=6): [W(6)] [100.0% done] [0KB/1512KB/0KB /s] [0/378/0 iops] [eta 00m:00s]
journal-test: (groupid=0, jobs=6): err= 0: pid=2376: Mon Jan 15 16:09:29 2018
write: io=28728KB, bw=490225B/s, iops=119, runt= 60008msec
clat (msec): min=8, max=610, avg=50.13, stdev=60.64
lat (msec): min=8, max=610, avg=50.13, stdev=60.64
clat percentiles (msec):
| 1.00th=[ 12], 5.00th=[ 12], 10.00th=[ 13], 20.00th=[ 24],
| 30.00th=[ 24], 40.00th=[ 25], 50.00th=[ 25], 60.00th=[ 43],
| 70.00th=[ 49], 80.00th=[ 61], 90.00th=[ 85], 95.00th=[ 157],
| 99.00th=[ 334], 99.50th=[ 420], 99.90th=[ 537], 99.95th=[ 611],
| 99.99th=[ 611]
lat (msec) : 10=0.08%, 20=12.03%, 50=59.26%, 100=20.75%, 250=5.22%
lat (msec) : 500=2.45%, 750=0.21%
cpu : usr=0.01%, sys=0.09%, ctx=14402, majf=0, minf=61
IO depths : 1=100.0%, 2=0.0%, 4=0.0%, 8=0.0%, 16=0.0%, 32=0.0%, >=64=0.0%
submit : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0%
complete : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0%
issued : total=r=0/w=7182/d=0, short=r=0/w=0/d=0, drop=r=0/w=0/d=0
latency : target=0, window=0, percentile=100.00%, depth=2
Run status group 0 (all jobs):
WRITE: io=28728KB, aggrb=478KB/s, minb=478KB/s, maxb=478KB/s, mint=60008msec, maxt=60008msec
Disk stats (read/write):
sdb: ios=302/16859, merge=0/261, ticks=10192/593900, in_queue=604192, util=99.92%
Nagios check for CEPH interface: Traffic In : 5.12 Mb/s (0.5 %), Out : 4.17 Mb/s (0.4 %) - Link Speed : 1000000000
From node-4
root@node-4:~# rados -p my-pool bench 10 write --no-cleanup
hints = 1
Maintaining 16 concurrent writes of 4194304 bytes to objects of size 4194304 for up to 10 seconds or 0 objects
Object prefix: benchmark_data_zeus_9250
sec Cur ops started finished avg MB/s cur MB/s last lat(s) avg lat(s)
0 0 0 0 0 0 - 0
1 16 17 1 3.99954 4 0.547193 0.547193
2 16 24 8 15.9976 28 1.57422 1.39662
3 16 33 17 22.6634 36 0.256374 1.71357
4 16 45 29 28.996 48 0.456743 1.71227
5 16 57 41 32.7955 48 1.33359 1.63479
6 16 69 53 35.3286 48 1.49434 1.56238
7 16 80 64 36.5666 44 0.581883 1.52677
8 16 93 77 38.495 52 0.569441 1.52224
9 16 103 87 38.6617 40 0.295466 1.45903
10 16 113 97 38.795 40 1.82926 1.50819
11 16 114 98 35.6317 4 1.40668 1.50715
Total time run: 11.251195
Total writes made: 114
Write size: 4194304
Object size: 4194304
Bandwidth (MB/sec): 40.529
Stddev Bandwidth: 17.0134
Max bandwidth (MB/sec): 52
Min bandwidth (MB/sec): 4
Average IOPS: 10
Stddev IOPS: 4
Max IOPS: 13
Min IOPS: 1
Average Latency(s): 1.57879
Stddev Latency(s): 0.670161
Max latency(s): 3.88376
Min latency(s): 0.251962
root@node-4:~# rados -p my-pool bench 60 seq --no-cleanup
hints = 1
sec Cur ops started finished avg MB/s cur MB/s last lat(s) avg lat(s)
0 0 0 0 0 0 - 0
1 16 40 24 95.9845 96 0.966508 0.361017
2 16 70 54 107.983 120 0.217577 0.45263
3 16 103 87 115.984 132 1.15813 0.491828
4 16 114 98 97.9868 44 0.201338 0.49361
Total time run: 4.737685
Total reads made: 114
Read size: 4194304
Object size: 4194304
Bandwidth (MB/sec): 96.2495
Average IOPS: 24
Stddev IOPS: 9
Max IOPS: 33
Min IOPS: 11
Average Latency(s): 0.64647
Max latency(s): 2.04525
Min latency(s): 0.00323975
root@node-4:~#
root@node-4:~# fio --filename=/dev/sdb --direct=1 --sync=1 --rw=write --bs=4k --numjobs=6 --iodepth=2 --runtime=60 --time_based --group_reporting --name=journal-test
journal-test: (g=0): rw=write, bs=4K-4K/4K-4K/4K-4K, ioengine=psync, iodepth=2
...
fio-2.16
Starting 6 processes
Jobs: 6 (f=6): [W(6)] [100.0% done] [0KB/589.8MB/0KB /s] [0/151K/0 iops] [eta 00m:00s]
journal-test: (groupid=0, jobs=6): err= 0: pid=30522: Mon Jan 15 16:21:25 2018
write: io=35203MB, bw=600786KB/s, iops=150196, runt= 60001msec
clat (usec): min=24, max=2722, avg=39.17, stdev= 9.17
lat (usec): min=24, max=2722, avg=39.28, stdev= 9.17
clat percentiles (usec):
| 1.00th=[ 30], 5.00th=[ 32], 10.00th=[ 34], 20.00th=[ 35],
| 30.00th=[ 36], 40.00th=[ 38], 50.00th=[ 39], 60.00th=[ 40],
| 70.00th=[ 41], 80.00th=[ 42], 90.00th=[ 44], 95.00th=[ 47],
| 99.00th=[ 58], 99.50th=[ 68], 99.90th=[ 76], 99.95th=[ 91],
| 99.99th=[ 185]
lat (usec) : 50=96.91%, 100=3.05%, 250=0.04%, 500=0.01%, 750=0.01%
lat (usec) : 1000=0.01%
lat (msec) : 2=0.01%, 4=0.01%
cpu : usr=7.21%, sys=17.14%, ctx=9014570, majf=0, minf=71
IO depths : 1=100.0%, 2=0.0%, 4=0.0%, 8=0.0%, 16=0.0%, 32=0.0%, >=64=0.0%
submit : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0%
complete : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0%
issued : total=r=0/w=9011947/d=0, short=r=0/w=0/d=0, drop=r=0/w=0/d=0
latency : target=0, window=0, percentile=100.00%, depth=2
Run status group 0 (all jobs):
WRITE: io=35203MB, aggrb=600786KB/s, minb=600786KB/s, maxb=600786KB/s, mint=60001msec, maxt=60001msec
Disk stats (read/write):
sdb: ios=332/8977011, merge=0/18282, ticks=392/283780, in_queue=283984, util=100.00%
Benchmarks from VM
From a VM into node-1 (Linux CentOS)
[root@vmcentos ~]# dd if=/dev/zero of=/tmp/test.data bs=1M count=1000 oflag=direct
1000+0 enregistrements lus
1000+0 enregistrements écrits
1048576000 octets (1.0 GB) copiés, 225.527 s, 4.6 MB/s
[root@vmcentos ~]# dd if=/dev/zero of=/tmp/test.data bs=1M count=1024 conv=fdatasync
1024+0 enregistrements lus
1024+0 enregistrements écrits
1073741824 octets (1.1 GB) copiés, 107.071 s, 10.0 MB/s
From another VM into node-1 (Linux Debian)
moi@vmdebian-1:~$ dd if=/dev/zero of=/tmp/test.data bs=1M count=1024 conv=fdatasync
1024+0 enregistrements lus
1024+0 enregistrements écrits
1073741824 octets (1.1 GB) copiés, 26.5522 s, 40.4 MB/s
Questions
- What are the problems with my configuration that can cause these poor performances ?
- How can I fix this ?
Thanks !