Thanks Wolfgang.
As part of troubleshooting, I have zfs_arc_max currently set as either 25% or 50% of the system memory. zfs_arc_min is dynamic. The systems have plenty of free memory.
This is the same behavior that drove me to upgrade from consumer SSDs to enterprise ones. Things improved a bit, but migrating any vm storage onto the box after the first causes the system to pretty much fall over until that migration is done. Its icon goes red in the GUI, the virtual machines on it go to IOWAIT of >95%, and the system IOWAIT spikes as well - to around 35%.
We're in our busy season, so I can't easily do hot testing and risk ire.
-J
Stats below fhis is from our dual E5-2690 v2 node.
root@prx3:~# top -n 1
top - 14:15:20 up 41 days, 19:31, 3 users, load average: 5.49, 5.45, 5.63
Tasks: 714 total, 1 running, 484 sleeping, 0 stopped, 0 zombie
%Cpu(s): 17.4 us, 2.1 sy, 0.0 ni, 80.2 id, 0.3 wa, 0.0 hi, 0.1 si, 0.0 st
KiB Mem : 13198950+total, 3457748 free, 57405268 used, 71126488 buff/cache
KiB Swap: 0 total, 0 free, 0 used. 73204032 avail Mem
root@prx3:~# cat /proc/spl/kstat/zfs/arcstats
13 1 0x01 96 26112 3004685479 3612125006613757
name type data
hits 4 9274585099
misses 4 517734225
demand_data_hits 4 3008397593
demand_data_misses 4 415260513
demand_metadata_hits 4 6236180400
demand_metadata_misses 4 6000636
prefetch_data_hits 4 17106216
prefetch_data_misses 4 95416812
prefetch_metadata_hits 4 12900890
prefetch_metadata_misses 4 1056264
mru_hits 4 1675794272
mru_ghost_hits 4 52419145
mfu_hits 4 7571087972
mfu_ghost_hits 4 7225402
deleted 4 371134024
mutex_miss 4 80467
access_skip 4 19560
evict_skip 4 48773919
evict_not_enough 4 46441
evict_l2_cached 4 0
evict_l2_eligible 4 3463692487680
evict_l2_ineligible 4 664820148224
evict_l2_skip 4 0
hash_elements 4 961231
hash_elements_max 4 8138654
hash_collisions 4 136436885
hash_chains 4 27709
hash_chain_max 4 6
p 4 3441147657
c 4 4354750464
c_min 4 4223664128
c_max 4 67578626048
size 4 4206951832
compressed_size 4 3640339968
uncompressed_size 4 6415455232
overhead_size 4 226883072
hdr_size 4 316859592
data_size 4 3501989376
metadata_size 4 365233664
dbuf_size 4 6868880
dnode_size 4 12683840
bonus_size 4 3316480
anon_size 4 30963712
anon_evictable_data 4 0
anon_evictable_metadata 4 0
mru_size 4 3780403200
mru_evictable_data 4 3368808960
mru_evictable_metadata 4 176915456
mru_ghost_size 4 450988032
mru_ghost_evictable_data 4 170688512
mru_ghost_evictable_metadata 4 280299520
mfu_size 4 55856128
mfu_evictable_data 4 4468224
mfu_evictable_metadata 4 3896320
mfu_ghost_size 4 3489696768
mfu_ghost_evictable_data 4 1952574976
mfu_ghost_evictable_metadata 4 1537121792
l2_hits 4 0
l2_misses 4 0
l2_feeds 4 0
l2_rw_clash 4 0
l2_read_bytes 4 0
l2_write_bytes 4 0
l2_writes_sent 4 0
l2_writes_done 4 0
l2_writes_error 4 0
l2_writes_lock_retry 4 0
l2_evict_lock_retry 4 0
l2_evict_reading 4 0
l2_evict_l1cached 4 0
l2_free_on_write 4 0
l2_abort_lowmem 4 0
l2_cksum_bad 4 0
l2_io_error 4 0
l2_size 4 0
l2_asize 4 0
l2_hdr_size 4 0
memory_direct_count 4 917401
memory_indirect_count 4 922438
memory_all_bytes 4 135157252096
memory_free_bytes 4 62905028608
memory_available_bytes 3 60793196544
arc_no_grow 4 0
arc_tempreserve 4 0
arc_loaned_bytes 4 0
arc_prune 4 0
arc_meta_used 4 704962456
arc_meta_limit 4 50683969536
arc_dnode_limit 4 5068396953
arc_meta_max 4 3000373768
arc_meta_min 4 16777216
sync_wait_for_async 4 406270
demand_hit_predictive_prefetch 4 13311748
arc_need_free 4 0
arc_sys_free 4 2111832064
root@prx3:~# zpool status
pool: rpool
state: ONLINE
scan: scrub repaired 0B in 0h32m with 0 errors on Sun Nov 11 00:56:21 2018
config:
NAME STATE READ WRITE CKSUM
rpool ONLINE 0 0 0
mirror-0 ONLINE 0 0 0
sdb2 ONLINE 0 0 0
sdd2 ONLINE 0 0 0
errors: No known data errors
root@prx3:~# zfs list
NAME USED AVAIL REFER MOUNTPOINT
rpool 230G 847G 104K /rpool
rpool/ROOT 1.05G 847G 96K /rpool/ROOT
rpool/ROOT/pve-1 1.05G 847G 1.05G /
rpool/data 219G 847G 96K /rpool/data
rpool/data/vm-115-disk-0 103G 847G 61.1G -
rpool/data/vm-119-disk-0 72.7G 847G 52.8G -
rpool/data/vm-133-disk-0 34.7G 847G 34.7G -
rpool/data/vm-134-disk-0 2.32G 847G 2.32G -
rpool/data/vm-302-disk-0 429M 847G 429M -
rpool/data/vm-503-disk-0 5.66G 847G 5.66G -
rpool/swap 8.50G 855G 796M -