Built a test cluster with 4.0 and a HP MSA 1040 multipathed ISCSI SAN.
Got Iscsi connections (one per target NIC/IP) and multipath on top of these working.
root@n1:~# awk '!/^#/ && NF>0{print}' /etc/iscsi/iscsid.conf
iscsid.startup = /usr/sbin/iscsid
node.startup = manual
node.leading_login = No
node.session.timeo.replacement_timeout = 15
node.conn[0].timeo.login_timeout = 5
node.conn[0].timeo.logout_timeout = 5
node.conn[0].timeo.noop_out_interval = 5
node.conn[0].timeo.noop_out_timeout = 5
node.session.err_timeo.abort_timeout = 15
node.session.err_timeo.lu_reset_timeout = 30
node.session.err_timeo.tgt_reset_timeout = 30
node.session.initial_login_retry_max = 8
node.session.cmds_max = 1024
node.session.queue_depth = 32
node.session.xmit_thread_priority = -20
node.session.iscsi.InitialR2T = No
node.session.iscsi.ImmediateData = Yes
node.session.iscsi.FirstBurstLength = 262144
node.session.iscsi.MaxBurstLength = 16776192
node.conn[0].iscsi.MaxRecvDataSegmentLength = 262144
node.conn[0].iscsi.MaxXmitDataSegmentLength = 0
discovery.sendtargets.iscsi.MaxRecvDataSegmentLength = 32768
node.session.nr_sessions = 1
node.session.iscsi.FastAbort = Yes
root@n1:~# cat /etc/multipath.conf
blacklist {
wwid "3600508b1001cb3a67cd9c48d02255ead"
wwid "3600508b1001c174c463bc1b1e2d53fa0"
wwid "3600508b1001c06d4d45874c12a8c2a2c"
wwid "3600508b1001cf513f53307e68bf534a8"
wwid "3600508b1001c0d670eb10b70a0ae5a03"
wwid "3600508b1001cb7a276020d5e79689301"
wwid "3600508b1001c6f882c71129103dd4bdd"
devnode "^fuse"
devnode "^(ram|raw|loop|fd|md|dm-|sr|scd|st)[0-9]*"
devnode "^hd[a-z][[0-9]*]"
devnode "^vd[a-z]"
devnode "^cciss!c[0-9]d[0-9]*[p[0-9]*]"
}
defaults {
polling_interval 5
max_polling_interval 60
path_grouping_policy multibus
path_selector "round-robin 0"
rr_min_io 100
rr_weight priorities
failback immediate
no_path_retry queue
#user_friendly_names yes
#features no_partions
}
devices {
device {
vendor "HP"
product "MSA 1040 SAN"
path_grouping_policy group_by_prio
uid_attribute "ID_SERIAL"
prio alua
detect_prio yes
#path_selector "service-time 0"
path_selector "round-robin 0"
#path_selector "queue-length 0"
path_checker tur
hardware_handler "1 alua"
rr_weight uniform
rr_min_io 100
rr_min_io_rq 1
no_path_retry 18
#queue_without_daemon no
}
}
root@n1:~# iscsiadm -m session
tcp: [1] 10.2.1.1:3260,3 iqn.1986-03.com.hp:storage.msa1040.151725e557 (non-flash)
tcp: [2] 10.2.0.2:3260,2 iqn.1986-03.com.hp:storage.msa1040.151725e557 (non-flash)
tcp: [3] 10.2.0.1:3260,1 iqn.1986-03.com.hp:storage.msa1040.151725e557 (non-flash)
tcp: [4] 10.2.1.2:3260,4 iqn.1986-03.com.hp:storage.msa1040.151725e557 (non-flash)
# mapped 60LUNs from each MSA controller
root@n1:~# dmsetup ls | grep -c 3600c
120
# each with 4 paths
root@n1:~# multipath -l /dev/dm-3
3600c0ff000258a36583c125601000000 dm-3 HP,MSA 1040 SAN
size=93G features='1 queue_if_no_path' hwhandler='1 alua' wp=rw
|-+- policy='round-robin 0' prio=0 status=active
| |- 1:0:0:0 sdb 8:16 active undef running
| `- 3:0:0:0 sdc 8:32 active undef running
`-+- policy='round-robin 0' prio=0 status=enabled
|- 4:0:0:0 sde 8:64 active undef running
`- 2:0:0:0 sdd 8:48 active undef running
Only whenever doing something in PVE that involves handling images on the SAN, PVE starts by do an iscsi login, that seems to wait for timeout before they fail with error code 15 due to sessions already in place, and then continues with the operation with success. This creates noise and prolongs operation, like live migrations etc.
Oct 10 00:19:51 n1 pvedaemon[18987]: command '/usr/bin/iscsiadm --mode node --targetname iqn.1986-03.com.hp:storage.msa1040.151725e557 --login' failed: exit code 15
Why continuously attempt to login on every operation anyway to avoid spending time on failed logins?
Also operations/queries of VGs/LVMs/PVs seems to take quite sometime meaning pvestatd runs always and some operation from GUI (like creating new HDD on a VM) times out though successes :/
root@n1:~# tail /var/log/daemon.log
Oct 12 09:19:58 n1 pvestatd[2757]: status update time (18.119 seconds)
Oct 12 09:20:18 n1 pvestatd[2757]: status update time (19.425 seconds)
Oct 12 09:20:36 n1 pvestatd[2757]: status update time (18.645 seconds)
Oct 12 09:20:55 n1 pvestatd[2757]: status update time (18.475 seconds)
Oct 12 09:21:14 n1 pvestatd[2757]: status update time (19.266 seconds)
Oct 12 09:21:33 n1 pvestatd[2757]: status update time (18.987 seconds)
Oct 12 09:21:52 n1 pvestatd[2757]: status update time (18.580 seconds)
Oct 12 09:22:11 n1 pvestatd[2757]: status update time (18.970 seconds)
Oct 12 09:22:30 n1 pvestatd[2757]: status update time (18.940 seconds)
Oct 12 09:22:49 n1 pvestatd[2757]: status update time (19.203 seconds)
Is this due to the number of PVs (120 LUNs wouldn't think that's to much to handle for a Linux box)?
Got Iscsi connections (one per target NIC/IP) and multipath on top of these working.
root@n1:~# awk '!/^#/ && NF>0{print}' /etc/iscsi/iscsid.conf
iscsid.startup = /usr/sbin/iscsid
node.startup = manual
node.leading_login = No
node.session.timeo.replacement_timeout = 15
node.conn[0].timeo.login_timeout = 5
node.conn[0].timeo.logout_timeout = 5
node.conn[0].timeo.noop_out_interval = 5
node.conn[0].timeo.noop_out_timeout = 5
node.session.err_timeo.abort_timeout = 15
node.session.err_timeo.lu_reset_timeout = 30
node.session.err_timeo.tgt_reset_timeout = 30
node.session.initial_login_retry_max = 8
node.session.cmds_max = 1024
node.session.queue_depth = 32
node.session.xmit_thread_priority = -20
node.session.iscsi.InitialR2T = No
node.session.iscsi.ImmediateData = Yes
node.session.iscsi.FirstBurstLength = 262144
node.session.iscsi.MaxBurstLength = 16776192
node.conn[0].iscsi.MaxRecvDataSegmentLength = 262144
node.conn[0].iscsi.MaxXmitDataSegmentLength = 0
discovery.sendtargets.iscsi.MaxRecvDataSegmentLength = 32768
node.session.nr_sessions = 1
node.session.iscsi.FastAbort = Yes
root@n1:~# cat /etc/multipath.conf
blacklist {
wwid "3600508b1001cb3a67cd9c48d02255ead"
wwid "3600508b1001c174c463bc1b1e2d53fa0"
wwid "3600508b1001c06d4d45874c12a8c2a2c"
wwid "3600508b1001cf513f53307e68bf534a8"
wwid "3600508b1001c0d670eb10b70a0ae5a03"
wwid "3600508b1001cb7a276020d5e79689301"
wwid "3600508b1001c6f882c71129103dd4bdd"
devnode "^fuse"
devnode "^(ram|raw|loop|fd|md|dm-|sr|scd|st)[0-9]*"
devnode "^hd[a-z][[0-9]*]"
devnode "^vd[a-z]"
devnode "^cciss!c[0-9]d[0-9]*[p[0-9]*]"
}
defaults {
polling_interval 5
max_polling_interval 60
path_grouping_policy multibus
path_selector "round-robin 0"
rr_min_io 100
rr_weight priorities
failback immediate
no_path_retry queue
#user_friendly_names yes
#features no_partions
}
devices {
device {
vendor "HP"
product "MSA 1040 SAN"
path_grouping_policy group_by_prio
uid_attribute "ID_SERIAL"
prio alua
detect_prio yes
#path_selector "service-time 0"
path_selector "round-robin 0"
#path_selector "queue-length 0"
path_checker tur
hardware_handler "1 alua"
rr_weight uniform
rr_min_io 100
rr_min_io_rq 1
no_path_retry 18
#queue_without_daemon no
}
}
root@n1:~# iscsiadm -m session
tcp: [1] 10.2.1.1:3260,3 iqn.1986-03.com.hp:storage.msa1040.151725e557 (non-flash)
tcp: [2] 10.2.0.2:3260,2 iqn.1986-03.com.hp:storage.msa1040.151725e557 (non-flash)
tcp: [3] 10.2.0.1:3260,1 iqn.1986-03.com.hp:storage.msa1040.151725e557 (non-flash)
tcp: [4] 10.2.1.2:3260,4 iqn.1986-03.com.hp:storage.msa1040.151725e557 (non-flash)
# mapped 60LUNs from each MSA controller
root@n1:~# dmsetup ls | grep -c 3600c
120
# each with 4 paths
root@n1:~# multipath -l /dev/dm-3
3600c0ff000258a36583c125601000000 dm-3 HP,MSA 1040 SAN
size=93G features='1 queue_if_no_path' hwhandler='1 alua' wp=rw
|-+- policy='round-robin 0' prio=0 status=active
| |- 1:0:0:0 sdb 8:16 active undef running
| `- 3:0:0:0 sdc 8:32 active undef running
`-+- policy='round-robin 0' prio=0 status=enabled
|- 4:0:0:0 sde 8:64 active undef running
`- 2:0:0:0 sdd 8:48 active undef running
Only whenever doing something in PVE that involves handling images on the SAN, PVE starts by do an iscsi login, that seems to wait for timeout before they fail with error code 15 due to sessions already in place, and then continues with the operation with success. This creates noise and prolongs operation, like live migrations etc.
Oct 10 00:19:51 n1 pvedaemon[18987]: command '/usr/bin/iscsiadm --mode node --targetname iqn.1986-03.com.hp:storage.msa1040.151725e557 --login' failed: exit code 15
Why continuously attempt to login on every operation anyway to avoid spending time on failed logins?
Also operations/queries of VGs/LVMs/PVs seems to take quite sometime meaning pvestatd runs always and some operation from GUI (like creating new HDD on a VM) times out though successes :/
root@n1:~# tail /var/log/daemon.log
Oct 12 09:19:58 n1 pvestatd[2757]: status update time (18.119 seconds)
Oct 12 09:20:18 n1 pvestatd[2757]: status update time (19.425 seconds)
Oct 12 09:20:36 n1 pvestatd[2757]: status update time (18.645 seconds)
Oct 12 09:20:55 n1 pvestatd[2757]: status update time (18.475 seconds)
Oct 12 09:21:14 n1 pvestatd[2757]: status update time (19.266 seconds)
Oct 12 09:21:33 n1 pvestatd[2757]: status update time (18.987 seconds)
Oct 12 09:21:52 n1 pvestatd[2757]: status update time (18.580 seconds)
Oct 12 09:22:11 n1 pvestatd[2757]: status update time (18.970 seconds)
Oct 12 09:22:30 n1 pvestatd[2757]: status update time (18.940 seconds)
Oct 12 09:22:49 n1 pvestatd[2757]: status update time (19.203 seconds)
Is this due to the number of PVs (120 LUNs wouldn't think that's to much to handle for a Linux box)?