Hello,
I've encountered a problem with my Proxmox cluster.
I have 3 PVE servers with iSCSI connections on 2 storage arrays with 8 iSCSI paths.
I added each iSCSI path to all 3 servers and created shared LVMs.
Attached is the /ect/pve/storage.cfg file:
dir: local
path /var/lib/vz
content iso,backup,vztmpl
lvmthin: local-lvm
thinpool data
vgname pve
content images,rootdir
esxi: VMW
server 172.22.0.202
username root
content import
skip-cert-verification 1
esxi: VMW1
server 172.22.0.201
username root
content import
skip-cert-verification 1
iscsi: ControllerCIV1
portal 172.21.0.31
target iqn.2006-08.com.huawei
ceanstor:2100a4178b235300::22004:172.21.0.31
happy images
iscsi:ControllerCIV2
portal 172.21.0.32
target iqn.2006-08.com.huawei
ceanstor:2100a4178b235300::22005:172.21.0.32
happy images
iscsi:ControllerCIV3
portal 172.21.0.33
target iqn.2006-08.com.huawei
ceanstor:2100a4178b235300::1022004:172.21.0.33
happy images
iscsi:ControllerCIV4
portal 172.21.0.34
target iqn.2006-08.com.huawei
ceanstor:2100a4178b235300::1022005:172.21.0.34
happy images
iscsi: ControllerDOU1
portal 172.21.0.42
target iqn.2006-08.com.huawei
ceanstor:2100a4178b2352dd::22004:172.21.0.42
happy images
iscsi: ControllerDOU2
portal 172.21.0.43
target iqn.2006-08.com.huawei
ceanstor:2100a4178b2352dd::22005:172.21.0.43
thrilled pictures
iscsi: ControllerDOU3
portal 172.21.0.44
target iqn.2006-08.com.huawei
ceanstor:2100a4178b2352dd::1022004:172.21.0.44
happy pictures
iscsi: ControllerDOU4
portal 172.21.0.45
target iqn.2006-08.com.huawei
ceanstor:2100a4178b2352dd::1022005:172.21.0.45
happy images
lvm: Data
vgname group
base ControllerCIV2:0.0.2.scsi-36a4178b1002353009bb9e17700000011
content rootdir,images
saferemove 0
shared 1
lvm:CMK-REP
vgname cmk_rep
base ControllerCIV2:0.0.5.scsi-36a4178b100235300bda1637a00000014
content images,rootdir
saferemove 0
shared 1
And part of multipath-ll
mpath1 (36a4178b1002353009bb9e17700000011) dm-9 HUAWEI,XSG1
size=600G features='1 queue_if_no_path' hwhandler='0' wp=rw
`-+- policy='round-robin 0' prio=1 status=active
|- 11:0:0:2 sdb 8:16 active ready running
|- 12:0:0:2 sdg 8:96 active ready running
|- 13:0:0:2 sdm 8:192 active ready running
|- 14:0:0:2 sds 65:32 active ready running
|- 15:0:0:2 sdx 65:112 active ready running
|- 16:0:0:2 sdac 65:192 active ready running
|- 17:0:0:2 sdai 66:32 active ready running
`- 18:0:0:2 sdao 66:128 active ready running
mpath2 (36a4178b1002353009c0989bb00000012) dm-10 HUAWEI,XSG1
size=60G features='1 queue_if_no_path' hwhandler='0' wp=rw
`-+- policy='round-robin 0' prio=1 status=active
|- 11:0:0:3 sdc 8:32 active ready running
|- 12:0:0:3 sdh 8:112 active ready running
|- 13:0:0:3 sdn 8:208 active ready running
|- 14:0:0:3 sdt 65:48 active ready running
|- 15:0:0:3 sdy 65:128 active ready running
|- 16:0:0:3 sdad 65:208 active ready running
|- 17:0:0:3 sdaj 66:48 active ready running
`- 18:0:0:3 sdap 66:144 active ready running
The file /etc/multipath.conf
root@HBGMT-MGT-3:~# cat /etc/multipath.conf
defaults {
user_friendly_names yes
find_multipaths yes
}
blacklist {
devnode "^sda$"
}
devices {
device {
vendor “HUAWEI”
product "XSG1"
path_grouping_policy multibus
path_selector "round-robin 0"
features "1 queue_if_no_path"
hardware_handler "0"
const prio
immediate failback
}
}
multipath {
multipath {
wwid 36a4178b1002353009c0989bb00000012
alias mpath2
}
multipath {
wwid 36a4178b1002353009bb9e17700000011
alias mpath1
}
multipath {
wwid 36a4178b100235300bda1637a00000014
alias mpath3
}
multipath {
wwid 36a4178b100235300bda936c800000015
alias mpath4
}
For resilience testing purposes, we shut down a storage array and restarted the servers. The "CMK_REP" storage was no longer available, and we had to restart the array to make the storage accessible again.
Before the server restart, everything was working fine.
Do you know what the problem might be?
I followed the article on Proxmox at the time to configure iSCSI multipath.
Nicolas
I've encountered a problem with my Proxmox cluster.
I have 3 PVE servers with iSCSI connections on 2 storage arrays with 8 iSCSI paths.
I added each iSCSI path to all 3 servers and created shared LVMs.
Attached is the /ect/pve/storage.cfg file:
dir: local
path /var/lib/vz
content iso,backup,vztmpl
lvmthin: local-lvm
thinpool data
vgname pve
content images,rootdir
esxi: VMW
server 172.22.0.202
username root
content import
skip-cert-verification 1
esxi: VMW1
server 172.22.0.201
username root
content import
skip-cert-verification 1
iscsi: ControllerCIV1
portal 172.21.0.31
target iqn.2006-08.com.huawei
happy images
iscsi:ControllerCIV2
portal 172.21.0.32
target iqn.2006-08.com.huawei
happy images
iscsi:ControllerCIV3
portal 172.21.0.33
target iqn.2006-08.com.huawei
happy images
iscsi:ControllerCIV4
portal 172.21.0.34
target iqn.2006-08.com.huawei
happy images
iscsi: ControllerDOU1
portal 172.21.0.42
target iqn.2006-08.com.huawei
happy images
iscsi: ControllerDOU2
portal 172.21.0.43
target iqn.2006-08.com.huawei
thrilled pictures
iscsi: ControllerDOU3
portal 172.21.0.44
target iqn.2006-08.com.huawei
happy pictures
iscsi: ControllerDOU4
portal 172.21.0.45
target iqn.2006-08.com.huawei
happy images
lvm: Data
vgname group
base ControllerCIV2:0.0.2.scsi-36a4178b1002353009bb9e17700000011
content rootdir,images
saferemove 0
shared 1
lvm:CMK-REP
vgname cmk_rep
base ControllerCIV2:0.0.5.scsi-36a4178b100235300bda1637a00000014
content images,rootdir
saferemove 0
shared 1
And part of multipath-ll
mpath1 (36a4178b1002353009bb9e17700000011) dm-9 HUAWEI,XSG1
size=600G features='1 queue_if_no_path' hwhandler='0' wp=rw
`-+- policy='round-robin 0' prio=1 status=active
|- 11:0:0:2 sdb 8:16 active ready running
|- 12:0:0:2 sdg 8:96 active ready running
|- 13:0:0:2 sdm 8:192 active ready running
|- 14:0:0:2 sds 65:32 active ready running
|- 15:0:0:2 sdx 65:112 active ready running
|- 16:0:0:2 sdac 65:192 active ready running
|- 17:0:0:2 sdai 66:32 active ready running
`- 18:0:0:2 sdao 66:128 active ready running
mpath2 (36a4178b1002353009c0989bb00000012) dm-10 HUAWEI,XSG1
size=60G features='1 queue_if_no_path' hwhandler='0' wp=rw
`-+- policy='round-robin 0' prio=1 status=active
|- 11:0:0:3 sdc 8:32 active ready running
|- 12:0:0:3 sdh 8:112 active ready running
|- 13:0:0:3 sdn 8:208 active ready running
|- 14:0:0:3 sdt 65:48 active ready running
|- 15:0:0:3 sdy 65:128 active ready running
|- 16:0:0:3 sdad 65:208 active ready running
|- 17:0:0:3 sdaj 66:48 active ready running
`- 18:0:0:3 sdap 66:144 active ready running
The file /etc/multipath.conf
root@HBGMT-MGT-3:~# cat /etc/multipath.conf
defaults {
user_friendly_names yes
find_multipaths yes
}
blacklist {
devnode "^sda$"
}
devices {
device {
vendor “HUAWEI”
product "XSG1"
path_grouping_policy multibus
path_selector "round-robin 0"
features "1 queue_if_no_path"
hardware_handler "0"
const prio
immediate failback
}
}
multipath {
multipath {
wwid 36a4178b1002353009c0989bb00000012
alias mpath2
}
multipath {
wwid 36a4178b1002353009bb9e17700000011
alias mpath1
}
multipath {
wwid 36a4178b100235300bda1637a00000014
alias mpath3
}
multipath {
wwid 36a4178b100235300bda936c800000015
alias mpath4
}
For resilience testing purposes, we shut down a storage array and restarted the servers. The "CMK_REP" storage was no longer available, and we had to restart the array to make the storage accessible again.
Before the server restart, everything was working fine.
Do you know what the problem might be?
I followed the article on Proxmox at the time to configure iSCSI multipath.
Nicolas
Last edited: