Hello all,
I have a 4 node cluster with multiple OSDs, 9 for each node. I have pool set for 2/2.
Recently one of the node (master) failed. All vms migrated to node2 but none of them came up. More strangely my vm on node4 and node4 also stopped working. I must have some huge mistake in my setup.
This is not how it is supposed to work.
Here are logs relevant to event from syslog
Apr 17 23:41:11 hosting-b pve-ha-lrm[3963]: start failed: command '/usr/bin/kvm -id 101 -chardev 'socket,id=qmp,path=/var/run/qemu-server/101.qmp,server,nowait' -mon 'chardev=qmp,mode=control' -pidfile /var/run/qemu-server/101.pid -daemonize -smbios 'type=1,uuid=dda46807-1e54-492a-af87-c1a9aef58e7f' -name HybridITServices -smp '2,sockets=2,cores=1,maxcpus=2' -nodefaults -boot 'menu=on,strict=on,reboot-timeout=1000,splash=/usr/share/qemu-server/bootsplash.jpg' -vga std -vnc unix:/var/run/qemu-server/101.vnc,x509,password -cpu kvm64,+lahf_lm,+sep -m 20480 -k en-us -device 'pci-bridge,id=pci.1,chassis_nr=1,bus=pci.0,addr=0x1e' -device 'pci-bridge,id=pci.2,chassis_nr=2,bus=pci.0,addr=0x1f' -device 'piix3-usb-uhci,id=uhci,bus=pci.0,addr=0x1.0x2' -device 'usb-tablet,id=tablet,bus=uhci.0,port=1' -device 'virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x3' -iscsi 'initiator-name=iqn.1993-08.org.debian:01:303b856cc6' -drive 'if=none,id=drive-ide2,media=cdrom,aio=threads' -device 'ide-cd,bus=ide.1,unit=0,drive=drive-ide2,id=ide2,bootindex=100' -device 'virtio-scsi-pci,id=scsihw0,bus=pci.0,addr=0x5' -drive 'file=rbd:rbd/vm-101-disk-1:conf=/etc/pve/ceph.conf:id=admin:keyring=/etc/pve/priv/ceph/rbd_vm.keyring,if=none,id=drive-scsi0,format=raw,cache=none,aio=native,detect-zeroes=on' -device 'scsi-hd,bus=scsihw0.0,channel=0,scsi-id=0,lun=0,drive=drive-scsi0,id=scsi0,bootindex=200' -netdev 'type=tap,id=net0,ifname=tap101i0,script=/var/lib/qemu-server/pve-bridge,downscript=/var/lib/qemu-server/pve-bridgedown' -device 'e1000,mac=F6:1F:51:C8:F9:24,netdev=net0,bus=pci.0,addr=0x12,id=net0,bootindex=300' -machine 'accel=tcg'' failed: got timeout
Apr 17 23:41:11 hosting-b pve-ha-lrm[3960]: <root@pam> end task UPID:hosting-b:00000F7B:00090AE8:5AD6E869:qmstart:101:root@pam: start failed: command '/usr/bin/kvm -id 101 -chardev 'socket,id=qmp,path=/var/run/qemu-server/101.qmp,server,nowait' -mon 'chardev=qmp,mode=control' -pidfile /var/run/qemu-server/101.pid -daemonize -smbios 'type=1,uuid=dda46807-1e54-492a-af87-c1a9aef58e7f' -name HybridITServices -smp '2,sockets=2,cores=1,maxcpus=2' -nodefaults -boot 'menu=on,strict=on,reboot-timeout=1000,splash=/usr/share/qemu-server/bootsplash.jpg' -vga std -vnc unix:/var/run/qemu-server/101.vnc,x509,password -cpu kvm64,+lahf_lm,+sep -m 20480 -k en-us -device 'pci-bridge,id=pci.1,chassis_nr=1,bus=pci.0,addr=0x1e' -device 'pci-bridge,id=pci.2,chassis_nr=2,bus=pci.0,addr=0x1f' -device 'piix3-usb-uhci,id=uhci,bus=pci.0,addr=0x1.0x2' -device 'usb-tablet,id=tablet,bus=uhci.0,port=1' -device 'virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x3' -iscsi 'initiator-name=iqn.1993-08.org.debian:01:303b856cc6' -drive 'if=none,id=drive-ide2,media=cdrom,aio=threads' -device 'ide-cd,bus=ide.1,unit=0,drive=drive-ide2,id=ide2,bootindex=100' -device 'virtio-scsi-pci,id=scsihw0,bus=pci.0,addr=0x5' -drive 'file=rbd:rbd/vm-101-disk-1:conf=/etc/pve/ceph.conf:id=admin:keyring=/etc/pve/priv/ceph/rbd_vm.keyring,if=none,id=drive-scsi0,format=raw,cache=none,aio=native,detect-zeroes=on' -device 'scsi-hd,bus=scsihw0.0,channel=0,scsi-id=0,lun=0,drive=drive-scsi0,id=scsi0,bootindex=200' -netdev 'type=tap,id=net0,ifname=tap101i0,script=/var/lib/qemu-server/pve-bridge,downscript=/var/lib/qemu-server/pve-bridgedown' -device 'e1000,mac=F6:1F:51:C8:F9:24,netdev=net0,bus=pci.0,addr=0x12,id=net0,bootindex=300' -machine 'accel=tcg'' failed: got timeout
Apr 17 23:41:11 hosting-b pve-ha-lrm[3960]: service status vm:101 started
Apr 17 23:41:11 hosting-b pve-ha-lrm[3966]: start failed: command '/usr/bin/kvm -id 104 -chardev 'socket,id=qmp,path=/var/run/qemu-server/104.qmp,server,nowait' -mon 'chardev=qmp,mode=control' -pidfile /var/run/qemu-server/104.pid -daemonize -smbios 'type=1,uuid=68658ece-d058-411a-8944-be3b7bf8bc39' -name Hybrid-VPSv4-174-79-24-141 -smp '2,sockets=1,cores=2,maxcpus=2' -nodefaults -boot 'menu=on,strict=on,reboot-timeout=1000,splash=/usr/share/qemu-server/bootsplash.jpg' -vga std -vnc unix:/var/run/qemu-server/104.vnc,x509,password -cpu kvm64,+lahf_lm,+sep,+kvm_pv_unhalt,+kvm_pv_eoi,enforce -m 20480 -k en-us -device 'pci-bridge,id=pci.2,chassis_nr=2,bus=pci.0,addr=0x1f' -device 'pci-bridge,id=pci.1,chassis_nr=1,bus=pci.0,addr=0x1e' -device 'piix3-usb-uhci,id=uhci,bus=pci.0,addr=0x1.0x2' -device 'usb-tablet,id=tablet,bus=uhci.0,port=1' -device 'virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x3' -iscsi 'initiator-name=iqn.1993-08.org.debian:01:303b856cc6' -drive 'if=none,id=drive-ide2,media=cdrom,aio=threads' -device 'ide-cd,bus=ide.1,unit=0,drive=drive-ide2,id=ide2,bootindex=200' -device 'virtio-scsi-pci,id=scsihw0,bus=pci.0,addr=0x5' -drive 'file=rbd:rbd/vm-104-disk-1:conf=/etc/pve/ceph.conf:id=admin:keyring=/etc/pve/priv/ceph/rbd_vm.keyring,if=none,id=drive-scsi0,format=raw,cache=none,aio=native,detect-zeroes=on' -device 'scsi-hd,bus=scsihw0.0,channel=0,scsi-id=0,lun=0,drive=drive-scsi0,id=scsi0,bootindex=100' -netdev 'type=tap,id=net0,ifname=tap104i0,script=/var/lib/qemu-server/pve-bridge,downscript=/var/lib/qemu-server/pve-bridgedown' -device 'e1000,mac=6A:56:698:ED:89,netdev=net0,bus=pci.0,addr=0x12,id=net0,bootindex=300' -netdev 'type=tap,id=net1,ifname=tap104i1,script=/var/lib/qemu-server/pve-bridge,downscript=/var/lib/qemu-server/pve-bridgedown' -device 'e1000,mac=F6:F9:22:67:E2:0E,netdev=net1,bus=pci.0,addr=0x13,id=net1,bootindex=301'' failed: got timeout
Apr 17 23:41:11 hosting-b pve-ha-lrm[3964]: <root@pam> end task UPID:hosting-b:00000F7E:00090AEB:5AD6E869:qmstart:104:root@pam: start failed: command '/usr/bin/kvm -id 104 -chardev 'socket,id=qmp,path=/var/run/qemu-server/104.qmp,server,nowait' -mon 'chardev=qmp,mode=control' -pidfile /var/run/qemu-server/104.pid -daemonize -smbios 'type=1,uuid=68658ece-d058-411a-8944-be3b7bf8bc39' -name Hybrid-VPSv4-174-79-24-141 -smp '2,sockets=1,cores=2,maxcpus=2' -nodefaults -boot 'menu=on,strict=on,reboot-timeout=1000,splash=/usr/share/qemu-server/bootsplash.jpg' -vga std -vnc unix:/var/run/qemu-server/104.vnc,x509,password -cpu kvm64,+lahf_lm,+sep,+kvm_pv_unhalt,+kvm_pv_eoi,enforce -m 20480 -k en-us -device 'pci-bridge,id=pci.2,chassis_nr=2,bus=pci.0,addr=0x1f' -device 'pci-bridge,id=pci.1,chassis_nr=1,bus=pci.0,addr=0x1e' -device 'piix3-usb-uhci,id=uhci,bus=pci.0,addr=0x1.0x2' -device 'usb-tablet,id=tablet,bus=uhci.0,port=1' -device 'virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x3' -iscsi 'initiator-name=iqn.1993-08.org.debian:01:303b856cc6' -drive 'if=none,id=drive-ide2,media=cdrom,aio=threads' -device 'ide-cd,bus=ide.1,unit=0,drive=drive-ide2,id=ide2,bootindex=200' -device 'virtio-scsi-pci,id=scsihw0,bus=pci.0,addr=0x5' -drive 'file=rbd:rbd/vm-104-disk-1:conf=/etc/pve/ceph.conf:id=admin:keyring=/etc/pve/priv/ceph/rbd_vm.keyring,if=none,id=drive-scsi0,format=raw,cache=none,aio=native,detect-zeroes=on' -device 'scsi-hd,bus=scsihw0.0,channel=0,scsi-id=0,lun=0,drive=drive-scsi0,id=scsi0,bootindex=100' -netdev 'type=tap,id=net0,ifname=tap104i0,script=/var/lib/qemu-server/pve-bridge,downscript=/var/lib/qemu-server/pve-bridgedown' -device 'e1000,mac=6A:56:698:ED:89,netdev=net0,bus=pci.0,addr=0x12,id=net0,bootindex=300' -netdev 'type=tap,id=net1,ifname=tap104i1,script=/var/lib/qemu-server/pve-bridge,downscript=/var/lib/qemu-server/pve-bridgedown' -device 'e1000,mac=F6:F9:22:67:E2:0E,netdev=net1,bus=pci.0,addr=0x13,id=net1,bootindex=301'' failed: got timeout
I have a 4 node cluster with multiple OSDs, 9 for each node. I have pool set for 2/2.
Recently one of the node (master) failed. All vms migrated to node2 but none of them came up. More strangely my vm on node4 and node4 also stopped working. I must have some huge mistake in my setup.
This is not how it is supposed to work.
Here are logs relevant to event from syslog
Apr 17 23:41:11 hosting-b pve-ha-lrm[3963]: start failed: command '/usr/bin/kvm -id 101 -chardev 'socket,id=qmp,path=/var/run/qemu-server/101.qmp,server,nowait' -mon 'chardev=qmp,mode=control' -pidfile /var/run/qemu-server/101.pid -daemonize -smbios 'type=1,uuid=dda46807-1e54-492a-af87-c1a9aef58e7f' -name HybridITServices -smp '2,sockets=2,cores=1,maxcpus=2' -nodefaults -boot 'menu=on,strict=on,reboot-timeout=1000,splash=/usr/share/qemu-server/bootsplash.jpg' -vga std -vnc unix:/var/run/qemu-server/101.vnc,x509,password -cpu kvm64,+lahf_lm,+sep -m 20480 -k en-us -device 'pci-bridge,id=pci.1,chassis_nr=1,bus=pci.0,addr=0x1e' -device 'pci-bridge,id=pci.2,chassis_nr=2,bus=pci.0,addr=0x1f' -device 'piix3-usb-uhci,id=uhci,bus=pci.0,addr=0x1.0x2' -device 'usb-tablet,id=tablet,bus=uhci.0,port=1' -device 'virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x3' -iscsi 'initiator-name=iqn.1993-08.org.debian:01:303b856cc6' -drive 'if=none,id=drive-ide2,media=cdrom,aio=threads' -device 'ide-cd,bus=ide.1,unit=0,drive=drive-ide2,id=ide2,bootindex=100' -device 'virtio-scsi-pci,id=scsihw0,bus=pci.0,addr=0x5' -drive 'file=rbd:rbd/vm-101-disk-1:conf=/etc/pve/ceph.conf:id=admin:keyring=/etc/pve/priv/ceph/rbd_vm.keyring,if=none,id=drive-scsi0,format=raw,cache=none,aio=native,detect-zeroes=on' -device 'scsi-hd,bus=scsihw0.0,channel=0,scsi-id=0,lun=0,drive=drive-scsi0,id=scsi0,bootindex=200' -netdev 'type=tap,id=net0,ifname=tap101i0,script=/var/lib/qemu-server/pve-bridge,downscript=/var/lib/qemu-server/pve-bridgedown' -device 'e1000,mac=F6:1F:51:C8:F9:24,netdev=net0,bus=pci.0,addr=0x12,id=net0,bootindex=300' -machine 'accel=tcg'' failed: got timeout
Apr 17 23:41:11 hosting-b pve-ha-lrm[3960]: <root@pam> end task UPID:hosting-b:00000F7B:00090AE8:5AD6E869:qmstart:101:root@pam: start failed: command '/usr/bin/kvm -id 101 -chardev 'socket,id=qmp,path=/var/run/qemu-server/101.qmp,server,nowait' -mon 'chardev=qmp,mode=control' -pidfile /var/run/qemu-server/101.pid -daemonize -smbios 'type=1,uuid=dda46807-1e54-492a-af87-c1a9aef58e7f' -name HybridITServices -smp '2,sockets=2,cores=1,maxcpus=2' -nodefaults -boot 'menu=on,strict=on,reboot-timeout=1000,splash=/usr/share/qemu-server/bootsplash.jpg' -vga std -vnc unix:/var/run/qemu-server/101.vnc,x509,password -cpu kvm64,+lahf_lm,+sep -m 20480 -k en-us -device 'pci-bridge,id=pci.1,chassis_nr=1,bus=pci.0,addr=0x1e' -device 'pci-bridge,id=pci.2,chassis_nr=2,bus=pci.0,addr=0x1f' -device 'piix3-usb-uhci,id=uhci,bus=pci.0,addr=0x1.0x2' -device 'usb-tablet,id=tablet,bus=uhci.0,port=1' -device 'virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x3' -iscsi 'initiator-name=iqn.1993-08.org.debian:01:303b856cc6' -drive 'if=none,id=drive-ide2,media=cdrom,aio=threads' -device 'ide-cd,bus=ide.1,unit=0,drive=drive-ide2,id=ide2,bootindex=100' -device 'virtio-scsi-pci,id=scsihw0,bus=pci.0,addr=0x5' -drive 'file=rbd:rbd/vm-101-disk-1:conf=/etc/pve/ceph.conf:id=admin:keyring=/etc/pve/priv/ceph/rbd_vm.keyring,if=none,id=drive-scsi0,format=raw,cache=none,aio=native,detect-zeroes=on' -device 'scsi-hd,bus=scsihw0.0,channel=0,scsi-id=0,lun=0,drive=drive-scsi0,id=scsi0,bootindex=200' -netdev 'type=tap,id=net0,ifname=tap101i0,script=/var/lib/qemu-server/pve-bridge,downscript=/var/lib/qemu-server/pve-bridgedown' -device 'e1000,mac=F6:1F:51:C8:F9:24,netdev=net0,bus=pci.0,addr=0x12,id=net0,bootindex=300' -machine 'accel=tcg'' failed: got timeout
Apr 17 23:41:11 hosting-b pve-ha-lrm[3960]: service status vm:101 started
Apr 17 23:41:11 hosting-b pve-ha-lrm[3966]: start failed: command '/usr/bin/kvm -id 104 -chardev 'socket,id=qmp,path=/var/run/qemu-server/104.qmp,server,nowait' -mon 'chardev=qmp,mode=control' -pidfile /var/run/qemu-server/104.pid -daemonize -smbios 'type=1,uuid=68658ece-d058-411a-8944-be3b7bf8bc39' -name Hybrid-VPSv4-174-79-24-141 -smp '2,sockets=1,cores=2,maxcpus=2' -nodefaults -boot 'menu=on,strict=on,reboot-timeout=1000,splash=/usr/share/qemu-server/bootsplash.jpg' -vga std -vnc unix:/var/run/qemu-server/104.vnc,x509,password -cpu kvm64,+lahf_lm,+sep,+kvm_pv_unhalt,+kvm_pv_eoi,enforce -m 20480 -k en-us -device 'pci-bridge,id=pci.2,chassis_nr=2,bus=pci.0,addr=0x1f' -device 'pci-bridge,id=pci.1,chassis_nr=1,bus=pci.0,addr=0x1e' -device 'piix3-usb-uhci,id=uhci,bus=pci.0,addr=0x1.0x2' -device 'usb-tablet,id=tablet,bus=uhci.0,port=1' -device 'virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x3' -iscsi 'initiator-name=iqn.1993-08.org.debian:01:303b856cc6' -drive 'if=none,id=drive-ide2,media=cdrom,aio=threads' -device 'ide-cd,bus=ide.1,unit=0,drive=drive-ide2,id=ide2,bootindex=200' -device 'virtio-scsi-pci,id=scsihw0,bus=pci.0,addr=0x5' -drive 'file=rbd:rbd/vm-104-disk-1:conf=/etc/pve/ceph.conf:id=admin:keyring=/etc/pve/priv/ceph/rbd_vm.keyring,if=none,id=drive-scsi0,format=raw,cache=none,aio=native,detect-zeroes=on' -device 'scsi-hd,bus=scsihw0.0,channel=0,scsi-id=0,lun=0,drive=drive-scsi0,id=scsi0,bootindex=100' -netdev 'type=tap,id=net0,ifname=tap104i0,script=/var/lib/qemu-server/pve-bridge,downscript=/var/lib/qemu-server/pve-bridgedown' -device 'e1000,mac=6A:56:698:ED:89,netdev=net0,bus=pci.0,addr=0x12,id=net0,bootindex=300' -netdev 'type=tap,id=net1,ifname=tap104i1,script=/var/lib/qemu-server/pve-bridge,downscript=/var/lib/qemu-server/pve-bridgedown' -device 'e1000,mac=F6:F9:22:67:E2:0E,netdev=net1,bus=pci.0,addr=0x13,id=net1,bootindex=301'' failed: got timeout
Apr 17 23:41:11 hosting-b pve-ha-lrm[3964]: <root@pam> end task UPID:hosting-b:00000F7E:00090AEB:5AD6E869:qmstart:104:root@pam: start failed: command '/usr/bin/kvm -id 104 -chardev 'socket,id=qmp,path=/var/run/qemu-server/104.qmp,server,nowait' -mon 'chardev=qmp,mode=control' -pidfile /var/run/qemu-server/104.pid -daemonize -smbios 'type=1,uuid=68658ece-d058-411a-8944-be3b7bf8bc39' -name Hybrid-VPSv4-174-79-24-141 -smp '2,sockets=1,cores=2,maxcpus=2' -nodefaults -boot 'menu=on,strict=on,reboot-timeout=1000,splash=/usr/share/qemu-server/bootsplash.jpg' -vga std -vnc unix:/var/run/qemu-server/104.vnc,x509,password -cpu kvm64,+lahf_lm,+sep,+kvm_pv_unhalt,+kvm_pv_eoi,enforce -m 20480 -k en-us -device 'pci-bridge,id=pci.2,chassis_nr=2,bus=pci.0,addr=0x1f' -device 'pci-bridge,id=pci.1,chassis_nr=1,bus=pci.0,addr=0x1e' -device 'piix3-usb-uhci,id=uhci,bus=pci.0,addr=0x1.0x2' -device 'usb-tablet,id=tablet,bus=uhci.0,port=1' -device 'virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x3' -iscsi 'initiator-name=iqn.1993-08.org.debian:01:303b856cc6' -drive 'if=none,id=drive-ide2,media=cdrom,aio=threads' -device 'ide-cd,bus=ide.1,unit=0,drive=drive-ide2,id=ide2,bootindex=200' -device 'virtio-scsi-pci,id=scsihw0,bus=pci.0,addr=0x5' -drive 'file=rbd:rbd/vm-104-disk-1:conf=/etc/pve/ceph.conf:id=admin:keyring=/etc/pve/priv/ceph/rbd_vm.keyring,if=none,id=drive-scsi0,format=raw,cache=none,aio=native,detect-zeroes=on' -device 'scsi-hd,bus=scsihw0.0,channel=0,scsi-id=0,lun=0,drive=drive-scsi0,id=scsi0,bootindex=100' -netdev 'type=tap,id=net0,ifname=tap104i0,script=/var/lib/qemu-server/pve-bridge,downscript=/var/lib/qemu-server/pve-bridgedown' -device 'e1000,mac=6A:56:698:ED:89,netdev=net0,bus=pci.0,addr=0x12,id=net0,bootindex=300' -netdev 'type=tap,id=net1,ifname=tap104i1,script=/var/lib/qemu-server/pve-bridge,downscript=/var/lib/qemu-server/pve-bridgedown' -device 'e1000,mac=F6:F9:22:67:E2:0E,netdev=net1,bus=pci.0,addr=0x13,id=net1,bootindex=301'' failed: got timeout
Last edited: