Hallo,
wir haben unser Cluster heute auf 7.1-8 geupdatet und bekommen Coredumps vom cluster service bzw pmcfs
Wir hatten auch schon coredumps vor update auf 7.1
Woher kommen diese crashes ?
Nach Kill von corosync und restart des cluster services ist alles wieder ok
wir haben unser Cluster heute auf 7.1-8 geupdatet und bekommen Coredumps vom cluster service bzw pmcfs
[15413.683096] cfs_loop[1509]: segfault at 7f0b7784cdae ip 0000563dc0afcae0 sp 00007f0b154fb1b8 error 4 in pmxcfs[563dc0ae3000+1b000]
[15413.683113] Code: 83 c4 08 31 c0 5b 5d c3 66 90 48 8d 15 c0 28 00 00 48 8d 35 3a 74 00 00 31 ff e8 6b 6b fe ff e9 5c ff ff ff 66 0f 1f 44 00 00 <8b> 47 0c 8b 56 0c 39 d0 75 0d 48 8b 47 10 48 8b 56 10 48 39 d0 74
[15590.639801] INFO: task pvestatd:1542 blocked for more than 120 seconds.
[15590.639856] Tainted: P O 5.13.19-2-pve #1
[15590.639884] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[15590.639926] taskvestatd state stack: 0 pid: 1542 ppid: 1 flags:0x00004000
[15590.639932] Call Trace:
[15590.639939] __schedule+0x2fa/0x910
[15590.639950] schedule+0x4f/0xc0
[15590.639954] rwsem_down_read_slowpath+0x318/0x380
[15590.639958] down_read+0x43/0x90
[15590.639961] walk_component+0x132/0x1b0
[15590.639966] link_path_walk.part.0+0x241/0x370
[15590.639969] ? path_init+0x2c1/0x3f0
[15590.639972] path_lookupat+0x43/0x1c0
[15590.639976] filename_lookup+0xbb/0x1c0
[15590.639982] ? __check_object_size+0x13f/0x150
[15590.639989] ? strncpy_from_user+0x44/0x150
[15590.639995] ? getname_flags.part.0+0x4c/0x1b0
[15590.640000] user_path_at_empty+0x59/0x90
[15590.640005] vfs_statx+0x7a/0x120
[15590.640009] __do_sys_newstat+0x3e/0x70
[15590.640014] ? handle_mm_fault+0xda/0x2c0
[15590.640020] __x64_sys_newstat+0x16/0x20
[15590.640023] do_syscall_64+0x61/0xb0
[15590.640027] ? irqentry_exit_to_user_mode+0x9/0x20
[15590.640030] ? irqentry_exit+0x19/0x30
[15590.640032] ? exc_page_fault+0x8f/0x170
[15590.640034] ? asm_exc_page_fault+0x8/0x30
[15590.640037] entry_SYSCALL_64_after_hwframe+0x44/0xae
[15590.640040] RIP: 0033:0x7f15249a13a6
[15590.640043] RSP: 002b:00007ffe0eb2d738 EFLAGS: 00000246 ORIG_RAX: 0000000000000004
[15590.640046] RAX: ffffffffffffffda RBX: 0000559ac27a1000 RCX: 00007f15249a13a6
[15590.640048] RDX: 0000559abd9d54b8 RSI: 0000559abd9d54b8 RDI: 0000559abee57e90
[15590.640050] RBP: 0000559abd9d52a0 R08: 0000000000000001 R09: 0000000000000111
[15590.640051] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
[15590.640053] R13: 0000559abc15b383 R14: 0000559abee57e90 R15: 0000000000000000
[15590.640177] INFO: task pvescheduler:822517 blocked for more than 120 seconds.
[15590.640210] Tainted: P O 5.13.19-2-pve #1
[15590.640236] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[15413.683113] Code: 83 c4 08 31 c0 5b 5d c3 66 90 48 8d 15 c0 28 00 00 48 8d 35 3a 74 00 00 31 ff e8 6b 6b fe ff e9 5c ff ff ff 66 0f 1f 44 00 00 <8b> 47 0c 8b 56 0c 39 d0 75 0d 48 8b 47 10 48 8b 56 10 48 39 d0 74
[15590.639801] INFO: task pvestatd:1542 blocked for more than 120 seconds.
[15590.639856] Tainted: P O 5.13.19-2-pve #1
[15590.639884] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[15590.639926] taskvestatd state stack: 0 pid: 1542 ppid: 1 flags:0x00004000
[15590.639932] Call Trace:
[15590.639939] __schedule+0x2fa/0x910
[15590.639950] schedule+0x4f/0xc0
[15590.639954] rwsem_down_read_slowpath+0x318/0x380
[15590.639958] down_read+0x43/0x90
[15590.639961] walk_component+0x132/0x1b0
[15590.639966] link_path_walk.part.0+0x241/0x370
[15590.639969] ? path_init+0x2c1/0x3f0
[15590.639972] path_lookupat+0x43/0x1c0
[15590.639976] filename_lookup+0xbb/0x1c0
[15590.639982] ? __check_object_size+0x13f/0x150
[15590.639989] ? strncpy_from_user+0x44/0x150
[15590.639995] ? getname_flags.part.0+0x4c/0x1b0
[15590.640000] user_path_at_empty+0x59/0x90
[15590.640005] vfs_statx+0x7a/0x120
[15590.640009] __do_sys_newstat+0x3e/0x70
[15590.640014] ? handle_mm_fault+0xda/0x2c0
[15590.640020] __x64_sys_newstat+0x16/0x20
[15590.640023] do_syscall_64+0x61/0xb0
[15590.640027] ? irqentry_exit_to_user_mode+0x9/0x20
[15590.640030] ? irqentry_exit+0x19/0x30
[15590.640032] ? exc_page_fault+0x8f/0x170
[15590.640034] ? asm_exc_page_fault+0x8/0x30
[15590.640037] entry_SYSCALL_64_after_hwframe+0x44/0xae
[15590.640040] RIP: 0033:0x7f15249a13a6
[15590.640043] RSP: 002b:00007ffe0eb2d738 EFLAGS: 00000246 ORIG_RAX: 0000000000000004
[15590.640046] RAX: ffffffffffffffda RBX: 0000559ac27a1000 RCX: 00007f15249a13a6
[15590.640048] RDX: 0000559abd9d54b8 RSI: 0000559abd9d54b8 RDI: 0000559abee57e90
[15590.640050] RBP: 0000559abd9d52a0 R08: 0000000000000001 R09: 0000000000000111
[15590.640051] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
[15590.640053] R13: 0000559abc15b383 R14: 0000559abee57e90 R15: 0000000000000000
[15590.640177] INFO: task pvescheduler:822517 blocked for more than 120 seconds.
[15590.640210] Tainted: P O 5.13.19-2-pve #1
[15590.640236] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
root@prox03:~# coredumpctl 1508
Unknown command verb 1508.
root@prox03:~# coredumpctl gdb 1508
PID: 1508 (pmxcfs)
UID: 0 (root)
GID: 0 (root)
Signal: 11 (SEGV)
Timestamp: Sun 2021-12-12 18:44:57 CET (2h 31min ago)
Command Line: /usr/bin/pmxcfs
Executable: /usr/bin/pmxcfs
Control Group: /system.slice/pve-cluster.service
Unit: pve-cluster.service
Slice: system.slice
Boot ID: d3a72de5e2ae48f2a0adb8aa5a51def5
Machine ID: 311e72c1f57a48ab96d7ba5321633f70
Hostname: prox03
Storage: /var/lib/systemd/coredump/core.pmxcfs.0.d3a72de5e2ae48f2a0adb8aa5a51def5.1508.1639331097000000.zst
Message: Process 1508 (pmxcfs) of user 0 dumped core.
Stack trace of thread 1509:
#0 0x0000563dc0afcae0 n/a (pmxcfs + 0x1fae0)
#1 0x00007f0b1910e9dc n/a (libglib-2.0.so.0 + 0x819dc)
#2 0x0000563dc0afd3d3 n/a (pmxcfs + 0x203d3)
#3 0x0000563dc0afda67 n/a (pmxcfs + 0x20a67)
#4 0x0000563dc0af0e75 n/a (pmxcfs + 0x13e75)
#5 0x00007f0b19234b68 cpg_dispatch (libcpg.so.4 + 0x2b68)
#6 0x0000563dc0af241c n/a (pmxcfs + 0x1541c)
#7 0x0000563dc0af2ba3 n/a (pmxcfs + 0x15ba3)
#8 0x0000563dc0ae724b n/a (pmxcfs + 0xa24b)
#9 0x00007f0b191c75df n/a (libqb.so.100 + 0xb5df)
#10 0x00007f0b191c73fc qb_loop_run (libqb.so.100 + 0xb3fc)
#11 0x0000563dc0ae6d5d n/a (pmxcfs + 0x9d5d)
#12 0x00007f0b191080bd n/a (libglib-2.0.so.0 + 0x7b0bd)
#13 0x00007f0b18e90ea7 start_thread (libpthread.so.0 + 0x8ea7)
#14 0x00007f0b18dc0def __clone (libc.so.6 + 0xfddef)
Stack trace of thread 1517:
#0 0x00007f0b18e9b08c read (libpthread.so.0 + 0x1308c)
#1 0x00007f0b19060990 n/a (libfuse.so.2 + 0x14990)
#2 0x00007f0b1906248c n/a (libfuse.so.2 + 0x1648c)
#3 0x00007f0b19060fbf n/a (libfuse.so.2 + 0x14fbf)
#4 0x00007f0b18e90ea7 start_thread (libpthread.so.0 + 0x8ea7)
#5 0x00007f0b18dc0def __clone (libc.so.6 + 0xfddef)
Stack trace of thread 1585:
#0 0x00007f0b18e9b08c read (libpthread.so.0 + 0x1308c)
#1 0x00007f0b19060990 n/a (libfuse.so.2 + 0x14990)
#2 0x00007f0b1906248c n/a (libfuse.so.2 + 0x1648c)
#3 0x00007f0b19060fbf n/a (libfuse.so.2 + 0x14fbf)
#4 0x00007f0b18e90ea7 start_thread (libpthread.so.0 + 0x8ea7)
#5 0x00007f0b18dc0def __clone (libc.so.6 + 0xfddef)
Stack trace of thread 1516:
#0 0x00007f0b18e9b08c read (libpthread.so.0 + 0x1308c)
#1 0x00007f0b19060990 n/a (libfuse.so.2 + 0x14990)
#2 0x00007f0b1906248c n/a (libfuse.so.2 + 0x1648c)
#3 0x00007f0b19060fbf n/a (libfuse.so.2 + 0x14fbf)
#4 0x00007f0b18e90ea7 start_thread (libpthread.so.0 + 0x8ea7)
#5 0x00007f0b18dc0def __clone (libc.so.6 + 0xfddef)
Stack trace of thread 1540:
#0 0x00007f0b18e9b08c read (libpthread.so.0 + 0x1308c)
#1 0x00007f0b19060990 n/a (libfuse.so.2 + 0x14990)
#2 0x00007f0b1906248c n/a (libfuse.so.2 + 0x1648c)
#3 0x00007f0b19060fbf n/a (libfuse.so.2 + 0x14fbf)
#4 0x00007f0b18e90ea7 start_thread (libpthread.so.0 + 0x8ea7)
#5 0x00007f0b18dc0def __clone (libc.so.6 + 0xfddef)
Stack trace of thread 1515:
#0 0x00007f0b18dc1116 epoll_wait (libc.so.6 + 0xfe116)
#1 0x00007f0b191d88c8 n/a (libqb.so.100 + 0x1c8c8)
#2 0x00007f0b191c731f qb_loop_run (libqb.so.100 + 0xb31f)
#3 0x0000563dc0ae77f3 n/a (pmxcfs + 0xa7f3)
#4 0x00007f0b191080bd n/a (libglib-2.0.so.0 + 0x7b0bd)
#5 0x00007f0b18e90ea7 start_thread (libpthread.so.0 + 0x8ea7)
#6 0x00007f0b18dc0def __clone (libc.so.6 + 0xfddef)
Stack trace of thread 1508:
#0 0x00007f0b18e9a174 do_futex_wait.constprop.0 (libpthread.so.0 + 0x12174)
#1 0x00007f0b18e9a278 __new_sem_wait_slow.constprop.0 (libpthread.so.0 + 0x12278)
#2 0x00007f0b19061270 fuse_session_loop_mt (libfuse.so.2 + 0x15270)
#3 0x00007f0b19066c98 fuse_loop_mt (libfuse.so.2 + 0x1ac98)
#4 0x0000563dc0ae4b0d n/a (pmxcfs + 0x7b0d)
#5 0x00007f0b18ce9d0a __libc_start_main (libc.so.6 + 0x26d0a)
#6 0x0000563dc0ae4fda n/a (pmxcfs + 0x7fda)
Stack trace of thread 748395:
#0 0x00007f0b18e9b08c read (libpthread.so.0 + 0x1308c)
#1 0x00007f0b19060990 n/a (libfuse.so.2 + 0x14990)
#2 0x00007f0b1906248c n/a (libfuse.so.2 + 0x1648c)
#3 0x00007f0b19060fbf n/a (libfuse.so.2 + 0x14fbf)
#4 0x00007f0b18e90ea7 start_thread (libpthread.so.0 + 0x8ea7)
#5 0x00007f0b18dc0def __clone (libc.so.6 + 0xfddef)
Failed to invoke gdb: No such file or directory
Unknown command verb 1508.
root@prox03:~# coredumpctl gdb 1508
PID: 1508 (pmxcfs)
UID: 0 (root)
GID: 0 (root)
Signal: 11 (SEGV)
Timestamp: Sun 2021-12-12 18:44:57 CET (2h 31min ago)
Command Line: /usr/bin/pmxcfs
Executable: /usr/bin/pmxcfs
Control Group: /system.slice/pve-cluster.service
Unit: pve-cluster.service
Slice: system.slice
Boot ID: d3a72de5e2ae48f2a0adb8aa5a51def5
Machine ID: 311e72c1f57a48ab96d7ba5321633f70
Hostname: prox03
Storage: /var/lib/systemd/coredump/core.pmxcfs.0.d3a72de5e2ae48f2a0adb8aa5a51def5.1508.1639331097000000.zst
Message: Process 1508 (pmxcfs) of user 0 dumped core.
Stack trace of thread 1509:
#0 0x0000563dc0afcae0 n/a (pmxcfs + 0x1fae0)
#1 0x00007f0b1910e9dc n/a (libglib-2.0.so.0 + 0x819dc)
#2 0x0000563dc0afd3d3 n/a (pmxcfs + 0x203d3)
#3 0x0000563dc0afda67 n/a (pmxcfs + 0x20a67)
#4 0x0000563dc0af0e75 n/a (pmxcfs + 0x13e75)
#5 0x00007f0b19234b68 cpg_dispatch (libcpg.so.4 + 0x2b68)
#6 0x0000563dc0af241c n/a (pmxcfs + 0x1541c)
#7 0x0000563dc0af2ba3 n/a (pmxcfs + 0x15ba3)
#8 0x0000563dc0ae724b n/a (pmxcfs + 0xa24b)
#9 0x00007f0b191c75df n/a (libqb.so.100 + 0xb5df)
#10 0x00007f0b191c73fc qb_loop_run (libqb.so.100 + 0xb3fc)
#11 0x0000563dc0ae6d5d n/a (pmxcfs + 0x9d5d)
#12 0x00007f0b191080bd n/a (libglib-2.0.so.0 + 0x7b0bd)
#13 0x00007f0b18e90ea7 start_thread (libpthread.so.0 + 0x8ea7)
#14 0x00007f0b18dc0def __clone (libc.so.6 + 0xfddef)
Stack trace of thread 1517:
#0 0x00007f0b18e9b08c read (libpthread.so.0 + 0x1308c)
#1 0x00007f0b19060990 n/a (libfuse.so.2 + 0x14990)
#2 0x00007f0b1906248c n/a (libfuse.so.2 + 0x1648c)
#3 0x00007f0b19060fbf n/a (libfuse.so.2 + 0x14fbf)
#4 0x00007f0b18e90ea7 start_thread (libpthread.so.0 + 0x8ea7)
#5 0x00007f0b18dc0def __clone (libc.so.6 + 0xfddef)
Stack trace of thread 1585:
#0 0x00007f0b18e9b08c read (libpthread.so.0 + 0x1308c)
#1 0x00007f0b19060990 n/a (libfuse.so.2 + 0x14990)
#2 0x00007f0b1906248c n/a (libfuse.so.2 + 0x1648c)
#3 0x00007f0b19060fbf n/a (libfuse.so.2 + 0x14fbf)
#4 0x00007f0b18e90ea7 start_thread (libpthread.so.0 + 0x8ea7)
#5 0x00007f0b18dc0def __clone (libc.so.6 + 0xfddef)
Stack trace of thread 1516:
#0 0x00007f0b18e9b08c read (libpthread.so.0 + 0x1308c)
#1 0x00007f0b19060990 n/a (libfuse.so.2 + 0x14990)
#2 0x00007f0b1906248c n/a (libfuse.so.2 + 0x1648c)
#3 0x00007f0b19060fbf n/a (libfuse.so.2 + 0x14fbf)
#4 0x00007f0b18e90ea7 start_thread (libpthread.so.0 + 0x8ea7)
#5 0x00007f0b18dc0def __clone (libc.so.6 + 0xfddef)
Stack trace of thread 1540:
#0 0x00007f0b18e9b08c read (libpthread.so.0 + 0x1308c)
#1 0x00007f0b19060990 n/a (libfuse.so.2 + 0x14990)
#2 0x00007f0b1906248c n/a (libfuse.so.2 + 0x1648c)
#3 0x00007f0b19060fbf n/a (libfuse.so.2 + 0x14fbf)
#4 0x00007f0b18e90ea7 start_thread (libpthread.so.0 + 0x8ea7)
#5 0x00007f0b18dc0def __clone (libc.so.6 + 0xfddef)
Stack trace of thread 1515:
#0 0x00007f0b18dc1116 epoll_wait (libc.so.6 + 0xfe116)
#1 0x00007f0b191d88c8 n/a (libqb.so.100 + 0x1c8c8)
#2 0x00007f0b191c731f qb_loop_run (libqb.so.100 + 0xb31f)
#3 0x0000563dc0ae77f3 n/a (pmxcfs + 0xa7f3)
#4 0x00007f0b191080bd n/a (libglib-2.0.so.0 + 0x7b0bd)
#5 0x00007f0b18e90ea7 start_thread (libpthread.so.0 + 0x8ea7)
#6 0x00007f0b18dc0def __clone (libc.so.6 + 0xfddef)
Stack trace of thread 1508:
#0 0x00007f0b18e9a174 do_futex_wait.constprop.0 (libpthread.so.0 + 0x12174)
#1 0x00007f0b18e9a278 __new_sem_wait_slow.constprop.0 (libpthread.so.0 + 0x12278)
#2 0x00007f0b19061270 fuse_session_loop_mt (libfuse.so.2 + 0x15270)
#3 0x00007f0b19066c98 fuse_loop_mt (libfuse.so.2 + 0x1ac98)
#4 0x0000563dc0ae4b0d n/a (pmxcfs + 0x7b0d)
#5 0x00007f0b18ce9d0a __libc_start_main (libc.so.6 + 0x26d0a)
#6 0x0000563dc0ae4fda n/a (pmxcfs + 0x7fda)
Stack trace of thread 748395:
#0 0x00007f0b18e9b08c read (libpthread.so.0 + 0x1308c)
#1 0x00007f0b19060990 n/a (libfuse.so.2 + 0x14990)
#2 0x00007f0b1906248c n/a (libfuse.so.2 + 0x1648c)
#3 0x00007f0b19060fbf n/a (libfuse.so.2 + 0x14fbf)
#4 0x00007f0b18e90ea7 start_thread (libpthread.so.0 + 0x8ea7)
#5 0x00007f0b18dc0def __clone (libc.so.6 + 0xfddef)
Failed to invoke gdb: No such file or directory
Wir hatten auch schon coredumps vor update auf 7.1
Woher kommen diese crashes ?
Nach Kill von corosync und restart des cluster services ist alles wieder ok
Last edited: