tl:dr
changing %i to corresponding name make service mon working.
One of my mons keeps dying, restarting and cannot start again, so I investigate it.
It cannot start due to misconfiguration in /etc/systemd/system/ceph-mon.target.wants/ceph-mon@pve2.service file at "%i" variable, which points to '2', so I changed it into 'pve2' in my case.
the same explanation is below for my another mon, pve3:
root@pve3:~# ps auxw|grep ceph-mon|grep -v grep
root@pve3:~# systemctl start ceph-mon@3.service
root@pve3:~# sleep 10
root@pve3:~# systemctl status ceph-mon@3.service
● ceph-mon@3.service - Ceph cluster monitor daemon
Loaded: loaded (/lib/systemd/system/ceph-mon@.service; disabled; preset: enabled)
Drop-In: /usr/lib/systemd/system/ceph-mon@.service.d
└─ceph-after-pve-cluster.conf
Active: activating (auto-restart) (Result: exit-code) since Sat 2024-08-24 11:19:22 CEST; 1s ago
Process: 2707566 ExecStart=/usr/bin/ceph-mon -f --cluster ${CLUSTER} --id 3 --setuser ceph --setgroup ceph (code=exited, status=1/FAILURE)
Main PID: 2707566 (code=exited, status=1/FAILURE)
CPU: 83ms
root@pve3:~# ps auxw|grep ceph-mon|grep -v grep
root@pve3:~# diff -u /etc/systemd/system/ceph-mon.target.wants/ceph-mon@pve3.service /tmp/ceph-mon@pve3.service
--- /etc/systemd/system/ceph-mon.target.wants/ceph-mon@pve3.service 2024-08-24 11:16:13.702182501 +0200
+++ /tmp/ceph-mon@pve3.service 2024-08-24 11:03:01.898301061 +0200
@@ -13,7 +13,7 @@
Environment=CLUSTER=ceph
EnvironmentFile=-/etc/default/ceph
ExecReload=/bin/kill -HUP $MAINPID
-ExecStart=/usr/bin/ceph-mon -f --cluster ${CLUSTER} --id %i --setuser ceph --setgroup ceph
+ExecStart=/usr/bin/ceph-mon -f --cluster ceph --id pve3 --setuser ceph --setgroup ceph
LimitNOFILE=1048576
LimitNPROC=1048576
LockPersonality=true
root@pve3:~# cp /tmp/ceph-mon@pve3.service /etc/systemd/system/ceph-mon.target.wants/ceph-mon@pve3.service
root@pve3:~# systemctl daemon-reload
root@pve3:~# systemctl start ceph-mon@3.service
root@pve3:~# sleep 5
root@pve3:~# ps auxw|grep ceph-mon|grep -v grep
ceph 2708209 2.9 0.6 322140 98676 ? Ssl 11:20 0:00 /usr/bin/ceph-mon -f --cluster ceph --id pve3 --setuser ceph --setgroup ceph
root@pve3:~# systemctl status ceph-mon@3.service
● ceph-mon@3.service - Ceph cluster monitor daemon
Loaded: loaded (/lib/systemd/system/ceph-mon@.service; disabled; preset: enabled)
Drop-In: /usr/lib/systemd/system/ceph-mon@.service.d
└─ceph-after-pve-cluster.conf
Active: active (running) since Sat 2024-08-24 11:20:02 CEST; 1min 56s ago
Main PID: 2708209 (ceph-mon)
Tasks: 24
Memory: 88.9M
CPU: 1.222s
CGroup: /system.slice/system-ceph\x2dmon.slice/ceph-mon@3.service
└─2708209 /usr/bin/ceph-mon -f --cluster ceph --id pve3 --setuser ceph --setgroup ceph
changing %i to corresponding name make service mon working.
One of my mons keeps dying, restarting and cannot start again, so I investigate it.
It cannot start due to misconfiguration in /etc/systemd/system/ceph-mon.target.wants/ceph-mon@pve2.service file at "%i" variable, which points to '2', so I changed it into 'pve2' in my case.
the same explanation is below for my another mon, pve3:
root@pve3:~# ps auxw|grep ceph-mon|grep -v grep
root@pve3:~# systemctl start ceph-mon@3.service
root@pve3:~# sleep 10
root@pve3:~# systemctl status ceph-mon@3.service
● ceph-mon@3.service - Ceph cluster monitor daemon
Loaded: loaded (/lib/systemd/system/ceph-mon@.service; disabled; preset: enabled)
Drop-In: /usr/lib/systemd/system/ceph-mon@.service.d
└─ceph-after-pve-cluster.conf
Active: activating (auto-restart) (Result: exit-code) since Sat 2024-08-24 11:19:22 CEST; 1s ago
Process: 2707566 ExecStart=/usr/bin/ceph-mon -f --cluster ${CLUSTER} --id 3 --setuser ceph --setgroup ceph (code=exited, status=1/FAILURE)
Main PID: 2707566 (code=exited, status=1/FAILURE)
CPU: 83ms
root@pve3:~# ps auxw|grep ceph-mon|grep -v grep
root@pve3:~# diff -u /etc/systemd/system/ceph-mon.target.wants/ceph-mon@pve3.service /tmp/ceph-mon@pve3.service
--- /etc/systemd/system/ceph-mon.target.wants/ceph-mon@pve3.service 2024-08-24 11:16:13.702182501 +0200
+++ /tmp/ceph-mon@pve3.service 2024-08-24 11:03:01.898301061 +0200
@@ -13,7 +13,7 @@
Environment=CLUSTER=ceph
EnvironmentFile=-/etc/default/ceph
ExecReload=/bin/kill -HUP $MAINPID
-ExecStart=/usr/bin/ceph-mon -f --cluster ${CLUSTER} --id %i --setuser ceph --setgroup ceph
+ExecStart=/usr/bin/ceph-mon -f --cluster ceph --id pve3 --setuser ceph --setgroup ceph
LimitNOFILE=1048576
LimitNPROC=1048576
LockPersonality=true
root@pve3:~# cp /tmp/ceph-mon@pve3.service /etc/systemd/system/ceph-mon.target.wants/ceph-mon@pve3.service
root@pve3:~# systemctl daemon-reload
root@pve3:~# systemctl start ceph-mon@3.service
root@pve3:~# sleep 5
root@pve3:~# ps auxw|grep ceph-mon|grep -v grep
ceph 2708209 2.9 0.6 322140 98676 ? Ssl 11:20 0:00 /usr/bin/ceph-mon -f --cluster ceph --id pve3 --setuser ceph --setgroup ceph
root@pve3:~# systemctl status ceph-mon@3.service
● ceph-mon@3.service - Ceph cluster monitor daemon
Loaded: loaded (/lib/systemd/system/ceph-mon@.service; disabled; preset: enabled)
Drop-In: /usr/lib/systemd/system/ceph-mon@.service.d
└─ceph-after-pve-cluster.conf
Active: active (running) since Sat 2024-08-24 11:20:02 CEST; 1min 56s ago
Main PID: 2708209 (ceph-mon)
Tasks: 24
Memory: 88.9M
CPU: 1.222s
CGroup: /system.slice/system-ceph\x2dmon.slice/ceph-mon@3.service
└─2708209 /usr/bin/ceph-mon -f --cluster ceph --id pve3 --setuser ceph --setgroup ceph