Hi,
I finished upgrade to Proxmox 6 + Ceph Nautilus on 4 node cluster.
On 2 nodes I have identified that all directories /var/lib/ceph/osd/ceph-<id>/ are empty after rebooting.
Typically the content of this directory is this:
root@ld5508:~# ls -l /var/lib/ceph/osd/ceph-70/
insgesamt 60
-rw-r--r-- 1 root root 402 Jun 7 15:49 activate.monmap
-rw-r--r-- 1 ceph ceph 3 Jun 7 15:49 active
lrwxrwxrwx 1 ceph ceph 58 Jun 7 15:49 block -> /dev/disk/by-partuuid/d9c2755f-0542-4772-af7d-0942cf75be76
lrwxrwxrwx 1 ceph ceph 58 Jun 7 15:49 block.db -> /dev/disk/by-partuuid/e2fcac2c-d3c7-4672-84ec-20c0187d7d2a
-rw-r--r-- 1 ceph ceph 37 Jun 7 15:49 block.db_uuid
-rw-r--r-- 1 ceph ceph 37 Jun 7 15:49 block_uuid
-rw-r--r-- 1 ceph ceph 2 Jun 7 15:49 bluefs
-rw-r--r-- 1 ceph ceph 37 Jun 7 15:49 ceph_fsid
-rw-r--r-- 1 ceph ceph 37 Jun 7 15:49 fsid
-rw------- 1 ceph ceph 57 Jun 7 15:49 keyring
-rw-r--r-- 1 ceph ceph 8 Jun 7 15:49 kv_backend
-rw-r--r-- 1 ceph ceph 21 Jun 7 15:49 magic
-rw-r--r-- 1 ceph ceph 4 Jun 7 15:49 mkfs_done
-rw-r--r-- 1 ceph ceph 6 Jun 7 15:49 ready
-rw-r--r-- 1 ceph ceph 3 Aug 23 09:57 require_osd_release
-rw-r--r-- 1 ceph ceph 0 Aug 21 11:35 systemd
-rw-r--r-- 1 ceph ceph 10 Jun 7 15:49 type
-rw-r--r-- 1 ceph ceph 3 Jun 7 15:49 whoami
I'm concerned that I loose the content on the other nodes, too.
In addition to this issue I have identified that no OSDs are displayed in WebUI -> Ceph -> OSD.
The screen is completely empty.
I assume that this issue could be related to ceph.conf; here's my current configuration file:
root@ld3955:/etc/ceph# more /etc/pve/ceph.conf
[global]
auth client required = cephx
auth cluster required = cephx
auth service required = cephx
cluster network = 192.168.1.0/27
debug ms = 0/0
fsid = 6b1b5117-6e08-4843-93d6-2da3cf8a6bae
mon allow pool delete = true
mon osd full ratio = .85
mon osd nearfull ratio = .75
osd crush update on start = false
osd journal size = 5120
osd pool default min size = 2
osd pool default size = 3
public network = 10.97.206.0/24
mon_host = 10.97.206.93,10.97.206.94,10.97.206.95
[client]
keyring = /etc/pve/priv/$cluster.$name.keyring
[osd]
osd journal size = 100
[mds.ld3955]
host = ld3955
mds standby for name = pve
keyring = /etc/pve/priv/ceph.mds.ld3955.keyring
[mds.ld3976]
host = ld3976
mds standby for name = pve
keyring = /etc/pve/priv/ceph.mds.ld3976.keyring
All services are running with exception of the OSDs where relevant /var/lib/ceph/osd/ceph-<id>/ are empty.
I'm re-creating the affected OSDs manually in order to fix this issue.
Can you please support to identify the root cause of these 2 issues?
THX
I finished upgrade to Proxmox 6 + Ceph Nautilus on 4 node cluster.
On 2 nodes I have identified that all directories /var/lib/ceph/osd/ceph-<id>/ are empty after rebooting.
Typically the content of this directory is this:
root@ld5508:~# ls -l /var/lib/ceph/osd/ceph-70/
insgesamt 60
-rw-r--r-- 1 root root 402 Jun 7 15:49 activate.monmap
-rw-r--r-- 1 ceph ceph 3 Jun 7 15:49 active
lrwxrwxrwx 1 ceph ceph 58 Jun 7 15:49 block -> /dev/disk/by-partuuid/d9c2755f-0542-4772-af7d-0942cf75be76
lrwxrwxrwx 1 ceph ceph 58 Jun 7 15:49 block.db -> /dev/disk/by-partuuid/e2fcac2c-d3c7-4672-84ec-20c0187d7d2a
-rw-r--r-- 1 ceph ceph 37 Jun 7 15:49 block.db_uuid
-rw-r--r-- 1 ceph ceph 37 Jun 7 15:49 block_uuid
-rw-r--r-- 1 ceph ceph 2 Jun 7 15:49 bluefs
-rw-r--r-- 1 ceph ceph 37 Jun 7 15:49 ceph_fsid
-rw-r--r-- 1 ceph ceph 37 Jun 7 15:49 fsid
-rw------- 1 ceph ceph 57 Jun 7 15:49 keyring
-rw-r--r-- 1 ceph ceph 8 Jun 7 15:49 kv_backend
-rw-r--r-- 1 ceph ceph 21 Jun 7 15:49 magic
-rw-r--r-- 1 ceph ceph 4 Jun 7 15:49 mkfs_done
-rw-r--r-- 1 ceph ceph 6 Jun 7 15:49 ready
-rw-r--r-- 1 ceph ceph 3 Aug 23 09:57 require_osd_release
-rw-r--r-- 1 ceph ceph 0 Aug 21 11:35 systemd
-rw-r--r-- 1 ceph ceph 10 Jun 7 15:49 type
-rw-r--r-- 1 ceph ceph 3 Jun 7 15:49 whoami
I'm concerned that I loose the content on the other nodes, too.
In addition to this issue I have identified that no OSDs are displayed in WebUI -> Ceph -> OSD.
The screen is completely empty.
I assume that this issue could be related to ceph.conf; here's my current configuration file:
root@ld3955:/etc/ceph# more /etc/pve/ceph.conf
[global]
auth client required = cephx
auth cluster required = cephx
auth service required = cephx
cluster network = 192.168.1.0/27
debug ms = 0/0
fsid = 6b1b5117-6e08-4843-93d6-2da3cf8a6bae
mon allow pool delete = true
mon osd full ratio = .85
mon osd nearfull ratio = .75
osd crush update on start = false
osd journal size = 5120
osd pool default min size = 2
osd pool default size = 3
public network = 10.97.206.0/24
mon_host = 10.97.206.93,10.97.206.94,10.97.206.95
[client]
keyring = /etc/pve/priv/$cluster.$name.keyring
[osd]
osd journal size = 100
[mds.ld3955]
host = ld3955
mds standby for name = pve
keyring = /etc/pve/priv/ceph.mds.ld3955.keyring
[mds.ld3976]
host = ld3976
mds standby for name = pve
keyring = /etc/pve/priv/ceph.mds.ld3976.keyring
All services are running with exception of the OSDs where relevant /var/lib/ceph/osd/ceph-<id>/ are empty.
I'm re-creating the affected OSDs manually in order to fix this issue.
Can you please support to identify the root cause of these 2 issues?
THX