ceph version 18.2.2 (e9fe820e7fffd1b7cde143a9f77653b73fcec748) reef (stable)
Hello everyone, please we need your help to import the bluestore OSDs to the new cluster, because after trying to import, we noticed that the OSDs do not start. They are only in a "down" state and are imported as filestore instead of bluestore.
screenshot: https://share.cleanshot.com/NWbS7bN3
In the proxmox interface, when I select an OSD and click on the "details" button, the error appears: "error with 'osd metadata': mon_cmd failed - (500)".
screenshot: https://share.cleanshot.com/7CxVbPRZ
We reinstalled ceph on 3 nodes (pxm1, pxm2 and pxm3), with the 4 OSDs being available on only 2 nodes pxm2 and pxm3, with pxm1 only having the function of mon and mgr to compose the cluster.
New cluster information:
Furthermore, we saw that the OSD ceph_fsid ("fsid:")remains the old cluster's fsid even after import:
To import the OSDs, we first increased the epoch of the new cluster, repeating the commands below until the cluster's epoch count number was greater than the OSDs' epoch number:
OSD volumes:
ceph-bluestore-tool show-label:
-> osb.0:
-> osd.1:
-> osd.2
-> osd.3
NOTE: Due to character limit, I send the OSD log in the next message.
Hello everyone, please we need your help to import the bluestore OSDs to the new cluster, because after trying to import, we noticed that the OSDs do not start. They are only in a "down" state and are imported as filestore instead of bluestore.
screenshot: https://share.cleanshot.com/NWbS7bN3
In the proxmox interface, when I select an OSD and click on the "details" button, the error appears: "error with 'osd metadata': mon_cmd failed - (500)".
screenshot: https://share.cleanshot.com/7CxVbPRZ
We reinstalled ceph on 3 nodes (pxm1, pxm2 and pxm3), with the 4 OSDs being available on only 2 nodes pxm2 and pxm3, with pxm1 only having the function of mon and mgr to compose the cluster.
New cluster information:
Bash:
root@pxm2:~# cat /etc/ceph/ceph.conf
[global]
auth_client_required = cephx
auth_cluster_required = cephx
auth_service_required = cephx
cluster_network = 192.168.0.2/24
fsid = f4466e33-b57d-4d68-9909-3468afd9e5c2
mon_allow_pool_delete = true
mon_host = 192.168.0.2 192.168.0.3 192.168.0.1
ms_bind_ipv4 = true
ms_bind_ipv6 = false
osd_pool_default_min_size = 2
osd_pool_default_size = 3
public_network = 192.168.0.0/24
[client]
# keyring = /etc/pve/priv/$cluster.$name.keyring
keyring = /etc/pve/priv/ceph.client.admin.keyring
#[client.crash]
# keyring = /etc/pve/ceph/$cluster.$name.keyring
[client.crash]
key = AQAl95NmlvL0HRAAovpivsfHqqokmO0vqIR5Lg==
[client.admin]
key = AQAk95NmSjMdORAAiAHkTSSMquKkBAGpALjwQA==
caps mds = "allow *"
caps mgr = "allow *"
caps mon = "allow *"
caps osd = "allow *"
[mon.pxm1]
public_addr = 192.168.0.1
[mon.pxm2]
public_addr = 192.168.0.2
[mon.pxm3]
public_addr = 192.168.0.3
Bash:
root@pxm3:~# ceph fsid
f4466e33-b57d-4d68-9909-3468afd9e5c2
Bash:
root@pxm2:~# ceph health
HEALTH_WARN mon pxm1 is low on available space; 4 osds down; 2 hosts (4 osds) down; 1 root (4 osds) down; 173 daemons have recently crashed
Bash:
root@pxm3:~# ceph -s
cluster:
id: f4466e33-b57d-4d68-9909-3468afd9e5c2
health: HEALTH_WARN
mon pxm1 is low on available space
4 osds down
2 hosts (4 osds) down
1 root (4 osds) down
170 daemons have recently crashed
services:
mon: 3 daemons, quorum pxm2,pxm3,pxm1 (age 3h)
mgr: pxm2(active, since 3h), standbys: pxm1, pxm3
osd: 4 osds: 0 up, 4 in (since 2h)
data:
pools: 0 pools, 0 pgs
objects: 0 objects, 0 B
usage: 0 B used, 0 B / 0 B avail
pgs:
Bash:
root@pxm2:~# ceph-crash
INFO:ceph-crash:pinging cluster to exercise our key
cluster:
id: f4466e33-b57d-4d68-9909-3468afd9e5c2
health: HEALTH_WARN
mon pxm1 is low on available space
4 osds down
2 hosts (4 osds) down
1 root (4 osds) down
173 daemons have recently crashed
services:
mon: 3 daemons, quorum pxm2,pxm3,pxm1 (age 75s)
mgr: pxm2(active, since 3h), standbys: pxm1, pxm3
osd: 4 osds: 0 up, 4 in (since 2h)
data:
pools: 0 pools, 0 pgs
objects: 0 objects, 0 B
usage: 0 B used, 0 B / 0 B avail
pgs:
INFO:ceph-crash:monitoring path /var/lib/ceph/crash, delay 600s
Furthermore, we saw that the OSD ceph_fsid ("fsid:")remains the old cluster's fsid even after import:
Bash:
root@pxm2:~# cephadm ls
[
{
"style": "legacy",
"name": "osd.0",
"fsid": "5514a69a-46ba-4a44-bb56-8d3109c6c9e0",
"systemd_unit": "ceph-osd@0",
"enabled": true,
"state": "error",
"host_version": "18.2.2"
},
{
"style": "legacy",
"name": "osd.1",
"fsid": "5514a69a-46ba-4a44-bb56-8d3109c6c9e0",
"systemd_unit": "ceph-osd@1",
"enabled": true,
"state": "error",
"host_version": "18.2.2"
},
{
"style": "legacy",
"name": "mon.pxm2",
"fsid": "f4466e33-b57d-4d68-9909-3468afd9e5c2",
"systemd_unit": "ceph-mon@pxm2",
"enabled": true,
"state": "running",
"host_version": "18.2.2"
},
{
"style": "legacy",
"name": "mgr.pxm2",
"fsid": "f4466e33-b57d-4d68-9909-3468afd9e5c2",
"systemd_unit": "ceph-mgr@pxm2",
"enabled": true,
"state": "running",
"host_version": "18.2.2"
}
]
root@pxm3:~# cephadm ls
[
{
"style": "legacy",
"name": "osd.3",
"fsid": "5514a69a-46ba-4a44-bb56-8d3109c6c9e0",
"systemd_unit": "ceph-osd@3",
"enabled": true,
"state": "error",
"host_version": "18.2.2"
},
{
"style": "legacy",
"name": "osd.2",
"fsid": "5514a69a-46ba-4a44-bb56-8d3109c6c9e0",
"systemd_unit": "ceph-osd@2",
"enabled": true,
"state": "error",
"host_version": "18.2.2"
},
{
"style": "legacy",
"name": "mon.pxm3",
"fsid": "f4466e33-b57d-4d68-9909-3468afd9e5c2",
"systemd_unit": "ceph-mon@pxm3",
"enabled": true,
"state": "running",
"host_version": "18.2.2"
},
{
"style": "legacy",
"name": "mgr.pxm3",
"fsid": "f4466e33-b57d-4d68-9909-3468afd9e5c2",
"systemd_unit": "ceph-mgr@pxm3",
"enabled": true,
"state": "running",
"host_version": "18.2.2"
}
]
To import the OSDs, we first increased the epoch of the new cluster, repeating the commands below until the cluster's epoch count number was greater than the OSDs' epoch number:
Bash:
ceph osd set noin
ceph osd set noout
ceph osd set noup
ceph osd set nodown
ceph osd set norebalance
ceph osd set nobackfill
ceph osd unset noin
ceph osd unset noout
ceph osd unset noup
ceph osd unset nodown
ceph osd unset norebalance
ceph osd unset nobackfill
OSD volumes:
Code:
pxm2:
-> OSD.0: /dev/ceph-1740d41a-2ae7-4c4d-820f-ec3702e3ba90/osd-block-39f9b32f-c6e7-4b3f-b7f0-9b11a5832aaa
-> OSD.1: /dev/ceph-ad425d70-4aa3-419a-997f-f3a4082c9904/osd-block-bb4df480-2b9b-4604-a44d-6151d5c0cb33
pxm3:
-> OSD.2: /dev/ceph-94682b88-d09c-4eab-9170-c6d31eac79e6/osd-block-3f6756d6-e64b-4c60-9ac2-305c0e71cc51
-> OSD.3: /dev/ceph-d5ffd027-8289-4a1c-9378-6687d9f950ad/osd-block-eece9fc9-44d6-460b-aced-572c79a98be8
ceph-bluestore-tool show-label:
-> osb.0:
Bash:
root@pxm2:~# ceph-bluestore-tool show-label --path /var/lib/ceph/osd/ceph-0
inferring bluefs devices from bluestore path
{
"/var/lib/ceph/osd/ceph-0/block": {
"osd_uuid": "39f9b32f-c6e7-4b3f-b7f0-9b11a5832aaa",
"size": 2000397795328,
"btime": "2024-07-07T21:32:30.861509-0300",
"description": "main",
"bfm_blocks": "488378368",
"bfm_blocks_per_key": "128",
"bfm_bytes_per_block": "4096",
"bfm_size": "2000397795328",
"bluefs": "1",
"ceph_fsid": "5514a69a-46ba-4a44-bb56-8d3109c6c9e0",
"ceph_version_when_created": "ceph version 18.2.2 (e9fe820e7fffd1b7cde143a9f77653b73fcec748) reef (stable)",
"created_at": "2024-07-08T00:32:32.459102Z",
"kv_backend": "rocksdb",
"magic": "ceph osd volume v026",
"mkfs_done": "yes",
"osd_key": "AQCdM4tmHR5tLRAA5AikqvQyMqOoH5MnL8Qdtg==",
"ready": "ready",
"require_osd_release": "18",
"whoami": "0"
}
}
-> osd.1:
Bash:
root@pxm2:~# ceph-bluestore-tool show-label --path /var/lib/ceph/osd/ceph-1
inferring bluefs devices from bluestore path
{
"/var/lib/ceph/osd/ceph-1/block": {
"osd_uuid": "bb4df480-2b9b-4604-a44d-6151d5c0cb33",
"size": 2000397795328,
"btime": "2024-07-07T21:32:43.729638-0300",
"description": "main",
"bfm_blocks": "488378368",
"bfm_blocks_per_key": "128",
"bfm_bytes_per_block": "4096",
"bfm_size": "2000397795328",
"bluefs": "1",
"ceph_fsid": "5514a69a-46ba-4a44-bb56-8d3109c6c9e0",
"ceph_version_when_created": "ceph version 18.2.2 (e9fe820e7fffd1b7cde143a9f77653b73fcec748) reef (stable)",
"created_at": "2024-07-08T00:32:45.577456Z",
"kv_backend": "rocksdb",
"magic": "ceph osd volume v026",
"mkfs_done": "yes",
"osd_key": "AQCqM4tmTy87JxAAJVK1NokBDjdKSe+Z8OjwMA==",
"ready": "ready",
"require_osd_release": "18",
"whoami": "1"
}
}
-> osd.2
Bash:
root@pxm3:~# ceph-bluestore-tool show-label --path /var/lib/ceph/osd/ceph-2
inferring bluefs devices from bluestore path
{
"/var/lib/ceph/osd/ceph-2/block": {
"osd_uuid": "3f6756d6-e64b-4c60-9ac2-305c0e71cc51",
"size": 2000397795328,
"btime": "2024-07-07T21:33:07.812888-0300",
"description": "main",
"bfm_blocks": "488378368",
"bfm_blocks_per_key": "128",
"bfm_bytes_per_block": "4096",
"bfm_size": "2000397795328",
"bluefs": "1",
"ceph_fsid": "5514a69a-46ba-4a44-bb56-8d3109c6c9e0",
"ceph_version_when_created": "ceph version 18.2.2 (e9fe820e7fffd1b7cde143a9f77653b73fcec748) reef (stable)",
"created_at": "2024-07-08T00:33:09.404317Z",
"kv_backend": "rocksdb",
"magic": "ceph osd volume v026",
"mkfs_done": "yes",
"osd_key": "AQDCM4tmu++vLRAAuJOXjTuEHR9VsKz7ShVEPg==",
"ready": "ready",
"require_osd_release": "18",
"whoami": "2"
}
}
-> osd.3
Bash:
root@pxm3:~# ceph-bluestore-tool show-label --path /var/lib/ceph/osd/ceph-3
inferring bluefs devices from bluestore path
{
"/var/lib/ceph/osd/ceph-3/block": {
"osd_uuid": "eece9fc9-44d6-460b-aced-572c79a98be8",
"size": 2000397795328,
"btime": "2024-07-07T21:33:25.725294-0300",
"description": "main",
"bfm_blocks": "488378368",
"bfm_blocks_per_key": "128",
"bfm_bytes_per_block": "4096",
"bfm_size": "2000397795328",
"bluefs": "1",
"ceph_fsid": "5514a69a-46ba-4a44-bb56-8d3109c6c9e0",
"ceph_version_when_created": "ceph version 18.2.2 (e9fe820e7fffd1b7cde143a9f77653b73fcec748) reef (stable)",
"created_at": "2024-07-08T00:33:27.323085Z",
"kv_backend": "rocksdb",
"magic": "ceph osd volume v026",
"mkfs_done": "yes",
"osd_key": "AQDUM4tmagOEKBAAeAfZXcyU1naRkqIE5iVOfw==",
"ready": "ready",
"require_osd_release": "18",
"whoami": "3"
}
}
Bash:
root@pxm2:~# cat /var/lib/ceph/osd/ceph-0/ceph_fsid
5514a69a-46ba-4a44-bb56-8d3109c6c9e0
root@pxm2:~# cat /var/lib/ceph/osd/ceph-1/ceph_fsid
5514a69a-46ba-4a44-bb56-8d3109c6c9e0
root@pxm3:~# cat /var/lib/ceph/osd/ceph-2/ceph_fsid
5514a69a-46ba-4a44-bb56-8d3109c6c9e0
root@pxm3:~# cat /var/lib/ceph/osd/ceph-3/ceph_fsid
5514a69a-46ba-4a44-bb56-8d3109c6c9e0
Code:
root@pxm2:~# ceph daemon osd.0 status
no valid command found; 10 closest matches:
0
1
2
abort
assert
bluefs debug_inject_read_zeros
bluefs files list
bluefs stats
bluestore allocator dump block
bluestore allocator fragmentation block
admin_socket: invalid command
root@pxm2:~# ceph daemon osd.1 status
no valid command found; 10 closest matches:
0
1
2
abort
assert
bluefs debug_inject_read_zeros
bluefs files list
bluefs stats
bluestore allocator dump block
bluestore allocator fragmentation block
admin_socket: invalid command
root@pxm3:~# ceph daemon osd.2 status
no valid command found; 10 closest matches:
0
1
2
abort
assert
bluefs debug_inject_read_zeros
bluefs files list
bluefs stats
bluestore allocator dump block
bluestore allocator fragmentation block
admin_socket: invalid command
root@pxm3:~# ceph daemon osd.3 status
no valid command found; 10 closest matches:
0
1
2
abort
assert
config diff
config diff get <var>
config get <var>
config help [<var>]
config set <var> <val>...
admin_socket: invalid command
Code:
root@pxm2:~# ceph osd info osd.0
osd.0 down in weight 1 up_from 0 up_thru 0 down_at 0 last_clean_interval [0,0) exists 39f9b32f-c6e7-4b3f-b7f0-9b11a5832aaa
root@pxm2:~# ceph osd info osd.1
osd.1 down in weight 1 up_from 0 up_thru 0 down_at 0 last_clean_interval [0,0) exists bb4df480-2b9b-4604-a44d-6151d5c0cb33
root@pxm3:~# ceph osd info osd.2
osd.2 down in weight 1 up_from 0 up_thru 0 down_at 0 last_clean_interval [0,0) exists 3f6756d6-e64b-4c60-9ac2-305c0e71cc51
root@pxm3:~# ceph osd info osd.3
osd.3 down in weight 1 up_from 0 up_thru 0 down_at 0 last_clean_interval [0,0) exists eece9fc9-44d6-460b-aced-572c79a98be8
Bash:
root@pxm3:~# ceph osd status
ID HOST USED AVAIL WR OPS WR DATA RD OPS RD DATA STATE
0 0 0 0 0 0 0 exists
1 0 0 0 0 0 0 exists
2 0 0 0 0 0 0 exists
3 0 0 0 0 0 0 exists
root@pxm3:~# ceph osd tree
ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT PRI-AFF
-1 7.27759 root default
-2 3.63879 host pxm2
0 1.81940 osd.0 down 1.00000 1.00000
1 1.81940 osd.1 down 1.00000 1.00000
-5 3.63879 host pxm3
3 1.81940 osd.3 down 1.00000 1.00000
2 ssd 1.81940 osd.2 down 1.00000 1.00000
Bash:
root@pxm2:~# ceph osd df
ID CLASS WEIGHT REWEIGHT SIZE RAW USE DATA OMAP META AVAIL %USE VAR PGS STATUS
0 ssd 1.81940 1.00000 0 B 0 B 0 B 0 B 0 B 0 B 0 1.00 0 down
1 ssd 1.81940 1.00000 0 B 0 B 0 B 0 B 0 B 0 B 0 1.00 0 down
2 ssd 1.81940 1.00000 0 B 0 B 0 B 0 B 0 B 0 B 0 1.00 0 down
3 ssd 1.81940 1.00000 0 B 0 B 0 B 0 B 0 B 0 B 0 1.00 0 down
TOTAL 0 B 0 B 0 B 0 B 0 B 0 B 0
MIN/MAX VAR: 1.00/1.00 STDDEV: 0
NOTE: Due to character limit, I send the OSD log in the next message.
Last edited: