Edit: FYI: I successfully replaced a different disk on a different node earlier today, and that's still rebalancing.
I had an issue destroying the old drive, but thought that might be normal on a failed disk:
destroy OSD osd.0
Remove osd.0 from the CRUSH map
Remove the osd.0 authentication key.
Remove OSD osd.0
Use of uninitialized value in pattern match (m//) at /usr/share/perl5/PVE/API2/Ceph/OSD.pm line 1010.
TASK ERROR: invalid path: journal
Old drive was /dev/sda, new drive is seen as /dev/sdk
create OSD on /dev/sdk (bluestore)
wiping block device /dev/sdk
/dev/sdk: 8 bytes were erased at offset 0x00000200 (gpt): 45 46 49 20 50 41 52 54
/dev/sdk: 8 bytes were erased at offset 0xe8e0db5e00 (gpt): 45 46 49 20 50 41 52 54
/dev/sdk: 2 bytes were erased at offset 0x000001fe (PMBR): 55 aa
/dev/sdk: calling ioctl to re-read partition table: Success
200+0 records in
200+0 records out
209715200 bytes (210 MB, 200 MiB) copied, 2.80312 s, 74.8 MB/s
Running command: /usr/bin/ceph-authtool --gen-print-key
Running command: /usr/bin/ceph --cluster ceph --name client.bootstrap-osd --keyring /var/lib/ceph/bootstrap-osd/ceph.keyring -i - osd new de9e192e-a9df-408f-abd0-8cfab4b13dc4
Running command: vgcreate --force --yes ceph-3620e544-0fac-41f6-8b3b-a2b950131a5b /dev/sdk
stdout: Physical volume "/dev/sdk" successfully created.
stdout: Volume group "ceph-3620e544-0fac-41f6-8b3b-a2b950131a5b" successfully created
Running command: lvcreate --yes -l 238467 -n osd-block-de9e192e-a9df-408f-abd0-8cfab4b13dc4 ceph-3620e544-0fac-41f6-8b3b-a2b950131a5b
stdout: Logical volume "osd-block-de9e192e-a9df-408f-abd0-8cfab4b13dc4" created.
Running command: /usr/bin/ceph-authtool --gen-print-key
--> Was unable to complete a new OSD, will rollback changes
Running command: /usr/bin/ceph --cluster ceph --name client.bootstrap-osd --keyring /var/lib/ceph/bootstrap-osd/ceph.keyring osd purge-new osd.0 --yes-i-really-mean-it
stderr: 2024-09-25T13:21:15.918-0600 7f9a2a368700 -1 auth: unable to find a keyring on /etc/pve/priv/ceph.client.bootstrap-osd.keyring: (2) No such file or directory
2024-09-25T13:21:15.918-0600 7f9a2a368700 -1 AuthRegistry(0x7f9a24059780) no keyring found at /etc/pve/priv/ceph.client.bootstrap-osd.keyring, disabling cephx
stderr: purged osd.0
--> Zapping: /dev/ceph-3620e544-0fac-41f6-8b3b-a2b950131a5b/osd-block-de9e192e-a9df-408f-abd0-8cfab4b13dc4
Running command: /bin/dd if=/dev/zero of=/dev/ceph-3620e544-0fac-41f6-8b3b-a2b950131a5b/osd-block-de9e192e-a9df-408f-abd0-8cfab4b13dc4 bs=1M count=10 conv=fsync
stderr: 10+0 records in
10+0 records out
stderr: 10485760 bytes (10 MB, 10 MiB) copied, 0.107226 s, 97.8 MB/s
--> Only 1 LV left in VG, will proceed to destroy volume group ceph-3620e544-0fac-41f6-8b3b-a2b950131a5b
Running command: vgremove -v -f ceph-3620e544-0fac-41f6-8b3b-a2b950131a5b
stderr: Removing ceph--3620e544--0fac--41f6--8b3b--a2b950131a5b-osd--block--de9e192e--a9df--408f--abd0--8cfab4b13dc4 (253:6)
stderr: Archiving volume group "ceph-3620e544-0fac-41f6-8b3b-a2b950131a5b" metadata (seqno 5).
stderr: Releasing logical volume "osd-block-de9e192e-a9df-408f-abd0-8cfab4b13dc4"
stderr: Creating volume group backup "/etc/lvm/backup/ceph-3620e544-0fac-41f6-8b3b-a2b950131a5b" (seqno 6).
stdout: Logical volume "osd-block-de9e192e-a9df-408f-abd0-8cfab4b13dc4" successfully removed
stderr: Removing physical volume "/dev/sdk" from volume group "ceph-3620e544-0fac-41f6-8b3b-a2b950131a5b"
stdout: Volume group "ceph-3620e544-0fac-41f6-8b3b-a2b950131a5b" successfully removed
--> Zapping successful for OSD: 0
--> OSError: [Errno 5] Input/output error: '/var/lib/ceph/osd/ceph-0'
TASK ERROR: command 'ceph-volume lvm create --cluster-fsid a555c9f1-597a-449a-a871-122863806426 --data /dev/sdk' failed: exit code 1
root@cluster01:~# lsblk
NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINT
sdb 8:16 0 931.5G 0 disk
└─sdb1 8:17 0 931.5G 0 part /var/lib/ceph/osd/ceph-1
sdc 8:32 0 931.5G 0 disk
└─sdc1 8:33 0 931.5G 0 part /var/lib/ceph/osd/ceph-2
sdd 8:48 0 931.5G 0 disk
└─sdd1 8:49 0 931.5G 0 part /var/lib/ceph/osd/ceph-3
sde 8:64 0 111.8G 0 disk
├─sde1 8:65 0 1M 0 part
├─sde2 8:66 0 256M 0 part
└─sde3 8:67 0 111.5G 0 part
├─pve-swap 253:0 0 8G 0 lvm [SWAP]
├─pve-root 253:1 0 27.8G 0 lvm /
├─pve-data_tmeta 253:2 0 64M 0 lvm
│ └─pve-data-tpool 253:4 0 61.9G 0 lvm
│ └─pve-data 253:5 0 61.9G 1 lvm
└─pve-data_tdata 253:3 0 61.9G 0 lvm
└─pve-data-tpool 253:4 0 61.9G 0 lvm
└─pve-data 253:5 0 61.9G 1 lvm
sdf 8:80 0 931.5G 0 disk
└─sdf1 8:81 0 931.5G 0 part /var/lib/ceph/osd/ceph-4
sdg 8:96 0 931.5G 0 disk
└─sdg1 8:97 0 931.5G 0 part /var/lib/ceph/osd/ceph-5
sdh 8:112 0 931.5G 0 disk
└─sdh1 8:113 0 931.5G 0 part /var/lib/ceph/osd/ceph-6
sdi 8:128 0 931.5G 0 disk
└─sdi1 8:129 0 931.5G 0 part /var/lib/ceph/osd/ceph-7
sdj 8:144 0 111.8G 0 disk
├─sdj1 8:145 0 5G 0 part
├─sdj2 8:146 0 5G 0 part
├─sdj3 8:147 0 5G 0 part
├─sdj4 8:148 0 5G 0 part
├─sdj5 8:149 0 5G 0 part
├─sdj6 8:150 0 5G 0 part
├─sdj7 8:151 0 5G 0 part
└─sdj8 8:152 0 5G 0 part
sdk 8:160 0 931.5G 0 disk
root@cluster01:~# pveversion -v
proxmox-ve: 7.4-1 (running kernel: 5.15.149-1-pve)
pve-manager: 7.4-17 (running version: 7.4-17/513c62be)
pve-kernel-5.15: 7.4-12
pve-kernel-5.4: 6.4-18
pve-kernel-5.15.149-1-pve: 5.15.149-1
pve-kernel-5.15.131-1-pve: 5.15.131-2
pve-kernel-5.15.104-1-pve: 5.15.104-2
pve-kernel-5.15.60-2-pve: 5.15.60-2
pve-kernel-5.15.60-1-pve: 5.15.60-1
pve-kernel-5.15.35-2-pve: 5.15.35-5
pve-kernel-5.4.189-2-pve: 5.4.189-2
pve-kernel-4.4.128-1-pve: 4.4.128-111
pve-kernel-4.4.117-2-pve: 4.4.117-110
pve-kernel-4.4.117-1-pve: 4.4.117-109
pve-kernel-4.4.114-1-pve: 4.4.114-108
pve-kernel-4.4.98-6-pve: 4.4.98-107
pve-kernel-4.4.98-5-pve: 4.4.98-105
pve-kernel-4.4.98-4-pve: 4.4.98-104
pve-kernel-4.4.35-1-pve: 4.4.35-77
ceph: 15.2.17-pve1
ceph-fuse: 15.2.17-pve1
corosync: 3.1.7-pve1
criu: 3.15-1+pve-1
glusterfs-client: 9.2-1
ifupdown: 0.8.36+pve2
ksm-control-daemon: 1.4-1
libjs-extjs: 7.0.0-1
libknet1: 1.24-pve2
libproxmox-acme-perl: 1.4.4
libproxmox-backup-qemu0: 1.3.1-1
libproxmox-rs-perl: 0.2.1
libpve-access-control: 7.4.3
libpve-apiclient-perl: 3.2-2
libpve-common-perl: 7.4-2
libpve-guest-common-perl: 4.2-4
libpve-http-server-perl: 4.2-3
libpve-rs-perl: 0.7.7
libpve-storage-perl: 7.4-3
libqb0: 1.0.5-1
libspice-server1: 0.14.3-2.1
lvm2: 2.03.11-2.1
lxc-pve: 5.0.2-2
lxcfs: 5.0.3-pve1
novnc-pve: 1.4.0-1
proxmox-backup-client: 2.4.6-1
proxmox-backup-file-restore: 2.4.6-1
proxmox-kernel-helper: 7.4-1
proxmox-mail-forward: 0.1.1-1
proxmox-mini-journalreader: 1.3-1
proxmox-offline-mirror-helper: 0.5.2
proxmox-widget-toolkit: 3.7.3
pve-cluster: 7.3-3
pve-container: 4.4-6
pve-docs: 7.4-2
pve-edk2-firmware: 3.20230228-4~bpo11+3
pve-firewall: 4.3-5
pve-firmware: 3.6-6
pve-ha-manager: 3.6.1
pve-i18n: 2.12-1
pve-qemu-kvm: 7.2.10-1
pve-xtermjs: 4.16.0-2
qemu-server: 7.4-5
smartmontools: 7.2-pve3
spiceterm: 3.2-2
swtpm: 0.8.0~bpo11+3
vncterm: 1.7-1
zfsutils-linux: 2.1.15-pve1
I had an issue destroying the old drive, but thought that might be normal on a failed disk:
destroy OSD osd.0
Remove osd.0 from the CRUSH map
Remove the osd.0 authentication key.
Remove OSD osd.0
Use of uninitialized value in pattern match (m//) at /usr/share/perl5/PVE/API2/Ceph/OSD.pm line 1010.
TASK ERROR: invalid path: journal
Old drive was /dev/sda, new drive is seen as /dev/sdk
create OSD on /dev/sdk (bluestore)
wiping block device /dev/sdk
/dev/sdk: 8 bytes were erased at offset 0x00000200 (gpt): 45 46 49 20 50 41 52 54
/dev/sdk: 8 bytes were erased at offset 0xe8e0db5e00 (gpt): 45 46 49 20 50 41 52 54
/dev/sdk: 2 bytes were erased at offset 0x000001fe (PMBR): 55 aa
/dev/sdk: calling ioctl to re-read partition table: Success
200+0 records in
200+0 records out
209715200 bytes (210 MB, 200 MiB) copied, 2.80312 s, 74.8 MB/s
Running command: /usr/bin/ceph-authtool --gen-print-key
Running command: /usr/bin/ceph --cluster ceph --name client.bootstrap-osd --keyring /var/lib/ceph/bootstrap-osd/ceph.keyring -i - osd new de9e192e-a9df-408f-abd0-8cfab4b13dc4
Running command: vgcreate --force --yes ceph-3620e544-0fac-41f6-8b3b-a2b950131a5b /dev/sdk
stdout: Physical volume "/dev/sdk" successfully created.
stdout: Volume group "ceph-3620e544-0fac-41f6-8b3b-a2b950131a5b" successfully created
Running command: lvcreate --yes -l 238467 -n osd-block-de9e192e-a9df-408f-abd0-8cfab4b13dc4 ceph-3620e544-0fac-41f6-8b3b-a2b950131a5b
stdout: Logical volume "osd-block-de9e192e-a9df-408f-abd0-8cfab4b13dc4" created.
Running command: /usr/bin/ceph-authtool --gen-print-key
--> Was unable to complete a new OSD, will rollback changes
Running command: /usr/bin/ceph --cluster ceph --name client.bootstrap-osd --keyring /var/lib/ceph/bootstrap-osd/ceph.keyring osd purge-new osd.0 --yes-i-really-mean-it
stderr: 2024-09-25T13:21:15.918-0600 7f9a2a368700 -1 auth: unable to find a keyring on /etc/pve/priv/ceph.client.bootstrap-osd.keyring: (2) No such file or directory
2024-09-25T13:21:15.918-0600 7f9a2a368700 -1 AuthRegistry(0x7f9a24059780) no keyring found at /etc/pve/priv/ceph.client.bootstrap-osd.keyring, disabling cephx
stderr: purged osd.0
--> Zapping: /dev/ceph-3620e544-0fac-41f6-8b3b-a2b950131a5b/osd-block-de9e192e-a9df-408f-abd0-8cfab4b13dc4
Running command: /bin/dd if=/dev/zero of=/dev/ceph-3620e544-0fac-41f6-8b3b-a2b950131a5b/osd-block-de9e192e-a9df-408f-abd0-8cfab4b13dc4 bs=1M count=10 conv=fsync
stderr: 10+0 records in
10+0 records out
stderr: 10485760 bytes (10 MB, 10 MiB) copied, 0.107226 s, 97.8 MB/s
--> Only 1 LV left in VG, will proceed to destroy volume group ceph-3620e544-0fac-41f6-8b3b-a2b950131a5b
Running command: vgremove -v -f ceph-3620e544-0fac-41f6-8b3b-a2b950131a5b
stderr: Removing ceph--3620e544--0fac--41f6--8b3b--a2b950131a5b-osd--block--de9e192e--a9df--408f--abd0--8cfab4b13dc4 (253:6)
stderr: Archiving volume group "ceph-3620e544-0fac-41f6-8b3b-a2b950131a5b" metadata (seqno 5).
stderr: Releasing logical volume "osd-block-de9e192e-a9df-408f-abd0-8cfab4b13dc4"
stderr: Creating volume group backup "/etc/lvm/backup/ceph-3620e544-0fac-41f6-8b3b-a2b950131a5b" (seqno 6).
stdout: Logical volume "osd-block-de9e192e-a9df-408f-abd0-8cfab4b13dc4" successfully removed
stderr: Removing physical volume "/dev/sdk" from volume group "ceph-3620e544-0fac-41f6-8b3b-a2b950131a5b"
stdout: Volume group "ceph-3620e544-0fac-41f6-8b3b-a2b950131a5b" successfully removed
--> Zapping successful for OSD: 0
--> OSError: [Errno 5] Input/output error: '/var/lib/ceph/osd/ceph-0'
TASK ERROR: command 'ceph-volume lvm create --cluster-fsid a555c9f1-597a-449a-a871-122863806426 --data /dev/sdk' failed: exit code 1
root@cluster01:~# lsblk
NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINT
sdb 8:16 0 931.5G 0 disk
└─sdb1 8:17 0 931.5G 0 part /var/lib/ceph/osd/ceph-1
sdc 8:32 0 931.5G 0 disk
└─sdc1 8:33 0 931.5G 0 part /var/lib/ceph/osd/ceph-2
sdd 8:48 0 931.5G 0 disk
└─sdd1 8:49 0 931.5G 0 part /var/lib/ceph/osd/ceph-3
sde 8:64 0 111.8G 0 disk
├─sde1 8:65 0 1M 0 part
├─sde2 8:66 0 256M 0 part
└─sde3 8:67 0 111.5G 0 part
├─pve-swap 253:0 0 8G 0 lvm [SWAP]
├─pve-root 253:1 0 27.8G 0 lvm /
├─pve-data_tmeta 253:2 0 64M 0 lvm
│ └─pve-data-tpool 253:4 0 61.9G 0 lvm
│ └─pve-data 253:5 0 61.9G 1 lvm
└─pve-data_tdata 253:3 0 61.9G 0 lvm
└─pve-data-tpool 253:4 0 61.9G 0 lvm
└─pve-data 253:5 0 61.9G 1 lvm
sdf 8:80 0 931.5G 0 disk
└─sdf1 8:81 0 931.5G 0 part /var/lib/ceph/osd/ceph-4
sdg 8:96 0 931.5G 0 disk
└─sdg1 8:97 0 931.5G 0 part /var/lib/ceph/osd/ceph-5
sdh 8:112 0 931.5G 0 disk
└─sdh1 8:113 0 931.5G 0 part /var/lib/ceph/osd/ceph-6
sdi 8:128 0 931.5G 0 disk
└─sdi1 8:129 0 931.5G 0 part /var/lib/ceph/osd/ceph-7
sdj 8:144 0 111.8G 0 disk
├─sdj1 8:145 0 5G 0 part
├─sdj2 8:146 0 5G 0 part
├─sdj3 8:147 0 5G 0 part
├─sdj4 8:148 0 5G 0 part
├─sdj5 8:149 0 5G 0 part
├─sdj6 8:150 0 5G 0 part
├─sdj7 8:151 0 5G 0 part
└─sdj8 8:152 0 5G 0 part
sdk 8:160 0 931.5G 0 disk
root@cluster01:~# pveversion -v
proxmox-ve: 7.4-1 (running kernel: 5.15.149-1-pve)
pve-manager: 7.4-17 (running version: 7.4-17/513c62be)
pve-kernel-5.15: 7.4-12
pve-kernel-5.4: 6.4-18
pve-kernel-5.15.149-1-pve: 5.15.149-1
pve-kernel-5.15.131-1-pve: 5.15.131-2
pve-kernel-5.15.104-1-pve: 5.15.104-2
pve-kernel-5.15.60-2-pve: 5.15.60-2
pve-kernel-5.15.60-1-pve: 5.15.60-1
pve-kernel-5.15.35-2-pve: 5.15.35-5
pve-kernel-5.4.189-2-pve: 5.4.189-2
pve-kernel-4.4.128-1-pve: 4.4.128-111
pve-kernel-4.4.117-2-pve: 4.4.117-110
pve-kernel-4.4.117-1-pve: 4.4.117-109
pve-kernel-4.4.114-1-pve: 4.4.114-108
pve-kernel-4.4.98-6-pve: 4.4.98-107
pve-kernel-4.4.98-5-pve: 4.4.98-105
pve-kernel-4.4.98-4-pve: 4.4.98-104
pve-kernel-4.4.35-1-pve: 4.4.35-77
ceph: 15.2.17-pve1
ceph-fuse: 15.2.17-pve1
corosync: 3.1.7-pve1
criu: 3.15-1+pve-1
glusterfs-client: 9.2-1
ifupdown: 0.8.36+pve2
ksm-control-daemon: 1.4-1
libjs-extjs: 7.0.0-1
libknet1: 1.24-pve2
libproxmox-acme-perl: 1.4.4
libproxmox-backup-qemu0: 1.3.1-1
libproxmox-rs-perl: 0.2.1
libpve-access-control: 7.4.3
libpve-apiclient-perl: 3.2-2
libpve-common-perl: 7.4-2
libpve-guest-common-perl: 4.2-4
libpve-http-server-perl: 4.2-3
libpve-rs-perl: 0.7.7
libpve-storage-perl: 7.4-3
libqb0: 1.0.5-1
libspice-server1: 0.14.3-2.1
lvm2: 2.03.11-2.1
lxc-pve: 5.0.2-2
lxcfs: 5.0.3-pve1
novnc-pve: 1.4.0-1
proxmox-backup-client: 2.4.6-1
proxmox-backup-file-restore: 2.4.6-1
proxmox-kernel-helper: 7.4-1
proxmox-mail-forward: 0.1.1-1
proxmox-mini-journalreader: 1.3-1
proxmox-offline-mirror-helper: 0.5.2
proxmox-widget-toolkit: 3.7.3
pve-cluster: 7.3-3
pve-container: 4.4-6
pve-docs: 7.4-2
pve-edk2-firmware: 3.20230228-4~bpo11+3
pve-firewall: 4.3-5
pve-firmware: 3.6-6
pve-ha-manager: 3.6.1
pve-i18n: 2.12-1
pve-qemu-kvm: 7.2.10-1
pve-xtermjs: 4.16.0-2
qemu-server: 7.4-5
smartmontools: 7.2-pve3
spiceterm: 3.2-2
swtpm: 0.8.0~bpo11+3
vncterm: 1.7-1
zfsutils-linux: 2.1.15-pve1
Last edited: