[SOLVED] 'ceph pg 55.0 query' not working

Arron - Thank you, I was able to edit the pool at the ui. Proxmox has made it so much easier to edit a pool then I remember from the old days!!
 
Hehe, yeah, slowly but steadily more and more is possible via the GUI. I do have quite a few things on my TODO list in that regard ;)
 
  • Like
Reactions: RobFantini
Hey there - I have exactly the same issue with .mgr, but despite destroying it and recreating it several times I still get the same singular PG that is stuck in "unknown." This was after creating a new crush rule and assigning it to my actual data pool, then trying to reduce the size of the pool from 3 to 2. When I tried to reduce the size, I started getting errors that "29425/17655 objects misplaced", so after much faffing about I deleted the data pool (still commissioning this cluster, so no data loss). I noticed that .mgr was the thing causing problems when I tried to create a new pool and immediately was hit with "15/9 objects misplaced."

My CRUSH map seems fine as far as I can tell, and regardless of which rule I apply to .mgr it's singular PG remains stuck.

CRUSH map:
Code:
# begin crush map
tunable choose_local_tries 0
tunable choose_local_fallback_tries 0
tunable choose_total_tries 50
tunable chooseleaf_descend_once 1
tunable chooseleaf_vary_r 1
tunable chooseleaf_stable 1
tunable straw_calc_version 1
tunable allowed_bucket_algs 54

# devices
device 0 osd.0 class hdd
device 1 osd.1 class hdd
device 2 osd.2 class hdd
device 3 osd.3 class hdd
device 4 osd.4 class hdd
device 5 osd.5 class hdd
device 6 osd.6 class hdd
device 7 osd.7 class hdd
device 8 osd.8 class hdd
device 9 osd.9 class hdd
device 10 osd.10 class hdd
device 11 osd.11 class hdd
device 12 osd.12 class hdd
device 13 osd.13 class hdd
device 14 osd.14 class hdd
device 15 osd.15 class hdd
device 16 osd.16 class hdd
device 17 osd.17 class hdd
device 18 osd.18 class hdd
device 19 osd.19 class hdd
device 20 osd.20 class hdd
device 21 osd.21 class hdd
device 22 osd.22 class hdd
device 23 osd.23 class hdd
device 24 osd.24 class hdd
device 25 osd.25 class hdd
device 26 osd.26 class hdd
device 27 osd.27 class hdd
device 28 osd.28 class hdd
device 29 osd.29 class hdd
device 30 osd.30 class hdd
device 31 osd.31 class hdd

# types
type 0 osd
type 1 host
type 2 chassis
type 3 rack
type 4 row
type 5 pdu
type 6 pod
type 7 room
type 8 datacenter
type 9 zone
type 10 region
type 11 root

# buckets
root default {
    id -1        # do not change unnecessarily
    id -2 class hdd        # do not change unnecessarily
    # weight 0.00000
    alg straw2
    hash 0    # rjenkins1
}
host pves01 {
    id -3        # do not change unnecessarily
    id -4 class hdd        # do not change unnecessarily
    # weight 7.27759
    alg straw2
    hash 0    # rjenkins1
    item osd.0 weight 1.81940
    item osd.1 weight 1.81940
    item osd.2 weight 1.81940
    item osd.3 weight 1.81940
}
host pves02 {
    id -5        # do not change unnecessarily
    id -6 class hdd        # do not change unnecessarily
    # weight 7.27759
    alg straw2
    hash 0    # rjenkins1
    item osd.4 weight 1.81940
    item osd.5 weight 1.81940
    item osd.6 weight 1.81940
    item osd.7 weight 1.81940
}
host pves03 {
    id -7        # do not change unnecessarily
    id -8 class hdd        # do not change unnecessarily
    # weight 7.27759
    alg straw2
    hash 0    # rjenkins1
    item osd.8 weight 1.81940
    item osd.9 weight 1.81940
    item osd.10 weight 1.81940
    item osd.11 weight 1.81940
}
host pves04 {
    id -9        # do not change unnecessarily
    id -10 class hdd        # do not change unnecessarily
    # weight 7.27759
    alg straw2
    hash 0    # rjenkins1
    item osd.12 weight 1.81940
    item osd.13 weight 1.81940
    item osd.14 weight 1.81940
    item osd.15 weight 1.81940
}
host pves05 {
    id -11        # do not change unnecessarily
    id -12 class hdd        # do not change unnecessarily
    # weight 7.27759
    alg straw2
    hash 0    # rjenkins1
    item osd.16 weight 1.81940
    item osd.17 weight 1.81940
    item osd.18 weight 1.81940
    item osd.19 weight 1.81940
}
host pves06 {
    id -13        # do not change unnecessarily
    id -14 class hdd        # do not change unnecessarily
    # weight 7.27759
    alg straw2
    hash 0    # rjenkins1
    item osd.20 weight 1.81940
    item osd.21 weight 1.81940
    item osd.22 weight 1.81940
    item osd.23 weight 1.81940
}
host pves07 {
    id -15        # do not change unnecessarily
    id -16 class hdd        # do not change unnecessarily
    # weight 7.27759
    alg straw2
    hash 0    # rjenkins1
    item osd.24 weight 1.81940
    item osd.25 weight 1.81940
    item osd.26 weight 1.81940
    item osd.27 weight 1.81940
}
host pves08 {
    id -17        # do not change unnecessarily
    id -18 class hdd        # do not change unnecessarily
    # weight 7.27759
    alg straw2
    hash 0    # rjenkins1
    item osd.28 weight 1.81940
    item osd.29 weight 1.81940
    item osd.30 weight 1.81940
    item osd.31 weight 1.81940
}
row chasis1 {
    id -19        # do not change unnecessarily
    id -26 class hdd        # do not change unnecessarily
    # weight 14.55518
    alg straw2
    hash 0    # rjenkins1
    item pves01 weight 7.27759
    item pves02 weight 7.27759
}
row chasis2 {
    id -20        # do not change unnecessarily
    id -25 class hdd        # do not change unnecessarily
    # weight 14.55518
    alg straw2
    hash 0    # rjenkins1
    item pves03 weight 7.27759
    item pves04 weight 7.27759
}
row chasis3 {
    id -21        # do not change unnecessarily
    id -24 class hdd        # do not change unnecessarily
    # weight 14.55518
    alg straw2
    hash 0    # rjenkins1
    item pves05 weight 7.27759
    item pves06 weight 7.27759
}
row chasis4 {
    id -22        # do not change unnecessarily
    id -23 class hdd        # do not change unnecessarily
    # weight 14.55518
    alg straw2
    hash 0    # rjenkins1
    item pves07 weight 7.27759
    item pves08 weight 7.27759
}

# rules
rule host_rule {
    id 0
    type replicated
    step take default
    step chooseleaf firstn 0 type host
    step emit
}
rule chasis_rule {
    id 1
    type replicated
    step take default
    step chooseleaf firstn 0 type row
    step emit
}

# end crush map

And the result of "ceph pg ls"
Code:
PG   OBJECTS  DEGRADED  MISPLACED  UNFOUND  BYTES  OMAP_BYTES*  OMAP_KEYS*  LOG  LOG_DUPS  STATE    SINCE  VERSION  REPORTED  UP     ACTING  SCRUB_STAMP                      DEEP_SCRUB_STAMP                 LAST_SCRUB_DURATION  SCRUB_SCHEDULING
6.0        0         0          0        0      0            0           0    0         0  unknown     6m      0'0       0:0  []p-1   []p-1  2025-03-03T13:26:18.239533-0600  2025-03-03T13:26:18.239533-0600                    0  --

What is going on here?
 
Alright, figured it out after much rubber-ducky debugging with a co-worker:

My CRUSH map was wrong - in none of the walkthroughs I saw did it mention being sure to add the new buckets I'd created to the default root, so simply running
Bash:
root@pves01:~# ceph osd crush move chasis1 root=default
moved item id -19 name 'chasis1' to location {root=default} in crush map
root@pves01:~# ceph osd crush move chasis2 root=default
moved item id -20 name 'chasis2' to location {root=default} in crush map
root@pves01:~# ceph osd crush move chasis3 root=default
moved item id -21 name 'chasis3' to location {root=default} in crush map
root@pves01:~# ceph osd crush move chasis4 root=default
moved item id -22 name 'chasis4' to location {root=default} in crush map

immediately solved the issue. After running the first command, the stuck PG for .mgr went into an "undersized+peered" state, which upon adding the rest of the buckets into the default root immediately cleared up as it replicated. I created a new data pool and all seems to be working well now.

For others that may find this, here's the complete CRUSH map for reference - I had to look here to find an example CRUSH map, which helped me spot the issue:
Code:
# begin crush map
tunable choose_local_tries 0
tunable choose_local_fallback_tries 0
tunable choose_total_tries 50
tunable chooseleaf_descend_once 1
tunable chooseleaf_vary_r 1
tunable chooseleaf_stable 1
tunable straw_calc_version 1
tunable allowed_bucket_algs 54

# devices
device 0 osd.0 class hdd
device 1 osd.1 class hdd
device 2 osd.2 class hdd
device 3 osd.3 class hdd
device 4 osd.4 class hdd
device 5 osd.5 class hdd
device 6 osd.6 class hdd
device 7 osd.7 class hdd
device 8 osd.8 class hdd
device 9 osd.9 class hdd
device 10 osd.10 class hdd
device 11 osd.11 class hdd
device 12 osd.12 class hdd
device 13 osd.13 class hdd
device 14 osd.14 class hdd
device 15 osd.15 class hdd
device 16 osd.16 class hdd
device 17 osd.17 class hdd
device 18 osd.18 class hdd
device 19 osd.19 class hdd
device 20 osd.20 class hdd
device 21 osd.21 class hdd
device 22 osd.22 class hdd
device 23 osd.23 class hdd
device 24 osd.24 class hdd
device 25 osd.25 class hdd
device 26 osd.26 class hdd
device 27 osd.27 class hdd
device 28 osd.28 class hdd
device 29 osd.29 class hdd
device 30 osd.30 class hdd
device 31 osd.31 class hdd

# types
type 0 osd
type 1 host
type 2 chassis
type 3 rack
type 4 row
type 5 pdu
type 6 pod
type 7 room
type 8 datacenter
type 9 zone
type 10 region
type 11 root

# buckets
host pves01 {
    id -3        # do not change unnecessarily
    id -4 class hdd        # do not change unnecessarily
    # weight 7.27759
    alg straw2
    hash 0    # rjenkins1
    item osd.0 weight 1.81940
    item osd.1 weight 1.81940
    item osd.2 weight 1.81940
    item osd.3 weight 1.81940
}
host pves02 {
    id -5        # do not change unnecessarily
    id -6 class hdd        # do not change unnecessarily
    # weight 7.27759
    alg straw2
    hash 0    # rjenkins1
    item osd.4 weight 1.81940
    item osd.5 weight 1.81940
    item osd.6 weight 1.81940
    item osd.7 weight 1.81940
}
row chasis1 {
    id -19        # do not change unnecessarily
    id -26 class hdd        # do not change unnecessarily
    # weight 14.55518
    alg straw2
    hash 0    # rjenkins1
    item pves01 weight 7.27759
    item pves02 weight 7.27759
}
host pves03 {
    id -7        # do not change unnecessarily
    id -8 class hdd        # do not change unnecessarily
    # weight 7.27759
    alg straw2
    hash 0    # rjenkins1
    item osd.8 weight 1.81940
    item osd.9 weight 1.81940
    item osd.10 weight 1.81940
    item osd.11 weight 1.81940
}
host pves04 {
    id -9        # do not change unnecessarily
    id -10 class hdd        # do not change unnecessarily
    # weight 7.27759
    alg straw2
    hash 0    # rjenkins1
    item osd.12 weight 1.81940
    item osd.13 weight 1.81940
    item osd.14 weight 1.81940
    item osd.15 weight 1.81940
}
row chasis2 {
    id -20        # do not change unnecessarily
    id -25 class hdd        # do not change unnecessarily
    # weight 14.55518
    alg straw2
    hash 0    # rjenkins1
    item pves03 weight 7.27759
    item pves04 weight 7.27759
}
host pves05 {
    id -11        # do not change unnecessarily
    id -12 class hdd        # do not change unnecessarily
    # weight 7.27759
    alg straw2
    hash 0    # rjenkins1
    item osd.16 weight 1.81940
    item osd.17 weight 1.81940
    item osd.18 weight 1.81940
    item osd.19 weight 1.81940
}
host pves06 {
    id -13        # do not change unnecessarily
    id -14 class hdd        # do not change unnecessarily
    # weight 7.27759
    alg straw2
    hash 0    # rjenkins1
    item osd.20 weight 1.81940
    item osd.21 weight 1.81940
    item osd.22 weight 1.81940
    item osd.23 weight 1.81940
}
row chasis3 {
    id -21        # do not change unnecessarily
    id -24 class hdd        # do not change unnecessarily
    # weight 14.55518
    alg straw2
    hash 0    # rjenkins1
    item pves05 weight 7.27759
    item pves06 weight 7.27759
}
host pves07 {
    id -15        # do not change unnecessarily
    id -16 class hdd        # do not change unnecessarily
    # weight 7.27759
    alg straw2
    hash 0    # rjenkins1
    item osd.24 weight 1.81940
    item osd.25 weight 1.81940
    item osd.26 weight 1.81940
    item osd.27 weight 1.81940
}
host pves08 {
    id -17        # do not change unnecessarily
    id -18 class hdd        # do not change unnecessarily
    # weight 7.27759
    alg straw2
    hash 0    # rjenkins1
    item osd.28 weight 1.81940
    item osd.29 weight 1.81940
    item osd.30 weight 1.81940
    item osd.31 weight 1.81940
}
row chasis4 {
    id -22        # do not change unnecessarily
    id -23 class hdd        # do not change unnecessarily
    # weight 14.55518
    alg straw2
    hash 0    # rjenkins1
    item pves07 weight 7.27759
    item pves08 weight 7.27759
}
root default {
    id -1        # do not change unnecessarily
    id -2 class hdd        # do not change unnecessarily
    # weight 58.22070
    alg straw2
    hash 0    # rjenkins1
    item chasis1 weight 14.55518
    item chasis2 weight 14.55518
    item chasis3 weight 14.55518
    item chasis4 weight 14.55518
}

# rules
rule host_rule {
    id 0
    type replicated
    step take default
    step chooseleaf firstn 0 type host
    step emit
}
rule chasis_rule {
    id 1
    type replicated
    step take default
    step chooseleaf firstn 0 type row
    step emit
}

# end crush map