Ceph unstable Behaviour causing VM hanging

Please paste output into CODE tags (can be found in the editor under the three ...).
 
Code:
# begin crush map
tunable choose_local_tries 0
tunable choose_local_fallback_tries 0
tunable choose_total_tries 50
tunable chooseleaf_descend_once 1
tunable chooseleaf_vary_r 1
tunable chooseleaf_stable 1
tunable straw_calc_version 1
tunable allowed_bucket_algs 54

# devices
device 0 osd.0 class ssd
device 1 osd.1 class ssd
device 2 osd.2 class ssd
device 3 osd.3 class ssd
device 4 osd.4 class ssd
device 5 osd.5 class ssd
device 6 osd.6 class ssd
device 7 osd.7 class ssd
device 8 osd.8 class ssd
device 9 osd.9 class ssd
device 10 osd.10 class ssd
device 11 osd.11 class ssd
device 12 osd.12 class ssd
device 13 osd.13 class ssd
device 14 osd.14 class ssd
device 15 osd.15 class ssd
device 16 osd.16 class ssd
device 17 osd.17 class ssd
device 18 osd.18 class ssd
device 19 osd.19 class ssd
device 20 osd.20 class ssd
device 21 osd.21 class ssd
device 22 osd.22 class ssd
device 23 osd.23 class ssd
device 24 osd.24 class ssd
device 25 osd.25 class ssd
device 26 osd.26 class ssd
device 27 osd.27 class ssd
device 28 osd.28 class ssd
device 29 osd.29 class ssd
device 30 osd.30 class ssd
device 31 osd.31 class ssd
device 32 osd.32 class ssd
device 33 osd.33 class ssd
device 34 osd.34 class ssd
device 35 osd.35 class ssd
device 36 osd.36 class ssd
device 37 osd.37 class ssd
device 38 osd.38 class ssd
device 39 osd.39 class ssd
device 40 osd.40 class ssd
device 41 osd.41 class ssd
device 42 osd.42 class ssd
device 43 osd.43 class ssd
device 44 osd.44 class ssd
device 45 osd.45 class ssd
device 46 osd.46 class ssd
device 47 osd.47 class ssd

# types
type 0 osd
type 1 host
type 2 chassis
type 3 rack
type 4 row
type 5 pdu
type 6 pod
type 7 room
type 8 datacenter
type 9 zone
type 10 region
type 11 root

# buckets
host inc1pve25 {
    id -3        # do not change unnecessarily
    id -4 class ssd        # do not change unnecessarily
    # weight 6.986
    alg straw2
    hash 0    # rjenkins1
    item osd.0 weight 1.747
    item osd.1 weight 1.747
    item osd.2 weight 1.747
    item osd.3 weight 1.747
}
host inc1pve26 {
    id -5        # do not change unnecessarily
    id -6 class ssd        # do not change unnecessarily
    # weight 6.986
    alg straw2
    hash 0    # rjenkins1
    item osd.4 weight 1.747
    item osd.5 weight 1.747
    item osd.6 weight 1.747
    item osd.7 weight 1.747
}
host inc1pve27 {
    id -7        # do not change unnecessarily
    id -8 class ssd        # do not change unnecessarily
    # weight 6.986
    alg straw2
    hash 0    # rjenkins1
    item osd.8 weight 1.747
    item osd.9 weight 1.747
    item osd.10 weight 1.747
    item osd.11 weight 1.747
}
host inc1pve28 {
    id -9        # do not change unnecessarily
    id -10 class ssd        # do not change unnecessarily
    # weight 6.986
    alg straw2
    hash 0    # rjenkins1
    item osd.12 weight 1.747
    item osd.13 weight 1.747
    item osd.14 weight 1.747
    item osd.15 weight 1.747
}
host inc1pve29 {
    id -11        # do not change unnecessarily
    id -12 class ssd        # do not change unnecessarily
    # weight 6.986
    alg straw2
    hash 0    # rjenkins1
    item osd.16 weight 1.747
    item osd.17 weight 1.747
    item osd.18 weight 1.747
    item osd.19 weight 1.747
}
host inc1pve30 {
    id -13        # do not change unnecessarily
    id -14 class ssd        # do not change unnecessarily
    # weight 6.986
    alg straw2
    hash 0    # rjenkins1
    item osd.20 weight 1.747
    item osd.21 weight 1.747
    item osd.22 weight 1.747
    item osd.23 weight 1.747
}
host inc1pve31 {
    id -15        # do not change unnecessarily
    id -16 class ssd        # do not change unnecessarily
    # weight 6.986
    alg straw2
    hash 0    # rjenkins1
    item osd.24 weight 1.747
    item osd.25 weight 1.747
    item osd.26 weight 1.747
    item osd.27 weight 1.747
}
host inc1pve32 {
    id -17        # do not change unnecessarily
    id -18 class ssd        # do not change unnecessarily
    # weight 6.986
    alg straw2
    hash 0    # rjenkins1
    item osd.28 weight 1.747
    item osd.29 weight 1.747
    item osd.30 weight 1.747
    item osd.31 weight 1.747
}
host inc1pve33 {
    id -19        # do not change unnecessarily
    id -20 class ssd        # do not change unnecessarily
    # weight 6.986
    alg straw2
    hash 0    # rjenkins1
    item osd.32 weight 1.747
    item osd.33 weight 1.747
    item osd.34 weight 1.747
    item osd.35 weight 1.747
}
host inc1pve34 {
    id -21        # do not change unnecessarily
    id -22 class ssd        # do not change unnecessarily
    # weight 6.986
    alg straw2
    hash 0    # rjenkins1
    item osd.36 weight 1.747
    item osd.37 weight 1.747
    item osd.38 weight 1.747
    item osd.39 weight 1.747
}
host inc1pve35 {
    id -23        # do not change unnecessarily
    id -24 class ssd        # do not change unnecessarily
    # weight 6.986
    alg straw2
    hash 0    # rjenkins1
    item osd.40 weight 1.747
    item osd.41 weight 1.747
    item osd.42 weight 1.747
    item osd.43 weight 1.747
}
host inc1pve36 {
    id -25        # do not change unnecessarily
    id -26 class ssd        # do not change unnecessarily
    # weight 6.986
    alg straw2
    hash 0    # rjenkins1
    item osd.44 weight 1.747
    item osd.45 weight 1.747
    item osd.46 weight 1.747
    item osd.47 weight 1.747
}
root default {
    id -1        # do not change unnecessarily
    id -2 class ssd        # do not change unnecessarily
    # weight 83.837
    alg straw2
    hash 0    # rjenkins1
    item inc1pve25 weight 6.986
    item inc1pve26 weight 6.986
    item inc1pve27 weight 6.986
    item inc1pve28 weight 6.986
    item inc1pve29 weight 6.986
    item inc1pve30 weight 6.986
    item inc1pve31 weight 6.986
    item inc1pve32 weight 6.986
    item inc1pve33 weight 6.986
    item inc1pve34 weight 6.986
    item inc1pve35 weight 6.986
    item inc1pve36 weight 6.986
}

# rules
rule replicated_rule {
    id 0
    type replicated
    min_size 1
    max_size 10
    step take default
    step chooseleaf firstn 0 type host
    step emit
}
rule erasure-code {
    id 1
    type erasure
    min_size 3
    max_size 3
    step set_chooseleaf_tries 5
    step set_choose_tries 100
    step take default
    step chooseleaf indep 0 type host
    step emit
}

# choose_args
choose_args 18446744073709551615 {
  {
    bucket_id -1
    weight_set [
      [ 7.321 7.398 7.296 7.188 7.481 6.802 6.680 7.360 6.990 6.979 7.537 6.726 ]
    ]
  }
  {
    bucket_id -2
    weight_set [
      [ 7.321 7.398 7.296 7.188 7.481 6.802 6.680 7.360 6.990 6.979 7.537 6.726 ]
    ]
  }
  {
    bucket_id -3
    weight_set [
      [ 1.664 1.880 1.944 1.833 ]
    ]
  }
  {
    bucket_id -4
    weight_set [
      [ 1.664 1.880 1.944 1.833 ]
    ]
  }
  {
    bucket_id -5
    weight_set [
      [ 1.794 1.779 1.877 1.946 ]
    ]
  }
  {
    bucket_id -6
    weight_set [
      [ 1.794 1.779 1.877 1.946 ]
    ]
  }
  {
    bucket_id -7
    weight_set [
      [ 1.726 1.869 1.796 1.906 ]
    ]
  }
  {
    bucket_id -8
    weight_set [
      [ 1.726 1.869 1.796 1.906 ]
    ]
  }
  {
    bucket_id -9
    weight_set [
      [ 1.678 1.638 1.769 2.103 ]
    ]
  }
  {
    bucket_id -10
    weight_set [
      [ 1.678 1.638 1.769 2.103 ]
    ]
  }
  {
    bucket_id -11
    weight_set [
      [ 1.882 1.832 1.786 1.980 ]
    ]
  }
  {
    bucket_id -12
    weight_set [
      [ 1.882 1.832 1.786 1.980 ]
    ]
  }
  {
    bucket_id -13
    weight_set [
      [ 1.624 1.922 1.727 1.528 ]
    ]
  }
  {
    bucket_id -14
    weight_set [
      [ 1.624 1.922 1.727 1.528 ]
    ]
  }
  {
    bucket_id -15
    weight_set [
      [ 1.791 1.724 1.727 1.438 ]
    ]
  }
  {
    bucket_id -16
    weight_set [
      [ 1.791 1.724 1.727 1.438 ]
    ]
  }
  {
    bucket_id -17
    weight_set [
      [ 1.755 1.725 2.134 1.745 ]
    ]
  }
  {
    bucket_id -18
    weight_set [
      [ 1.755 1.725 2.134 1.745 ]
    ]
  }
  {
    bucket_id -19
    weight_set [
      [ 1.903 1.829 1.665 1.593 ]
    ]
  }
  {
    bucket_id -20
    weight_set [
      [ 1.903 1.829 1.665 1.593 ]
    ]
  }
  {
    bucket_id -21
    weight_set [
      [ 1.779 1.686 1.796 1.718 ]
    ]
  }
  {
    bucket_id -22
    weight_set [
      [ 1.779 1.686 1.796 1.718 ]
    ]
  }
  {
    bucket_id -23
    weight_set [
      [ 1.915 1.864 1.591 2.167 ]
    ]
  }
  {
    bucket_id -24
    weight_set [
      [ 1.915 1.864 1.591 2.167 ]
    ]
  }
  {
    bucket_id -25
    weight_set [
      [ 1.728 1.794 1.760 1.444 ]
    ]
  }
  {
    bucket_id -26
    weight_set [
      [ 1.728 1.794 1.760 1.444 ]
    ]
  }
}

# end crush map
 
Code:
epoch 24875
fsid b020e833-3252-416a-b904-40bb4c97af5e
created 2020-05-20 08:24:39.089770
modified 2020-07-09 14:00:54.164665
flags sortbitwise,recovery_deletes,purged_snapdirs,pglog_hardlimit
crush_version 116
full_ratio 0.95
backfillfull_ratio 0.9
nearfull_ratio 0.85
require_min_compat_client jewel
min_compat_client jewel
require_osd_release nautilus
pool 11 'vm' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 2048 pgp_num 2048 last_change 20952 lfor 0/20640/20646 flags hashpspool,selfmanaged_snaps stripe_width 0 application rbd
    removed_snaps [1~5]
max_osd 48
osd.0 up   in  weight 1 up_from 24102 up_thru 24806 down_at 24101 last_clean_interval [23102,24101) [v2:172.19.2.32:6800/2266,v1:172.19.2.32:6801/2266] [v2:172.19.2.32:6802/3002266,v1:172.19.2.32:6830/3002266] exists,up 62660545-aa4d-493b-9b51-b8d4c0eea39d
osd.1 up   in  weight 1 up_from 24102 up_thru 24871 down_at 24101 last_clean_interval [23102,24101) [v2:172.19.2.32:6824/2264,v1:172.19.2.32:6825/2264] [v2:172.19.2.32:6831/3002264,v1:172.19.2.32:6832/3002264] exists,up 95f45bd1-2bfb-4f85-a80b-f49bad8a1926
osd.2 up   in  weight 1 up_from 24102 up_thru 24872 down_at 24101 last_clean_interval [23094,24101) [v2:172.19.2.32:6816/2263,v1:172.19.2.32:6817/2263] [v2:172.19.2.32:6803/3002263,v1:172.19.2.32:6804/3002263] exists,up 022a466d-e2c3-467d-b0f1-97884f31bc0e
osd.3 up   in  weight 1 up_from 24102 up_thru 24870 down_at 24101 last_clean_interval [23101,24101) [v2:172.19.2.32:6808/2265,v1:172.19.2.32:6809/2265] [v2:172.19.2.32:6805/3002265,v1:172.19.2.32:6806/3002265] exists,up ef382897-f193-46d9-9ccf-ed3ef238d059
osd.4 up   in  weight 1 up_from 24103 up_thru 24869 down_at 24101 last_clean_interval [23098,24102) [v2:172.19.2.33:6800/2230,v1:172.19.2.33:6801/2230] [v2:172.19.2.33:6810/2002230,v1:172.19.2.33:6811/2002230] exists,up 18bae712-f242-426c-a7aa-78e5fd20772b
osd.5 up   in  weight 1 up_from 24102 up_thru 24861 down_at 24101 last_clean_interval [23101,24101) [v2:172.19.2.33:6808/2222,v1:172.19.2.33:6809/2222] [v2:172.19.2.33:6824/3002222,v1:172.19.2.33:6826/3002222] exists,up 384a7e16-394e-4a5e-b8a0-72f518b2c1ad
osd.6 up   in  weight 1 up_from 24102 up_thru 24862 down_at 24101 last_clean_interval [23098,24101) [v2:172.19.2.33:6816/2232,v1:172.19.2.33:6818/2232] [v2:172.19.2.33:6825/2002232,v1:172.19.2.33:6827/2002232] exists,up 6d9df537-3323-44db-af75-0c0765f9bb9a
osd.7 up   in  weight 1 up_from 24102 up_thru 24868 down_at 24101 last_clean_interval [23099,24101) [v2:172.19.2.33:6817/2225,v1:172.19.2.33:6819/2225] [v2:172.19.2.33:6802/2002225,v1:172.19.2.33:6803/2002225] exists,up d5a3faf6-ff28-4c3d-bc78-10135b9b3b05
osd.8 up   in  weight 1 up_from 24094 up_thru 24874 down_at 24093 last_clean_interval [23102,24093) [v2:172.19.2.34:6802/2257,v1:172.19.2.34:6804/2257] [v2:172.19.2.34:6819/2002257,v1:172.19.2.34:6821/2002257] exists,up 31e512bd-e042-442a-b1cd-095a07554753
osd.9 up   in  weight 1 up_from 24094 up_thru 24863 down_at 24093 last_clean_interval [23102,24093) [v2:172.19.2.34:6824/2248,v1:172.19.2.34:6825/2248] [v2:172.19.2.34:6810/2002248,v1:172.19.2.34:6818/2002248] exists,up de7e3be3-1a21-47f3-be4b-d379080d5788
osd.10 up   in  weight 1 up_from 24094 up_thru 24774 down_at 24093 last_clean_interval [23102,24093) [v2:172.19.2.34:6816/2259,v1:172.19.2.34:6817/2259] [v2:172.19.2.34:6806/2002259,v1:172.19.2.34:6808/2002259] exists,up 4ed98d9d-2641-4e55-8841-235971b1c06c
 
Code:
osd.11 up   in  weight 1 up_from 24094 up_thru 24870 down_at 24093 last_clean_interval [23102,24093) [v2:172.19.2.34:6800/2258,v1:172.19.2.34:6801/2258] [v2:172.19.2.34:6809/2002258,v1:172.19.2.34:6813/2002258] exists,up 9833902e-5823-4f6a-8bcb-0248aebf8557
osd.12 up   in  weight 1 up_from 23999 up_thru 24843 down_at 23998 last_clean_interval [23102,23998) [v2:172.19.2.35:6809/2272,v1:172.19.2.35:6811/2272] [v2:172.19.2.35:6812/1002272,v1:172.19.2.35:6814/1002272] exists,up d5da7165-7ff0-4304-83d2-bcaa3b06f996
osd.13 up   in  weight 1 up_from 23995 up_thru 24862 down_at 23994 last_clean_interval [23102,23994) [v2:172.19.2.35:6808/2274,v1:172.19.2.35:6810/2274] [v2:172.19.2.35:6826/1002274,v1:172.19.2.35:6827/1002274] exists,up 19fa50a6-1579-45be-9be1-8154ef45d53b
osd.14 up   in  weight 1 up_from 23995 up_thru 24737 down_at 23994 last_clean_interval [23102,23994) [v2:172.19.2.35:6800/2273,v1:172.19.2.35:6801/2273] [v2:172.19.2.35:6834/1002273,v1:172.19.2.35:6835/1002273] exists,up da501b87-92e5-4631-a009-a0b8aa5aef64
osd.15 up   in  weight 1 up_from 23995 up_thru 24871 down_at 23994 last_clean_interval [23102,23994) [v2:172.19.2.35:6824/2270,v1:172.19.2.35:6825/2270] [v2:172.19.2.35:6802/1002270,v1:172.19.2.35:6803/1002270] exists,up 948b0f2b-6e6b-4293-bceb-74dd7de994db
osd.16 up   in  weight 1 up_from 23995 up_thru 24829 down_at 23994 last_clean_interval [23093,23994) [v2:172.19.2.36:6808/2201,v1:172.19.2.36:6810/2201] [v2:172.19.2.36:6813/1002201,v1:172.19.2.36:6815/1002201] exists,up c7cfd3a6-3047-4603-b8f6-d90d7a68d7a5
osd.17 up   in  weight 1 up_from 23995 up_thru 24854 down_at 23994 last_clean_interval [23101,23994) [v2:172.19.2.36:6800/2203,v1:172.19.2.36:6801/2203] [v2:172.19.2.36:6820/1002203,v1:172.19.2.36:6821/1002203] exists,up ef9fd31d-93eb-43bc-b54b-8b82088f84a4
osd.18 up   in  weight 1 up_from 24094 up_thru 24737 down_at 24093 last_clean_interval [23095,24093) [v2:172.19.2.36:6824/2204,v1:172.19.2.36:6825/2204] [v2:172.19.2.36:6826/2002204,v1:172.19.2.36:6827/2002204] exists,up d83bd27e-3cab-4b9c-9ee1-b48d7e6b8449
osd.19 up   in  weight 1 up_from 23995 up_thru 24855 down_at 23994 last_clean_interval [23102,23994) [v2:172.19.2.36:6809/2205,v1:172.19.2.36:6811/2205] [v2:172.19.2.36:6812/1002205,v1:172.19.2.36:6814/1002205] exists,up 49431642-b494-4cbd-8709-9d9835d8da76
osd.20 up   in  weight 1 up_from 24094 up_thru 24872 down_at 24093 last_clean_interval [23098,24093) [v2:172.19.2.37:6801/2241,v1:172.19.2.37:6803/2241] [v2:172.19.2.37:6804/2002241,v1:172.19.2.37:6806/2002241] exists,up 6ddca68b-4e1e-4095-9a49-6b078e8c8662
osd.21 down out weight 0 up_from 23101 up_thru 23905 down_at 23994 last_clean_interval [22415,22883) [v2:172.19.2.37:6816/2244,v1:172.19.2.37:6817/2244] [v2:172.19.2.37:6818/2244,v1:172.19.2.37:6819/2244] autoout,exists 54ad8209-a91b-4bf2-a9d6-4a7036941ab5
osd.22 up   in  weight 1 up_from 24095 up_thru 24845 down_at 24093 last_clean_interval [23101,24094) [v2:172.19.2.37:6824/2248,v1:172.19.2.37:6825/2248] [v2:172.19.2.37:6828/2002248,v1:172.19.2.37:6829/2002248] exists,up edc1c4ef-7642-448c-a6d8-9d38eedfbaaa
osd.23 up   in  weight 1 up_from 24094 up_thru 24872 down_at 24093 last_clean_interval [23101,24093) [v2:172.19.2.37:6800/2247,v1:172.19.2.37:6802/2247] [v2:172.19.2.37:6809/1002247,v1:172.19.2.37:6811/1002247] exists,up a51564e1-9554-4006-9cdf-e945d1286957
osd.24 up   in  weight 1 up_from 23997 up_thru 24854 down_at 23994 last_clean_interval [23102,23996) [v2:172.19.2.38:6802/2200,v1:172.19.2.38:6804/2200] [v2:172.19.2.38:6818/1002200,v1:172.19.2.38:6819/1002200] exists,up 541cf92e-1c5a-45a2-9965-c6b06e03fbe6
osd.25 up   in  weight 1 up_from 24094 up_thru 24765 down_at 24093 last_clean_interval [23102,24093) [v2:172.19.2.38:6816/2193,v1:172.19.2.38:6817/2193] [v2:172.19.2.38:6826/2002193,v1:172.19.2.38:6827/2002193] exists,up dd4ea85d-4476-4559-9107-aed3f856abdc
osd.26 up   in  weight 1 up_from 24098 up_thru 24792 down_at 24093 last_clean_interval [23102,24097) [v2:172.19.2.38:6824/2194,v1:172.19.2.38:6825/2194] [v2:172.19.2.38:6811/2002194,v1:172.19.2.38:6813/2002194] exists,up 2cfefdbf-b343-493a-a629-cc0a10bb9112
osd.27 up   in  weight 1 up_from 24094 up_thru 24730 down_at 24093 last_clean_interval [23102,24093) [v2:172.19.2.38:6800/2195,v1:172.19.2.38:6801/2195] [v2:172.19.2.38:6803/2002195,v1:172.19.2.38:6805/2002195] exists,up 17b8bb37-9fca-4db8-889e-a7b98b2feb65
osd.28 up   in  weight 1 up_from 23995 up_thru 24870 down_at 23994 last_clean_interval [23101,23994) [v2:172.19.2.39:6800/2179,v1:172.19.2.39:6801/2179] [v2:172.19.2.39:6812/1002179,v1:172.19.2.39:6813/1002179] exists,up 933be6cc-1b52-4498-af2a-3b1c4e18c827
osd.29 up   in  weight 1 up_from 24094 up_thru 24864 down_at 24093 last_clean_interval [23102,24093) [v2:172.19.2.39:6808/2180,v1:172.19.2.39:6809/2180] [v2:172.19.2.39:6818/2002180,v1:172.19.2.39:6819/2002180] exists,up 3d5c7d29-9d56-43ba-9879-728896dac408
osd.30 up   in  weight 1 up_from 23995 up_thru 24869 down_at 23994 last_clean_interval [23102,23994) [v2:172.19.2.39:6824/2177,v1:172.19.2.39:6825/2177] [v2:172.19.2.39:6810/1002177,v1:172.19.2.39:6811/1002177] exists,up 13eadfd9-b3c0-4d36-aad1-15db8767379c
osd.31 up   in  weight 1 up_from 23995 up_thru 24855 down_at 23994 last_clean_interval [23101,23994) [v2:172.19.2.39:6816/2181,v1:172.19.2.39:6817/2181] [v2:172.19.2.39:6804/1002181,v1:172.19.2.39:6805/1002181] exists,up c83d4def-153c-48de-8e82-ac795ef09e9e
osd.32 up   in  weight 1 up_from 24102 up_thru 24798 down_at 24101 last_clean_interval [23459,24101) [v2:172.19.2.40:6806/2203,v1:172.19.2.40:6808/2203] [v2:172.19.2.40:6805/2002203,v1:172.19.2.40:6810/2002203] exists,up 5eee08c9-f7b9-4db7-a2ef-4e6142c13818
osd.33 up   in  weight 1 up_from 24102 up_thru 24728 down_at 24101 last_clean_interval [23461,24101) [v2:172.19.2.40:6800/2196,v1:172.19.2.40:6801/2196] [v2:172.19.2.40:6803/2002196,v1:172.19.2.40:6809/2002196] exists,up 8e419a7f-3ca9-402e-a233-1d1e31c0f133
osd.34 up   in  weight 1 up_from 24102 up_thru 24798 down_at 24101 last_clean_interval [23461,24101) [v2:172.19.2.40:6824/2197,v1:172.19.2.40:6825/2197] [v2:172.19.2.40:6818/2002197,v1:172.19.2.40:6807/2002197] exists,up f7093193-7e61-4a68-9059-07dcd4726822
osd.35 up   in  weight 1 up_from 24103 up_thru 24869 down_at 24101 last_clean_interval [23461,24102) [v2:172.19.2.40:6816/2201,v1:172.19.2.40:6817/2201] [v2:172.19.2.40:6804/2002201,v1:172.19.2.40:6832/2002201] exists,up dcb8134e-ee62-41e6-8cba-536adf50ecc8
osd.36 up   in  weight 1 up_from 24102 up_thru 24704 down_at 24101 last_clean_interval [23462,24101) [v2:172.19.2.41:6802/2206,v1:172.19.2.41:6806/2206] [v2:172.19.2.41:6807/1002206,v1:172.19.2.41:6808/1002206] exists,up f57f6ad6-c27c-4486-a5dc-f6422a7127a6
osd.37 up   in  weight 1 up_from 24102 up_thru 24870 down_at 24101 last_clean_interval [23461,24101) [v2:172.19.2.41:6800/2207,v1:172.19.2.41:6804/2207] [v2:172.19.2.41:6810/2002207,v1:172.19.2.41:6812/2002207] exists,up 1b54500c-302e-47d0-a5ba-d5405f04c06d
osd.38 up   in  weight 1 up_from 24103 up_thru 24863 down_at 24101 last_clean_interval [23462,24101) [v2:172.19.2.41:6801/2205,v1:172.19.2.41:6803/2205] [v2:172.19.2.41:6827/2002205,v1:172.19.2.41:6833/2002205] exists,up 18970b3d-b619-43bf-af12-51aac1605b93
osd.39 up   in  weight 1 up_from 24102 up_thru 24873 down_at 24101 last_clean_interval [23462,24101) [v2:172.19.2.41:6805/2208,v1:172.19.2.41:6809/2208] [v2:172.19.2.41:6813/2002208,v1:172.19.2.41:6817/2002208] exists,up 9b938ea7-30f7-4399-8ffd-7f9edee0fe9a
osd.40 up   in  weight 1 up_from 24096 up_thru 24825 down_at 24093 last_clean_interval [23462,24095) [v2:172.19.2.42:6808/2186,v1:172.19.2.42:6809/2186] [v2:172.19.2.42:6810/2002186,v1:172.19.2.42:6811/2002186] exists,up 9d975f81-3c9c-4382-8cc1-7e3638039c29
osd.41 up   in  weight 1 up_from 24094 up_thru 24870 down_at 24093 last_clean_interval [23463,24093) [v2:172.19.2.42:6800/2182,v1:172.19.2.42:6801/2182] [v2:172.19.2.42:6818/2002182,v1:172.19.2.42:6819/2002182] exists,up 5c4ce6ab-a6a2-4c42-bf03-ddc5ac6bebad
osd.42 up   in  weight 1 up_from 24095 up_thru 24855 down_at 24093 last_clean_interval [23463,24094) [v2:172.19.2.42:6816/2177,v1:172.19.2.42:6817/2177] [v2:172.19.2.42:6830/2002177,v1:172.19.2.42:6831/2002177] exists,up b7b8d43e-811b-4bc0-8ffb-3012e17b7098
osd.43 up   in  weight 1 up_from 24094 up_thru 24856 down_at 24093 last_clean_interval [23464,24093) [v2:172.19.2.42:6824/2180,v1:172.19.2.42:6825/2180] [v2:172.19.2.42:6821/2002180,v1:172.19.2.42:6823/2002180] exists,up cb622639-a0b9-42c5-8f4d-62f88b6a1465
osd.44 up   in  weight 1 up_from 24094 up_thru 24807 down_at 24093 last_clean_interval [23461,24093) [v2:172.19.2.43:6824/2178,v1:172.19.2.43:6825/2178] [v2:172.19.2.43:6815/2002178,v1:172.19.2.43:6818/2002178] exists,up 01ceca8a-2d71-4eaf-ad1a-e660efb901f5
osd.45 up   in  weight 1 up_from 24094 up_thru 24870 down_at 24093 last_clean_interval [23461,24093) [v2:172.19.2.43:6803/2180,v1:172.19.2.43:6807/2180] [v2:172.19.2.43:6808/2002180,v1:172.19.2.43:6812/2002180] exists,up f49d3f66-e628-45c5-bed4-60bb32f8090f
osd.46 up   in  weight 1 up_from 24094 up_thru 24863 down_at 24093 last_clean_interval [23463,24093) [v2:172.19.2.43:6800/2176,v1:172.19.2.43:6801/2176] [v2:172.19.2.43:6830/1002176,v1:172.19.2.43:6831/1002176] exists,up 968a8f0a-0a7c-4640-9ba2-d8c9c24cae39
osd.47 up   in  weight 1 up_from 24098 up_thru 24793 down_at 24093 last_clean_interval [23463,24097) [v2:172.19.2.43:6802/2179,v1:172.19.2.43:6805/2179] [v2:172.19.2.43:6804/2002179,v1:172.19.2.43:6806/2002179] exists,up 20998632-c0cd-4816-845a-969b1f1dd4a5
 
I am not certain. Is the autobalancer or autoscaler running?

It is off


root@inc1pve25:~# ceph osd pool autoscale-status
POOL SIZE TARGET SIZE RATE RAW CAPACITY RATIO TARGET RATIO BIAS PG_NUM NEW PG_NUM AUTOSCALE
vm 5708M 3.0 85847G 0.0002 1.0 2048 32 off
 
Yes autoscaler was off
But it must have been on at some point. Since those entries appear after it ran.

  • Is the ceph balancer status empty?
  • And what das a ceph osd tree show?
 
Code:
ID  CLASS WEIGHT   TYPE NAME          STATUS REWEIGHT PRI-AFF
 -1       83.83667 root default                               
 -3        6.98639     host inc1pve25                         
  0   ssd  1.74660         osd.0          up  1.00000 1.00000
  1   ssd  1.74660         osd.1          up  1.00000 1.00000
  2   ssd  1.74660         osd.2          up  1.00000 1.00000
  3   ssd  1.74660         osd.3          up  1.00000 1.00000
 -5        6.98639     host inc1pve26                         
  4   ssd  1.74660         osd.4          up  1.00000 1.00000
  5   ssd  1.74660         osd.5          up  1.00000 1.00000
  6   ssd  1.74660         osd.6          up  1.00000 1.00000
  7   ssd  1.74660         osd.7          up  1.00000 1.00000
 -7        6.98639     host inc1pve27                         
  8   ssd  1.74660         osd.8          up  1.00000 1.00000
  9   ssd  1.74660         osd.9          up  1.00000 1.00000
 10   ssd  1.74660         osd.10         up  1.00000 1.00000
 11   ssd  1.74660         osd.11         up  1.00000 1.00000
 -9        6.98639     host inc1pve28                         
 12   ssd  1.74660         osd.12         up  1.00000 1.00000
 13   ssd  1.74660         osd.13         up  1.00000 1.00000
 14   ssd  1.74660         osd.14         up  1.00000 1.00000
 15   ssd  1.74660         osd.15         up  1.00000 1.00000
-11        6.98639     host inc1pve29                         
 16   ssd  1.74660         osd.16         up  1.00000 1.00000
 17   ssd  1.74660         osd.17         up  1.00000 1.00000
 18   ssd  1.74660         osd.18         up  1.00000 1.00000
 19   ssd  1.74660         osd.19         up  1.00000 1.00000
-13        6.98639     host inc1pve30                         
 20   ssd  1.74660         osd.20         up  1.00000 1.00000
 21   ssd  1.74660         osd.21         up  1.00000 1.00000
 22   ssd  1.74660         osd.22         up  1.00000 1.00000
 23   ssd  1.74660         osd.23         up  1.00000 1.00000
-15        6.98639     host inc1pve31                         
 24   ssd  1.74660         osd.24         up  1.00000 1.00000
 25   ssd  1.74660         osd.25         up  1.00000 1.00000
 26   ssd  1.74660         osd.26         up  1.00000 1.00000
 27   ssd  1.74660         osd.27         up  1.00000 1.00000
-17        6.98639     host inc1pve32                         
 28   ssd  1.74660         osd.28         up  1.00000 1.00000
 29   ssd  1.74660         osd.29         up  1.00000 1.00000
 30   ssd  1.74660         osd.30         up  1.00000 1.00000
 31   ssd  1.74660         osd.31         up  1.00000 1.00000
-19        6.98639     host inc1pve33                         
 32   ssd  1.74660         osd.32         up  1.00000 1.00000
 33   ssd  1.74660         osd.33         up  1.00000 1.00000
 34   ssd  1.74660         osd.34         up  1.00000 1.00000
 35   ssd  1.74660         osd.35         up  1.00000 1.00000
-21        6.98639     host inc1pve34                         
 36   ssd  1.74660         osd.36         up  1.00000 1.00000
 37   ssd  1.74660         osd.37         up  1.00000 1.00000
 38   ssd  1.74660         osd.38         up  1.00000 1.00000
 39   ssd  1.74660         osd.39         up  1.00000 1.00000
-23        6.98639     host inc1pve35                         
 40   ssd  1.74660         osd.40         up  1.00000 1.00000
 41   ssd  1.74660         osd.41         up  1.00000 1.00000
 42   ssd  1.74660         osd.42         up  1.00000 1.00000
 43   ssd  1.74660         osd.43         up  1.00000 1.00000
-25        6.98639     host inc1pve36                         
 44   ssd  1.74660         osd.44         up  1.00000 1.00000
 45   ssd  1.74660         osd.45         up  1.00000 1.00000
 46   ssd  1.74660         osd.46         up  1.00000 1.00000
 47   ssd  1.74660         osd.47         up  1.00000 1.00000
 
root@inc1pve25:~# ceph balancer status


{


"last_optimize_duration": "0:00:01.215117",


"plans": [],


"mode": "crush-compat",


"active": true,


"optimize_result": "Unable to find further optimization, change balancer mode and retry might help",


"last_optimize_started": "Fri Jul 10 08:21:55 2020"


}
 
"active": true,
The status shows that the balancer is active. There must have been a reason to why the balancer was activated in the first place. See the link on how to configure the balancer.
https://docs.ceph.com/docs/nautilus/rados/operations/balancer/

My current assumption is, that the reweights from the balancer may not allow the recovery to proceed. If you want to reset the weights, you will need to turn of the balancer and remove the weights from the crushmap. Though this will re-balance the whole cluster at once.
 
The status shows that the balancer is active. There must have been a reason to why the balancer was activated in the first place. See the link on how to configure the balancer.
https://docs.ceph.com/docs/nautilus/rados/operations/balancer/

My current assumption is, that the reweights from the balancer may not allow the recovery to proceed. If you want to reset the weights, you will need to turn of the balancer and remove the weights from the crushmap. Though this will re-balance the whole cluster at once.



root@inc1pve25:~# ceph balancer off


root@inc1pve25:~# ceph balancer status


{


"last_optimize_duration": "0:00:01.142933",


"plans": [],


"mode": "crush-compat",


"active": false,


"optimize_result": "Unable to find further optimization, change balancer mode and retry might help",


"last_optimize_started": "Fri Jul 10 08:44:24 2020"


}
 
The status shows that the balancer is active. There must have been a reason to why the balancer was activated in the first place. See the link on how to configure the balancer.
https://docs.ceph.com/docs/nautilus/rados/operations/balancer/

My current assumption is, that the reweights from the balancer may not allow the recovery to proceed. If you want to reset the weights, you will need to turn of the balancer and remove the weights from the crushmap. Though this will re-balance the whole cluster at once.

Now how to remove weights from the crushmaps
 

About

The Proxmox community has been around for many years and offers help and support for Proxmox VE, Proxmox Backup Server, and Proxmox Mail Gateway.
We think our community is one of the best thanks to people like you!

Get your subscription!

The Proxmox team works very hard to make sure you are running the best software and getting stable updates and security enhancements, as well as quick enterprise support. Tens of thousands of happy customers have a Proxmox subscription. Get yours easily in our online shop.

Buy now!