And now I am unable to get the ceph-mons, possibly related to this network issue after Proxmox 6.4 -> 7 upgrade or the fact that all the LVM IDs seem to have changed.
Any help is appreciated as this upgrade has been a truly horrible experience this far with no access to my data..
The monitor logs are huge after setting debugging to 20 on both monitor and paxos but basically they don't go into quorum..
Tested this far:
* telnet into both messenger v1 and messenger v2 ports
* Modified ceph.conf to only speak on either messenger v1 or messenger v2
* Reseted all but 1 healthy mon in hopes of that healthy mon to pass on the epoch to the others, other mons says synchronizing then goes into electing again
* Removed all monitors but the one healthy one to achieve quorum (leader) which was achieved but then the other monitors just do not want to synch
One observation is that the epoch seems all different from each other, hope this is not a brain split I am looking at ?
As the logs are massive I do not even know where to start to copy and paste so I will try to paste the most interesting portions below..
Any help is appreciated as this upgrade has been a truly horrible experience this far with no access to my data..
The monitor logs are huge after setting debugging to 20 on both monitor and paxos but basically they don't go into quorum..
Tested this far:
* telnet into both messenger v1 and messenger v2 ports
* Modified ceph.conf to only speak on either messenger v1 or messenger v2
* Reseted all but 1 healthy mon in hopes of that healthy mon to pass on the epoch to the others, other mons says synchronizing then goes into electing again
* Removed all monitors but the one healthy one to achieve quorum (leader) which was achieved but then the other monitors just do not want to synch
One observation is that the epoch seems all different from each other, hope this is not a brain split I am looking at ?
As the logs are massive I do not even know where to start to copy and paste so I will try to paste the most interesting portions below..
[global]
auth client required = none
auth cluster required = none
auth service required = none
#bluestore_block_db_size = 13106127360
#bluestore_block_wal_size = 13106127360
cluster_network = 172.16.1.0/16
debug_asok = 0/0
debug_auth = 0/0
debug_buffer = 0/0
debug_client = 0/0
debug_context = 0/0
debug_crush = 0/0
debug_filer = 0/0
debug_filestore = 0/0
debug_finisher = 0/0
debug_heartbeatmap = 0/0
debug_journal = 0/0
debug_journaler = 0/0
debug_lockdep = 0/0
debug_mds = 0/0
debug_mds_balancer = 0/0
debug_mds_locker = 0/0
debug_mds_log = 0/0
debug_mds_log_expire = 0/0
debug_mds_migrator = 0/0
debug_mon = 20
debug_monc = 0/0
debug_ms = 0/0
debug_objclass = 0/0
debug_objectcacher = 0/0
debug_objecter = 0/0
debug_optracker = 0/0
debug_osd = 1/1
debug_paxos = 0/0
debug_perfcounter = 0/0
debug_rados = 0/0
debug_rbd = 0/0
debug_rgw = 0/0
debug_throttle = 0/0
debug_timer = 0/0
debug_tp = 0/0
fsid = e44fbe1c-b1c7-481d-bd25-dc595eae2d13
mon_allow_pool_delete = true
mon_host = 192.168.1.21, 192.168.1.22, 192.168.1.23
mon_max_pg_per_osd = 500
mon_osd_allow_primary_affinity = true
osd_journal_size = 28120
osd_max_backfills = 5
osd_max_pg_per_osd_hard_ratio = 3
osd_pool_default_min_size = 2
osd_pool_default_size = 3
osd_recovery_max_active = 6
osd_recovery_op_priority = 3
osd_scrub_auto_repair = true
osd_scrub_begin_hour = 1
osd_scrub_end_hour = 8
osd_scrub_sleep = 0.1
public_network = 192.168.1.0/24
rbd_cache = true
bluestore_default_buffered_write = true # BlueStore has the ability to perform buffered writes. Buffered writes enable populating the read cache during the write process. This setting, in effect, changes the BlueStore cache into a write-through cache.
# It is advised that spinning media continue to use 64 kB while SSD/NVMe are likely to benefit from setting to 4 kB.
min_alloc_size_ssd=4096
min_alloc_size_hdd=65536
# https://yourcmc.ru/wiki/Ceph_performance
bluefs_preextend_wal_files = true
cephx_require_signatures = true
cephx_cluster_require_signatures = true
cephx_sign_messages = true
objecter_inflight_ops = 5120 # 24576 seems to be gold
objecter_inflight_op_bytes = 524288000 # (512 * 1024 000) on 512 PGs
[client]
client_reconnect_stale = true
keyring = /etc/pve/priv/$cluster.$name.keyring
[mds]
keyring = /var/lib/ceph/mds/ceph-$id/keyring
mds_data = /var/lib/ceph/mds/ceph-$id
[mon]
mon_compact_on_start = true
mon_compact_on_trim = true
[osd]
filestore_xattr_use_omap = true
keyring = /var/lib/ceph/osd/ceph-$id/keyring
osd_crush_update_on_start = true
[mds.pve23]
host = 192.168.1.23
[mds.pve21]
host = 192.168.1.21
[mds.pve22]
host = 192.168.1.22
auth client required = none
auth cluster required = none
auth service required = none
#bluestore_block_db_size = 13106127360
#bluestore_block_wal_size = 13106127360
cluster_network = 172.16.1.0/16
debug_asok = 0/0
debug_auth = 0/0
debug_buffer = 0/0
debug_client = 0/0
debug_context = 0/0
debug_crush = 0/0
debug_filer = 0/0
debug_filestore = 0/0
debug_finisher = 0/0
debug_heartbeatmap = 0/0
debug_journal = 0/0
debug_journaler = 0/0
debug_lockdep = 0/0
debug_mds = 0/0
debug_mds_balancer = 0/0
debug_mds_locker = 0/0
debug_mds_log = 0/0
debug_mds_log_expire = 0/0
debug_mds_migrator = 0/0
debug_mon = 20
debug_monc = 0/0
debug_ms = 0/0
debug_objclass = 0/0
debug_objectcacher = 0/0
debug_objecter = 0/0
debug_optracker = 0/0
debug_osd = 1/1
debug_paxos = 0/0
debug_perfcounter = 0/0
debug_rados = 0/0
debug_rbd = 0/0
debug_rgw = 0/0
debug_throttle = 0/0
debug_timer = 0/0
debug_tp = 0/0
fsid = e44fbe1c-b1c7-481d-bd25-dc595eae2d13
mon_allow_pool_delete = true
mon_host = 192.168.1.21, 192.168.1.22, 192.168.1.23
mon_max_pg_per_osd = 500
mon_osd_allow_primary_affinity = true
osd_journal_size = 28120
osd_max_backfills = 5
osd_max_pg_per_osd_hard_ratio = 3
osd_pool_default_min_size = 2
osd_pool_default_size = 3
osd_recovery_max_active = 6
osd_recovery_op_priority = 3
osd_scrub_auto_repair = true
osd_scrub_begin_hour = 1
osd_scrub_end_hour = 8
osd_scrub_sleep = 0.1
public_network = 192.168.1.0/24
rbd_cache = true
bluestore_default_buffered_write = true # BlueStore has the ability to perform buffered writes. Buffered writes enable populating the read cache during the write process. This setting, in effect, changes the BlueStore cache into a write-through cache.
# It is advised that spinning media continue to use 64 kB while SSD/NVMe are likely to benefit from setting to 4 kB.
min_alloc_size_ssd=4096
min_alloc_size_hdd=65536
# https://yourcmc.ru/wiki/Ceph_performance
bluefs_preextend_wal_files = true
cephx_require_signatures = true
cephx_cluster_require_signatures = true
cephx_sign_messages = true
objecter_inflight_ops = 5120 # 24576 seems to be gold
objecter_inflight_op_bytes = 524288000 # (512 * 1024 000) on 512 PGs
[client]
client_reconnect_stale = true
keyring = /etc/pve/priv/$cluster.$name.keyring
[mds]
keyring = /var/lib/ceph/mds/ceph-$id/keyring
mds_data = /var/lib/ceph/mds/ceph-$id
[mon]
mon_compact_on_start = true
mon_compact_on_trim = true
[osd]
filestore_xattr_use_omap = true
keyring = /var/lib/ceph/osd/ceph-$id/keyring
osd_crush_update_on_start = true
[mds.pve23]
host = 192.168.1.23
[mds.pve21]
host = 192.168.1.21
[mds.pve22]
host = 192.168.1.22
Last edited: