Ceph OSD crash

szucs10

Active Member
Sep 12, 2020
13
0
41
28
Hi!

Has anyone encountered a similar problem?
After upgrade to ceph-14.2.11 OSD crashed randomly, the problem occurred twice:

ceph crash info 2020-11-03_04:50:37.808243Z_e8e9fd54-27a2-4039-82ff-e13d3e7ca40b
{
"os_version_id": "10",
"assert_condition": "is_valid_io(off, len)",
"utsname_release": "5.4.65-1-pve",
"os_name": "Debian GNU/Linux 10 (buster)",
"entity_name": "osd.13",
"assert_file": "/build/ceph-JY24tx/ceph-14.2.11/src/os/bluestore/KernelDevice.cc",
"timestamp": "2020-11-03 04:50:37.808243Z",
"process_name": "ceph-osd",
"utsname_machine": "x86_64",
"assert_line": 864,
"utsname_sysname": "Linux",
"os_version": "10 (buster)",
"os_id": "10",
"assert_thread_name": "tp_osd_tp",
"utsname_version": "#1 SMP PVE 5.4.65-1 (Mon, 21 Sep 2020 15:40:22 +0200)",
"backtrace": [
"(()+0x12730) [0x7fa137293730]",
"(gsignal()+0x10b) [0x7fa136d767bb]",
"(abort()+0x121) [0x7fa136d61535]",
"(ceph::__ceph_assert_fail(char const*, char const*, int, char const*)+0x1a3) [0x557b97d2d419]",
"(()+0x5115a0) [0x557b97d2d5a0]",
"(KernelDevice::aio_write(unsigned long, ceph::buffer::v14_2_0::list&, IOContext*, bool, int)+0x90) [0x557b983a1570]",
"(BlueStore::_do_alloc_write(BlueStore::TransContext*, boost::intrusive_ptr<BlueStore::Collection>, boost::intrusive_ptr<BlueStore::Onode>, BlueStore::WriteContext*)+0x2237) [0x557b98281247]",
"(BlueStore::_do_write(BlueStore::TransContext*, boost::intrusive_ptr<BlueStore::Collection>&, boost::intrusive_ptr<BlueStore::Onode>, unsigned long, unsigned long, ceph::buffer::v14_2_0::list&, unsigned int)+0x318) [0x557b982a9ef8]",
"(BlueStore::_write(BlueStore::TransContext*, boost::intrusive_ptr<BlueStore::Collection>&, boost::intrusive_ptr<BlueStore::Onode>&, unsigned long, unsigned long, ceph::buffer::v14_2_0::list&, unsigned int)+0xda) [0x557b982aadfa]",
"(BlueStore::_txc_add_transaction(BlueStore::TransContext*, ObjectStore::Transaction*)+0x1671) [0x557b982ae481]",
"(BlueStore::queue_transactions(boost::intrusive_ptr<ObjectStore::CollectionImpl>&, std::vector<ObjectStore::Transaction, std::allocator<ObjectStore::Transaction> >&, boost::intrusive_ptr<TrackedOp>, ThreadPool::TPHandle*)+0x3c8) [0x557b982afeb8]",
"(non-virtual thunk to PrimaryLogPG::queue_transactions(std::vector<ObjectStore::Transaction, std::allocator<ObjectStore::Transaction> >&, boost::intrusive_ptr<OpRequest>)+0x54) [0x557b9801d8b4]",
"(ReplicatedBackend::submit_transaction(hobject_t const&, object_stat_sum_t const&, eversion_t const&, std::unique_ptr<PGTransaction, std::default_delete<PGTransaction> >&&, eversion_t const&, eversion_t const&, std::vector<pg_log_entry_t, std::allocator<pg_log_entry_t> > const&, boost::optional<pg_hit_set_history_t>&, Context*, unsigned long, osd_reqid_t, boost::intrusive_ptr<OpRequest>)+0x644) [0x557b981133f4]",
"(PrimaryLogPG::issue_repop(PrimaryLogPG::RepGather*, PrimaryLogPG::OpContext*)+0x102a) [0x557b97f7e0da]",
"(PrimaryLogPG::execute_ctx(PrimaryLogPG::OpContext*)+0x110c) [0x557b97fdf26c]",
"(PrimaryLogPG::do_op(boost::intrusive_ptr<OpRequest>&)+0x3101) [0x557b97fe2ba1]",
"(PrimaryLogPG::do_request(boost::intrusive_ptr<OpRequest>&, ThreadPool::TPHandle&)+0xd77) [0x557b97fe4fa7]",
"(OSD::dequeue_op(boost::intrusive_ptr<PG>, boost::intrusive_ptr<OpRequest>, ThreadPool::TPHandle&)+0x392) [0x557b97e10f02]",
"(PGOpItem::run(OSD*, OSDShard*, boost::intrusive_ptr<PG>&, ThreadPool::TPHandle&)+0x62) [0x557b980b4e92]",
"(OSD::ShardedOpWQ::_process(unsigned int, ceph::heartbeat_handle_d*)+0x7d7) [0x557b97e2cba7]",
"(ShardedThreadPool::shardedthreadpool_worker(unsigned int)+0x5b4) [0x557b983f90c4]",
"(ShardedThreadPool::WorkThreadSharded::entry()+0x10) [0x557b983fbad0]",
"(()+0x7fa3) [0x7fa137288fa3]",
"(clone()+0x3f) [0x7fa136e384cf]"
],
"utsname_hostname": "xxxxxxx",
"assert_msg": "/build/ceph-JY24tx/ceph-14.2.11/src/os/bluestore/KernelDevice.cc: In function 'virtual int KernelDevice::aio_write(uint64_t, ceph::bufferlist&, IOContext*, bool, int)' thread 7fa109af2700 time 2020-11-03 05:50:37.797725\n/build/ceph-JY24tx/ceph-14.2.11/src/os/bluestore/KernelDevice.cc: 864: FAILED ceph_assert(is_valid_io(off, len))\n",
"crash_id": "2020-11-03_04:50:37.808243Z_e8e9fd54-27a2-4039-82ff-e13d3e7ca40b",
"assert_func": "virtual int KernelDevice::aio_write(uint64_t, ceph::bufferlist&, IOContext*, bool, int)",
"ceph_version": "14.2.11"
}
 
"assert_msg": "/build/ceph-JY24tx/ceph-14.2.11/src/os/bluestore/KernelDevice.cc: In function 'virtual int KernelDevice::aio_write(uint64_t, ceph::bufferlist&, IOContext*, bool, int)' thread 7fa109af2700 time 2020-11-03 05:50:37.797725\n/build/ceph-JY24tx/ceph-14.2.11/src/os/bluestore/KernelDevice.cc: 864: FAILED ceph_assert(is_valid_io(off, len))\n",
"crash_id": "2020-11-03_04:50:37.808243Z_e8e9fd54-27a2-4039-82ff-e13d3e7ca40b",
"assert_func": "virtual int KernelDevice::aio_write(uint64_t, ceph::bufferlist&, IOContext*, bool, int)",
"ceph_version": "14.2.11"
I have not found an recent entry to an issue for that assert. Maybe you can run a fsck on the stopped OSD with ceph-bluestore-tool. It might indicate if the drive has issues. If it is persistent, then a report to upstream Ceph might be a good idea.
https://tracker.ceph.com/
 
Hello,

i guess I have the same Issue here. OSD Crash with no obvious Hardware Issues:

Code:
root@X# ceph crash info 2020-11-18_02:24:35.429967Z_800333e3-630a-406b-9a0e-c7c345336087
{
    "os_version_id": "10",
    "utsname_machine": "x86_64",
    "entity_name": "osd.29",
    "backtrace": [
        "(()+0x12730) [0x7fb4df645730]",
        "(gsignal()+0x10b) [0x7fb4df1287bb]",
        "(abort()+0x121) [0x7fb4df113535]",
        "(ceph::__ceph_assert_fail(char const*, char const*, int, char const*)+0x1a3) [0x55def3ba0419]",
        "(()+0x5115a0) [0x55def3ba05a0]",
        "(KernelDevice::aio_write(unsigned long, ceph::buffer::v14_2_0::list&, IOContext*, bool, int)+0x90) [0x55def4214570]",
        "(BlueStore::_do_alloc_write(BlueStore::TransContext*, boost::intrusive_ptr<BlueStore::Collection>, boost::intrusive_ptr<BlueStore::Onode>, BlueStore::WriteContext*)+0x2237) [0x55def40f4247]",
        "(BlueStore::_do_write(BlueStore::TransContext*, boost::intrusive_ptr<BlueStore::Collection>&, boost::intrusive_ptr<BlueStore::Onode>, unsigned long, unsigned long, ceph::buffer::v14_2_0::list&, unsigned int)+0x318) [0x55def411cef8]",
        "(BlueStore::_write(BlueStore::TransContext*, boost::intrusive_ptr<BlueStore::Collection>&, boost::intrusive_ptr<BlueStore::Onode>&, unsigned long, unsigned long, ceph::buffer::v14_2_0::list&, unsigned int)+0xda) [0x55def411ddfa]",
        "(BlueStore::_txc_add_transaction(BlueStore::TransContext*, ObjectStore::Transaction*)+0x1671) [0x55def4121481]",
        "(BlueStore::queue_transactions(boost::intrusive_ptr<ObjectStore::CollectionImpl>&, std::vector<ObjectStore::Transaction, std::allocator<ObjectStore::Transaction> >&, boost::intrusive_ptr<TrackedOp>, ThreadPool::TPHandle*)+0x3c8) [0x55def4122eb8]",
        "(non-virtual thunk to PrimaryLogPG::queue_transactions(std::vector<ObjectStore::Transaction, std::allocator<ObjectStore::Transaction> >&, boost::intrusive_ptr<OpRequest>)+0x54) [0x55def3e908b4]",
        "(ReplicatedBackend::do_repop(boost::intrusive_ptr<OpRequest>)+0xdf8) [0x55def3f89978]",
        "(ReplicatedBackend::_handle_message(boost::intrusive_ptr<OpRequest>)+0x267) [0x55def3f97ab7]",
        "(PGBackend::handle_message(boost::intrusive_ptr<OpRequest>)+0x57) [0x55def3ea8e17]",
        "(PrimaryLogPG::do_request(boost::intrusive_ptr<OpRequest>&, ThreadPool::TPHandle&)+0x61f) [0x55def3e5784f]",
        "(OSD::dequeue_op(boost::intrusive_ptr<PG>, boost::intrusive_ptr<OpRequest>, ThreadPool::TPHandle&)+0x392) [0x55def3c83f02]",
        "(PGOpItem::run(OSD*, OSDShard*, boost::intrusive_ptr<PG>&, ThreadPool::TPHandle&)+0x62) [0x55def3f27e92]",
        "(OSD::ShardedOpWQ::_process(unsigned int, ceph::heartbeat_handle_d*)+0x7d7) [0x55def3c9fba7]",
        "(ShardedThreadPool::shardedthreadpool_worker(unsigned int)+0x5b4) [0x55def426c0c4]",
        "(ShardedThreadPool::WorkThreadSharded::entry()+0x10) [0x55def426ead0]",
        "(()+0x7fa3) [0x7fb4df63afa3]",
        "(clone()+0x3f) [0x7fb4df1ea4cf]"
    ],
    "assert_line": 864,
    "utsname_release": "5.4.65-1-pve",
    "assert_file": "/build/ceph-JY24tx/ceph-14.2.11/src/os/bluestore/KernelDevice.cc",
    "utsname_sysname": "Linux",
    "os_version": "10 (buster)",
    "os_id": "10",
    "assert_thread_name": "tp_osd_tp",
    "assert_msg": "/build/ceph-JY24tx/ceph-14.2.11/src/os/bluestore/KernelDevice.cc: In function 'virtual int KernelDevice::aio_write(uint64_t, ceph::bufferlist&, IOContext*, bool, int)' thread 7fb4c06c2700 time 2020-11-18 03:24:35.419736\n/build/ceph-JY24tx/ceph-14.2.11/src/os/bluestore/KernelDevice.cc: 864: FAILED ceph_assert(is_valid_io(off, len))\n",
    "assert_func": "virtual int KernelDevice::aio_write(uint64_t, ceph::bufferlist&, IOContext*, bool, int)",
    "ceph_version": "14.2.11",
    "os_name": "Debian GNU/Linux 10 (buster)",
    "timestamp": "2020-11-18 02:24:35.429967Z",
    "process_name": "ceph-osd",
    "archived": "2020-11-18 10:14:48.914391",
    "utsname_hostname": "X",
    "crash_id": "2020-11-18_02:24:35.429967Z_800333e3-630a-406b-9a0e-c7c345336087",
    "assert_condition": "is_valid_io(off, len)",
    "utsname_version": "#1 SMP PVE 5.4.65-1 (Mon, 21 Sep 2020 15:40:22 +0200)"
}
 
There is Progress on this:

After some analysis IMO the root cause is highly likely the same as for https://tracker.ceph.com/issues/47751
Under some circumstances Hybrid allocator might claim out-of-bound extent from a fallback bitmap allocator which subsequently results is using this extent for disk write op. Hence the assertion in is_valid_io().
Using bitmap or avl allocators is a recommended workaround for now. Patches to fix allocator are on their ways

https://tracker.ceph.com/issues/48276#note-32

The PR isn't merged upstream yet, so I guess we will see this (important) fix only in 14.2.16 or later.
 
Last edited:
The PR isn't merged upstream yet, so I guess we will see this (important) fix only in 14.2.16 or later.
Ceph 14.2.16 has been released, but the patch was not merged yet.