Hi!
Has anyone encountered a similar problem?
After upgrade to ceph-14.2.11 OSD crashed randomly, the problem occurred twice:
ceph crash info 2020-11-03_04:50:37.808243Z_e8e9fd54-27a2-4039-82ff-e13d3e7ca40b
{
"os_version_id": "10",
"assert_condition": "is_valid_io(off, len)",
"utsname_release": "5.4.65-1-pve",
"os_name": "Debian GNU/Linux 10 (buster)",
"entity_name": "osd.13",
"assert_file": "/build/ceph-JY24tx/ceph-14.2.11/src/os/bluestore/KernelDevice.cc",
"timestamp": "2020-11-03 04:50:37.808243Z",
"process_name": "ceph-osd",
"utsname_machine": "x86_64",
"assert_line": 864,
"utsname_sysname": "Linux",
"os_version": "10 (buster)",
"os_id": "10",
"assert_thread_name": "tp_osd_tp",
"utsname_version": "#1 SMP PVE 5.4.65-1 (Mon, 21 Sep 2020 15:40:22 +0200)",
"backtrace": [
"(()+0x12730) [0x7fa137293730]",
"(gsignal()+0x10b) [0x7fa136d767bb]",
"(abort()+0x121) [0x7fa136d61535]",
"(ceph::__ceph_assert_fail(char const*, char const*, int, char const*)+0x1a3) [0x557b97d2d419]",
"(()+0x5115a0) [0x557b97d2d5a0]",
"(KernelDevice::aio_write(unsigned long, ceph::buffer::v14_2_0::list&, IOContext*, bool, int)+0x90) [0x557b983a1570]",
"(BlueStore::_do_alloc_write(BlueStore::TransContext*, boost::intrusive_ptr<BlueStore::Collection>, boost::intrusive_ptr<BlueStore::Onode>, BlueStore::WriteContext*)+0x2237) [0x557b98281247]",
"(BlueStore::_do_write(BlueStore::TransContext*, boost::intrusive_ptr<BlueStore::Collection>&, boost::intrusive_ptr<BlueStore::Onode>, unsigned long, unsigned long, ceph::buffer::v14_2_0::list&, unsigned int)+0x318) [0x557b982a9ef8]",
"(BlueStore::_write(BlueStore::TransContext*, boost::intrusive_ptr<BlueStore::Collection>&, boost::intrusive_ptr<BlueStore::Onode>&, unsigned long, unsigned long, ceph::buffer::v14_2_0::list&, unsigned int)+0xda) [0x557b982aadfa]",
"(BlueStore::_txc_add_transaction(BlueStore::TransContext*, ObjectStore::Transaction*)+0x1671) [0x557b982ae481]",
"(BlueStore::queue_transactions(boost::intrusive_ptr<ObjectStore::CollectionImpl>&, std::vector<ObjectStore::Transaction, std::allocator<ObjectStore::Transaction> >&, boost::intrusive_ptr<TrackedOp>, ThreadPool::TPHandle*)+0x3c8) [0x557b982afeb8]",
"(non-virtual thunk to PrimaryLogPG::queue_transactions(std::vector<ObjectStore::Transaction, std::allocator<ObjectStore::Transaction> >&, boost::intrusive_ptr<OpRequest>)+0x54) [0x557b9801d8b4]",
"(ReplicatedBackend::submit_transaction(hobject_t const&, object_stat_sum_t const&, eversion_t const&, std::unique_ptr<PGTransaction, std::default_delete<PGTransaction> >&&, eversion_t const&, eversion_t const&, std::vector<pg_log_entry_t, std::allocator<pg_log_entry_t> > const&, boost:ptional<pg_hit_set_history_t>&, Context*, unsigned long, osd_reqid_t, boost::intrusive_ptr<OpRequest>)+0x644) [0x557b981133f4]",
"(PrimaryLogPG::issue_repop(PrimaryLogPG::RepGather*, PrimaryLogPG::OpContext*)+0x102a) [0x557b97f7e0da]",
"(PrimaryLogPG::execute_ctx(PrimaryLogPG::OpContext*)+0x110c) [0x557b97fdf26c]",
"(PrimaryLogPG::do_op(boost::intrusive_ptr<OpRequest>&)+0x3101) [0x557b97fe2ba1]",
"(PrimaryLogPG::do_request(boost::intrusive_ptr<OpRequest>&, ThreadPool::TPHandle&)+0xd77) [0x557b97fe4fa7]",
"(OSD::dequeue_op(boost::intrusive_ptr<PG>, boost::intrusive_ptr<OpRequest>, ThreadPool::TPHandle&)+0x392) [0x557b97e10f02]",
"(PGOpItem::run(OSD*, OSDShard*, boost::intrusive_ptr<PG>&, ThreadPool::TPHandle&)+0x62) [0x557b980b4e92]",
"(OSD::ShardedOpWQ::_process(unsigned int, ceph::heartbeat_handle_d*)+0x7d7) [0x557b97e2cba7]",
"(ShardedThreadPool::shardedthreadpool_worker(unsigned int)+0x5b4) [0x557b983f90c4]",
"(ShardedThreadPool::WorkThreadSharded::entry()+0x10) [0x557b983fbad0]",
"(()+0x7fa3) [0x7fa137288fa3]",
"(clone()+0x3f) [0x7fa136e384cf]"
],
"utsname_hostname": "xxxxxxx",
"assert_msg": "/build/ceph-JY24tx/ceph-14.2.11/src/os/bluestore/KernelDevice.cc: In function 'virtual int KernelDevice::aio_write(uint64_t, ceph::bufferlist&, IOContext*, bool, int)' thread 7fa109af2700 time 2020-11-03 05:50:37.797725\n/build/ceph-JY24tx/ceph-14.2.11/src/os/bluestore/KernelDevice.cc: 864: FAILED ceph_assert(is_valid_io(off, len))\n",
"crash_id": "2020-11-03_04:50:37.808243Z_e8e9fd54-27a2-4039-82ff-e13d3e7ca40b",
"assert_func": "virtual int KernelDevice::aio_write(uint64_t, ceph::bufferlist&, IOContext*, bool, int)",
"ceph_version": "14.2.11"
}
Has anyone encountered a similar problem?
After upgrade to ceph-14.2.11 OSD crashed randomly, the problem occurred twice:
ceph crash info 2020-11-03_04:50:37.808243Z_e8e9fd54-27a2-4039-82ff-e13d3e7ca40b
{
"os_version_id": "10",
"assert_condition": "is_valid_io(off, len)",
"utsname_release": "5.4.65-1-pve",
"os_name": "Debian GNU/Linux 10 (buster)",
"entity_name": "osd.13",
"assert_file": "/build/ceph-JY24tx/ceph-14.2.11/src/os/bluestore/KernelDevice.cc",
"timestamp": "2020-11-03 04:50:37.808243Z",
"process_name": "ceph-osd",
"utsname_machine": "x86_64",
"assert_line": 864,
"utsname_sysname": "Linux",
"os_version": "10 (buster)",
"os_id": "10",
"assert_thread_name": "tp_osd_tp",
"utsname_version": "#1 SMP PVE 5.4.65-1 (Mon, 21 Sep 2020 15:40:22 +0200)",
"backtrace": [
"(()+0x12730) [0x7fa137293730]",
"(gsignal()+0x10b) [0x7fa136d767bb]",
"(abort()+0x121) [0x7fa136d61535]",
"(ceph::__ceph_assert_fail(char const*, char const*, int, char const*)+0x1a3) [0x557b97d2d419]",
"(()+0x5115a0) [0x557b97d2d5a0]",
"(KernelDevice::aio_write(unsigned long, ceph::buffer::v14_2_0::list&, IOContext*, bool, int)+0x90) [0x557b983a1570]",
"(BlueStore::_do_alloc_write(BlueStore::TransContext*, boost::intrusive_ptr<BlueStore::Collection>, boost::intrusive_ptr<BlueStore::Onode>, BlueStore::WriteContext*)+0x2237) [0x557b98281247]",
"(BlueStore::_do_write(BlueStore::TransContext*, boost::intrusive_ptr<BlueStore::Collection>&, boost::intrusive_ptr<BlueStore::Onode>, unsigned long, unsigned long, ceph::buffer::v14_2_0::list&, unsigned int)+0x318) [0x557b982a9ef8]",
"(BlueStore::_write(BlueStore::TransContext*, boost::intrusive_ptr<BlueStore::Collection>&, boost::intrusive_ptr<BlueStore::Onode>&, unsigned long, unsigned long, ceph::buffer::v14_2_0::list&, unsigned int)+0xda) [0x557b982aadfa]",
"(BlueStore::_txc_add_transaction(BlueStore::TransContext*, ObjectStore::Transaction*)+0x1671) [0x557b982ae481]",
"(BlueStore::queue_transactions(boost::intrusive_ptr<ObjectStore::CollectionImpl>&, std::vector<ObjectStore::Transaction, std::allocator<ObjectStore::Transaction> >&, boost::intrusive_ptr<TrackedOp>, ThreadPool::TPHandle*)+0x3c8) [0x557b982afeb8]",
"(non-virtual thunk to PrimaryLogPG::queue_transactions(std::vector<ObjectStore::Transaction, std::allocator<ObjectStore::Transaction> >&, boost::intrusive_ptr<OpRequest>)+0x54) [0x557b9801d8b4]",
"(ReplicatedBackend::submit_transaction(hobject_t const&, object_stat_sum_t const&, eversion_t const&, std::unique_ptr<PGTransaction, std::default_delete<PGTransaction> >&&, eversion_t const&, eversion_t const&, std::vector<pg_log_entry_t, std::allocator<pg_log_entry_t> > const&, boost:ptional<pg_hit_set_history_t>&, Context*, unsigned long, osd_reqid_t, boost::intrusive_ptr<OpRequest>)+0x644) [0x557b981133f4]",
"(PrimaryLogPG::issue_repop(PrimaryLogPG::RepGather*, PrimaryLogPG::OpContext*)+0x102a) [0x557b97f7e0da]",
"(PrimaryLogPG::execute_ctx(PrimaryLogPG::OpContext*)+0x110c) [0x557b97fdf26c]",
"(PrimaryLogPG::do_op(boost::intrusive_ptr<OpRequest>&)+0x3101) [0x557b97fe2ba1]",
"(PrimaryLogPG::do_request(boost::intrusive_ptr<OpRequest>&, ThreadPool::TPHandle&)+0xd77) [0x557b97fe4fa7]",
"(OSD::dequeue_op(boost::intrusive_ptr<PG>, boost::intrusive_ptr<OpRequest>, ThreadPool::TPHandle&)+0x392) [0x557b97e10f02]",
"(PGOpItem::run(OSD*, OSDShard*, boost::intrusive_ptr<PG>&, ThreadPool::TPHandle&)+0x62) [0x557b980b4e92]",
"(OSD::ShardedOpWQ::_process(unsigned int, ceph::heartbeat_handle_d*)+0x7d7) [0x557b97e2cba7]",
"(ShardedThreadPool::shardedthreadpool_worker(unsigned int)+0x5b4) [0x557b983f90c4]",
"(ShardedThreadPool::WorkThreadSharded::entry()+0x10) [0x557b983fbad0]",
"(()+0x7fa3) [0x7fa137288fa3]",
"(clone()+0x3f) [0x7fa136e384cf]"
],
"utsname_hostname": "xxxxxxx",
"assert_msg": "/build/ceph-JY24tx/ceph-14.2.11/src/os/bluestore/KernelDevice.cc: In function 'virtual int KernelDevice::aio_write(uint64_t, ceph::bufferlist&, IOContext*, bool, int)' thread 7fa109af2700 time 2020-11-03 05:50:37.797725\n/build/ceph-JY24tx/ceph-14.2.11/src/os/bluestore/KernelDevice.cc: 864: FAILED ceph_assert(is_valid_io(off, len))\n",
"crash_id": "2020-11-03_04:50:37.808243Z_e8e9fd54-27a2-4039-82ff-e13d3e7ca40b",
"assert_func": "virtual int KernelDevice::aio_write(uint64_t, ceph::bufferlist&, IOContext*, bool, int)",
"ceph_version": "14.2.11"
}