From be0ba676237bfd4c2c415562b774ab0e807428b7 Mon Sep 17 00:00:00 2001 From: Radoslaw Zarzynski Date: Tue, 30 Nov 2021 19:30:32 +0000 Subject: [PATCH] crimson/osd: fix sequencing issues in ClientRequest::process_op. The following crash has been observed in one of the runs at Sepia: ``` ceph-osd: /home/jenkins-build/build/workspace/ceph-dev-new-build/ARCH/x86_64/AVAILABLE_ARCH/x86_64/AVAILABLE_DIST/centos8/DIST/centos8/MACHINE_SIZE/gigantic/release/17.0.0-8898-ge57ad63c/rpm/el8/BUILD/ceph-17.0.0-8898-ge57ad63c/src/crimson/osd/osd_operation_sequencer.h:123: void crimson::osd::OpSequencer::finish_op_in_order(crimson::osd::ClientRequest&): Assertion `op.get_id() > last_completed_id' failed. Aborting on shard 0. Backtrace: Reactor stalled for 1807 ms on shard 0. Backtrace: 0xb14ab 0x46e57428 0x46bc450d 0x46be03bd 0x46be0782 0x46be0946 0x46be0bf6 0x12b1f 0x137341 0x3fdd6a92 0x3fddccdb 0x3fdde1ee 0x3fdde8b3 0x3fdd3f2b 0x3fdd4442 0x3fdd4c3a 0x12b1f 0x3737e 0x21db4 0x21c88 0x2fa75 0x3b769527 0x3b8418af 0x3b8423cb 0x3b842ce0 0x3b84383d 0x3a116220 0x3a143f31 0x3a144bcd 0x46b96271 0x46bde51a 0x46d6891b 0x46d6a8f0 0x4681a7d2 0x4681f03b 0x39fd50f2 0x23492 0x39b7a7dd 0# gsignal in /lib64/libc.so.6 1# abort in /lib64/libc.so.6 2# 0x00007FB9FB946C89 in /lib64/libc.so.6 3# 0x00007FB9FB954A76 in /lib64/libc.so.6 4# 0x00005595E98E6528 in ceph-osd 5# 0x00005595E99BE8B0 in ceph-osd 6# 0x00005595E99BF3CC in ceph-osd 7# 0x00005595E99BFCE1 in ceph-osd 8# 0x00005595E99C083E in ceph-osd 9# 0x00005595E8293221 in ceph-osd 10# 0x00005595E82C0F32 in ceph-osd 11# 0x00005595E82C1BCE in ceph-osd 12# 0x00005595F4D13272 in ceph-osd 13# 0x00005595F4D5B51B in ceph-osd 14# 0x00005595F4EE591C in ceph-osd 15# 0x00005595F4EE78F1 in ceph-osd 16# 0x00005595F49977D3 in ceph-osd 17# 0x00005595F499C03C in ceph-osd 18# main in ceph-osd 19# __libc_start_main in /lib64/libc.so.6 20# _start in ceph-osd ``` The sequence of events provides at least two clues: - the op no. 32 finished before the op no. 29 which was waiting for `ObjectContext`, - the op no. 29 was a short-living one -- it wasn't waiting even on `obc`. ``` rzarzynski@teuthology:/home/teuthworker/archive/rzarzynski-2021-11-22_22:01:32-rados-master-distro-basic-smithi$ less ./6520106/remote/smithi115/log/ceph-osd.3.log.gz ... DEBUG 2021-11-22 22:32:24,531 [shard 0] osd - client_request(id=29, detail=m=[osd_op(client.4371.0:36 4.d 4.f0fb5e1d (undecoded) ondisk+retry+read+rwordered+known_if_redirected+supports_pool_eio e23) v8]): start DEBUG 2021-11-22 22:32:24,531 [shard 0] osd - client_request(id=29, detail=m=[osd_op(client.4371.0:36 4.d 4.f0fb5e1d (undecoded) ondisk+retry+read+rwordered+known_if_redirected+supports_pool_eio e23) v8]): in repeat ... DEBUG 2021-11-22 22:32:24,546 [shard 0] osd - client_request(id=29, detail=m=[osd_op(client.4371.0:36 4.d 4.f0fb5e1d (undecoded) ondisk+retry+read+rwordered+known_if_redirected+supports_pool_eio e23) v8]) same_interval_since: 21 DEBUG 2021-11-22 22:32:24,546 [shard 0] osd - OpSequencer::start_op: op=29, last_started=27, last_unblocked=27, last_completed=27 ... DEBUG 2021-11-22 22:32:24,621 [shard 0] osd - client_request(id=32, detail=m=[osd_op(client.4371.0:49 4.d 4.81addbad (undecoded) ondisk+retry+write+known_if_redirected+supports_pool_eio e23) v8]): start DEBUG 2021-11-22 22:32:24,621 [shard 0] osd - client_request(id=32, detail=m=[osd_op(client.4371.0:49 4.d 4.81addbad (undecoded) ondisk+retry+write+known_if_redirected+supports_pool_eio e23) v8]): in repeat ... DEBUG 2021-11-22 22:32:24,626 [shard 0] osd - client_request(id=32, detail=m=[osd_op(client.4371.0:49 4.d 4.81addbad (undecoded) ondisk+retry+write+known_if_redirected+supports_pool_eio e23) v8]) same_interval_s ince: 21 DEBUG 2021-11-22 22:32:24,626 [shard 0] osd - OpSequencer::start_op: op=32, last_started=29, last_unblocked=29, last_completed=27 DEBUG 2021-11-22 22:32:24,669 [shard 0] osd - OpSequencer::finish_op_in_order: op=32, last_started=32, last_unblocked=32, last_completed=27 ... DEBUG 2021-11-22 22:32:24,671 [shard 0] osd - client_request(id=32, detail=m=[osd_op(client.4371.0:49 4.d 4:b5dbb581:::smithi11538976-13:head {write 601684~619341 in=619341b, stat} snapc 0={} RETRY=1 ondisk+retry+write+known_if_redirected+supports_pool_eio e23) v8]): destroying ... DEBUG 2021-11-22 22:32:24,722 [shard 0] osd - client_request(id=29, detail=m=[osd_op(client.4371.0:36 4.d 4:b87adf0f:::smithi11538976-9:head {read 0~1} snapc 0={} RETRY=1 ondisk+retry+read+rwordered+known_if_redirected+supports_pool_eio e23) v8]): got obc lock ... INFO 2021-11-22 22:32:24,723 [shard 0] osd - client_request(id=29, detail=m=[osd_op(client.4371.0:36 4.d 4:b87adf0f:::smithi11538976-9:head {read 0~1} snapc 0={} RETRY=1 ondisk+retry+read+rwordered+known_if_redirected+supports_pool_eio e23) v8]) obc.get()=0x6190000d5780 ... DEBUG 2021-11-22 22:32:24,753 [shard 0] osd - OpSequencer::finish_op_in_order: op=29, last_started=32, last_unblocked=32, last_completed=32 ceph-osd: /home/jenkins-build/build/workspace/ceph-dev-new-build/ARCH/x86_64/AVAILABLE_ARCH/x86_64/AVAILABLE_DIST/centos8/DIST/centos8/MACHINE_SIZE/gigantic/release/17.0.0-8898-ge57ad63c/rpm/el8/BUILD/ceph-17.0.0-8898-ge57ad63c/src/crimson/osd/osd_operation_sequencer.h:123: void crimson::osd::OpSequencer::finish_op_in_order(crimson::osd::ClientRequest&): Assertion `op.get_id() > last_completed_id' failed. Aborting on shard 0. ``` This could be explained in a scenario where: - op no. 29 skipped stages of the execution pipeline while - it wrongly informed `OpSequencer` the execution was in-order. Static analysis shows there are multiple problems of this genre in the `ClientRequest::process_op()` and its callees with the most recently merged one being the path for `PG::already_complete()`. Signed-off-by: Radoslaw Zarzynski --- .../osd/osd_operations/client_request.cc | 60 ++++++++++++------- .../osd/osd_operations/client_request.h | 10 +++- 2 files changed, 47 insertions(+), 23 deletions(-) diff --git a/src/crimson/osd/osd_operations/client_request.cc b/src/crimson/osd/osd_operations/client_request.cc index 3a5111ce0d4..66cd414ffc8 100644 --- a/src/crimson/osd/osd_operations/client_request.cc +++ b/src/crimson/osd/osd_operations/client_request.cc @@ -121,10 +121,13 @@ seastar::future<> ClientRequest::start() sequencer.finish_op_out_of_order(*this, osd.get_shard_services().registry); }); } else { - return process_op(pgref).then_interruptible([this] { - // NOTE: this assumes process_op() handles everything - // in-order which I'm not sure about. - sequencer.finish_op_in_order(*this); + return process_op(pgref).then_interruptible([this] (const seq_mode_t mode) { + if (mode == seq_mode_t::IN_ORDER) { + sequencer.finish_op_in_order(*this); + } else { + assert(mode == seq_mode_t::OUT_OF_ORDER); + sequencer.finish_op_out_of_order(*this, osd.get_shard_services().registry); + } }); } }); @@ -154,7 +157,7 @@ ClientRequest::process_pg_op( }); } -ClientRequest::interruptible_future<> +ClientRequest::interruptible_future ClientRequest::process_op(Ref &pg) { return with_blocking_future_interruptible( @@ -165,52 +168,65 @@ ClientRequest::process_op(Ref &pg) }).then_interruptible([this, pg]() mutable { return pg->already_complete(m->get_reqid()).then_unpack_interruptible( [this, pg](bool completed, int ret) mutable - -> PG::load_obc_iertr::future<> { + -> PG::load_obc_iertr::future { if (completed) { auto reply = crimson::make_message( m.get(), ret, pg->get_osdmap_epoch(), CEPH_OSD_FLAG_ACK | CEPH_OSD_FLAG_ONDISK, false); - return conn->send(std::move(reply)); + return conn->send(std::move(reply)).then([] { + return seastar::make_ready_future(seq_mode_t::OUT_OF_ORDER); + }); } else { return with_blocking_future_interruptible( handle.enter(pp(*pg).get_obc)).then_interruptible( - [this, pg]() mutable -> PG::load_obc_iertr::future<> { + [this, pg]() mutable -> PG::load_obc_iertr::future { logger().debug("{}: got obc lock", *this); op_info.set_from_op(&*m, *pg->get_osdmap()); - return pg->with_locked_obc(m->get_hobj(), op_info, - [this, pg](auto obc) mutable { - return with_blocking_future_interruptible( - handle.enter(pp(*pg).process) - ).then_interruptible([this, pg, obc]() mutable { - return do_process(pg, obc); + // XXX: `do_with()` is just a workaround for `with_obc_func_t` imposing + // `future`. + return seastar::do_with(seq_mode_t{}, [this, &pg] (seq_mode_t& mode) { + return pg->with_locked_obc(m->get_hobj(), op_info, + [this, pg, &mode](auto obc) mutable { + return with_blocking_future_interruptible( + handle.enter(pp(*pg).process) + ).then_interruptible([this, pg, obc, &mode]() mutable { + return do_process(pg, obc).then_interruptible([&mode] (seq_mode_t _mode) { + mode = _mode; + return seastar::now(); + }); + }); + }).safe_then_interruptible([&mode] { + return PG::load_obc_iertr::make_ready_future(mode); }); }); }); } }); - }).safe_then_interruptible([pg=std::move(pg)] { - return seastar::now(); + }).safe_then_interruptible([pg=std::move(pg)] (const seq_mode_t mode) { + return seastar::make_ready_future(mode); }, PG::load_obc_ertr::all_same_way([](auto &code) { logger().error("ClientRequest saw error code {}", code); - return seastar::now(); + return seastar::make_ready_future(seq_mode_t::OUT_OF_ORDER); })); } -ClientRequest::interruptible_future<> +ClientRequest::interruptible_future ClientRequest::do_process(Ref& pg, crimson::osd::ObjectContextRef obc) { if (!pg->is_primary()) { // primary can handle both normal ops and balanced reads if (is_misdirected(*pg)) { logger().trace("do_process: dropping misdirected op"); - return seastar::now(); + return seastar::make_ready_future(seq_mode_t::OUT_OF_ORDER); } else if (const hobject_t& hoid = m->get_hobj(); !pg->get_peering_state().can_serve_replica_read(hoid)) { auto reply = crimson::make_message( m.get(), -EAGAIN, pg->get_osdmap_epoch(), m->get_flags() & (CEPH_OSD_FLAG_ACK|CEPH_OSD_FLAG_ONDISK), !m->has_flag(CEPH_OSD_FLAG_RETURNVEC)); - return conn->send(std::move(reply)); + return conn->send(std::move(reply)).then([] { + return seastar::make_ready_future(seq_mode_t::OUT_OF_ORDER); + }); } } return pg->do_osd_ops(m, obc, op_info).safe_then_unpack_interruptible( @@ -226,7 +242,9 @@ ClientRequest::do_process(Ref& pg, crimson::osd::ObjectContextRef obc) return with_blocking_future_interruptible( handle.enter(pp(*pg).send_reply)).then_interruptible( [this, reply=std::move(reply)]() mutable{ - return conn->send(std::move(reply)); + return conn->send(std::move(reply)).then([] { + return seastar::make_ready_future(seq_mode_t::IN_ORDER); + }); }); }, crimson::ct_error::eagain::handle([this, pg]() mutable { return process_op(pg); diff --git a/src/crimson/osd/osd_operations/client_request.h b/src/crimson/osd/osd_operations/client_request.h index 9563c102260..d695e2c3ee9 100644 --- a/src/crimson/osd/osd_operations/client_request.h +++ b/src/crimson/osd/osd_operations/client_request.h @@ -62,14 +62,20 @@ public: private: template interruptible_future<> with_sequencer(FuncT&& func); - interruptible_future<> do_process( + + enum class seq_mode_t { + IN_ORDER, + OUT_OF_ORDER + }; + + interruptible_future do_process( Ref& pg, crimson::osd::ObjectContextRef obc); ::crimson::interruptible::interruptible_future< ::crimson::osd::IOInterruptCondition> process_pg_op( Ref &pg); ::crimson::interruptible::interruptible_future< - ::crimson::osd::IOInterruptCondition> process_op( + ::crimson::osd::IOInterruptCondition, seq_mode_t> process_op( Ref &pg); bool is_pg_op() const; -- 2.39.5