From 26e85455a00ab2c38da6535c363d0e6653603121 Mon Sep 17 00:00:00 2001 From: Radoslaw Zarzynski Date: Mon, 7 Jun 2021 13:03:51 +0000 Subject: [PATCH] crimson/osd: fix assertion failure in OpSequencer on replay. Commit b5efdc6f1c9563357d7dfd33a8f379053592a215 has unified the interruption handling among `InternalClientRequest` and `ClientRequest`. Unfortunately, a call to `maybe_reset()` of `OpSequencer` has been overlooked and dropped leading to the `assert(prev_op > last_unblocked)` assertion failure in `start_op()`. This was the root cause of the following problem at Sepia: ``` rzarzynski@teuthology:/home/teuthworker/archive/rzarzynski-2021-05-26_12:20:26-rados-master-distro-basic-smithi/6136929$ less ./remote/smithi194/log/ceph-osd.6.log.gz ... DEBUG 2021-05-26 20:24:53,988 [shard 0] ms - [osd.6(client) v2:172.21.15.194:6804/34047 >> client.4453 172.21.15.67:0/3814935464@37042] <== #1 === osd_op(client.4453.0:5 12.6 12.7fc1f406 (undecoded) ondisk+write+known_if_redirected e52) v8 (42) DEBUG 2021-05-26 20:24:53,988 [shard 0] osd - client_request(id=4, detail=osd_op(client.4453.0:5 12.6 12.7fc1f406 (undecoded) ondisk+write+known_if_redirected e52) v8): start DEBUG 2021-05-26 20:24:53,988 [shard 0] osd - client_request(id=4, detail=osd_op(client.4453.0:5 12.6 12.7fc1f406 (undecoded) ondisk+write+known_if_redirected e52) v8): in repeat DEBUG 2021-05-26 20:24:53,988 [shard 0] osd - client_request(id=4, detail=osd_op(client.4453.0:5 12.6 12.7fc1f406 (undecoded) ondisk+write+known_if_redirected e52) v8) same_interval_since: 19 DEBUG 2021-05-26 20:24:53,988 [shard 0] osd - do_recover_missing check for recovery, 12:602f83fe:::foo:head DEBUG 2021-05-26 20:24:53,988 [shard 0] osd - client_request(id=4, detail=osd_op(client.4453.0:5 12.6 12:602f83fe:::foo:head {write 0~128 in=128b} snapc 0={} ondisk+write+known_if_redirected e52) v8): got obc lock ... DEBUG 2021-05-26 20:25:21,810 [shard 0] osd - client_request(id=4, detail=osd_op(client.4453.0:5 12.6 12:602f83fe:::foo:head {write 0~128 in=1 28b} snapc 0={} ondisk+write+known_if_redirected e52) v8): in repeat ... DEBUG 2021-05-26 20:25:21,809 [shard 0] osd - should_abort_request operation restart, acting set changed ... DEBUG 2021-05-26 20:25:21,813 [shard 0] osd - client_request(id=4, detail=osd_op(client.4453.0:5 12.6 12:602f83fe:::foo:head {write 0~128 in=128b} snapc 0={} ondisk+write+known_if_redirected e52) v8) same_interval_since: 55 ... DEBUG 2021-05-26 20:25:21,813 [shard 0] osd - client_request(id=4, detail=osd_op(client.4453.0:5 12.6 12:602f83fe:::foo:head {write 0~128 in=128b} snapc 0={} ondisk+write+known_if_redirected e52) v8) same_interval_since: 55 ceph-osd: /home/jenkins-build/build/workspace/ceph-dev-new-build/ARCH/x86_64/AVAILABLE_ARCH/x86_64/AVAILABLE_DIST/centos8/DIST/centos8/MACHINE_SIZE/gigantic/release/17.0.0-4622-gaa1dc559/rpm/el8/BUILD/ceph-17.0.0-4622-gaa1dc559/src/crimson/osd/osd_operation_sequencer.h:52: seastar::futurize_t crimson::osd::OpSequencer::start_op(HandleT&, uint64_t, uint64_t, FuncT&&) [with HandleT = crimson::PipelineHandle; FuncT = crimson::interruptible::interruptor::wrap_function(Func&&) [with Func = crimson::osd::ClientRequest::start():: mutable::)> mutable:: mutable::; InterruptCond = crimson::osd::IOInterruptCondition]::; Result = crimson::interruptible::interruptible_future_detail >; seastar::futurize_t = crimson::interruptible::interruptible_future_detail >; uint64_t = long unsigned int]: Assertion `prev_op > last_unblocked' failed. Aborting on shard 0. Backtrace: 0# 0x000055F440239C1F in ceph-osd 1# FatalSignal::signaled(int, siginfo_t const*) in ceph-osd 2# FatalSignal::install_oneshot_signal_handler<6>()::{lambda(int, siginfo_t*, void*)#1}::_FUN(int, siginfo_t*, void*) in ceph-osd 3# 0x00007F4788336B20 in /lib64/libpthread.so.0 4# gsignal in /lib64/libc.so.6 5# abort in /lib64/libc.so.6 6# 0x00007F4786931B09 in /lib64/libc.so.6 7# 0x00007F478693FDE6 in /lib64/libc.so.6 8# 0x000055F43BA3DD17 in ceph-osd 9# 0x000055F43BA419A8 in ceph-osd 10# seastar::continuation >, seastar::noncopyable_function > (boost::intrusive_ptr&&)>, seastar::future >::then_impl_nrvo > (boost::intrusive_ptr&&)>, seastar::future > >(seastar::noncopyable_function > (boost::intrusive_ptr&&)>&&)::{lambda(seastar::internal::promise_base_with_type >&&, seastar::noncopyable_function > (boost::intrusive_ptr&&)>&, seastar::future_state >&&)#1}, boost::intrusive_ptr >::run_and_dispose() in ceph-osd 11# 0x000055F445C5C70F in ceph-osd ``` Signed-off-by: Radoslaw Zarzynski --- src/crimson/osd/osd_operations/client_request.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/crimson/osd/osd_operations/client_request.cc b/src/crimson/osd/osd_operations/client_request.cc index 75f70985af1..00f7133ab4b 100644 --- a/src/crimson/osd/osd_operations/client_request.cc +++ b/src/crimson/osd/osd_operations/client_request.cc @@ -123,10 +123,11 @@ seastar::future<> ClientRequest::start() sequencer.finish_op(get_id()); return seastar::stop_iteration::yes; }); - }, [pgref](std::exception_ptr eptr) { + }, [this, pgref](std::exception_ptr eptr) { if (should_abort_request(std::move(eptr))) { return seastar::stop_iteration::yes; } else { + sequencer.maybe_reset(get_id()); return seastar::stop_iteration::no; } }, pgref); -- 2.39.5