From ddedc2e1820897be052828d5933a51daf4d29cf8 Mon Sep 17 00:00:00 2001 From: Radoslaw Zarzynski Date: Wed, 21 Apr 2021 15:34:30 +0000 Subject: [PATCH] crimson/common: fix forwarding in non_futurized_call_with_interruption(). This commit fixes a memory corruption problem found on the intersection of `seastar::thread` and `interruptor` by a teuthology job [1]. It turns out to be caused by a use-after-free which happens due to an issue with type deduction in `non_futurized_call_with_interruption`. However, it might be beneficial to audit other members of the family too. If `Result` is deduced to be a non-reference type (i.e. the `Func` returns e.g. `ceph::bufferlist`), `std::forward` forwards it as a r-value reference (`Result&&`). `decltype(auto)` makes this reference the return type of the entire function which isn't what we want. Debugging ========= Instrumentation --------------- ```diff --- a/src/crimson/osd/ops_executer.cc +++ b/src/crimson/osd/ops_executer.cc @@ -28,6 +28,21 @@ namespace { namespace crimson::osd { +struct scope_dbgr { + scope_dbgr() { + logger().debug("{}:{} on {}", __func__, __LINE__, (void*)this); + } + scope_dbgr(const scope_dbgr& src) { + logger().debug("{}:{} copy on {}; src {}", __func__, __LINE__, (void*)this, (void*)&src); + } + scope_dbgr(scope_dbgr&& src) noexcept { + logger().debug("{}:{} move on {}; src {}", __func__, __LINE__, (void*)this, (void*)&src); + } + ~scope_dbgr() { + logger().debug("{}:{} on {}", __func__, __LINE__, (void*)this); + } +}; + OpsExecuter::call_ierrorator::future<> OpsExecuter::do_op_call(OSDOp& osd_op) { std::string cname, mname; @@ -84,14 +99,16 @@ OpsExecuter::call_ierrorator::future<> OpsExecuter::do_op_call(OSDOp& osd_op) ceph::bufferlist outdata; auto cls_context = reinterpret_cast(this); const auto ret = method->exec(cls_context, indata, outdata); - return std::make_pair(ret, std::move(outdata)); + logger().debug("do_ops_call: outdata just after exec: {}", outdata); + return std::make_tuple(ret, std::move(outdata), scope_dbgr{}); } ).then_interruptible( [this, prev_rd, prev_wr, &osd_op, flags] (auto outcome) -> call_errorator::future<> { - auto& [ret, outdata] = outcome; + auto& [ret, outdata, dbgr] = outcome; osd_op.rval = ret; - + logger().debug("do_ops_call: outdata just after then_interruptible: {}", + outdata); logger().debug("do_op_call: method returned ret={}, outdata.length()={}" " while num_read={}, num_write={}", ret, outdata.length(), num_read, num_write); ``` After the fix ------------- ``` DEBUG 2021-04-21 15:32:30,012 [shard 0] bluestore - get_attr DEBUG 2021-04-21 15:32:30,012 [shard 0] osd - do_ops_call: outdata just after exec: buffer::list(len=18, buffer::ptr(0~18 0x6000000ef000 in raw 0x6000000ef000 len 4000 nref 1) ) DEBUG 2021-04-21 15:32:30,012 [shard 0] osd - scope_dbgr:33 on 0x60000085feb3 DEBUG 2021-04-21 15:32:30,012 [shard 0] osd - scope_dbgr:39 move on 0x60000085ff00; src 0x60000085feb3 DEBUG 2021-04-21 15:32:30,012 [shard 0] osd - ~scope_dbgr:42 on 0x60000085feb3 DEBUG 2021-04-21 15:32:30,012 [shard 0] osd - scope_dbgr:39 move on 0x60000085fed0; src 0x60000085ff00 DEBUG 2021-04-21 15:32:30,012 [shard 0] osd - ~scope_dbgr:42 on 0x60000085ff00 DEBUG 2021-04-21 15:32:30,012 [shard 0] osd - scope_dbgr:39 move on 0x60000085ff40; src 0x60000085fed0 DEBUG 2021-04-21 15:32:30,012 [shard 0] osd - ~scope_dbgr:42 on 0x60000085fed0 DEBUG 2021-04-21 15:32:30,012 [shard 0] osd - scope_dbgr:39 move on 0x60000015a740; src 0x60000085ff40 DEBUG 2021-04-21 15:32:30,012 [shard 0] osd - ~scope_dbgr:42 on 0x60000085ff40 DEBUG 2021-04-21 15:32:30,012 [shard 0] osd - scope_dbgr:39 move on 0x7ffc4d018df0; src 0x60000015a740 DEBUG 2021-04-21 15:32:30,012 [shard 0] osd - ~scope_dbgr:42 on 0x60000015a740 DEBUG 2021-04-21 15:32:30,012 [shard 0] osd - scope_dbgr:39 move on 0x6000005196d8; src 0x7ffc4d018df0 DEBUG 2021-04-21 15:32:30,012 [shard 0] osd - ~scope_dbgr:42 on 0x7ffc4d018df0 DEBUG 2021-04-21 15:32:30,012 [shard 0] osd - scope_dbgr:39 move on 0x7ffc4d018df0; src 0x6000005196d8 DEBUG 2021-04-21 15:32:30,012 [shard 0] osd - ~scope_dbgr:42 on 0x6000005196d8 DEBUG 2021-04-21 15:32:30,012 [shard 0] osd - scope_dbgr:39 move on 0x7ffc4d018db0; src 0x7ffc4d018df0 DEBUG 2021-04-21 15:32:30,012 [shard 0] osd - ~scope_dbgr:42 on 0x7ffc4d018df0 DEBUG 2021-04-21 15:32:30,012 [shard 0] osd - scope_dbgr:39 move on 0x600000807b58; src 0x7ffc4d018db0 DEBUG 2021-04-21 15:32:30,012 [shard 0] osd - ~scope_dbgr:42 on 0x7ffc4d018db0 DEBUG 2021-04-21 15:32:30,012 [shard 0] osd - scope_dbgr:39 move on 0x7ffc4d018d30; src 0x600000807b58 DEBUG 2021-04-21 15:32:30,012 [shard 0] osd - do_ops_call: outdata just after then_interruptible: buffer::list(len=18, buffer::ptr(0~18 0x6000000ef000 in raw 0x6000000ef000 len 4000 nref 1) ) DEBUG 2021-04-21 15:32:30,012 [shard 0] osd - do_op_call: method returned ret=0, outdata.length()=18 while num_read=1, num_write=0 DEBUG 2021-04-21 15:32:30,012 [shard 0] osd - ~scope_dbgr:42 on 0x7ffc4d018d30 DEBUG 2021-04-21 15:32:30,012 [shard 0] osd - ~scope_dbgr:42 on 0x600000807b58 ``` Before ------ ``` DEBUG 2021-04-21 16:06:24,354 [shard 0] bluestore - get_attr DEBUG 2021-04-21 16:06:24,354 [shard 0] osd - do_ops_call: outdata just after exec: buffer::list(len=18, buffer::ptr(0~18 0x6000000f7000 in raw 0x6000000f7000 len 4000 nref 1) ) DEBUG 2021-04-21 16:06:24,354 [shard 0] osd - scope_dbgr:33 on 0x60000085fe84 DEBUG 2021-04-21 16:06:24,354 [shard 0] osd - scope_dbgr:39 move on 0x60000085fed0; src 0x60000085fe84 DEBUG 2021-04-21 16:06:24,354 [shard 0] osd - ~scope_dbgr:42 on 0x60000085fe84 DEBUG 2021-04-21 16:06:24,354 [shard 0] osd - ~scope_dbgr:42 on 0x60000085fed0 DEBUG 2021-04-21 16:06:24,354 [shard 0] osd - scope_dbgr:39 move on 0x60000085fea0; src 0x60000085fed0 DEBUG 2021-04-21 16:06:24,354 [shard 0] osd - scope_dbgr:39 move on 0x60000085ff40; src 0x60000085fea0 DEBUG 2021-04-21 16:06:24,354 [shard 0] osd - ~scope_dbgr:42 on 0x60000085fea0 DEBUG 2021-04-21 16:06:24,354 [shard 0] osd - scope_dbgr:39 move on 0x6000008360b0; src 0x60000085ff40 DEBUG 2021-04-21 16:06:24,354 [shard 0] osd - ~scope_dbgr:42 on 0x60000085ff40 DEBUG 2021-04-21 16:06:24,354 [shard 0] osd - scope_dbgr:39 move on 0x7ffe48b3ee00; src 0x6000008360b0 DEBUG 2021-04-21 16:06:24,354 [shard 0] osd - ~scope_dbgr:42 on 0x6000008360b0 DEBUG 2021-04-21 16:06:24,354 [shard 0] osd - scope_dbgr:39 move on 0x600000801a38; src 0x7ffe48b3ee00 DEBUG 2021-04-21 16:06:24,354 [shard 0] osd - ~scope_dbgr:42 on 0x7ffe48b3ee00 DEBUG 2021-04-21 16:06:24,354 [shard 0] osd - scope_dbgr:39 move on 0x7ffe48b3ee00; src 0x600000801a38 DEBUG 2021-04-21 16:06:24,354 [shard 0] osd - ~scope_dbgr:42 on 0x600000801a38 DEBUG 2021-04-21 16:06:24,354 [shard 0] osd - scope_dbgr:39 move on 0x7ffe48b3edc0; src 0x7ffe48b3ee00 DEBUG 2021-04-21 16:06:24,354 [shard 0] osd - ~scope_dbgr:42 on 0x7ffe48b3ee00 DEBUG 2021-04-21 16:06:24,354 [shard 0] osd - scope_dbgr:39 move on 0x600000807bf8; src 0x7ffe48b3edc0 DEBUG 2021-04-21 16:06:24,354 [shard 0] osd - ~scope_dbgr:42 on 0x7ffe48b3edc0 DEBUG 2021-04-21 16:06:24,354 [shard 0] osd - scope_dbgr:39 move on 0x7ffe48b3ed40; src 0x600000807bf8 DEBUG 2021-04-21 16:06:24,354 [shard 0] osd - do_ops_call: outdata just after then_interruptible: buffer::list(len=18, buffer:ptr(0~18 no raw) ) DEBUG 2021-04-21 16:06:24,354 [shard 0] osd - do_op_call: method returned ret=0, outdata.length()=18 while num_read=1, num_write=0 DEBUG 2021-04-21 16:06:24,354 [shard 0] osd - ~scope_dbgr:42 on 0x7ffe48b3ed40 DEBUG 2021-04-21 16:06:24,354 [shard 0] osd - ~scope_dbgr:42 on 0x600000807bf8 DEBUG 2021-04-21 16:06:24,354 [shard 0] osd - do_osd_ops: osd_op(client.4131.0:4 1.e 1:70fc7f5f:test-rados-api-o06-82166-2::foo:head {call lock.get_info in=31b out=18b} snapc 0={} ondisk+read+known_if_redirected e6) v8 - object 1:70fc7f5f:test-rados-api-o06-82166-2::foo:head all operations successful Reactor stalled for 521 ms on shard 0. Backtrace: 0x00000000017101b8 0x00000000016d7960 0x00000000016d7edc 0x00000000016d810f /lib64/libpthread.so.0+0x0000000000012b2f 0x00000000016b563e 0x00000000016b801c 0x00000000016b8504 0x0000000000d68120 ``` [1]: http://pulpito.front.sepia.ceph.com/rzarzynski-2021-04-19_12:11:51-rados-master-distro-basic-smithi/6058778/ Signed-off-by: Radoslaw Zarzynski --- src/crimson/common/interruptible_future.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/crimson/common/interruptible_future.h b/src/crimson/common/interruptible_future.h index 0d66ca843c4a..d9a3a7360ab8 100644 --- a/src/crimson/common/interruptible_future.h +++ b/src/crimson/common/interruptible_future.h @@ -195,7 +195,7 @@ auto call_with_interruption( template > -decltype(auto) non_futurized_call_with_interruption( +Result non_futurized_call_with_interruption( InterruptCondRef interrupt_condition, Func&& func, T&&... args) { -- 2.47.3