]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
crimson/common: fix forwarding in non_futurized_call_with_interruption(). 40968/head
authorRadoslaw Zarzynski <rzarzyns@redhat.com>
Wed, 21 Apr 2021 15:34:30 +0000 (15:34 +0000)
committerRadoslaw Zarzynski <rzarzyns@redhat.com>
Wed, 21 Apr 2021 16:53:25 +0000 (16:53 +0000)
This commit fixes a memory corruption problem found on the intersection
of `seastar::thread` and `interruptor` by a teuthology job [1].
It turns out to be caused by a use-after-free which happens due to
an issue with type deduction in `non_futurized_call_with_interruption`.
However, it might be beneficial to audit other members of the family too.

If `Result` is deduced to be a non-reference type (i.e. the `Func`
returns e.g. `ceph::bufferlist`), `std::forward<Result>` forwards
it as a r-value reference (`Result&&`). `decltype(auto)` makes this
reference the return type of the entire function which isn't what
we want.

Debugging
=========

Instrumentation
---------------
```diff
--- a/src/crimson/osd/ops_executer.cc
+++ b/src/crimson/osd/ops_executer.cc
@@ -28,6 +28,21 @@ namespace {

 namespace crimson::osd {

+struct scope_dbgr {
+  scope_dbgr() {
+    logger().debug("{}:{} on {}", __func__, __LINE__, (void*)this);
+  }
+  scope_dbgr(const scope_dbgr& src) {
+    logger().debug("{}:{} copy on {}; src {}", __func__, __LINE__, (void*)this, (void*)&src);
+  }
+  scope_dbgr(scope_dbgr&& src) noexcept {
+    logger().debug("{}:{} move on {}; src {}", __func__, __LINE__, (void*)this, (void*)&src);
+  }
+  ~scope_dbgr() {
+    logger().debug("{}:{} on {}", __func__, __LINE__, (void*)this);
+  }
+};
+
 OpsExecuter::call_ierrorator::future<> OpsExecuter::do_op_call(OSDOp& osd_op)
 {
   std::string cname, mname;
@@ -84,14 +99,16 @@ OpsExecuter::call_ierrorator::future<> OpsExecuter::do_op_call(OSDOp& osd_op)
       ceph::bufferlist outdata;
       auto cls_context = reinterpret_cast<cls_method_context_t>(this);
       const auto ret = method->exec(cls_context, indata, outdata);
-      return std::make_pair(ret, std::move(outdata));
+      logger().debug("do_ops_call: outdata just after exec: {}", outdata);
+      return std::make_tuple(ret, std::move(outdata), scope_dbgr{});
     }
   ).then_interruptible(
     [this, prev_rd, prev_wr, &osd_op, flags]
     (auto outcome) -> call_errorator::future<> {
-      auto& [ret, outdata] = outcome;
+      auto& [ret, outdata, dbgr] = outcome;
       osd_op.rval = ret;
-
+      logger().debug("do_ops_call: outdata just after then_interruptible: {}",
+                     outdata);
       logger().debug("do_op_call: method returned ret={}, outdata.length()={}"
                      " while num_read={}, num_write={}",
                      ret, outdata.length(), num_read, num_write);
```

After the fix
-------------
```
DEBUG 2021-04-21 15:32:30,012 [shard 0] bluestore - get_attr
DEBUG 2021-04-21 15:32:30,012 [shard 0] osd - do_ops_call: outdata just after exec: buffer::list(len=18,
        buffer::ptr(0~18 0x6000000ef000 in raw 0x6000000ef000 len 4000 nref 1)
)
DEBUG 2021-04-21 15:32:30,012 [shard 0] osd - scope_dbgr:33 on 0x60000085feb3
DEBUG 2021-04-21 15:32:30,012 [shard 0] osd - scope_dbgr:39 move on 0x60000085ff00; src 0x60000085feb3
DEBUG 2021-04-21 15:32:30,012 [shard 0] osd - ~scope_dbgr:42 on 0x60000085feb3
DEBUG 2021-04-21 15:32:30,012 [shard 0] osd - scope_dbgr:39 move on 0x60000085fed0; src 0x60000085ff00
DEBUG 2021-04-21 15:32:30,012 [shard 0] osd - ~scope_dbgr:42 on 0x60000085ff00
DEBUG 2021-04-21 15:32:30,012 [shard 0] osd - scope_dbgr:39 move on 0x60000085ff40; src 0x60000085fed0
DEBUG 2021-04-21 15:32:30,012 [shard 0] osd - ~scope_dbgr:42 on 0x60000085fed0
DEBUG 2021-04-21 15:32:30,012 [shard 0] osd - scope_dbgr:39 move on 0x60000015a740; src 0x60000085ff40
DEBUG 2021-04-21 15:32:30,012 [shard 0] osd - ~scope_dbgr:42 on 0x60000085ff40
DEBUG 2021-04-21 15:32:30,012 [shard 0] osd - scope_dbgr:39 move on 0x7ffc4d018df0; src 0x60000015a740
DEBUG 2021-04-21 15:32:30,012 [shard 0] osd - ~scope_dbgr:42 on 0x60000015a740
DEBUG 2021-04-21 15:32:30,012 [shard 0] osd - scope_dbgr:39 move on 0x6000005196d8; src 0x7ffc4d018df0
DEBUG 2021-04-21 15:32:30,012 [shard 0] osd - ~scope_dbgr:42 on 0x7ffc4d018df0
DEBUG 2021-04-21 15:32:30,012 [shard 0] osd - scope_dbgr:39 move on 0x7ffc4d018df0; src 0x6000005196d8
DEBUG 2021-04-21 15:32:30,012 [shard 0] osd - ~scope_dbgr:42 on 0x6000005196d8
DEBUG 2021-04-21 15:32:30,012 [shard 0] osd - scope_dbgr:39 move on 0x7ffc4d018db0; src 0x7ffc4d018df0
DEBUG 2021-04-21 15:32:30,012 [shard 0] osd - ~scope_dbgr:42 on 0x7ffc4d018df0
DEBUG 2021-04-21 15:32:30,012 [shard 0] osd - scope_dbgr:39 move on 0x600000807b58; src 0x7ffc4d018db0
DEBUG 2021-04-21 15:32:30,012 [shard 0] osd - ~scope_dbgr:42 on 0x7ffc4d018db0
DEBUG 2021-04-21 15:32:30,012 [shard 0] osd - scope_dbgr:39 move on 0x7ffc4d018d30; src 0x600000807b58
DEBUG 2021-04-21 15:32:30,012 [shard 0] osd - do_ops_call: outdata just after then_interruptible: buffer::list(len=18,
        buffer::ptr(0~18 0x6000000ef000 in raw 0x6000000ef000 len 4000 nref 1)
)
DEBUG 2021-04-21 15:32:30,012 [shard 0] osd - do_op_call: method returned ret=0, outdata.length()=18 while num_read=1, num_write=0
DEBUG 2021-04-21 15:32:30,012 [shard 0] osd - ~scope_dbgr:42 on 0x7ffc4d018d30
DEBUG 2021-04-21 15:32:30,012 [shard 0] osd - ~scope_dbgr:42 on 0x600000807b58
```

Before
------
```
DEBUG 2021-04-21 16:06:24,354 [shard 0] bluestore - get_attr
DEBUG 2021-04-21 16:06:24,354 [shard 0] osd - do_ops_call: outdata just after exec: buffer::list(len=18,
        buffer::ptr(0~18 0x6000000f7000 in raw 0x6000000f7000 len 4000 nref 1)
)
DEBUG 2021-04-21 16:06:24,354 [shard 0] osd - scope_dbgr:33 on 0x60000085fe84
DEBUG 2021-04-21 16:06:24,354 [shard 0] osd - scope_dbgr:39 move on 0x60000085fed0; src 0x60000085fe84
DEBUG 2021-04-21 16:06:24,354 [shard 0] osd - ~scope_dbgr:42 on 0x60000085fe84
DEBUG 2021-04-21 16:06:24,354 [shard 0] osd - ~scope_dbgr:42 on 0x60000085fed0
DEBUG 2021-04-21 16:06:24,354 [shard 0] osd - scope_dbgr:39 move on 0x60000085fea0; src 0x60000085fed0
DEBUG 2021-04-21 16:06:24,354 [shard 0] osd - scope_dbgr:39 move on 0x60000085ff40; src 0x60000085fea0
DEBUG 2021-04-21 16:06:24,354 [shard 0] osd - ~scope_dbgr:42 on 0x60000085fea0
DEBUG 2021-04-21 16:06:24,354 [shard 0] osd - scope_dbgr:39 move on 0x6000008360b0; src 0x60000085ff40
DEBUG 2021-04-21 16:06:24,354 [shard 0] osd - ~scope_dbgr:42 on 0x60000085ff40
DEBUG 2021-04-21 16:06:24,354 [shard 0] osd - scope_dbgr:39 move on 0x7ffe48b3ee00; src 0x6000008360b0
DEBUG 2021-04-21 16:06:24,354 [shard 0] osd - ~scope_dbgr:42 on 0x6000008360b0
DEBUG 2021-04-21 16:06:24,354 [shard 0] osd - scope_dbgr:39 move on 0x600000801a38; src 0x7ffe48b3ee00
DEBUG 2021-04-21 16:06:24,354 [shard 0] osd - ~scope_dbgr:42 on 0x7ffe48b3ee00
DEBUG 2021-04-21 16:06:24,354 [shard 0] osd - scope_dbgr:39 move on 0x7ffe48b3ee00; src 0x600000801a38
DEBUG 2021-04-21 16:06:24,354 [shard 0] osd - ~scope_dbgr:42 on 0x600000801a38
DEBUG 2021-04-21 16:06:24,354 [shard 0] osd - scope_dbgr:39 move on 0x7ffe48b3edc0; src 0x7ffe48b3ee00
DEBUG 2021-04-21 16:06:24,354 [shard 0] osd - ~scope_dbgr:42 on 0x7ffe48b3ee00
DEBUG 2021-04-21 16:06:24,354 [shard 0] osd - scope_dbgr:39 move on 0x600000807bf8; src 0x7ffe48b3edc0
DEBUG 2021-04-21 16:06:24,354 [shard 0] osd - ~scope_dbgr:42 on 0x7ffe48b3edc0
DEBUG 2021-04-21 16:06:24,354 [shard 0] osd - scope_dbgr:39 move on 0x7ffe48b3ed40; src 0x600000807bf8
DEBUG 2021-04-21 16:06:24,354 [shard 0] osd - do_ops_call: outdata just after then_interruptible: buffer::list(len=18,
        buffer:ptr(0~18 no raw)
)
DEBUG 2021-04-21 16:06:24,354 [shard 0] osd - do_op_call: method returned ret=0, outdata.length()=18 while num_read=1, num_write=0
DEBUG 2021-04-21 16:06:24,354 [shard 0] osd - ~scope_dbgr:42 on 0x7ffe48b3ed40
DEBUG 2021-04-21 16:06:24,354 [shard 0] osd - ~scope_dbgr:42 on 0x600000807bf8
DEBUG 2021-04-21 16:06:24,354 [shard 0] osd - do_osd_ops: osd_op(client.4131.0:4 1.e 1:70fc7f5f:test-rados-api-o06-82166-2::foo:head {call lock.get_info in=31b out=18b}
 snapc 0={} ondisk+read+known_if_redirected e6) v8 - object 1:70fc7f5f:test-rados-api-o06-82166-2::foo:head all operations successful
Reactor stalled for 521 ms on shard 0.
Backtrace:
  0x00000000017101b8
  0x00000000016d7960
  0x00000000016d7edc
  0x00000000016d810f
  /lib64/libpthread.so.0+0x0000000000012b2f
  0x00000000016b563e
  0x00000000016b801c
  0x00000000016b8504
  0x0000000000d68120
```

[1]: http://pulpito.front.sepia.ceph.com/rzarzynski-2021-04-19_12:11:51-rados-master-distro-basic-smithi/6058778/

Signed-off-by: Radoslaw Zarzynski <rzarzyns@redhat.com>
src/crimson/common/interruptible_future.h

index 0d66ca843c4a16bad402a50f37f07bec84e232bc..d9a3a7360ab823cc0f97c8eceb760a417142b920 100644 (file)
@@ -195,7 +195,7 @@ auto call_with_interruption(
 
 template <typename InterruptCond, typename Func, typename... T,
          typename Result = std::invoke_result_t<Func, T...>>
-decltype(auto) non_futurized_call_with_interruption(
+Result non_futurized_call_with_interruption(
   InterruptCondRef<InterruptCond> interrupt_condition,
   Func&& func, T&&... args)
 {