]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
crimson/osd: extend lifetime of OpsExecuter to match all_completed. 41536/head
authorRadoslaw Zarzynski <rzarzyns@redhat.com>
Tue, 25 May 2021 14:17:41 +0000 (14:17 +0000)
committerRadoslaw Zarzynski <rzarzyns@redhat.com>
Tue, 25 May 2021 14:47:33 +0000 (14:47 +0000)
f7181ab2f65803ecd8204f8f4f5aad4713b747f3 has optimized the client
parallelism. To achieve that `PG::do_osd_ops()` were converted to
return basically future pair of futures. Unfortunately, the life-
time management of `OpsExecuter` was kept intact. In the result,
the object was valid only till fullfying the outer future while,
due to the `rollbacker` instances, it should be available till
`all_completed` becomes available.

This issue can explain the following problem has been observed
in a Teuthology job [1].

```
DEBUG 2021-05-20 08:03:22,617 [shard 0] osd - do_op_call: method returned ret=-17, outdata.length()=0 while num_read=1, num_write=0
DEBUG 2021-05-20 08:03:22,617 [shard 0] osd - rollback_obc_if_modified: object 19:e17d4708:test-rados-api-smithi095-38404-2::foo:head got erro
r generic:17, need_rollback=false
=================================================================
==33626==ERROR: AddressSanitizer: heap-use-after-free on address 0x60d0000b9320 at pc 0x560f486b8222 bp 0x7fffc467a1e0 sp 0x7fffc467a1d0
READ of size 4 at 0x60d0000b9320 thread T0
    #0 0x560f486b8221  (/usr/bin/ceph-osd+0x2c610221)
    #1 0x560f4880c6b1 in seastar::continuation<seastar::internal::promise_base_with_type<boost::intrusive_ptr<MOSDOpReply> >, seastar::noncopy
able_function<crimson::interruptible::interruptible_future_detail<crimson::osd::IOInterruptCondition, crimson::errorator<crimson::unthrowable_
wrapper<std::error_code const&, crimson::ec<(std::errc)11> > >::_future<crimson::errorated_future_marker<boost::intrusive_ptr<MOSDOpReply> > >
 > ()>, seastar::future<void>::then_impl_nrvo<seastar::noncopyable_function<crimson::interruptible::interruptible_future_detail<crimson::osd::
IOInterruptCondition, crimson::errorator<crimson::unthrowable_wrapper<std::error_code const&, crimson::ec<(std::errc)11> > >::_future<crimson:
:errorated_future_marker<boost::intrusive_ptr<MOSDOpReply> > > > ()>, crimson::interruptible::interruptible_future_detail<crimson::osd::IOInte
rruptCondition, crimson::errorator<crimson::unthrowable_wrapper<std::error_code const&, crimson::ec<(std::errc)11> > >::_future<crimson::error
ated_future_marker<boost::intrusive_ptr<MOSDOpReply> > > > >(seastar::noncopyable_function<crimson::interruptible::interruptible_future_detail
<crimson::osd::IOInterruptCondition, crimson::errorator<crimson::unthrowable_wrapper<std::error_code const&, crimson::ec<(std::errc)11> > >::_
future<crimson::errorated_future_marker<boost::intrusive_ptr<MOSDOpReply> > > > ()>&&)::{lambda(seastar::internal::promise_base_with_type<boos
t::intrusive_ptr<MOSDOpReply> >&&, seastar::noncopyable_function<crimson::interruptible::interruptible_future_detail<crimson::osd::IOInterruptCondition, crimson::errorator<crimson::unthrowable_wrapper<std::error_code const&, crimson::ec<(std::errc)11> > >::_future<crimson::errorated_future_marker<boost::intrusive_ptr<MOSDOpReply> > > > ()>&, seastar::future_state<seastar::internal::monostate>&&)#1}, void>::run_and_dispose() (/usr/bin/ceph-osd+0x2c7646b1)
    #2 0x560f5352c3ae  (/usr/bin/ceph-osd+0x374843ae)
    #3 0x560f535318ef  (/usr/bin/ceph-osd+0x374898ef)
    #4 0x560f536e395a  (/usr/bin/ceph-osd+0x3763b95a)
    #5 0x560f532413d9  (/usr/bin/ceph-osd+0x371993d9)
    #6 0x560f476af95a in main (/usr/bin/ceph-osd+0x2b60795a)
    #7 0x7f7aa0af97b2 in __libc_start_main (/lib64/libc.so.6+0x237b2)
    #8 0x560f477d2e8d in _start (/usr/bin/ceph-osd+0x2b72ae8d)

```

[1]: http://pulpito.front.sepia.ceph.com/rzarzynski-2021-05-20_07:28:16-rados-master-distro-basic-smithi/6124735/

The commit deals with the problem by repacking the outer future.
An alternative could be in switching from `std::unique_ptr` to
`seastar::shared_ptr` for managing `OpsExecuter`.

Signed-off-by: Radoslaw Zarzynski <rzarzyns@redhat.com>
src/crimson/osd/pg.cc

index 04f33c53b9b28005610aca5cc85ad43df074feab..14656104601968d0a77d20d3f55d35d94fab0024 100644 (file)
@@ -824,7 +824,12 @@ PG::do_osd_ops(
         peering_state.get_info().last_user_version);
       return do_osd_ops_iertr::make_ready_future<Ref<MOSDOpReply>>(std::move(reply));
     }
-  ).finally([ox_deleter=std::move(ox)] {});
+  ).safe_then_unpack_interruptible(
+    [ox_deleter=std::move(ox)](auto submitted, auto all_completed) mutable {
+    return do_osd_ops_iertr::make_ready_future<pg_rep_op_fut_t<Ref<MOSDOpReply>>>(
+      std::move(submitted),
+      all_completed.finally([ox_deleter=std::move(ox_deleter)] {}));
+  });
 }
 
 PG::do_osd_ops_iertr::future<PG::pg_rep_op_fut_t<>>
@@ -844,7 +849,12 @@ PG::do_osd_ops(
     std::as_const(op_info),
     std::move(success_func),
     std::move(failure_func)
-  ).finally([ox_deleter=std::move(ox)] {});
+  ).safe_then_unpack_interruptible(
+    [ox_deleter=std::move(ox)](auto submitted, auto all_completed) mutable {
+    return do_osd_ops_iertr::make_ready_future<pg_rep_op_fut_t<>>(
+      std::move(submitted),
+      all_completed.finally([ox_deleter=std::move(ox_deleter)] {}));
+  });
 }
 
 PG::interruptible_future<Ref<MOSDOpReply>> PG::do_pg_ops(Ref<MOSDOp> m)