From acd5823a7fe5154cac6ba6c78f47f032bfb8179e Mon Sep 17 00:00:00 2001 From: Radoslaw Zarzynski Date: Tue, 25 May 2021 14:17:41 +0000 Subject: [PATCH] crimson/osd: extend lifetime of OpsExecuter to match all_completed. f7181ab2f65803ecd8204f8f4f5aad4713b747f3 has optimized the client parallelism. To achieve that `PG::do_osd_ops()` were converted to return basically future pair of futures. Unfortunately, the life- time management of `OpsExecuter` was kept intact. In the result, the object was valid only till fullfying the outer future while, due to the `rollbacker` instances, it should be available till `all_completed` becomes available. This issue can explain the following problem has been observed in a Teuthology job [1]. ``` DEBUG 2021-05-20 08:03:22,617 [shard 0] osd - do_op_call: method returned ret=-17, outdata.length()=0 while num_read=1, num_write=0 DEBUG 2021-05-20 08:03:22,617 [shard 0] osd - rollback_obc_if_modified: object 19:e17d4708:test-rados-api-smithi095-38404-2::foo:head got erro r generic:17, need_rollback=false ================================================================= ==33626==ERROR: AddressSanitizer: heap-use-after-free on address 0x60d0000b9320 at pc 0x560f486b8222 bp 0x7fffc467a1e0 sp 0x7fffc467a1d0 READ of size 4 at 0x60d0000b9320 thread T0 #0 0x560f486b8221 (/usr/bin/ceph-osd+0x2c610221) #1 0x560f4880c6b1 in seastar::continuation >, seastar::noncopy able_function > >::_future > > > ()>, seastar::future::then_impl_nrvo > >::_future > > > ()>, crimson::interruptible::interruptible_future_detail > >::_future > > > >(seastar::noncopyable_function > >::_ future > > > ()>&&)::{lambda(seastar::internal::promise_base_with_type >&&, seastar::noncopyable_function > >::_future > > > ()>&, seastar::future_state&&)#1}, void>::run_and_dispose() (/usr/bin/ceph-osd+0x2c7646b1) #2 0x560f5352c3ae (/usr/bin/ceph-osd+0x374843ae) #3 0x560f535318ef (/usr/bin/ceph-osd+0x374898ef) #4 0x560f536e395a (/usr/bin/ceph-osd+0x3763b95a) #5 0x560f532413d9 (/usr/bin/ceph-osd+0x371993d9) #6 0x560f476af95a in main (/usr/bin/ceph-osd+0x2b60795a) #7 0x7f7aa0af97b2 in __libc_start_main (/lib64/libc.so.6+0x237b2) #8 0x560f477d2e8d in _start (/usr/bin/ceph-osd+0x2b72ae8d) ``` [1]: http://pulpito.front.sepia.ceph.com/rzarzynski-2021-05-20_07:28:16-rados-master-distro-basic-smithi/6124735/ The commit deals with the problem by repacking the outer future. An alternative could be in switching from `std::unique_ptr` to `seastar::shared_ptr` for managing `OpsExecuter`. Signed-off-by: Radoslaw Zarzynski --- src/crimson/osd/pg.cc | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/crimson/osd/pg.cc b/src/crimson/osd/pg.cc index 04f33c53b9b..14656104601 100644 --- a/src/crimson/osd/pg.cc +++ b/src/crimson/osd/pg.cc @@ -824,7 +824,12 @@ PG::do_osd_ops( peering_state.get_info().last_user_version); return do_osd_ops_iertr::make_ready_future>(std::move(reply)); } - ).finally([ox_deleter=std::move(ox)] {}); + ).safe_then_unpack_interruptible( + [ox_deleter=std::move(ox)](auto submitted, auto all_completed) mutable { + return do_osd_ops_iertr::make_ready_future>>( + std::move(submitted), + all_completed.finally([ox_deleter=std::move(ox_deleter)] {})); + }); } PG::do_osd_ops_iertr::future> @@ -844,7 +849,12 @@ PG::do_osd_ops( std::as_const(op_info), std::move(success_func), std::move(failure_func) - ).finally([ox_deleter=std::move(ox)] {}); + ).safe_then_unpack_interruptible( + [ox_deleter=std::move(ox)](auto submitted, auto all_completed) mutable { + return do_osd_ops_iertr::make_ready_future>( + std::move(submitted), + all_completed.finally([ox_deleter=std::move(ox_deleter)] {})); + }); } PG::interruptible_future> PG::do_pg_ops(Ref m) -- 2.39.5