From: Jason Dillaman Date: Fri, 17 Apr 2020 15:17:05 +0000 (-0400) Subject: rbd-mirror: track in-flight start/stop/restart in instance replayer X-Git-Tag: v14.2.22~1^2~3 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=e4d083d98ac56eee6184415ecca0f81c7f112910;p=ceph.git rbd-mirror: track in-flight start/stop/restart in instance replayer The shut down waits for in-flight ops to complete but the start/stop/restart operations were previously not tracked. This could cause a potential race and crash between an image replayer operation and the instance replayer shutting down. Fixes: https://tracker.ceph.com/issues/45072 Signed-off-by: Jason Dillaman (cherry picked from commit 31140a940ea1909c4b5d68ef4593cb582a527354) Conflicts: src/tools/rbd_mirror/InstanceReplayer.cc: Mutex::Locker vs std::lock_guard, m_local_rados->cct() vs m_local_io_ctx.cct(), no stop(Context *on_finish) function. --- diff --git a/src/test/rbd_mirror/test_mock_InstanceReplayer.cc b/src/test/rbd_mirror/test_mock_InstanceReplayer.cc index 79dce7ac296..5527dd42f5f 100644 --- a/src/test/rbd_mirror/test_mock_InstanceReplayer.cc +++ b/src/test/rbd_mirror/test_mock_InstanceReplayer.cc @@ -87,7 +87,7 @@ struct ImageReplayer { MOCK_METHOD0(destroy, void()); MOCK_METHOD2(start, void(Context *, bool)); MOCK_METHOD2(stop, void(Context *, bool)); - MOCK_METHOD0(restart, void()); + MOCK_METHOD1(restart, void(Context*)); MOCK_METHOD0(flush, void()); MOCK_METHOD2(print_status, void(Formatter *, stringstream *)); MOCK_METHOD2(add_peer, void(const std::string &, librados::IoCtx &)); @@ -192,7 +192,8 @@ TEST_F(TestMockInstanceReplayer, AcquireReleaseImage) { EXPECT_CALL(mock_image_replayer, is_stopped()).WillOnce(Return(true)); EXPECT_CALL(mock_image_replayer, is_blacklisted()).WillOnce(Return(false)); EXPECT_CALL(mock_image_replayer, is_finished()).WillOnce(Return(false)); - EXPECT_CALL(mock_image_replayer, start(nullptr, false)); + EXPECT_CALL(mock_image_replayer, start(_, false)) + .WillOnce(CompleteContext(0)); expect_work_queue(mock_threads); instance_replayer.acquire_image(&mock_instance_watcher, global_image_id, @@ -261,7 +262,8 @@ TEST_F(TestMockInstanceReplayer, RemoveFinishedImage) { EXPECT_CALL(mock_image_replayer, is_stopped()).WillOnce(Return(true)); EXPECT_CALL(mock_image_replayer, is_blacklisted()).WillOnce(Return(false)); EXPECT_CALL(mock_image_replayer, is_finished()).WillOnce(Return(false)); - EXPECT_CALL(mock_image_replayer, start(nullptr, false)); + EXPECT_CALL(mock_image_replayer, start(_, false)) + .WillOnce(CompleteContext(0)); expect_work_queue(mock_threads); instance_replayer.acquire_image(&mock_instance_watcher, global_image_id, @@ -332,7 +334,8 @@ TEST_F(TestMockInstanceReplayer, Reacquire) { EXPECT_CALL(mock_image_replayer, is_stopped()).WillOnce(Return(true)); EXPECT_CALL(mock_image_replayer, is_blacklisted()).WillOnce(Return(false)); EXPECT_CALL(mock_image_replayer, is_finished()).WillOnce(Return(false)); - EXPECT_CALL(mock_image_replayer, start(nullptr, false)); + EXPECT_CALL(mock_image_replayer, start(_, false)) + .WillOnce(CompleteContext(0)); expect_work_queue(mock_threads); C_SaferCond on_acquire1; @@ -342,7 +345,8 @@ TEST_F(TestMockInstanceReplayer, Reacquire) { // Re-acquire EXPECT_CALL(mock_image_replayer, set_finished(false)); - EXPECT_CALL(mock_image_replayer, restart()); + EXPECT_CALL(mock_image_replayer, restart(_)) + .WillOnce(CompleteContext(0)); expect_work_queue(mock_threads); C_SaferCond on_acquire2; diff --git a/src/tools/rbd_mirror/InstanceReplayer.cc b/src/tools/rbd_mirror/InstanceReplayer.cc index c0086a48f5b..b0e4463dac6 100644 --- a/src/tools/rbd_mirror/InstanceReplayer.cc +++ b/src/tools/rbd_mirror/InstanceReplayer.cc @@ -168,7 +168,7 @@ void InstanceReplayer::acquire_image(InstanceWatcher *instance_watcher, // detect if the image has been deleted while the leader was offline auto& image_replayer = it->second; image_replayer->set_finished(false); - image_replayer->restart(); + image_replayer->restart(new C_TrackedOp(m_async_op_tracker, nullptr)); } m_threads->work_queue->queue(on_finish, 0); @@ -217,7 +217,7 @@ void InstanceReplayer::remove_peer_image(const std::string &global_image_id, // it will eventually detect that the peer image is missing and // determine if a delete propagation is required. auto image_replayer = it->second; - image_replayer->restart(); + image_replayer->restart(new C_TrackedOp(m_async_op_tracker, nullptr)); } m_threads->work_queue->queue(on_finish, 0); } @@ -249,10 +249,15 @@ void InstanceReplayer::start() m_manual_stop = false; + auto cct = static_cast(m_local_rados->cct()); + auto gather_ctx = new C_Gather( + cct, new C_TrackedOp(m_async_op_tracker, nullptr)); for (auto &kv : m_image_replayers) { auto &image_replayer = kv.second; - image_replayer->start(nullptr, true); + image_replayer->start(gather_ctx->new_sub(), true); } + + gather_ctx->activate(); } template @@ -260,14 +265,21 @@ void InstanceReplayer::stop() { dout(10) << dendl; - Mutex::Locker locker(m_lock); + auto cct = static_cast(m_local_rados->cct()); + auto gather_ctx = new C_Gather( + cct, new C_TrackedOp(m_async_op_tracker, nullptr)); + { + Mutex::Locker locker(m_lock); - m_manual_stop = true; + m_manual_stop = true; - for (auto &kv : m_image_replayers) { - auto &image_replayer = kv.second; - image_replayer->stop(nullptr, true); + for (auto &kv : m_image_replayers) { + auto &image_replayer = kv.second; + image_replayer->stop(gather_ctx->new_sub(), true); + } } + + gather_ctx->activate(); } template @@ -281,7 +293,7 @@ void InstanceReplayer::restart() for (auto &kv : m_image_replayers) { auto &image_replayer = kv.second; - image_replayer->restart(); + image_replayer->restart(new C_TrackedOp(m_async_op_tracker, nullptr)); } } @@ -323,7 +335,7 @@ void InstanceReplayer::start_image_replayer( } dout(10) << "global_image_id=" << global_image_id << dendl; - image_replayer->start(nullptr, false); + image_replayer->start(new C_TrackedOp(m_async_op_tracker, nullptr), false); } template