]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
rbd-mirror: track in-flight start/stop/restart in instance replayer
authorJason Dillaman <dillaman@redhat.com>
Fri, 17 Apr 2020 15:17:05 +0000 (11:17 -0400)
committerMykola Golub <mgolub@suse.com>
Wed, 9 Jun 2021 18:24:17 +0000 (21:24 +0300)
The shut down waits for in-flight ops to complete but the
start/stop/restart operations were previously not tracked. This
could cause a potential race and crash between an image replayer
operation and the instance replayer shutting down.

Fixes: https://tracker.ceph.com/issues/45072
Signed-off-by: Jason Dillaman <dillaman@redhat.com>
(cherry picked from commit 31140a940ea1909c4b5d68ef4593cb582a527354)

Conflicts:
src/tools/rbd_mirror/InstanceReplayer.cc:
                Mutex::Locker vs std::lock_guard,
                m_local_rados->cct() vs m_local_io_ctx.cct(),
                no stop(Context *on_finish) function.

src/test/rbd_mirror/test_mock_InstanceReplayer.cc
src/tools/rbd_mirror/InstanceReplayer.cc

index 79dce7ac29674b8549d73138f588260dc879f758..5527dd42f5f43762f08e7bc1182d04eff99c734c 100644 (file)
@@ -87,7 +87,7 @@ struct ImageReplayer<librbd::MockTestImageCtx> {
   MOCK_METHOD0(destroy, void());
   MOCK_METHOD2(start, void(Context *, bool));
   MOCK_METHOD2(stop, void(Context *, bool));
-  MOCK_METHOD0(restart, void());
+  MOCK_METHOD1(restart, void(Context*));
   MOCK_METHOD0(flush, void());
   MOCK_METHOD2(print_status, void(Formatter *, stringstream *));
   MOCK_METHOD2(add_peer, void(const std::string &, librados::IoCtx &));
@@ -192,7 +192,8 @@ TEST_F(TestMockInstanceReplayer, AcquireReleaseImage) {
   EXPECT_CALL(mock_image_replayer, is_stopped()).WillOnce(Return(true));
   EXPECT_CALL(mock_image_replayer, is_blacklisted()).WillOnce(Return(false));
   EXPECT_CALL(mock_image_replayer, is_finished()).WillOnce(Return(false));
-  EXPECT_CALL(mock_image_replayer, start(nullptr, false));
+  EXPECT_CALL(mock_image_replayer, start(_, false))
+    .WillOnce(CompleteContext(0));
   expect_work_queue(mock_threads);
 
   instance_replayer.acquire_image(&mock_instance_watcher, global_image_id,
@@ -261,7 +262,8 @@ TEST_F(TestMockInstanceReplayer, RemoveFinishedImage) {
   EXPECT_CALL(mock_image_replayer, is_stopped()).WillOnce(Return(true));
   EXPECT_CALL(mock_image_replayer, is_blacklisted()).WillOnce(Return(false));
   EXPECT_CALL(mock_image_replayer, is_finished()).WillOnce(Return(false));
-  EXPECT_CALL(mock_image_replayer, start(nullptr, false));
+  EXPECT_CALL(mock_image_replayer, start(_, false))
+    .WillOnce(CompleteContext(0));
   expect_work_queue(mock_threads);
 
   instance_replayer.acquire_image(&mock_instance_watcher, global_image_id,
@@ -332,7 +334,8 @@ TEST_F(TestMockInstanceReplayer, Reacquire) {
   EXPECT_CALL(mock_image_replayer, is_stopped()).WillOnce(Return(true));
   EXPECT_CALL(mock_image_replayer, is_blacklisted()).WillOnce(Return(false));
   EXPECT_CALL(mock_image_replayer, is_finished()).WillOnce(Return(false));
-  EXPECT_CALL(mock_image_replayer, start(nullptr, false));
+  EXPECT_CALL(mock_image_replayer, start(_, false))
+    .WillOnce(CompleteContext(0));
   expect_work_queue(mock_threads);
 
   C_SaferCond on_acquire1;
@@ -342,7 +345,8 @@ TEST_F(TestMockInstanceReplayer, Reacquire) {
 
   // Re-acquire
   EXPECT_CALL(mock_image_replayer, set_finished(false));
-  EXPECT_CALL(mock_image_replayer, restart());
+  EXPECT_CALL(mock_image_replayer, restart(_))
+    .WillOnce(CompleteContext(0));
   expect_work_queue(mock_threads);
 
   C_SaferCond on_acquire2;
index c0086a48f5b7dc122d99c716aeec4707b9af5493..b0e4463dac6734498ff338a2cd2f2b7fff01562b 100644 (file)
@@ -168,7 +168,7 @@ void InstanceReplayer<I>::acquire_image(InstanceWatcher<I> *instance_watcher,
     // detect if the image has been deleted while the leader was offline
     auto& image_replayer = it->second;
     image_replayer->set_finished(false);
-    image_replayer->restart();
+    image_replayer->restart(new C_TrackedOp(m_async_op_tracker, nullptr));
   }
 
   m_threads->work_queue->queue(on_finish, 0);
@@ -217,7 +217,7 @@ void InstanceReplayer<I>::remove_peer_image(const std::string &global_image_id,
     // it will eventually detect that the peer image is missing and
     // determine if a delete propagation is required.
     auto image_replayer = it->second;
-    image_replayer->restart();
+    image_replayer->restart(new C_TrackedOp(m_async_op_tracker, nullptr));
   }
   m_threads->work_queue->queue(on_finish, 0);
 }
@@ -249,10 +249,15 @@ void InstanceReplayer<I>::start()
 
   m_manual_stop = false;
 
+  auto cct = static_cast<CephContext *>(m_local_rados->cct());
+  auto gather_ctx = new C_Gather(
+    cct, new C_TrackedOp(m_async_op_tracker, nullptr));
   for (auto &kv : m_image_replayers) {
     auto &image_replayer = kv.second;
-    image_replayer->start(nullptr, true);
+    image_replayer->start(gather_ctx->new_sub(), true);
   }
+
+  gather_ctx->activate();
 }
 
 template <typename I>
@@ -260,14 +265,21 @@ void InstanceReplayer<I>::stop()
 {
   dout(10) << dendl;
 
-  Mutex::Locker locker(m_lock);
+  auto cct = static_cast<CephContext *>(m_local_rados->cct());
+  auto gather_ctx = new C_Gather(
+    cct, new C_TrackedOp(m_async_op_tracker, nullptr));
+  {
+    Mutex::Locker locker(m_lock);
 
-  m_manual_stop = true;
+    m_manual_stop = true;
 
-  for (auto &kv : m_image_replayers) {
-    auto &image_replayer = kv.second;
-    image_replayer->stop(nullptr, true);
+    for (auto &kv : m_image_replayers) {
+      auto &image_replayer = kv.second;
+      image_replayer->stop(gather_ctx->new_sub(), true);
+    }
   }
+
+  gather_ctx->activate();
 }
 
 template <typename I>
@@ -281,7 +293,7 @@ void InstanceReplayer<I>::restart()
 
   for (auto &kv : m_image_replayers) {
     auto &image_replayer = kv.second;
-    image_replayer->restart();
+    image_replayer->restart(new C_TrackedOp(m_async_op_tracker, nullptr));
   }
 }
 
@@ -323,7 +335,7 @@ void InstanceReplayer<I>::start_image_replayer(
   }
 
   dout(10) << "global_image_id=" << global_image_id << dendl;
-  image_replayer->start(nullptr, false);
+  image_replayer->start(new C_TrackedOp(m_async_op_tracker, nullptr), false);
 }
 
 template <typename I>