]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
rbd-mirror: don't hold (stale) copy of local image journal pointer 35430/head
authorJason Dillaman <dillaman@redhat.com>
Mon, 1 Jun 2020 18:09:34 +0000 (14:09 -0400)
committerNathan Cutler <ncutler@suse.com>
Sat, 6 Jun 2020 07:50:46 +0000 (09:50 +0200)
The exclusive-lock manages its life cycle and can close the journal
at any point. This can result in rbd-mirror deferencing a freed pointer
or a journal state machine that is in an unexpected state.

Fixes: https://tracker.ceph.com/issues/45803
Signed-off-by: Jason Dillaman <dillaman@redhat.com>
(cherry picked from commit 295483fbdc377d633addf5ec8ae2600a741db78c)

src/test/rbd_mirror/image_replayer/journal/test_mock_Replayer.cc
src/tools/rbd_mirror/image_replayer/journal/Replayer.cc
src/tools/rbd_mirror/image_replayer/journal/Replayer.h

index 7e233b8d6f7334ef63b18d64a5c7fc730b1b812f..9c5db0cf8394c5a08102d60b597cd7a6df5017c9 100644 (file)
@@ -464,6 +464,15 @@ public:
     return 0;
   }
 
+  void expect_local_journal_add_listener(
+      librbd::MockTestJournal& mock_local_journal,
+      librbd::journal::Listener** local_journal_listener) {
+    EXPECT_CALL(mock_local_journal, add_listener(_))
+      .WillOnce(SaveArg<0>(local_journal_listener));
+    expect_is_tag_owner(mock_local_journal, false);
+    expect_is_resync_requested(mock_local_journal, 0, false);
+  }
+
   int init_entry_replayer(MockReplayer& mock_replayer,
                           MockThreads& mock_threads,
                           MockReplayerListener& mock_replayer_listener,
@@ -480,10 +489,8 @@ public:
                              {librbd::journal::MirrorPeerClientMeta{}}, 0);
     expect_start_external_replay(mock_local_journal, &mock_local_journal_replay,
                                  0);
-    EXPECT_CALL(mock_local_journal, add_listener(_))
-      .WillOnce(SaveArg<0>(local_journal_listener));
-    expect_is_tag_owner(mock_local_journal, false);
-    expect_is_resync_requested(mock_local_journal, 0, false);
+    expect_local_journal_add_listener(mock_local_journal,
+                                      local_journal_listener);
     EXPECT_CALL(mock_remote_journaler, start_live_replay(_, _))
       .WillOnce(SaveArg<0>(remote_replay_handler));
     expect_notification(mock_threads, mock_replayer_listener);
@@ -562,15 +569,13 @@ TEST_F(TestMockImageReplayerJournalReplayer, InitShutDown) {
                                         mock_local_journal_replay));
 }
 
-TEST_F(TestMockImageReplayerJournalReplayer, InitNoLocalJournal) {
+TEST_F(TestMockImageReplayerJournalReplayer, InitRemoteJournalerError) {
   librbd::MockTestJournal mock_local_journal;
   librbd::MockTestImageCtx mock_local_image_ctx{*m_local_image_ctx,
                                                 mock_local_journal};
   ::journal::MockJournaler mock_remote_journaler;
   MockReplayerListener mock_replayer_listener;
   MockThreads mock_threads{m_threads};
-
-  mock_local_image_ctx.journal = nullptr;
   MockStateBuilder mock_state_builder(mock_local_image_ctx,
                                       mock_remote_journaler,
                                       {});
@@ -582,6 +587,7 @@ TEST_F(TestMockImageReplayerJournalReplayer, InitNoLocalJournal) {
 
   InSequence seq;
 
+  expect_init(mock_remote_journaler, -EINVAL);
   MockCloseImageRequest mock_close_image_request;
   expect_send(mock_close_image_request, 0);
 
@@ -590,7 +596,7 @@ TEST_F(TestMockImageReplayerJournalReplayer, InitNoLocalJournal) {
   ASSERT_EQ(-EINVAL, init_ctx.wait());
 }
 
-TEST_F(TestMockImageReplayerJournalReplayer, InitRemoteJournalerError) {
+TEST_F(TestMockImageReplayerJournalReplayer, InitRemoteJournalerGetClientError) {
   librbd::MockTestJournal mock_local_journal;
   librbd::MockTestImageCtx mock_local_image_ctx{*m_local_image_ctx,
                                                 mock_local_journal};
@@ -608,22 +614,28 @@ TEST_F(TestMockImageReplayerJournalReplayer, InitRemoteJournalerError) {
 
   InSequence seq;
 
-  expect_init(mock_remote_journaler, -EINVAL);
+  expect_init(mock_remote_journaler, 0);
+  EXPECT_CALL(mock_remote_journaler, add_listener(_));
+  expect_get_cached_client(mock_remote_journaler, "local mirror uuid", {},
+                           {librbd::journal::MirrorPeerClientMeta{}}, -EINVAL);
   MockCloseImageRequest mock_close_image_request;
   expect_send(mock_close_image_request, 0);
+  EXPECT_CALL(mock_remote_journaler, remove_listener(_));
 
   C_SaferCond init_ctx;
   mock_replayer.init(&init_ctx);
   ASSERT_EQ(-EINVAL, init_ctx.wait());
 }
 
-TEST_F(TestMockImageReplayerJournalReplayer, InitRemoteJournalerGetClientError) {
+TEST_F(TestMockImageReplayerJournalReplayer, InitNoLocalJournal) {
   librbd::MockTestJournal mock_local_journal;
   librbd::MockTestImageCtx mock_local_image_ctx{*m_local_image_ctx,
                                                 mock_local_journal};
   ::journal::MockJournaler mock_remote_journaler;
   MockReplayerListener mock_replayer_listener;
   MockThreads mock_threads{m_threads};
+
+  mock_local_image_ctx.journal = nullptr;
   MockStateBuilder mock_state_builder(mock_local_image_ctx,
                                       mock_remote_journaler,
                                       {});
@@ -634,11 +646,11 @@ TEST_F(TestMockImageReplayerJournalReplayer, InitRemoteJournalerGetClientError)
   expect_work_queue_repeatedly(mock_threads);
 
   InSequence seq;
-
   expect_init(mock_remote_journaler, 0);
   EXPECT_CALL(mock_remote_journaler, add_listener(_));
   expect_get_cached_client(mock_remote_journaler, "local mirror uuid", {},
-                           {librbd::journal::MirrorPeerClientMeta{}}, -EINVAL);
+                           {librbd::journal::MirrorPeerClientMeta{}}, 0);
+
   MockCloseImageRequest mock_close_image_request;
   expect_send(mock_close_image_request, 0);
   EXPECT_CALL(mock_remote_journaler, remove_listener(_));
@@ -1078,9 +1090,12 @@ TEST_F(TestMockImageReplayerJournalReplayer, Replay) {
 
   // replay_flush
   expect_shut_down(mock_local_journal_replay, false, 0);
+  EXPECT_CALL(mock_local_journal, remove_listener(_));
   EXPECT_CALL(mock_local_journal, stop_external_replay());
   expect_start_external_replay(mock_local_journal, &mock_local_journal_replay,
                                0);
+  expect_local_journal_add_listener(mock_local_journal,
+                                    &local_journal_listener);
   expect_get_tag(mock_remote_journaler, tag, 0);
   expect_allocate_tag(mock_local_journal, 0);
 
@@ -1154,9 +1169,12 @@ TEST_F(TestMockImageReplayerJournalReplayer, DecodeError) {
 
   // replay_flush
   expect_shut_down(mock_local_journal_replay, false, 0);
+  EXPECT_CALL(mock_local_journal, remove_listener(_));
   EXPECT_CALL(mock_local_journal, stop_external_replay());
   expect_start_external_replay(mock_local_journal, &mock_local_journal_replay,
                                0);
+  expect_local_journal_add_listener(mock_local_journal,
+                                    &local_journal_listener);
   expect_get_tag(mock_remote_journaler, tag, 0);
   expect_allocate_tag(mock_local_journal, 0);
 
@@ -1224,9 +1242,12 @@ TEST_F(TestMockImageReplayerJournalReplayer, DelayedReplay) {
 
   // replay_flush
   expect_shut_down(mock_local_journal_replay, false, 0);
+  EXPECT_CALL(mock_local_journal, remove_listener(_));
   EXPECT_CALL(mock_local_journal, stop_external_replay());
   expect_start_external_replay(mock_local_journal, &mock_local_journal_replay,
                                0);
+  expect_local_journal_add_listener(mock_local_journal,
+                                    &local_journal_listener);
   expect_get_tag(mock_remote_journaler, tag, 0);
   expect_allocate_tag(mock_local_journal, 0);
 
@@ -1648,6 +1669,7 @@ TEST_F(TestMockImageReplayerJournalReplayer, ReplayFlushShutDownError) {
 
   expect_try_pop_front(mock_remote_journaler, 1, true);
   expect_shut_down(mock_local_journal_replay, false, -EINVAL);
+  EXPECT_CALL(mock_local_journal, remove_listener(_));
   EXPECT_CALL(mock_local_journal, stop_external_replay());
   expect_notification(mock_threads, mock_replayer_listener);
   remote_replay_handler->handle_entries_available();
@@ -1655,7 +1677,6 @@ TEST_F(TestMockImageReplayerJournalReplayer, ReplayFlushShutDownError) {
   wait_for_notification();
   ASSERT_EQ(-EINVAL, mock_replayer.get_error_code());
 
-  EXPECT_CALL(mock_local_journal, remove_listener(_));
   MockCloseImageRequest mock_close_image_request;
   expect_send(mock_close_image_request, 0);
   expect_stop_replay(mock_remote_journaler, 0);
@@ -1702,6 +1723,7 @@ TEST_F(TestMockImageReplayerJournalReplayer, ReplayFlushStartError) {
 
   expect_try_pop_front(mock_remote_journaler, 1, true);
   expect_shut_down(mock_local_journal_replay, false, 0);
+  EXPECT_CALL(mock_local_journal, remove_listener(_));
   EXPECT_CALL(mock_local_journal, stop_external_replay());
   expect_start_external_replay(mock_local_journal, nullptr, -EINVAL);
   expect_notification(mock_threads, mock_replayer_listener);
@@ -1710,7 +1732,6 @@ TEST_F(TestMockImageReplayerJournalReplayer, ReplayFlushStartError) {
   wait_for_notification();
   ASSERT_EQ(-EINVAL, mock_replayer.get_error_code());
 
-  EXPECT_CALL(mock_local_journal, remove_listener(_));
   MockCloseImageRequest mock_close_image_request;
   expect_send(mock_close_image_request, 0);
   expect_stop_replay(mock_remote_journaler, 0);
@@ -1761,9 +1782,12 @@ TEST_F(TestMockImageReplayerJournalReplayer, GetTagError) {
                             true, 0, 0})};
   expect_try_pop_front(mock_remote_journaler, tag.tid, true);
   expect_shut_down(mock_local_journal_replay, false, 0);
+  EXPECT_CALL(mock_local_journal, remove_listener(_));
   EXPECT_CALL(mock_local_journal, stop_external_replay());
   expect_start_external_replay(mock_local_journal, &mock_local_journal_replay,
                                0);
+  expect_local_journal_add_listener(mock_local_journal,
+                                    &local_journal_listener);
   expect_get_tag(mock_remote_journaler, tag, -EINVAL);
   expect_notification(mock_threads, mock_replayer_listener);
   remote_replay_handler->handle_entries_available();
@@ -1821,9 +1845,12 @@ TEST_F(TestMockImageReplayerJournalReplayer, AllocateTagDemotion) {
 
   expect_try_pop_front(mock_remote_journaler, tag.tid, true);
   expect_shut_down(mock_local_journal_replay, false, 0);
+  EXPECT_CALL(mock_local_journal, remove_listener(_));
   EXPECT_CALL(mock_local_journal, stop_external_replay());
   expect_start_external_replay(mock_local_journal, &mock_local_journal_replay,
                                0);
+  expect_local_journal_add_listener(mock_local_journal,
+                                    &local_journal_listener);
   expect_get_tag(mock_remote_journaler, tag, 0);
   expect_get_tag_data(mock_local_journal, {});
   expect_allocate_tag(mock_local_journal, 0);
@@ -1883,9 +1910,12 @@ TEST_F(TestMockImageReplayerJournalReplayer, AllocateTagError) {
 
   expect_try_pop_front(mock_remote_journaler, tag.tid, true);
   expect_shut_down(mock_local_journal_replay, false, 0);
+  EXPECT_CALL(mock_local_journal, remove_listener(_));
   EXPECT_CALL(mock_local_journal, stop_external_replay());
   expect_start_external_replay(mock_local_journal, &mock_local_journal_replay,
                                0);
+  expect_local_journal_add_listener(mock_local_journal,
+                                    &local_journal_listener);
   expect_get_tag(mock_remote_journaler, tag, 0);
   expect_allocate_tag(mock_local_journal, -EINVAL);
   expect_notification(mock_threads, mock_replayer_listener);
@@ -1943,9 +1973,12 @@ TEST_F(TestMockImageReplayerJournalReplayer, PreprocessError) {
 
   expect_try_pop_front(mock_remote_journaler, tag.tid, true);
   expect_shut_down(mock_local_journal_replay, false, 0);
+  EXPECT_CALL(mock_local_journal, remove_listener(_));
   EXPECT_CALL(mock_local_journal, stop_external_replay());
   expect_start_external_replay(mock_local_journal, &mock_local_journal_replay,
                                0);
+  expect_local_journal_add_listener(mock_local_journal,
+                                    &local_journal_listener);
   expect_get_tag(mock_remote_journaler, tag, 0);
   expect_allocate_tag(mock_local_journal, 0);
   EXPECT_CALL(mock_replay_entry, get_data());
@@ -2008,9 +2041,12 @@ TEST_F(TestMockImageReplayerJournalReplayer, ProcessError) {
 
   expect_try_pop_front(mock_remote_journaler, tag.tid, true);
   expect_shut_down(mock_local_journal_replay, false, 0);
+  EXPECT_CALL(mock_local_journal, remove_listener(_));
   EXPECT_CALL(mock_local_journal, stop_external_replay());
   expect_start_external_replay(mock_local_journal, &mock_local_journal_replay,
                                0);
+  expect_local_journal_add_listener(mock_local_journal,
+                                    &local_journal_listener);
   expect_get_tag(mock_remote_journaler, tag, 0);
   expect_allocate_tag(mock_local_journal, 0);
   EXPECT_CALL(mock_replay_entry, get_data());
@@ -2081,9 +2117,12 @@ TEST_F(TestMockImageReplayerJournalReplayer, ImageNameUpdated) {
 
   expect_try_pop_front(mock_remote_journaler, tag.tid, true);
   expect_shut_down(mock_local_journal_replay, false, 0);
+  EXPECT_CALL(mock_local_journal, remove_listener(_));
   EXPECT_CALL(mock_local_journal, stop_external_replay());
   expect_start_external_replay(mock_local_journal, &mock_local_journal_replay,
                                0);
+  expect_local_journal_add_listener(mock_local_journal,
+                                    &local_journal_listener);
   expect_get_tag(mock_remote_journaler, tag, 0);
   expect_allocate_tag(mock_local_journal, 0);
   EXPECT_CALL(mock_local_journal_replay, decode(_, _)).WillOnce(Return(0));
index f1552ac68ca591743fdb2ec9d406875e6428a9ef..af6802194424e9025a0b8a09c49b2c5ec4bf2a6c 100644 (file)
@@ -179,28 +179,16 @@ template <typename I>
 void Replayer<I>::init(Context* on_finish) {
   dout(10) << dendl;
 
-  ceph_assert(m_local_journal == nullptr);
   {
     auto local_image_ctx = m_state_builder->local_image_ctx;
     std::shared_lock image_locker{local_image_ctx->image_lock};
     m_image_spec = util::compute_image_spec(local_image_ctx->md_ctx,
                                             local_image_ctx->name);
-    m_local_journal = local_image_ctx->journal;
   }
 
   ceph_assert(m_on_init_shutdown == nullptr);
   m_on_init_shutdown = on_finish;
 
-  if (m_local_journal == nullptr) {
-    std::unique_lock locker{m_lock};
-    m_state = STATE_COMPLETE;
-    m_state_builder->remote_journaler = nullptr;
-
-    handle_replay_complete(locker, -EINVAL, "error accessing local journal");
-    close_local_image();
-    return;
-  }
-
   init_remote_journaler();
 }
 
@@ -302,13 +290,28 @@ void Replayer<I>::handle_init_remote_journaler(int r) {
     return;
   }
 
-  start_external_replay();
+  start_external_replay(locker);
 }
 
 template <typename I>
-void Replayer<I>::start_external_replay() {
+void Replayer<I>::start_external_replay(std::unique_lock<ceph::mutex>& locker) {
   dout(10) << dendl;
 
+  auto local_image_ctx = m_state_builder->local_image_ctx;
+  std::shared_lock local_image_locker{local_image_ctx->image_lock};
+
+  ceph_assert(m_local_journal == nullptr);
+  m_local_journal = local_image_ctx->journal;
+  if (m_local_journal == nullptr) {
+    local_image_locker.unlock();
+
+    derr << "local image journal closed" << dendl;
+    handle_replay_complete(locker, -EINVAL, "error accessing local journal");
+    close_local_image();
+    return;
+  }
+
+  // safe to hold pointer to journal after external playback starts
   Context *start_ctx = create_context_callback<
     Replayer, &Replayer<I>::handle_start_external_replay>(this);
   m_local_journal->start_external_replay(&m_local_journal_replay, start_ctx);
@@ -336,7 +339,36 @@ void Replayer<I>::handle_start_external_replay(int r) {
 
   m_state = STATE_REPLAYING;
 
+  // check for resync/promotion state after adding listener
+  if (!add_local_journal_listener(locker)) {
+    return;
+  }
+
+  // start remote journal replay
+  m_event_preprocessor = EventPreprocessor<I>::create(
+    *m_state_builder->local_image_ctx, *m_state_builder->remote_journaler,
+    m_local_mirror_uuid, &m_state_builder->remote_client_meta,
+    m_threads->work_queue);
+  m_replay_status_formatter = ReplayStatusFormatter<I>::create(
+    m_state_builder->remote_journaler, m_local_mirror_uuid);
+
+  auto cct = static_cast<CephContext *>(m_state_builder->local_image_ctx->cct);
+  double poll_seconds = cct->_conf.get_val<double>(
+    "rbd_mirror_journal_poll_age");
+  m_remote_replay_handler = new RemoteReplayHandler(this);
+  m_state_builder->remote_journaler->start_live_replay(m_remote_replay_handler,
+                                                       poll_seconds);
+
+  notify_status_updated();
+}
+
+template <typename I>
+bool Replayer<I>::add_local_journal_listener(
+    std::unique_lock<ceph::mutex>& locker) {
+  dout(10) << dendl;
+
   // listen for promotion and resync requests against local journal
+  ceph_assert(m_local_journal_listener == nullptr);
   m_local_journal_listener = new LocalJournalListener(this);
   m_local_journal->add_listener(m_local_journal_listener);
 
@@ -345,38 +377,23 @@ void Replayer<I>::handle_start_external_replay(int r) {
   if (m_local_journal->is_tag_owner()) {
     dout(10) << "local image force-promoted" << dendl;
     handle_replay_complete(locker, 0, "force promoted");
-    return;
+    return false;
   }
 
   bool resync_requested = false;
-  r = m_local_journal->is_resync_requested(&resync_requested);
+  int r = m_local_journal->is_resync_requested(&resync_requested);
   if (r < 0) {
     dout(10) << "failed to determine resync state: " << cpp_strerror(r)
              << dendl;
     handle_replay_complete(locker, r, "error parsing resync state");
-    return;
+    return false;
   } else if (resync_requested) {
     dout(10) << "local image resync requested" << dendl;
     handle_replay_complete(locker, 0, "resync requested");
-    return;
+    return false;
   }
 
-  // start remote journal replay
-  m_event_preprocessor = EventPreprocessor<I>::create(
-    *m_state_builder->local_image_ctx, *m_state_builder->remote_journaler,
-    m_local_mirror_uuid, &m_state_builder->remote_client_meta,
-    m_threads->work_queue);
-  m_replay_status_formatter = ReplayStatusFormatter<I>::create(
-    m_state_builder->remote_journaler, m_local_mirror_uuid);
-
-  auto cct = static_cast<CephContext *>(m_state_builder->local_image_ctx->cct);
-  double poll_seconds = cct->_conf.get_val<double>(
-    "rbd_mirror_journal_poll_age");
-  m_remote_replay_handler = new RemoteReplayHandler(this);
-  m_state_builder->remote_journaler->start_live_replay(m_remote_replay_handler,
-                                                       poll_seconds);
-
-  notify_status_updated();
+  return true;
 }
 
 template <typename I>
@@ -810,19 +827,41 @@ void Replayer<I>::replay_flush() {
 
 template <typename I>
 void Replayer<I>::handle_replay_flush_shut_down(int r) {
-  {
-    std::unique_lock locker{m_lock};
-    ceph_assert(m_local_journal != nullptr);
-    m_local_journal->stop_external_replay();
-    m_local_journal_replay = nullptr;
-  }
-
+  std::unique_lock locker{m_lock};
   dout(10) << "r=" << r << dendl;
+
+  ceph_assert(m_local_journal != nullptr);
+  ceph_assert(m_local_journal_listener != nullptr);
+
+  // blocks if listener notification is in-progress
+  m_local_journal->remove_listener(m_local_journal_listener);
+  delete m_local_journal_listener;
+  m_local_journal_listener = nullptr;
+
+  m_local_journal->stop_external_replay();
+  m_local_journal_replay = nullptr;
+  m_local_journal.reset();
+
   if (r < 0) {
+    locker.unlock();
+
     handle_replay_flush(r);
     return;
   }
 
+  // journal might have been closed now that we stopped external replay
+  auto local_image_ctx = m_state_builder->local_image_ctx;
+  std::shared_lock local_image_locker{local_image_ctx->image_lock};
+  m_local_journal = local_image_ctx->journal;
+  if (m_local_journal == nullptr) {
+    local_image_locker.unlock();
+    locker.unlock();
+
+    derr << "local image journal closed" << dendl;
+    handle_replay_flush(-EINVAL);
+    return;
+  }
+
   auto ctx = create_context_callback<
     Replayer<I>, &Replayer<I>::handle_replay_flush>(this);
   m_local_journal->start_external_replay(&m_local_journal_replay, ctx);
@@ -830,16 +869,24 @@ void Replayer<I>::handle_replay_flush_shut_down(int r) {
 
 template <typename I>
 void Replayer<I>::handle_replay_flush(int r) {
+  std::unique_lock locker{m_lock};
   dout(10) << "r=" << r << dendl;
   if (r < 0) {
     derr << "replay flush encountered an error: " << cpp_strerror(r) << dendl;
-    handle_replay_complete(r, "replay flush encountered an error");
+    handle_replay_complete(locker, r, "replay flush encountered an error");
+    m_event_replay_tracker.finish_op();
+    return;
+  } else if (is_replay_complete(locker)) {
     m_event_replay_tracker.finish_op();
     return;
-  } else if (is_replay_complete()) {
+  }
+
+  // check for resync/promotion state after adding listener
+  if (!add_local_journal_listener(locker)) {
     m_event_replay_tracker.finish_op();
     return;
   }
+  locker.unlock();
 
   get_remote_tag();
 }
index f5d59a07f8a4733d1f9e864dab4be2e815aea1fe..ec6e44f1ae6495d92208713756c22c4964a1f2aa 100644 (file)
@@ -235,9 +235,11 @@ private:
   void init_remote_journaler();
   void handle_init_remote_journaler(int r);
 
-  void start_external_replay();
+  void start_external_replay(std::unique_lock<ceph::mutex>& locker);
   void handle_start_external_replay(int r);
 
+  bool add_local_journal_listener(std::unique_lock<ceph::mutex>& locker);
+
   bool notify_init_complete(std::unique_lock<ceph::mutex>& locker);
 
   void shut_down_local_journal_replay();