]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
rbd-mirror: don't hold (stale) copy of local image journal pointer 35339/head
authorJason Dillaman <dillaman@redhat.com>
Mon, 1 Jun 2020 18:09:34 +0000 (14:09 -0400)
committerJason Dillaman <dillaman@redhat.com>
Tue, 2 Jun 2020 14:55:51 +0000 (10:55 -0400)
The exclusive-lock manages its life cycle and can close the journal
at any point. This can result in rbd-mirror deferencing a freed pointer
or a journal state machine that is in an unexpected state.

Fixes: https://tracker.ceph.com/issues/45803
Signed-off-by: Jason Dillaman <dillaman@redhat.com>
src/test/rbd_mirror/image_replayer/journal/test_mock_Replayer.cc
src/tools/rbd_mirror/image_replayer/journal/Replayer.cc
src/tools/rbd_mirror/image_replayer/journal/Replayer.h

index fdde6dfa6d2a3d66acc04e0ad6c917d32df2157a..211216eeb941952e6659cce4f4d50917cb715eb5 100644 (file)
@@ -464,6 +464,15 @@ public:
     return 0;
   }
 
+  void expect_local_journal_add_listener(
+      librbd::MockTestJournal& mock_local_journal,
+      librbd::journal::Listener** local_journal_listener) {
+    EXPECT_CALL(mock_local_journal, add_listener(_))
+      .WillOnce(SaveArg<0>(local_journal_listener));
+    expect_is_tag_owner(mock_local_journal, false);
+    expect_is_resync_requested(mock_local_journal, 0, false);
+  }
+
   int init_entry_replayer(MockReplayer& mock_replayer,
                           MockThreads& mock_threads,
                           MockReplayerListener& mock_replayer_listener,
@@ -480,10 +489,8 @@ public:
                              {librbd::journal::MirrorPeerClientMeta{}}, 0);
     expect_start_external_replay(mock_local_journal, &mock_local_journal_replay,
                                  0);
-    EXPECT_CALL(mock_local_journal, add_listener(_))
-      .WillOnce(SaveArg<0>(local_journal_listener));
-    expect_is_tag_owner(mock_local_journal, false);
-    expect_is_resync_requested(mock_local_journal, 0, false);
+    expect_local_journal_add_listener(mock_local_journal,
+                                      local_journal_listener);
     EXPECT_CALL(mock_remote_journaler, start_live_replay(_, _))
       .WillOnce(SaveArg<0>(remote_replay_handler));
     expect_notification(mock_threads, mock_replayer_listener);
@@ -562,15 +569,13 @@ TEST_F(TestMockImageReplayerJournalReplayer, InitShutDown) {
                                         mock_local_journal_replay));
 }
 
-TEST_F(TestMockImageReplayerJournalReplayer, InitNoLocalJournal) {
+TEST_F(TestMockImageReplayerJournalReplayer, InitRemoteJournalerError) {
   librbd::MockTestJournal mock_local_journal;
   librbd::MockTestImageCtx mock_local_image_ctx{*m_local_image_ctx,
                                                 mock_local_journal};
   ::journal::MockJournaler mock_remote_journaler;
   MockReplayerListener mock_replayer_listener;
   MockThreads mock_threads{m_threads};
-
-  mock_local_image_ctx.journal = nullptr;
   MockStateBuilder mock_state_builder(mock_local_image_ctx,
                                       mock_remote_journaler,
                                       {});
@@ -582,6 +587,7 @@ TEST_F(TestMockImageReplayerJournalReplayer, InitNoLocalJournal) {
 
   InSequence seq;
 
+  expect_init(mock_remote_journaler, -EINVAL);
   MockCloseImageRequest mock_close_image_request;
   expect_send(mock_close_image_request, 0);
 
@@ -590,7 +596,7 @@ TEST_F(TestMockImageReplayerJournalReplayer, InitNoLocalJournal) {
   ASSERT_EQ(-EINVAL, init_ctx.wait());
 }
 
-TEST_F(TestMockImageReplayerJournalReplayer, InitRemoteJournalerError) {
+TEST_F(TestMockImageReplayerJournalReplayer, InitRemoteJournalerGetClientError) {
   librbd::MockTestJournal mock_local_journal;
   librbd::MockTestImageCtx mock_local_image_ctx{*m_local_image_ctx,
                                                 mock_local_journal};
@@ -608,22 +614,28 @@ TEST_F(TestMockImageReplayerJournalReplayer, InitRemoteJournalerError) {
 
   InSequence seq;
 
-  expect_init(mock_remote_journaler, -EINVAL);
+  expect_init(mock_remote_journaler, 0);
+  EXPECT_CALL(mock_remote_journaler, add_listener(_));
+  expect_get_cached_client(mock_remote_journaler, "local mirror uuid", {},
+                           {librbd::journal::MirrorPeerClientMeta{}}, -EINVAL);
   MockCloseImageRequest mock_close_image_request;
   expect_send(mock_close_image_request, 0);
+  EXPECT_CALL(mock_remote_journaler, remove_listener(_));
 
   C_SaferCond init_ctx;
   mock_replayer.init(&init_ctx);
   ASSERT_EQ(-EINVAL, init_ctx.wait());
 }
 
-TEST_F(TestMockImageReplayerJournalReplayer, InitRemoteJournalerGetClientError) {
+TEST_F(TestMockImageReplayerJournalReplayer, InitNoLocalJournal) {
   librbd::MockTestJournal mock_local_journal;
   librbd::MockTestImageCtx mock_local_image_ctx{*m_local_image_ctx,
                                                 mock_local_journal};
   ::journal::MockJournaler mock_remote_journaler;
   MockReplayerListener mock_replayer_listener;
   MockThreads mock_threads{m_threads};
+
+  mock_local_image_ctx.journal = nullptr;
   MockStateBuilder mock_state_builder(mock_local_image_ctx,
                                       mock_remote_journaler,
                                       {});
@@ -634,11 +646,11 @@ TEST_F(TestMockImageReplayerJournalReplayer, InitRemoteJournalerGetClientError)
   expect_work_queue_repeatedly(mock_threads);
 
   InSequence seq;
-
   expect_init(mock_remote_journaler, 0);
   EXPECT_CALL(mock_remote_journaler, add_listener(_));
   expect_get_cached_client(mock_remote_journaler, "local mirror uuid", {},
-                           {librbd::journal::MirrorPeerClientMeta{}}, -EINVAL);
+                           {librbd::journal::MirrorPeerClientMeta{}}, 0);
+
   MockCloseImageRequest mock_close_image_request;
   expect_send(mock_close_image_request, 0);
   EXPECT_CALL(mock_remote_journaler, remove_listener(_));
@@ -1078,9 +1090,12 @@ TEST_F(TestMockImageReplayerJournalReplayer, Replay) {
 
   // replay_flush
   expect_shut_down(mock_local_journal_replay, false, 0);
+  EXPECT_CALL(mock_local_journal, remove_listener(_));
   EXPECT_CALL(mock_local_journal, stop_external_replay());
   expect_start_external_replay(mock_local_journal, &mock_local_journal_replay,
                                0);
+  expect_local_journal_add_listener(mock_local_journal,
+                                    &local_journal_listener);
   expect_get_tag(mock_remote_journaler, tag, 0);
   expect_allocate_tag(mock_local_journal, 0);
 
@@ -1156,9 +1171,12 @@ TEST_F(TestMockImageReplayerJournalReplayer, DecodeError) {
 
   // replay_flush
   expect_shut_down(mock_local_journal_replay, false, 0);
+  EXPECT_CALL(mock_local_journal, remove_listener(_));
   EXPECT_CALL(mock_local_journal, stop_external_replay());
   expect_start_external_replay(mock_local_journal, &mock_local_journal_replay,
                                0);
+  expect_local_journal_add_listener(mock_local_journal,
+                                    &local_journal_listener);
   expect_get_tag(mock_remote_journaler, tag, 0);
   expect_allocate_tag(mock_local_journal, 0);
 
@@ -1226,9 +1244,12 @@ TEST_F(TestMockImageReplayerJournalReplayer, DelayedReplay) {
 
   // replay_flush
   expect_shut_down(mock_local_journal_replay, false, 0);
+  EXPECT_CALL(mock_local_journal, remove_listener(_));
   EXPECT_CALL(mock_local_journal, stop_external_replay());
   expect_start_external_replay(mock_local_journal, &mock_local_journal_replay,
                                0);
+  expect_local_journal_add_listener(mock_local_journal,
+                                    &local_journal_listener);
   expect_get_tag(mock_remote_journaler, tag, 0);
   expect_allocate_tag(mock_local_journal, 0);
 
@@ -1651,6 +1672,7 @@ TEST_F(TestMockImageReplayerJournalReplayer, ReplayFlushShutDownError) {
 
   expect_try_pop_front(mock_remote_journaler, 1, true);
   expect_shut_down(mock_local_journal_replay, false, -EINVAL);
+  EXPECT_CALL(mock_local_journal, remove_listener(_));
   EXPECT_CALL(mock_local_journal, stop_external_replay());
   expect_notification(mock_threads, mock_replayer_listener);
   remote_replay_handler->handle_entries_available();
@@ -1658,7 +1680,6 @@ TEST_F(TestMockImageReplayerJournalReplayer, ReplayFlushShutDownError) {
   wait_for_notification();
   ASSERT_EQ(-EINVAL, mock_replayer.get_error_code());
 
-  EXPECT_CALL(mock_local_journal, remove_listener(_));
   MockCloseImageRequest mock_close_image_request;
   expect_send(mock_close_image_request, 0);
   expect_stop_replay(mock_remote_journaler, 0);
@@ -1705,6 +1726,7 @@ TEST_F(TestMockImageReplayerJournalReplayer, ReplayFlushStartError) {
 
   expect_try_pop_front(mock_remote_journaler, 1, true);
   expect_shut_down(mock_local_journal_replay, false, 0);
+  EXPECT_CALL(mock_local_journal, remove_listener(_));
   EXPECT_CALL(mock_local_journal, stop_external_replay());
   expect_start_external_replay(mock_local_journal, nullptr, -EINVAL);
   expect_notification(mock_threads, mock_replayer_listener);
@@ -1713,7 +1735,6 @@ TEST_F(TestMockImageReplayerJournalReplayer, ReplayFlushStartError) {
   wait_for_notification();
   ASSERT_EQ(-EINVAL, mock_replayer.get_error_code());
 
-  EXPECT_CALL(mock_local_journal, remove_listener(_));
   MockCloseImageRequest mock_close_image_request;
   expect_send(mock_close_image_request, 0);
   expect_stop_replay(mock_remote_journaler, 0);
@@ -1764,9 +1785,12 @@ TEST_F(TestMockImageReplayerJournalReplayer, GetTagError) {
                             true, 0, 0})};
   expect_try_pop_front(mock_remote_journaler, tag.tid, true);
   expect_shut_down(mock_local_journal_replay, false, 0);
+  EXPECT_CALL(mock_local_journal, remove_listener(_));
   EXPECT_CALL(mock_local_journal, stop_external_replay());
   expect_start_external_replay(mock_local_journal, &mock_local_journal_replay,
                                0);
+  expect_local_journal_add_listener(mock_local_journal,
+                                    &local_journal_listener);
   expect_get_tag(mock_remote_journaler, tag, -EINVAL);
   expect_notification(mock_threads, mock_replayer_listener);
   remote_replay_handler->handle_entries_available();
@@ -1824,9 +1848,12 @@ TEST_F(TestMockImageReplayerJournalReplayer, AllocateTagDemotion) {
 
   expect_try_pop_front(mock_remote_journaler, tag.tid, true);
   expect_shut_down(mock_local_journal_replay, false, 0);
+  EXPECT_CALL(mock_local_journal, remove_listener(_));
   EXPECT_CALL(mock_local_journal, stop_external_replay());
   expect_start_external_replay(mock_local_journal, &mock_local_journal_replay,
                                0);
+  expect_local_journal_add_listener(mock_local_journal,
+                                    &local_journal_listener);
   expect_get_tag(mock_remote_journaler, tag, 0);
   expect_get_tag_data(mock_local_journal, {});
   expect_allocate_tag(mock_local_journal, 0);
@@ -1887,9 +1914,12 @@ TEST_F(TestMockImageReplayerJournalReplayer, AllocateTagError) {
 
   expect_try_pop_front(mock_remote_journaler, tag.tid, true);
   expect_shut_down(mock_local_journal_replay, false, 0);
+  EXPECT_CALL(mock_local_journal, remove_listener(_));
   EXPECT_CALL(mock_local_journal, stop_external_replay());
   expect_start_external_replay(mock_local_journal, &mock_local_journal_replay,
                                0);
+  expect_local_journal_add_listener(mock_local_journal,
+                                    &local_journal_listener);
   expect_get_tag(mock_remote_journaler, tag, 0);
   expect_allocate_tag(mock_local_journal, -EINVAL);
   expect_notification(mock_threads, mock_replayer_listener);
@@ -1947,9 +1977,12 @@ TEST_F(TestMockImageReplayerJournalReplayer, PreprocessError) {
 
   expect_try_pop_front(mock_remote_journaler, tag.tid, true);
   expect_shut_down(mock_local_journal_replay, false, 0);
+  EXPECT_CALL(mock_local_journal, remove_listener(_));
   EXPECT_CALL(mock_local_journal, stop_external_replay());
   expect_start_external_replay(mock_local_journal, &mock_local_journal_replay,
                                0);
+  expect_local_journal_add_listener(mock_local_journal,
+                                    &local_journal_listener);
   expect_get_tag(mock_remote_journaler, tag, 0);
   expect_allocate_tag(mock_local_journal, 0);
   EXPECT_CALL(mock_replay_entry, get_data());
@@ -2012,9 +2045,12 @@ TEST_F(TestMockImageReplayerJournalReplayer, ProcessError) {
 
   expect_try_pop_front(mock_remote_journaler, tag.tid, true);
   expect_shut_down(mock_local_journal_replay, false, 0);
+  EXPECT_CALL(mock_local_journal, remove_listener(_));
   EXPECT_CALL(mock_local_journal, stop_external_replay());
   expect_start_external_replay(mock_local_journal, &mock_local_journal_replay,
                                0);
+  expect_local_journal_add_listener(mock_local_journal,
+                                    &local_journal_listener);
   expect_get_tag(mock_remote_journaler, tag, 0);
   expect_allocate_tag(mock_local_journal, 0);
   EXPECT_CALL(mock_replay_entry, get_data());
@@ -2086,9 +2122,12 @@ TEST_F(TestMockImageReplayerJournalReplayer, ImageNameUpdated) {
 
   expect_try_pop_front(mock_remote_journaler, tag.tid, true);
   expect_shut_down(mock_local_journal_replay, false, 0);
+  EXPECT_CALL(mock_local_journal, remove_listener(_));
   EXPECT_CALL(mock_local_journal, stop_external_replay());
   expect_start_external_replay(mock_local_journal, &mock_local_journal_replay,
                                0);
+  expect_local_journal_add_listener(mock_local_journal,
+                                    &local_journal_listener);
   expect_get_tag(mock_remote_journaler, tag, 0);
   expect_allocate_tag(mock_local_journal, 0);
   EXPECT_CALL(mock_local_journal_replay, decode(_, _)).WillOnce(Return(0));
index 487ed1f90da4261deb7dc7757f5ff0e94d5bc1d6..616ac4d1dcbc2c3a0772cddaa121ef263ef970eb 100644 (file)
@@ -179,28 +179,16 @@ template <typename I>
 void Replayer<I>::init(Context* on_finish) {
   dout(10) << dendl;
 
-  ceph_assert(m_local_journal == nullptr);
   {
     auto local_image_ctx = m_state_builder->local_image_ctx;
     std::shared_lock image_locker{local_image_ctx->image_lock};
     m_image_spec = util::compute_image_spec(local_image_ctx->md_ctx,
                                             local_image_ctx->name);
-    m_local_journal = local_image_ctx->journal;
   }
 
   ceph_assert(m_on_init_shutdown == nullptr);
   m_on_init_shutdown = on_finish;
 
-  if (m_local_journal == nullptr) {
-    std::unique_lock locker{m_lock};
-    m_state = STATE_COMPLETE;
-    m_state_builder->remote_journaler = nullptr;
-
-    handle_replay_complete(locker, -EINVAL, "error accessing local journal");
-    close_local_image();
-    return;
-  }
-
   init_remote_journaler();
 }
 
@@ -302,13 +290,28 @@ void Replayer<I>::handle_init_remote_journaler(int r) {
     return;
   }
 
-  start_external_replay();
+  start_external_replay(locker);
 }
 
 template <typename I>
-void Replayer<I>::start_external_replay() {
+void Replayer<I>::start_external_replay(std::unique_lock<ceph::mutex>& locker) {
   dout(10) << dendl;
 
+  auto local_image_ctx = m_state_builder->local_image_ctx;
+  std::shared_lock local_image_locker{local_image_ctx->image_lock};
+
+  ceph_assert(m_local_journal == nullptr);
+  m_local_journal = local_image_ctx->journal;
+  if (m_local_journal == nullptr) {
+    local_image_locker.unlock();
+
+    derr << "local image journal closed" << dendl;
+    handle_replay_complete(locker, -EINVAL, "error accessing local journal");
+    close_local_image();
+    return;
+  }
+
+  // safe to hold pointer to journal after external playback starts
   Context *start_ctx = create_context_callback<
     Replayer, &Replayer<I>::handle_start_external_replay>(this);
   m_local_journal->start_external_replay(&m_local_journal_replay, start_ctx);
@@ -336,7 +339,36 @@ void Replayer<I>::handle_start_external_replay(int r) {
 
   m_state = STATE_REPLAYING;
 
+  // check for resync/promotion state after adding listener
+  if (!add_local_journal_listener(locker)) {
+    return;
+  }
+
+  // start remote journal replay
+  m_event_preprocessor = EventPreprocessor<I>::create(
+    *m_state_builder->local_image_ctx, *m_state_builder->remote_journaler,
+    m_local_mirror_uuid, &m_state_builder->remote_client_meta,
+    m_threads->work_queue);
+  m_replay_status_formatter = ReplayStatusFormatter<I>::create(
+    m_state_builder->remote_journaler, m_local_mirror_uuid);
+
+  auto cct = static_cast<CephContext *>(m_state_builder->local_image_ctx->cct);
+  double poll_seconds = cct->_conf.get_val<double>(
+    "rbd_mirror_journal_poll_age");
+  m_remote_replay_handler = new RemoteReplayHandler(this);
+  m_state_builder->remote_journaler->start_live_replay(m_remote_replay_handler,
+                                                       poll_seconds);
+
+  notify_status_updated();
+}
+
+template <typename I>
+bool Replayer<I>::add_local_journal_listener(
+    std::unique_lock<ceph::mutex>& locker) {
+  dout(10) << dendl;
+
   // listen for promotion and resync requests against local journal
+  ceph_assert(m_local_journal_listener == nullptr);
   m_local_journal_listener = new LocalJournalListener(this);
   m_local_journal->add_listener(m_local_journal_listener);
 
@@ -345,38 +377,23 @@ void Replayer<I>::handle_start_external_replay(int r) {
   if (m_local_journal->is_tag_owner()) {
     dout(10) << "local image force-promoted" << dendl;
     handle_replay_complete(locker, 0, "force promoted");
-    return;
+    return false;
   }
 
   bool resync_requested = false;
-  r = m_local_journal->is_resync_requested(&resync_requested);
+  int r = m_local_journal->is_resync_requested(&resync_requested);
   if (r < 0) {
     dout(10) << "failed to determine resync state: " << cpp_strerror(r)
              << dendl;
     handle_replay_complete(locker, r, "error parsing resync state");
-    return;
+    return false;
   } else if (resync_requested) {
     dout(10) << "local image resync requested" << dendl;
     handle_replay_complete(locker, 0, "resync requested");
-    return;
+    return false;
   }
 
-  // start remote journal replay
-  m_event_preprocessor = EventPreprocessor<I>::create(
-    *m_state_builder->local_image_ctx, *m_state_builder->remote_journaler,
-    m_local_mirror_uuid, &m_state_builder->remote_client_meta,
-    m_threads->work_queue);
-  m_replay_status_formatter = ReplayStatusFormatter<I>::create(
-    m_state_builder->remote_journaler, m_local_mirror_uuid);
-
-  auto cct = static_cast<CephContext *>(m_state_builder->local_image_ctx->cct);
-  double poll_seconds = cct->_conf.get_val<double>(
-    "rbd_mirror_journal_poll_age");
-  m_remote_replay_handler = new RemoteReplayHandler(this);
-  m_state_builder->remote_journaler->start_live_replay(m_remote_replay_handler,
-                                                       poll_seconds);
-
-  notify_status_updated();
+  return true;
 }
 
 template <typename I>
@@ -810,19 +827,41 @@ void Replayer<I>::replay_flush() {
 
 template <typename I>
 void Replayer<I>::handle_replay_flush_shut_down(int r) {
-  {
-    std::unique_lock locker{m_lock};
-    ceph_assert(m_local_journal != nullptr);
-    m_local_journal->stop_external_replay();
-    m_local_journal_replay = nullptr;
-  }
-
+  std::unique_lock locker{m_lock};
   dout(10) << "r=" << r << dendl;
+
+  ceph_assert(m_local_journal != nullptr);
+  ceph_assert(m_local_journal_listener != nullptr);
+
+  // blocks if listener notification is in-progress
+  m_local_journal->remove_listener(m_local_journal_listener);
+  delete m_local_journal_listener;
+  m_local_journal_listener = nullptr;
+
+  m_local_journal->stop_external_replay();
+  m_local_journal_replay = nullptr;
+  m_local_journal.reset();
+
   if (r < 0) {
+    locker.unlock();
+
     handle_replay_flush(r);
     return;
   }
 
+  // journal might have been closed now that we stopped external replay
+  auto local_image_ctx = m_state_builder->local_image_ctx;
+  std::shared_lock local_image_locker{local_image_ctx->image_lock};
+  m_local_journal = local_image_ctx->journal;
+  if (m_local_journal == nullptr) {
+    local_image_locker.unlock();
+    locker.unlock();
+
+    derr << "local image journal closed" << dendl;
+    handle_replay_flush(-EINVAL);
+    return;
+  }
+
   auto ctx = create_context_callback<
     Replayer<I>, &Replayer<I>::handle_replay_flush>(this);
   m_local_journal->start_external_replay(&m_local_journal_replay, ctx);
@@ -830,16 +869,24 @@ void Replayer<I>::handle_replay_flush_shut_down(int r) {
 
 template <typename I>
 void Replayer<I>::handle_replay_flush(int r) {
+  std::unique_lock locker{m_lock};
   dout(10) << "r=" << r << dendl;
   if (r < 0) {
     derr << "replay flush encountered an error: " << cpp_strerror(r) << dendl;
-    handle_replay_complete(r, "replay flush encountered an error");
+    handle_replay_complete(locker, r, "replay flush encountered an error");
+    m_event_replay_tracker.finish_op();
+    return;
+  } else if (is_replay_complete(locker)) {
     m_event_replay_tracker.finish_op();
     return;
-  } else if (is_replay_complete()) {
+  }
+
+  // check for resync/promotion state after adding listener
+  if (!add_local_journal_listener(locker)) {
     m_event_replay_tracker.finish_op();
     return;
   }
+  locker.unlock();
 
   get_remote_tag();
 }
index 35569502945f76709c3cc641cb2508c5c42a0863..f650b48b0ab131ddc5d7ff1a55108cfbffeaf56e 100644 (file)
@@ -235,9 +235,11 @@ private:
   void init_remote_journaler();
   void handle_init_remote_journaler(int r);
 
-  void start_external_replay();
+  void start_external_replay(std::unique_lock<ceph::mutex>& locker);
   void handle_start_external_replay(int r);
 
+  bool add_local_journal_listener(std::unique_lock<ceph::mutex>& locker);
+
   bool notify_init_complete(std::unique_lock<ceph::mutex>& locker);
 
   void shut_down_local_journal_replay();