From e2012764dce4bd05d9198cc0af422b54cac208c4 Mon Sep 17 00:00:00 2001 From: Jason Dillaman Date: Mon, 1 Jun 2020 14:09:34 -0400 Subject: [PATCH] rbd-mirror: don't hold (stale) copy of local image journal pointer The exclusive-lock manages its life cycle and can close the journal at any point. This can result in rbd-mirror deferencing a freed pointer or a journal state machine that is in an unexpected state. Fixes: https://tracker.ceph.com/issues/45803 Signed-off-by: Jason Dillaman (cherry picked from commit 295483fbdc377d633addf5ec8ae2600a741db78c) --- .../journal/test_mock_Replayer.cc | 67 +++++++-- .../image_replayer/journal/Replayer.cc | 133 ++++++++++++------ .../image_replayer/journal/Replayer.h | 4 +- 3 files changed, 146 insertions(+), 58 deletions(-) diff --git a/src/test/rbd_mirror/image_replayer/journal/test_mock_Replayer.cc b/src/test/rbd_mirror/image_replayer/journal/test_mock_Replayer.cc index 7e233b8d6f733..9c5db0cf8394c 100644 --- a/src/test/rbd_mirror/image_replayer/journal/test_mock_Replayer.cc +++ b/src/test/rbd_mirror/image_replayer/journal/test_mock_Replayer.cc @@ -464,6 +464,15 @@ public: return 0; } + void expect_local_journal_add_listener( + librbd::MockTestJournal& mock_local_journal, + librbd::journal::Listener** local_journal_listener) { + EXPECT_CALL(mock_local_journal, add_listener(_)) + .WillOnce(SaveArg<0>(local_journal_listener)); + expect_is_tag_owner(mock_local_journal, false); + expect_is_resync_requested(mock_local_journal, 0, false); + } + int init_entry_replayer(MockReplayer& mock_replayer, MockThreads& mock_threads, MockReplayerListener& mock_replayer_listener, @@ -480,10 +489,8 @@ public: {librbd::journal::MirrorPeerClientMeta{}}, 0); expect_start_external_replay(mock_local_journal, &mock_local_journal_replay, 0); - EXPECT_CALL(mock_local_journal, add_listener(_)) - .WillOnce(SaveArg<0>(local_journal_listener)); - expect_is_tag_owner(mock_local_journal, false); - expect_is_resync_requested(mock_local_journal, 0, false); + expect_local_journal_add_listener(mock_local_journal, + local_journal_listener); EXPECT_CALL(mock_remote_journaler, start_live_replay(_, _)) .WillOnce(SaveArg<0>(remote_replay_handler)); expect_notification(mock_threads, mock_replayer_listener); @@ -562,15 +569,13 @@ TEST_F(TestMockImageReplayerJournalReplayer, InitShutDown) { mock_local_journal_replay)); } -TEST_F(TestMockImageReplayerJournalReplayer, InitNoLocalJournal) { +TEST_F(TestMockImageReplayerJournalReplayer, InitRemoteJournalerError) { librbd::MockTestJournal mock_local_journal; librbd::MockTestImageCtx mock_local_image_ctx{*m_local_image_ctx, mock_local_journal}; ::journal::MockJournaler mock_remote_journaler; MockReplayerListener mock_replayer_listener; MockThreads mock_threads{m_threads}; - - mock_local_image_ctx.journal = nullptr; MockStateBuilder mock_state_builder(mock_local_image_ctx, mock_remote_journaler, {}); @@ -582,6 +587,7 @@ TEST_F(TestMockImageReplayerJournalReplayer, InitNoLocalJournal) { InSequence seq; + expect_init(mock_remote_journaler, -EINVAL); MockCloseImageRequest mock_close_image_request; expect_send(mock_close_image_request, 0); @@ -590,7 +596,7 @@ TEST_F(TestMockImageReplayerJournalReplayer, InitNoLocalJournal) { ASSERT_EQ(-EINVAL, init_ctx.wait()); } -TEST_F(TestMockImageReplayerJournalReplayer, InitRemoteJournalerError) { +TEST_F(TestMockImageReplayerJournalReplayer, InitRemoteJournalerGetClientError) { librbd::MockTestJournal mock_local_journal; librbd::MockTestImageCtx mock_local_image_ctx{*m_local_image_ctx, mock_local_journal}; @@ -608,22 +614,28 @@ TEST_F(TestMockImageReplayerJournalReplayer, InitRemoteJournalerError) { InSequence seq; - expect_init(mock_remote_journaler, -EINVAL); + expect_init(mock_remote_journaler, 0); + EXPECT_CALL(mock_remote_journaler, add_listener(_)); + expect_get_cached_client(mock_remote_journaler, "local mirror uuid", {}, + {librbd::journal::MirrorPeerClientMeta{}}, -EINVAL); MockCloseImageRequest mock_close_image_request; expect_send(mock_close_image_request, 0); + EXPECT_CALL(mock_remote_journaler, remove_listener(_)); C_SaferCond init_ctx; mock_replayer.init(&init_ctx); ASSERT_EQ(-EINVAL, init_ctx.wait()); } -TEST_F(TestMockImageReplayerJournalReplayer, InitRemoteJournalerGetClientError) { +TEST_F(TestMockImageReplayerJournalReplayer, InitNoLocalJournal) { librbd::MockTestJournal mock_local_journal; librbd::MockTestImageCtx mock_local_image_ctx{*m_local_image_ctx, mock_local_journal}; ::journal::MockJournaler mock_remote_journaler; MockReplayerListener mock_replayer_listener; MockThreads mock_threads{m_threads}; + + mock_local_image_ctx.journal = nullptr; MockStateBuilder mock_state_builder(mock_local_image_ctx, mock_remote_journaler, {}); @@ -634,11 +646,11 @@ TEST_F(TestMockImageReplayerJournalReplayer, InitRemoteJournalerGetClientError) expect_work_queue_repeatedly(mock_threads); InSequence seq; - expect_init(mock_remote_journaler, 0); EXPECT_CALL(mock_remote_journaler, add_listener(_)); expect_get_cached_client(mock_remote_journaler, "local mirror uuid", {}, - {librbd::journal::MirrorPeerClientMeta{}}, -EINVAL); + {librbd::journal::MirrorPeerClientMeta{}}, 0); + MockCloseImageRequest mock_close_image_request; expect_send(mock_close_image_request, 0); EXPECT_CALL(mock_remote_journaler, remove_listener(_)); @@ -1078,9 +1090,12 @@ TEST_F(TestMockImageReplayerJournalReplayer, Replay) { // replay_flush expect_shut_down(mock_local_journal_replay, false, 0); + EXPECT_CALL(mock_local_journal, remove_listener(_)); EXPECT_CALL(mock_local_journal, stop_external_replay()); expect_start_external_replay(mock_local_journal, &mock_local_journal_replay, 0); + expect_local_journal_add_listener(mock_local_journal, + &local_journal_listener); expect_get_tag(mock_remote_journaler, tag, 0); expect_allocate_tag(mock_local_journal, 0); @@ -1154,9 +1169,12 @@ TEST_F(TestMockImageReplayerJournalReplayer, DecodeError) { // replay_flush expect_shut_down(mock_local_journal_replay, false, 0); + EXPECT_CALL(mock_local_journal, remove_listener(_)); EXPECT_CALL(mock_local_journal, stop_external_replay()); expect_start_external_replay(mock_local_journal, &mock_local_journal_replay, 0); + expect_local_journal_add_listener(mock_local_journal, + &local_journal_listener); expect_get_tag(mock_remote_journaler, tag, 0); expect_allocate_tag(mock_local_journal, 0); @@ -1224,9 +1242,12 @@ TEST_F(TestMockImageReplayerJournalReplayer, DelayedReplay) { // replay_flush expect_shut_down(mock_local_journal_replay, false, 0); + EXPECT_CALL(mock_local_journal, remove_listener(_)); EXPECT_CALL(mock_local_journal, stop_external_replay()); expect_start_external_replay(mock_local_journal, &mock_local_journal_replay, 0); + expect_local_journal_add_listener(mock_local_journal, + &local_journal_listener); expect_get_tag(mock_remote_journaler, tag, 0); expect_allocate_tag(mock_local_journal, 0); @@ -1648,6 +1669,7 @@ TEST_F(TestMockImageReplayerJournalReplayer, ReplayFlushShutDownError) { expect_try_pop_front(mock_remote_journaler, 1, true); expect_shut_down(mock_local_journal_replay, false, -EINVAL); + EXPECT_CALL(mock_local_journal, remove_listener(_)); EXPECT_CALL(mock_local_journal, stop_external_replay()); expect_notification(mock_threads, mock_replayer_listener); remote_replay_handler->handle_entries_available(); @@ -1655,7 +1677,6 @@ TEST_F(TestMockImageReplayerJournalReplayer, ReplayFlushShutDownError) { wait_for_notification(); ASSERT_EQ(-EINVAL, mock_replayer.get_error_code()); - EXPECT_CALL(mock_local_journal, remove_listener(_)); MockCloseImageRequest mock_close_image_request; expect_send(mock_close_image_request, 0); expect_stop_replay(mock_remote_journaler, 0); @@ -1702,6 +1723,7 @@ TEST_F(TestMockImageReplayerJournalReplayer, ReplayFlushStartError) { expect_try_pop_front(mock_remote_journaler, 1, true); expect_shut_down(mock_local_journal_replay, false, 0); + EXPECT_CALL(mock_local_journal, remove_listener(_)); EXPECT_CALL(mock_local_journal, stop_external_replay()); expect_start_external_replay(mock_local_journal, nullptr, -EINVAL); expect_notification(mock_threads, mock_replayer_listener); @@ -1710,7 +1732,6 @@ TEST_F(TestMockImageReplayerJournalReplayer, ReplayFlushStartError) { wait_for_notification(); ASSERT_EQ(-EINVAL, mock_replayer.get_error_code()); - EXPECT_CALL(mock_local_journal, remove_listener(_)); MockCloseImageRequest mock_close_image_request; expect_send(mock_close_image_request, 0); expect_stop_replay(mock_remote_journaler, 0); @@ -1761,9 +1782,12 @@ TEST_F(TestMockImageReplayerJournalReplayer, GetTagError) { true, 0, 0})}; expect_try_pop_front(mock_remote_journaler, tag.tid, true); expect_shut_down(mock_local_journal_replay, false, 0); + EXPECT_CALL(mock_local_journal, remove_listener(_)); EXPECT_CALL(mock_local_journal, stop_external_replay()); expect_start_external_replay(mock_local_journal, &mock_local_journal_replay, 0); + expect_local_journal_add_listener(mock_local_journal, + &local_journal_listener); expect_get_tag(mock_remote_journaler, tag, -EINVAL); expect_notification(mock_threads, mock_replayer_listener); remote_replay_handler->handle_entries_available(); @@ -1821,9 +1845,12 @@ TEST_F(TestMockImageReplayerJournalReplayer, AllocateTagDemotion) { expect_try_pop_front(mock_remote_journaler, tag.tid, true); expect_shut_down(mock_local_journal_replay, false, 0); + EXPECT_CALL(mock_local_journal, remove_listener(_)); EXPECT_CALL(mock_local_journal, stop_external_replay()); expect_start_external_replay(mock_local_journal, &mock_local_journal_replay, 0); + expect_local_journal_add_listener(mock_local_journal, + &local_journal_listener); expect_get_tag(mock_remote_journaler, tag, 0); expect_get_tag_data(mock_local_journal, {}); expect_allocate_tag(mock_local_journal, 0); @@ -1883,9 +1910,12 @@ TEST_F(TestMockImageReplayerJournalReplayer, AllocateTagError) { expect_try_pop_front(mock_remote_journaler, tag.tid, true); expect_shut_down(mock_local_journal_replay, false, 0); + EXPECT_CALL(mock_local_journal, remove_listener(_)); EXPECT_CALL(mock_local_journal, stop_external_replay()); expect_start_external_replay(mock_local_journal, &mock_local_journal_replay, 0); + expect_local_journal_add_listener(mock_local_journal, + &local_journal_listener); expect_get_tag(mock_remote_journaler, tag, 0); expect_allocate_tag(mock_local_journal, -EINVAL); expect_notification(mock_threads, mock_replayer_listener); @@ -1943,9 +1973,12 @@ TEST_F(TestMockImageReplayerJournalReplayer, PreprocessError) { expect_try_pop_front(mock_remote_journaler, tag.tid, true); expect_shut_down(mock_local_journal_replay, false, 0); + EXPECT_CALL(mock_local_journal, remove_listener(_)); EXPECT_CALL(mock_local_journal, stop_external_replay()); expect_start_external_replay(mock_local_journal, &mock_local_journal_replay, 0); + expect_local_journal_add_listener(mock_local_journal, + &local_journal_listener); expect_get_tag(mock_remote_journaler, tag, 0); expect_allocate_tag(mock_local_journal, 0); EXPECT_CALL(mock_replay_entry, get_data()); @@ -2008,9 +2041,12 @@ TEST_F(TestMockImageReplayerJournalReplayer, ProcessError) { expect_try_pop_front(mock_remote_journaler, tag.tid, true); expect_shut_down(mock_local_journal_replay, false, 0); + EXPECT_CALL(mock_local_journal, remove_listener(_)); EXPECT_CALL(mock_local_journal, stop_external_replay()); expect_start_external_replay(mock_local_journal, &mock_local_journal_replay, 0); + expect_local_journal_add_listener(mock_local_journal, + &local_journal_listener); expect_get_tag(mock_remote_journaler, tag, 0); expect_allocate_tag(mock_local_journal, 0); EXPECT_CALL(mock_replay_entry, get_data()); @@ -2081,9 +2117,12 @@ TEST_F(TestMockImageReplayerJournalReplayer, ImageNameUpdated) { expect_try_pop_front(mock_remote_journaler, tag.tid, true); expect_shut_down(mock_local_journal_replay, false, 0); + EXPECT_CALL(mock_local_journal, remove_listener(_)); EXPECT_CALL(mock_local_journal, stop_external_replay()); expect_start_external_replay(mock_local_journal, &mock_local_journal_replay, 0); + expect_local_journal_add_listener(mock_local_journal, + &local_journal_listener); expect_get_tag(mock_remote_journaler, tag, 0); expect_allocate_tag(mock_local_journal, 0); EXPECT_CALL(mock_local_journal_replay, decode(_, _)).WillOnce(Return(0)); diff --git a/src/tools/rbd_mirror/image_replayer/journal/Replayer.cc b/src/tools/rbd_mirror/image_replayer/journal/Replayer.cc index f1552ac68ca59..af6802194424e 100644 --- a/src/tools/rbd_mirror/image_replayer/journal/Replayer.cc +++ b/src/tools/rbd_mirror/image_replayer/journal/Replayer.cc @@ -179,28 +179,16 @@ template void Replayer::init(Context* on_finish) { dout(10) << dendl; - ceph_assert(m_local_journal == nullptr); { auto local_image_ctx = m_state_builder->local_image_ctx; std::shared_lock image_locker{local_image_ctx->image_lock}; m_image_spec = util::compute_image_spec(local_image_ctx->md_ctx, local_image_ctx->name); - m_local_journal = local_image_ctx->journal; } ceph_assert(m_on_init_shutdown == nullptr); m_on_init_shutdown = on_finish; - if (m_local_journal == nullptr) { - std::unique_lock locker{m_lock}; - m_state = STATE_COMPLETE; - m_state_builder->remote_journaler = nullptr; - - handle_replay_complete(locker, -EINVAL, "error accessing local journal"); - close_local_image(); - return; - } - init_remote_journaler(); } @@ -302,13 +290,28 @@ void Replayer::handle_init_remote_journaler(int r) { return; } - start_external_replay(); + start_external_replay(locker); } template -void Replayer::start_external_replay() { +void Replayer::start_external_replay(std::unique_lock& locker) { dout(10) << dendl; + auto local_image_ctx = m_state_builder->local_image_ctx; + std::shared_lock local_image_locker{local_image_ctx->image_lock}; + + ceph_assert(m_local_journal == nullptr); + m_local_journal = local_image_ctx->journal; + if (m_local_journal == nullptr) { + local_image_locker.unlock(); + + derr << "local image journal closed" << dendl; + handle_replay_complete(locker, -EINVAL, "error accessing local journal"); + close_local_image(); + return; + } + + // safe to hold pointer to journal after external playback starts Context *start_ctx = create_context_callback< Replayer, &Replayer::handle_start_external_replay>(this); m_local_journal->start_external_replay(&m_local_journal_replay, start_ctx); @@ -336,7 +339,36 @@ void Replayer::handle_start_external_replay(int r) { m_state = STATE_REPLAYING; + // check for resync/promotion state after adding listener + if (!add_local_journal_listener(locker)) { + return; + } + + // start remote journal replay + m_event_preprocessor = EventPreprocessor::create( + *m_state_builder->local_image_ctx, *m_state_builder->remote_journaler, + m_local_mirror_uuid, &m_state_builder->remote_client_meta, + m_threads->work_queue); + m_replay_status_formatter = ReplayStatusFormatter::create( + m_state_builder->remote_journaler, m_local_mirror_uuid); + + auto cct = static_cast(m_state_builder->local_image_ctx->cct); + double poll_seconds = cct->_conf.get_val( + "rbd_mirror_journal_poll_age"); + m_remote_replay_handler = new RemoteReplayHandler(this); + m_state_builder->remote_journaler->start_live_replay(m_remote_replay_handler, + poll_seconds); + + notify_status_updated(); +} + +template +bool Replayer::add_local_journal_listener( + std::unique_lock& locker) { + dout(10) << dendl; + // listen for promotion and resync requests against local journal + ceph_assert(m_local_journal_listener == nullptr); m_local_journal_listener = new LocalJournalListener(this); m_local_journal->add_listener(m_local_journal_listener); @@ -345,38 +377,23 @@ void Replayer::handle_start_external_replay(int r) { if (m_local_journal->is_tag_owner()) { dout(10) << "local image force-promoted" << dendl; handle_replay_complete(locker, 0, "force promoted"); - return; + return false; } bool resync_requested = false; - r = m_local_journal->is_resync_requested(&resync_requested); + int r = m_local_journal->is_resync_requested(&resync_requested); if (r < 0) { dout(10) << "failed to determine resync state: " << cpp_strerror(r) << dendl; handle_replay_complete(locker, r, "error parsing resync state"); - return; + return false; } else if (resync_requested) { dout(10) << "local image resync requested" << dendl; handle_replay_complete(locker, 0, "resync requested"); - return; + return false; } - // start remote journal replay - m_event_preprocessor = EventPreprocessor::create( - *m_state_builder->local_image_ctx, *m_state_builder->remote_journaler, - m_local_mirror_uuid, &m_state_builder->remote_client_meta, - m_threads->work_queue); - m_replay_status_formatter = ReplayStatusFormatter::create( - m_state_builder->remote_journaler, m_local_mirror_uuid); - - auto cct = static_cast(m_state_builder->local_image_ctx->cct); - double poll_seconds = cct->_conf.get_val( - "rbd_mirror_journal_poll_age"); - m_remote_replay_handler = new RemoteReplayHandler(this); - m_state_builder->remote_journaler->start_live_replay(m_remote_replay_handler, - poll_seconds); - - notify_status_updated(); + return true; } template @@ -810,19 +827,41 @@ void Replayer::replay_flush() { template void Replayer::handle_replay_flush_shut_down(int r) { - { - std::unique_lock locker{m_lock}; - ceph_assert(m_local_journal != nullptr); - m_local_journal->stop_external_replay(); - m_local_journal_replay = nullptr; - } - + std::unique_lock locker{m_lock}; dout(10) << "r=" << r << dendl; + + ceph_assert(m_local_journal != nullptr); + ceph_assert(m_local_journal_listener != nullptr); + + // blocks if listener notification is in-progress + m_local_journal->remove_listener(m_local_journal_listener); + delete m_local_journal_listener; + m_local_journal_listener = nullptr; + + m_local_journal->stop_external_replay(); + m_local_journal_replay = nullptr; + m_local_journal.reset(); + if (r < 0) { + locker.unlock(); + handle_replay_flush(r); return; } + // journal might have been closed now that we stopped external replay + auto local_image_ctx = m_state_builder->local_image_ctx; + std::shared_lock local_image_locker{local_image_ctx->image_lock}; + m_local_journal = local_image_ctx->journal; + if (m_local_journal == nullptr) { + local_image_locker.unlock(); + locker.unlock(); + + derr << "local image journal closed" << dendl; + handle_replay_flush(-EINVAL); + return; + } + auto ctx = create_context_callback< Replayer, &Replayer::handle_replay_flush>(this); m_local_journal->start_external_replay(&m_local_journal_replay, ctx); @@ -830,16 +869,24 @@ void Replayer::handle_replay_flush_shut_down(int r) { template void Replayer::handle_replay_flush(int r) { + std::unique_lock locker{m_lock}; dout(10) << "r=" << r << dendl; if (r < 0) { derr << "replay flush encountered an error: " << cpp_strerror(r) << dendl; - handle_replay_complete(r, "replay flush encountered an error"); + handle_replay_complete(locker, r, "replay flush encountered an error"); + m_event_replay_tracker.finish_op(); + return; + } else if (is_replay_complete(locker)) { m_event_replay_tracker.finish_op(); return; - } else if (is_replay_complete()) { + } + + // check for resync/promotion state after adding listener + if (!add_local_journal_listener(locker)) { m_event_replay_tracker.finish_op(); return; } + locker.unlock(); get_remote_tag(); } diff --git a/src/tools/rbd_mirror/image_replayer/journal/Replayer.h b/src/tools/rbd_mirror/image_replayer/journal/Replayer.h index f5d59a07f8a47..ec6e44f1ae649 100644 --- a/src/tools/rbd_mirror/image_replayer/journal/Replayer.h +++ b/src/tools/rbd_mirror/image_replayer/journal/Replayer.h @@ -235,9 +235,11 @@ private: void init_remote_journaler(); void handle_init_remote_journaler(int r); - void start_external_replay(); + void start_external_replay(std::unique_lock& locker); void handle_start_external_replay(int r); + bool add_local_journal_listener(std::unique_lock& locker); + bool notify_init_complete(std::unique_lock& locker); void shut_down_local_journal_replay(); -- 2.39.5