From 91b8a24ea58aee01ec8567195b5aabe11f74b87f Mon Sep 17 00:00:00 2001 From: Jason Dillaman Date: Fri, 22 Feb 2019 13:22:55 -0500 Subject: [PATCH] rbd-mirror: fixed potential bootstrap cancel race condition If the image replay was canceled prior to the start of the bootstrap stage, the image replayer would be stuck attempting to shut down if the bootstrap is paused behind an image sync. Signed-off-by: Jason Dillaman --- .../rbd_mirror/test_mock_ImageReplayer.cc | 37 ++++++- src/tools/rbd_mirror/ImageReplayer.cc | 97 ++++++++++--------- src/tools/rbd_mirror/ImageReplayer.h | 1 + 3 files changed, 90 insertions(+), 45 deletions(-) diff --git a/src/test/rbd_mirror/test_mock_ImageReplayer.cc b/src/test/rbd_mirror/test_mock_ImageReplayer.cc index e68455360e7..46adeff2a1a 100644 --- a/src/test/rbd_mirror/test_mock_ImageReplayer.cc +++ b/src/test/rbd_mirror/test_mock_ImageReplayer.cc @@ -850,7 +850,6 @@ TEST_F(TestMockImageReplayer, GetRemoteImageIdError) { } TEST_F(TestMockImageReplayer, BootstrapError) { - create_local_image(); librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx); @@ -885,6 +884,42 @@ TEST_F(TestMockImageReplayer, BootstrapError) { ASSERT_EQ(-EINVAL, start_ctx.wait()); } +TEST_F(TestMockImageReplayer, StopBeforeBootstrap) { + create_local_image(); + librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx); + + journal::MockJournaler mock_remote_journaler; + MockThreads mock_threads(m_threads); + expect_work_queue_repeatedly(mock_threads); + expect_add_event_after_repeatedly(mock_threads); + + MockImageDeleter mock_image_deleter; + MockPrepareLocalImageRequest mock_prepare_local_image_request; + MockPrepareRemoteImageRequest mock_prepare_remote_image_request; + MockReplayStatusFormatter mock_replay_status_formatter; + + expect_get_or_send_update(mock_replay_status_formatter); + + InSequence seq; + expect_send(mock_prepare_local_image_request, mock_local_image_ctx.id, + mock_local_image_ctx.name, "remote mirror uuid", 0); + expect_send(mock_prepare_remote_image_request, "remote mirror uuid", + m_remote_image_ctx->id, 0); + EXPECT_CALL(mock_remote_journaler, construct()) + .WillOnce(Invoke([this]() { + m_image_replayer->stop(nullptr, true); + })); + + EXPECT_CALL(mock_remote_journaler, remove_listener(_)); + expect_shut_down(mock_remote_journaler, 0); + + create_image_replayer(mock_threads); + + C_SaferCond start_ctx; + m_image_replayer->start(&start_ctx); + ASSERT_EQ(-ECANCELED, start_ctx.wait()); +} + TEST_F(TestMockImageReplayer, StartExternalReplayError) { // START diff --git a/src/tools/rbd_mirror/ImageReplayer.cc b/src/tools/rbd_mirror/ImageReplayer.cc index eb094ea6050..d1c00af989b 100644 --- a/src/tools/rbd_mirror/ImageReplayer.cc +++ b/src/tools/rbd_mirror/ImageReplayer.cc @@ -334,7 +334,7 @@ void ImageReplayer::add_peer(const std::string &peer_uuid, template void ImageReplayer::set_state_description(int r, const std::string &desc) { - dout(20) << r << " " << desc << dendl; + dout(10) << r << " " << desc << dendl; Mutex::Locker l(m_lock); m_last_r = r; @@ -344,7 +344,7 @@ void ImageReplayer::set_state_description(int r, const std::string &desc) { template void ImageReplayer::start(Context *on_finish, bool manual) { - dout(20) << "on_finish=" << on_finish << dendl; + dout(10) << "on_finish=" << on_finish << dendl; int r = 0; { @@ -498,18 +498,21 @@ void ImageReplayer::bootstrap() { return; } - Context *ctx = create_context_callback< - ImageReplayer, &ImageReplayer::handle_bootstrap>(this); - - BootstrapRequest *request = BootstrapRequest::create( - m_threads, *m_local_ioctx, m_remote_image.io_ctx, m_instance_watcher, - &m_local_image_ctx, m_local_image_id, m_remote_image.image_id, - m_global_image_id, m_local_mirror_uuid, m_remote_image.mirror_uuid, - m_remote_journaler, &m_client_state, &m_client_meta, ctx, - &m_resync_requested, &m_progress_cxt); - + BootstrapRequest *request = nullptr; { Mutex::Locker locker(m_lock); + if (on_start_interrupted(m_lock)) { + return; + } + + auto ctx = create_context_callback< + ImageReplayer, &ImageReplayer::handle_bootstrap>(this); + request = BootstrapRequest::create( + m_threads, *m_local_ioctx, m_remote_image.io_ctx, m_instance_watcher, + &m_local_image_ctx, m_local_image_id, m_remote_image.image_id, + m_global_image_id, m_local_mirror_uuid, m_remote_image.mirror_uuid, + m_remote_journaler, &m_client_state, &m_client_meta, ctx, + &m_resync_requested, &m_progress_cxt); request->get(); m_bootstrap_request = request; } @@ -674,9 +677,9 @@ void ImageReplayer::handle_start_replay(int r) { dout(10) << "m_remote_journaler=" << *m_remote_journaler << dendl; } - dout(20) << "start succeeded" << dendl; + dout(10) << "start succeeded" << dendl; if (on_finish != nullptr) { - dout(20) << "on finish complete, r=" << r << dendl; + dout(10) << "on finish complete, r=" << r << dendl; on_finish->complete(r); } } @@ -684,7 +687,7 @@ void ImageReplayer::handle_start_replay(int r) { template void ImageReplayer::on_start_fail(int r, const std::string &desc) { - dout(20) << "r=" << r << dendl; + dout(10) << "r=" << r << dendl; Context *ctx = new FunctionContext([this, r, desc](int _r) { { Mutex::Locker locker(m_lock); @@ -693,7 +696,7 @@ void ImageReplayer::on_start_fail(int r, const std::string &desc) if (r < 0 && r != -ECANCELED && r != -EREMOTEIO && r != -ENOENT) { derr << "start failed: " << cpp_strerror(r) << dendl; } else { - dout(20) << "start canceled" << dendl; + dout(10) << "start canceled" << dendl; } } @@ -708,11 +711,16 @@ void ImageReplayer::on_start_fail(int r, const std::string &desc) } template -bool ImageReplayer::on_start_interrupted() -{ +bool ImageReplayer::on_start_interrupted() { Mutex::Locker locker(m_lock); + return on_start_interrupted(m_lock); +} + +template +bool ImageReplayer::on_start_interrupted(Mutex& lock) { + ceph_assert(m_lock.is_locked()); ceph_assert(m_state == STATE_STARTING); - if (m_on_stop_finish == nullptr) { + if (!m_stop_requested) { return false; } @@ -724,7 +732,7 @@ template void ImageReplayer::stop(Context *on_finish, bool manual, int r, const std::string& desc) { - dout(20) << "on_finish=" << on_finish << ", manual=" << manual + dout(10) << "on_finish=" << on_finish << ", manual=" << manual << ", desc=" << desc << dendl; image_replayer::BootstrapRequest *bootstrap_request = nullptr; @@ -738,13 +746,13 @@ void ImageReplayer::stop(Context *on_finish, bool manual, int r, } else { if (!is_stopped_()) { if (m_state == STATE_STARTING) { - dout(20) << "canceling start" << dendl; - if (m_bootstrap_request) { + dout(10) << "canceling start" << dendl; + if (m_bootstrap_request != nullptr) { bootstrap_request = m_bootstrap_request; bootstrap_request->get(); } } else { - dout(20) << "interrupting replay" << dendl; + dout(10) << "interrupting replay" << dendl; shut_down_replay = true; } @@ -758,6 +766,7 @@ void ImageReplayer::stop(Context *on_finish, bool manual, int r, // avoid holding lock since bootstrap request will update status if (bootstrap_request != nullptr) { + dout(10) << "canceling bootstrap" << dendl; bootstrap_request->cancel(); bootstrap_request->put(); } @@ -780,7 +789,7 @@ void ImageReplayer::stop(Context *on_finish, bool manual, int r, template void ImageReplayer::on_stop_journal_replay(int r, const std::string &desc) { - dout(20) << "enter" << dendl; + dout(10) << dendl; { Mutex::Locker locker(m_lock); @@ -801,7 +810,7 @@ void ImageReplayer::on_stop_journal_replay(int r, const std::string &desc) template void ImageReplayer::handle_replay_ready() { - dout(20) << "enter" << dendl; + dout(20) << dendl; if (on_replay_interrupted()) { return; } @@ -846,7 +855,7 @@ void ImageReplayer::restart(Context *on_finish) template void ImageReplayer::flush(Context *on_finish) { - dout(20) << "enter" << dendl; + dout(10) << dendl; { Mutex::Locker locker(m_lock); @@ -870,7 +879,7 @@ void ImageReplayer::flush(Context *on_finish) template void ImageReplayer::on_flush_local_replay_flush_start(Context *on_flush) { - dout(20) << "enter" << dendl; + dout(10) << dendl; FunctionContext *ctx = new FunctionContext( [this, on_flush](int r) { on_flush_local_replay_flush_finish(on_flush, r); @@ -885,7 +894,7 @@ template void ImageReplayer::on_flush_local_replay_flush_finish(Context *on_flush, int r) { - dout(20) << "r=" << r << dendl; + dout(10) << "r=" << r << dendl; if (r < 0) { derr << "error flushing local replay: " << cpp_strerror(r) << dendl; on_flush->complete(r); @@ -939,7 +948,7 @@ bool ImageReplayer::on_replay_interrupted() template void ImageReplayer::print_status(Formatter *f, stringstream *ss) { - dout(20) << "enter" << dendl; + dout(10) << dendl; Mutex::Locker l(m_lock); @@ -957,7 +966,7 @@ void ImageReplayer::print_status(Formatter *f, stringstream *ss) template void ImageReplayer::handle_replay_complete(int r, const std::string &error_desc) { - dout(20) << "r=" << r << dendl; + dout(10) << "r=" << r << dendl; if (r < 0) { derr << "replay encountered an error: " << cpp_strerror(r) << dendl; set_state_description(r, error_desc); @@ -972,13 +981,13 @@ void ImageReplayer::handle_replay_complete(int r, const std::string &error_de template void ImageReplayer::replay_flush() { - dout(20) << dendl; + dout(10) << dendl; bool interrupted = false; { Mutex::Locker locker(m_lock); if (m_state != STATE_REPLAYING) { - dout(20) << "replay interrupted" << dendl; + dout(10) << "replay interrupted" << dendl; interrupted = true; } else { m_state = STATE_REPLAY_FLUSHING; @@ -1010,7 +1019,7 @@ void ImageReplayer::replay_flush() { template void ImageReplayer::handle_replay_flush(int r) { - dout(20) << "r=" << r << dendl; + dout(10) << "r=" << r << dendl; { Mutex::Locker locker(m_lock); @@ -1279,7 +1288,7 @@ void ImageReplayer::handle_process_entry_safe(const ReplayEntry &replay_entry template bool ImageReplayer::update_mirror_image_status(bool force, const OptionalState &state) { - dout(20) << dendl; + dout(15) << dendl; { Mutex::Locker locker(m_lock); if (!start_mirror_image_status_update(force, false)) { @@ -1298,10 +1307,10 @@ bool ImageReplayer::start_mirror_image_status_update(bool force, if (!force && !is_stopped_()) { if (!is_running_()) { - dout(20) << "shut down in-progress: ignoring update" << dendl; + dout(15) << "shut down in-progress: ignoring update" << dendl; return false; } else if (m_in_flight_status_updates > (restarting ? 1 : 0)) { - dout(20) << "already sending update" << dendl; + dout(15) << "already sending update" << dendl; m_update_status_requested = true; return false; } @@ -1337,7 +1346,7 @@ void ImageReplayer::finish_mirror_image_status_update() { template void ImageReplayer::queue_mirror_image_status_update(const OptionalState &state) { - dout(20) << dendl; + dout(15) << dendl; FunctionContext *ctx = new FunctionContext( [this, state](int r) { send_mirror_status_update(state); @@ -1399,7 +1408,7 @@ void ImageReplayer::send_mirror_status_update(const OptionalState &opt_state) { Context *on_req_finish = new FunctionContext( [this](int r) { - dout(20) << "replay status ready: r=" << r << dendl; + dout(15) << "replay status ready: r=" << r << dendl; if (r >= 0) { send_mirror_status_update(boost::none); } else if (r == -EAGAIN) { @@ -1412,7 +1421,7 @@ void ImageReplayer::send_mirror_status_update(const OptionalState &opt_state) ceph_assert(m_replay_status_formatter != nullptr); if (!m_replay_status_formatter->get_or_send_update(&desc, on_req_finish)) { - dout(20) << "waiting for replay status" << dendl; + dout(15) << "waiting for replay status" << dendl; return; } status.description = "replaying, " + desc; @@ -1567,7 +1576,7 @@ void ImageReplayer::shut_down(int r) { // before proceeding if (m_in_flight_status_updates > 0) { if (m_on_update_status_finish == nullptr) { - dout(20) << "waiting for in-flight status update" << dendl; + dout(15) << "waiting for in-flight status update" << dendl; m_on_update_status_finish = new FunctionContext( [this, r](int _r) { shut_down(r); @@ -1682,7 +1691,7 @@ void ImageReplayer::handle_shut_down(int r) { // before proceeding if (m_in_flight_status_updates > 0) { if (m_on_update_status_finish == nullptr) { - dout(20) << "waiting for in-flight status update" << dendl; + dout(15) << "waiting for in-flight status update" << dendl; m_on_update_status_finish = new FunctionContext( [this, r](int _r) { handle_shut_down(r); @@ -1740,12 +1749,12 @@ void ImageReplayer::handle_shut_down(int r) { } if (on_start != nullptr) { - dout(20) << "on start finish complete, r=" << r << dendl; + dout(10) << "on start finish complete, r=" << r << dendl; on_start->complete(r); r = 0; } if (on_stop != nullptr) { - dout(20) << "on stop finish complete, r=" << r << dendl; + dout(10) << "on stop finish complete, r=" << r << dendl; on_stop->complete(r); } } @@ -1795,7 +1804,7 @@ std::string ImageReplayer::to_string(const State state) { template void ImageReplayer::resync_image(Context *on_finish) { - dout(20) << dendl; + dout(10) << dendl; m_resync_requested = true; stop(on_finish); diff --git a/src/tools/rbd_mirror/ImageReplayer.h b/src/tools/rbd_mirror/ImageReplayer.h index ec9544ef5c7..0852c44d11b 100644 --- a/src/tools/rbd_mirror/ImageReplayer.h +++ b/src/tools/rbd_mirror/ImageReplayer.h @@ -199,6 +199,7 @@ protected: virtual void on_start_fail(int r, const std::string &desc); virtual bool on_start_interrupted(); + virtual bool on_start_interrupted(Mutex& lock); virtual void on_stop_journal_replay(int r = 0, const std::string &desc = ""); -- 2.39.5