From: N Balachandran Date: Fri, 14 Jul 2023 12:56:15 +0000 (+0530) Subject: rbd-mirror: fix race preventing local image deletion X-Git-Tag: v19.0.0~840^2 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=1df7921b916579c273cb597e234c9c6b721674d7;p=ceph-ci.git rbd-mirror: fix race preventing local image deletion On primary image deletion, a race between InstanceReplayer::release_image() (which calls ImageReplayer::stop())and ImageReplayer::handle_bootstrap() may prevent the non-primary image from being deleted. Because ImageReplayer::handle_bootstrap() checks if the start has been canceled before processing -ENOLINK, m_delete_requested is not set to true and the local image is not deleted. This commit sets m_delete_requested to true before checking if the start has been canceled to ensure that the image is deleted during shut_down, and updates ImageReplayer::bootstrap() to cancel the BootstrapRequest and prevent a potentially long time spent in an image sync if ImageReplayer stop is requested. Fixes: https://tracker.ceph.com/issues/61672 Signed-off-by: N Balachandran --- diff --git a/src/test/rbd_mirror/image_replayer/test_mock_BootstrapRequest.cc b/src/test/rbd_mirror/image_replayer/test_mock_BootstrapRequest.cc index 8a66cecaed9..d8d7ed2da56 100644 --- a/src/test/rbd_mirror/image_replayer/test_mock_BootstrapRequest.cc +++ b/src/test/rbd_mirror/image_replayer/test_mock_BootstrapRequest.cc @@ -636,6 +636,59 @@ TEST_F(TestMockImageReplayerBootstrapRequest, PrepareRemoteImageNotPrimaryLocalL ASSERT_EQ(0, ctx.wait()); } +TEST_F(TestMockImageReplayerBootstrapRequest, PrepareRemoteImageDNELocalLinked) { + InSequence seq; + + // prepare local image + MockPrepareLocalImageRequest mock_prepare_local_image_request; + MockStateBuilder mock_state_builder; + expect_send(mock_prepare_local_image_request, mock_state_builder, + m_local_image_ctx->id, m_local_image_ctx->name, 0); + + // prepare remote image + MockPrepareRemoteImageRequest mock_prepare_remote_image_request; + expect_send(mock_prepare_remote_image_request, mock_state_builder, + "remote mirror uuid", m_remote_image_ctx->id, -ENOENT); + expect_is_local_primary(mock_state_builder, false); + expect_is_linked(mock_state_builder, true); + + C_SaferCond ctx; + MockThreads mock_threads(m_threads); + MockInstanceWatcher mock_instance_watcher; + MockBootstrapRequest *request = create_request( + &mock_threads, &mock_instance_watcher, "global image id", + "local mirror uuid", &ctx); + request->send(); + ASSERT_EQ(-ENOLINK, ctx.wait()); +} + +TEST_F(TestMockImageReplayerBootstrapRequest, PrepareRemoteImageDNELocalLinkedCanceled) { + InSequence seq; + + // prepare local image + MockPrepareLocalImageRequest mock_prepare_local_image_request; + MockStateBuilder mock_state_builder; + expect_send(mock_prepare_local_image_request, mock_state_builder, + m_local_image_ctx->id, m_local_image_ctx->name, 0); + + // prepare remote image + MockPrepareRemoteImageRequest mock_prepare_remote_image_request; + expect_send(mock_prepare_remote_image_request, mock_state_builder, + "remote mirror uuid", m_remote_image_ctx->id, -ENOENT); + expect_is_local_primary(mock_state_builder, false); + expect_is_linked(mock_state_builder, true); + + C_SaferCond ctx; + MockThreads mock_threads(m_threads); + MockInstanceWatcher mock_instance_watcher; + MockBootstrapRequest *request = create_request( + &mock_threads, &mock_instance_watcher, "global image id", + "local mirror uuid", &ctx); + request->cancel(); + request->send(); + ASSERT_EQ(-ENOLINK, ctx.wait()); +} + TEST_F(TestMockImageReplayerBootstrapRequest, OpenLocalImageError) { InSequence seq; diff --git a/src/test/rbd_mirror/test_mock_ImageReplayer.cc b/src/test/rbd_mirror/test_mock_ImageReplayer.cc index 177b71a1584..32a4f82b53b 100644 --- a/src/test/rbd_mirror/test_mock_ImageReplayer.cc +++ b/src/test/rbd_mirror/test_mock_ImageReplayer.cc @@ -620,6 +620,45 @@ TEST_F(TestMockImageReplayer, BootstrapCancel) { ASSERT_EQ(-ECANCELED, start_ctx.wait()); } +TEST_F(TestMockImageReplayer, BootstrapRemoteDeletedCancel) { + create_local_image(); + librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx); + + MockThreads mock_threads(m_threads); + expect_work_queue_repeatedly(mock_threads); + expect_add_event_after_repeatedly(mock_threads); + + MockImageDeleter mock_image_deleter; + + expect_set_mirror_image_status_repeatedly(); + + InSequence seq; + + MockBootstrapRequest mock_bootstrap_request; + MockStateBuilder mock_state_builder; + EXPECT_CALL(mock_bootstrap_request, send()) + .WillOnce(Invoke([this, &mock_bootstrap_request, &mock_state_builder, + &mock_local_image_ctx]() { + mock_state_builder.local_image_id = mock_local_image_ctx.id; + mock_state_builder.remote_image_id = ""; + *mock_bootstrap_request.state_builder = &mock_state_builder; + m_image_replayer->stop(nullptr); + mock_bootstrap_request.on_finish->complete(-ENOLINK); + })); + EXPECT_CALL(mock_bootstrap_request, cancel()); + + expect_close(mock_state_builder, 0); + + expect_trash_move(mock_image_deleter, "global image id", false, 0); + expect_mirror_image_status_exists(false); + + create_image_replayer(mock_threads); + + C_SaferCond start_ctx; + m_image_replayer->start(&start_ctx); + ASSERT_EQ(-ECANCELED, start_ctx.wait()); +} + TEST_F(TestMockImageReplayer, StopError) { // START diff --git a/src/tools/rbd_mirror/ImageReplayer.cc b/src/tools/rbd_mirror/ImageReplayer.cc index 0f909b206dd..1e88c3262f1 100644 --- a/src/tools/rbd_mirror/ImageReplayer.cc +++ b/src/tools/rbd_mirror/ImageReplayer.cc @@ -348,10 +348,6 @@ void ImageReplayer::bootstrap() { ceph_assert(!m_peers.empty()); m_remote_image_peer = *m_peers.begin(); - if (on_start_interrupted(m_lock)) { - return; - } - ceph_assert(m_state_builder == nullptr); auto ctx = create_context_callback< ImageReplayer, &ImageReplayer::handle_bootstrap>(this); @@ -364,6 +360,13 @@ void ImageReplayer::bootstrap() { request->get(); m_bootstrap_request = request; + + // proceed even if stop was requested to allow for m_delete_requested + // to get set; cancel() would prevent BootstrapRequest from going into + // image sync + if (m_stop_requested) { + request->cancel(); + } locker.unlock(); update_mirror_image_status(false, boost::none); @@ -379,6 +382,14 @@ void ImageReplayer::handle_bootstrap(int r) { m_bootstrap_request = nullptr; } + // set m_delete_requested early to ensure that in case remote + // image no longer exists local image gets deleted even if start + // is interrupted + if (r == -ENOLINK) { + dout(5) << "remote image no longer exists" << dendl; + m_delete_requested = true; + } + if (on_start_interrupted()) { return; } else if (r == -ENOMSG) { @@ -393,7 +404,6 @@ void ImageReplayer::handle_bootstrap(int r) { on_start_fail(r, "split-brain detected"); return; } else if (r == -ENOLINK) { - m_delete_requested = true; on_start_fail(0, "remote image no longer exists"); return; } else if (r == -ERESTART) {