From bd885d75b2e4d728086f744e0d10e7cd12d3f15b Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Sun, 4 Sep 2022 17:52:51 +0200 Subject: [PATCH] librbd: retry ENOENT in V2_REFRESH_PARENT as well With auto-deletion of trashed snapshots, it is relatively easy to lose a race to "rbd flatten" as follows: - when V2_GET_PARENT runs, the image is technically still a clone - when V2_REFRESH_PARENT runs, the image is fully flattened and the snapshot in the parent image is deleted This results in a spurious ENOENT error, mainly when trying to open the image (e.g. for "rbd info"). This race condition has always been there but auto-deletion of trashed snapshots makes it much worse. Retry ENOENT in V2_REFRESH_PARENT the same way as in V2_GET_SNAPSHOTS. Fixes: https://tracker.ceph.com/issues/52810 Signed-off-by: Ilya Dryomov --- src/librbd/image/RefreshRequest.cc | 9 +- src/librbd/image/RefreshRequest.h | 6 +- .../librbd/image/test_mock_RefreshRequest.cc | 159 +++++++++++++++++- 3 files changed, 161 insertions(+), 13 deletions(-) diff --git a/src/librbd/image/RefreshRequest.cc b/src/librbd/image/RefreshRequest.cc index 19a57b9a511d0..24159c55bf282 100644 --- a/src/librbd/image/RefreshRequest.cc +++ b/src/librbd/image/RefreshRequest.cc @@ -865,7 +865,14 @@ Context *RefreshRequest::handle_v2_refresh_parent(int *result) { CephContext *cct = m_image_ctx.cct; ldout(cct, 10) << this << " " << __func__ << ": r=" << *result << dendl; - if (*result < 0) { + if (*result == -ENOENT && m_enoent_retries++ < MAX_ENOENT_RETRIES) { + ldout(cct, 10) << "out-of-sync parent info detected, retrying" << dendl; + ceph_assert(m_refresh_parent != nullptr); + delete m_refresh_parent; + m_refresh_parent = nullptr; + send_v2_get_mutable_metadata(); + return nullptr; + } else if (*result < 0) { lderr(cct) << "failed to refresh parent image: " << cpp_strerror(*result) << dendl; save_result(result); diff --git a/src/librbd/image/RefreshRequest.h b/src/librbd/image/RefreshRequest.h index ab7b5089295d1..42f4b4669ef2c 100644 --- a/src/librbd/image/RefreshRequest.h +++ b/src/librbd/image/RefreshRequest.h @@ -80,9 +80,9 @@ private: * * v v * | * * * V2_GET_SNAPSHOTS (skip if no snaps) | * (ENOENT) | | - * v | - * V2_REFRESH_PARENT (skip if no parent or | - * | refresh not needed) | + * * v | + * * * V2_REFRESH_PARENT (skip if no parent or | + * (ENOENT) | refresh not needed) | * v | * V2_INIT_EXCLUSIVE_LOCK (skip if lock | * | active or disabled) | diff --git a/src/test/librbd/image/test_mock_RefreshRequest.cc b/src/test/librbd/image/test_mock_RefreshRequest.cc index 261604c43375c..206b8d7b68c4c 100644 --- a/src/test/librbd/image/test_mock_RefreshRequest.cc +++ b/src/test/librbd/image/test_mock_RefreshRequest.cc @@ -20,7 +20,7 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" #include -#include +#include #include namespace librbd { @@ -69,26 +69,32 @@ struct GetMetadataRequest { template <> struct RefreshParentRequest { - static RefreshParentRequest* s_instance; + static std::queue s_instances; static RefreshParentRequest* create(MockRefreshImageCtx &mock_image_ctx, const ParentImageInfo &parent_md, const MigrationInfo &migration_info, Context *on_finish) { - ceph_assert(s_instance != nullptr); - s_instance->on_finish = on_finish; - return s_instance; + ceph_assert(!s_instances.empty()); + auto instance = s_instances.front(); + instance->on_finish = on_finish; + return instance; } static bool is_refresh_required(MockRefreshImageCtx &mock_image_ctx, const ParentImageInfo& parent_md, const MigrationInfo &migration_info) { - ceph_assert(s_instance != nullptr); - return s_instance->is_refresh_required(); + ceph_assert(!s_instances.empty()); + return s_instances.front()->is_refresh_required(); } Context *on_finish = nullptr; RefreshParentRequest() { - s_instance = this; + s_instances.push(this); + } + + ~RefreshParentRequest() { + ceph_assert(this == s_instances.front()); + s_instances.pop(); } MOCK_CONST_METHOD0(is_refresh_required, bool()); @@ -98,7 +104,7 @@ struct RefreshParentRequest { }; GetMetadataRequest* GetMetadataRequest::s_instance = nullptr; -RefreshParentRequest* RefreshParentRequest::s_instance = nullptr; +std::queue*> RefreshParentRequest::s_instances; } // namespace image @@ -909,6 +915,141 @@ TEST_F(TestMockImageRefreshRequest, SuccessChildDontOpenParent) { ASSERT_EQ(0, ctx.wait()); } +TEST_F(TestMockImageRefreshRequest, SuccessChildBeingFlattened) { + REQUIRE_FEATURE(RBD_FEATURE_LAYERING); + + librbd::ImageCtx *ictx; + librbd::ImageCtx *ictx2 = nullptr; + std::string clone_name = get_temp_image_name(); + + ASSERT_EQ(0, open_image(m_image_name, &ictx)); + ASSERT_EQ(0, snap_create(*ictx, "snap")); + ASSERT_EQ(0, snap_protect(*ictx, "snap")); + BOOST_SCOPE_EXIT_ALL((&)) { + if (ictx2 != nullptr) { + close_image(ictx2); + } + + librbd::NoOpProgressContext no_op; + ASSERT_EQ(0, librbd::api::Image<>::remove(m_ioctx, clone_name, no_op)); + ASSERT_EQ(0, ictx->operations->snap_unprotect( + cls::rbd::UserSnapshotNamespace(), "snap")); + }; + + int order = ictx->order; + ASSERT_EQ(0, librbd::clone(m_ioctx, m_image_name.c_str(), "snap", m_ioctx, + clone_name.c_str(), ictx->features, &order, 0, 0)); + + ASSERT_EQ(0, open_image(clone_name, &ictx2)); + + MockRefreshImageCtx mock_image_ctx(*ictx2); + auto mock_refresh_parent_request = new MockRefreshParentRequest(); + MockRefreshParentRequest mock_refresh_parent_request_ext; + MockExclusiveLock mock_exclusive_lock; + expect_op_work_queue(mock_image_ctx); + expect_test_features(mock_image_ctx); + + mock_image_ctx.features &= ~RBD_FEATURE_OPERATIONS; + + InSequence seq; + expect_get_mutable_metadata(mock_image_ctx, mock_image_ctx.features, 0); + expect_get_parent(mock_image_ctx, 0); + MockGetMetadataRequest mock_get_metadata_request; + expect_get_metadata(mock_image_ctx, mock_get_metadata_request, + mock_image_ctx.header_oid, {}, 0); + expect_get_metadata(mock_image_ctx, mock_get_metadata_request, RBD_INFO, {}, + 0); + expect_apply_metadata(mock_image_ctx, 0); + expect_get_group(mock_image_ctx, 0); + expect_refresh_parent_is_required(*mock_refresh_parent_request, true); + expect_refresh_parent_send(mock_image_ctx, *mock_refresh_parent_request, + -ENOENT); + expect_get_mutable_metadata(mock_image_ctx, mock_image_ctx.features, 0); + expect_get_parent(mock_image_ctx, 0); + expect_get_metadata(mock_image_ctx, mock_get_metadata_request, + mock_image_ctx.header_oid, {}, 0); + expect_get_metadata(mock_image_ctx, mock_get_metadata_request, RBD_INFO, {}, + 0); + expect_apply_metadata(mock_image_ctx, 0); + expect_get_group(mock_image_ctx, 0); + expect_refresh_parent_is_required(mock_refresh_parent_request_ext, false); + if (ictx->test_features(RBD_FEATURE_EXCLUSIVE_LOCK)) { + expect_init_exclusive_lock(mock_image_ctx, mock_exclusive_lock, 0); + } + EXPECT_CALL(mock_image_ctx, rebuild_data_io_context()); + + C_SaferCond ctx; + auto req = new MockRefreshRequest(mock_image_ctx, false, false, &ctx); + req->send(); + + ASSERT_EQ(0, ctx.wait()); +} + +TEST_F(TestMockImageRefreshRequest, ChildEnoentRetriesLimit) { + REQUIRE_FEATURE(RBD_FEATURE_LAYERING); + + librbd::ImageCtx *ictx; + librbd::ImageCtx *ictx2 = nullptr; + std::string clone_name = get_temp_image_name(); + + ASSERT_EQ(0, open_image(m_image_name, &ictx)); + ASSERT_EQ(0, snap_create(*ictx, "snap")); + ASSERT_EQ(0, snap_protect(*ictx, "snap")); + BOOST_SCOPE_EXIT_ALL((&)) { + if (ictx2 != nullptr) { + close_image(ictx2); + } + + librbd::NoOpProgressContext no_op; + ASSERT_EQ(0, librbd::api::Image<>::remove(m_ioctx, clone_name, no_op)); + ASSERT_EQ(0, ictx->operations->snap_unprotect( + cls::rbd::UserSnapshotNamespace(), "snap")); + }; + + int order = ictx->order; + ASSERT_EQ(0, librbd::clone(m_ioctx, m_image_name.c_str(), "snap", m_ioctx, + clone_name.c_str(), ictx->features, &order, 0, 0)); + + ASSERT_EQ(0, open_image(clone_name, &ictx2)); + + MockRefreshImageCtx mock_image_ctx(*ictx2); + constexpr int num_tries = RefreshRequest<>::MAX_ENOENT_RETRIES + 1; + MockRefreshParentRequest* mock_refresh_parent_requests[num_tries]; + for (auto& mock_refresh_parent_request : mock_refresh_parent_requests) { + mock_refresh_parent_request = new MockRefreshParentRequest(); + } + MockGetMetadataRequest mock_get_metadata_request; + expect_op_work_queue(mock_image_ctx); + expect_test_features(mock_image_ctx); + + mock_image_ctx.features &= ~RBD_FEATURE_OPERATIONS; + + InSequence seq; + for (auto mock_refresh_parent_request : mock_refresh_parent_requests) { + expect_get_mutable_metadata(mock_image_ctx, mock_image_ctx.features, 0); + expect_get_parent(mock_image_ctx, 0); + expect_get_metadata(mock_image_ctx, mock_get_metadata_request, + mock_image_ctx.header_oid, {}, 0); + expect_get_metadata(mock_image_ctx, mock_get_metadata_request, RBD_INFO, {}, + 0); + expect_apply_metadata(mock_image_ctx, 0); + expect_get_group(mock_image_ctx, 0); + expect_refresh_parent_is_required(*mock_refresh_parent_request, true); + expect_refresh_parent_send(mock_image_ctx, *mock_refresh_parent_request, + -ENOENT); + } + expect_refresh_parent_apply(*mock_refresh_parent_requests[num_tries - 1]); + EXPECT_CALL(mock_image_ctx, rebuild_data_io_context()); + expect_refresh_parent_finalize( + mock_image_ctx, *mock_refresh_parent_requests[num_tries - 1], 0); + + C_SaferCond ctx; + auto req = new MockRefreshRequest(mock_image_ctx, false, false, &ctx); + req->send(); + + ASSERT_EQ(-ENOENT, ctx.wait()); +} + TEST_F(TestMockImageRefreshRequest, SuccessOpFeatures) { REQUIRE_FORMAT_V2(); -- 2.39.5