]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
librbd: retry ENOENT in V2_REFRESH_PARENT as well
authorIlya Dryomov <idryomov@gmail.com>
Sun, 4 Sep 2022 15:52:51 +0000 (17:52 +0200)
committerIlya Dryomov <idryomov@gmail.com>
Mon, 5 Sep 2022 21:02:39 +0000 (23:02 +0200)
With auto-deletion of trashed snapshots, it is relatively easy to lose
a race to "rbd flatten" as follows:

- when V2_GET_PARENT runs, the image is technically still a clone
- when V2_REFRESH_PARENT runs, the image is fully flattened and the
  snapshot in the parent image is deleted

This results in a spurious ENOENT error, mainly when trying to open the
image (e.g. for "rbd info").  This race condition has always been there
but auto-deletion of trashed snapshots makes it much worse.

Retry ENOENT in V2_REFRESH_PARENT the same way as in V2_GET_SNAPSHOTS.

Fixes: https://tracker.ceph.com/issues/52810
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
src/librbd/image/RefreshRequest.cc
src/librbd/image/RefreshRequest.h
src/test/librbd/image/test_mock_RefreshRequest.cc

index 19a57b9a511d0c9cfdc55ba96e2242c8da9cf588..24159c55bf2825dfa24f555b1493937b28ff8999 100644 (file)
@@ -865,7 +865,14 @@ Context *RefreshRequest<I>::handle_v2_refresh_parent(int *result) {
   CephContext *cct = m_image_ctx.cct;
   ldout(cct, 10) << this << " " << __func__ << ": r=" << *result << dendl;
 
-  if (*result < 0) {
+  if (*result == -ENOENT && m_enoent_retries++ < MAX_ENOENT_RETRIES) {
+    ldout(cct, 10) << "out-of-sync parent info detected, retrying" << dendl;
+    ceph_assert(m_refresh_parent != nullptr);
+    delete m_refresh_parent;
+    m_refresh_parent = nullptr;
+    send_v2_get_mutable_metadata();
+    return nullptr;
+  } else if (*result < 0) {
     lderr(cct) << "failed to refresh parent image: " << cpp_strerror(*result)
                << dendl;
     save_result(result);
index ab7b5089295d1fda6c270d4fe21134ed902cfe61..42f4b4669ef2c98b48694759851255ad3790fea8 100644 (file)
@@ -80,9 +80,9 @@ private:
    *       *        v     v   *                               |
    *        * * V2_GET_SNAPSHOTS (skip if no snaps)           |
    *     (ENOENT)   |                                         |
-   *                v                                         |
-   *            V2_REFRESH_PARENT (skip if no parent or       |
-   *                |              refresh not needed)        |
+   *       *        v                                         |
+   *        * * V2_REFRESH_PARENT (skip if no parent or       |
+   *     (ENOENT)   |              refresh not needed)        |
    *                v                                         |
    *            V2_INIT_EXCLUSIVE_LOCK (skip if lock          |
    *                |                   active or disabled)   |
index 261604c43375cc3c310a62e0dc4a0d5a05c43751..206b8d7b68c4c9a784761dc1af5d69f00bc9cd08 100644 (file)
@@ -20,7 +20,7 @@
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 #include <arpa/inet.h>
-#include <list>
+#include <queue>
 #include <boost/scope_exit.hpp>
 
 namespace librbd {
@@ -69,26 +69,32 @@ struct GetMetadataRequest<MockRefreshImageCtx> {
 
 template <>
 struct RefreshParentRequest<MockRefreshImageCtx> {
-  static RefreshParentRequest* s_instance;
+  static std::queue<RefreshParentRequest*> s_instances;
   static RefreshParentRequest* create(MockRefreshImageCtx &mock_image_ctx,
                                       const ParentImageInfo &parent_md,
                                       const MigrationInfo &migration_info,
                                       Context *on_finish) {
-    ceph_assert(s_instance != nullptr);
-    s_instance->on_finish = on_finish;
-    return s_instance;
+    ceph_assert(!s_instances.empty());
+    auto instance = s_instances.front();
+    instance->on_finish = on_finish;
+    return instance;
   }
   static bool is_refresh_required(MockRefreshImageCtx &mock_image_ctx,
                                   const ParentImageInfo& parent_md,
                                   const MigrationInfo &migration_info) {
-    ceph_assert(s_instance != nullptr);
-    return s_instance->is_refresh_required();
+    ceph_assert(!s_instances.empty());
+    return s_instances.front()->is_refresh_required();
   }
 
   Context *on_finish = nullptr;
 
   RefreshParentRequest() {
-    s_instance = this;
+    s_instances.push(this);
+  }
+
+  ~RefreshParentRequest() {
+    ceph_assert(this == s_instances.front());
+    s_instances.pop();
   }
 
   MOCK_CONST_METHOD0(is_refresh_required, bool());
@@ -98,7 +104,7 @@ struct RefreshParentRequest<MockRefreshImageCtx> {
 };
 
 GetMetadataRequest<MockRefreshImageCtx>* GetMetadataRequest<MockRefreshImageCtx>::s_instance = nullptr;
-RefreshParentRequest<MockRefreshImageCtx>* RefreshParentRequest<MockRefreshImageCtx>::s_instance = nullptr;
+std::queue<RefreshParentRequest<MockRefreshImageCtx>*> RefreshParentRequest<MockRefreshImageCtx>::s_instances;
 
 } // namespace image
 
@@ -909,6 +915,141 @@ TEST_F(TestMockImageRefreshRequest, SuccessChildDontOpenParent) {
   ASSERT_EQ(0, ctx.wait());
 }
 
+TEST_F(TestMockImageRefreshRequest, SuccessChildBeingFlattened) {
+  REQUIRE_FEATURE(RBD_FEATURE_LAYERING);
+
+  librbd::ImageCtx *ictx;
+  librbd::ImageCtx *ictx2 = nullptr;
+  std::string clone_name = get_temp_image_name();
+
+  ASSERT_EQ(0, open_image(m_image_name, &ictx));
+  ASSERT_EQ(0, snap_create(*ictx, "snap"));
+  ASSERT_EQ(0, snap_protect(*ictx, "snap"));
+  BOOST_SCOPE_EXIT_ALL((&)) {
+    if (ictx2 != nullptr) {
+      close_image(ictx2);
+    }
+
+    librbd::NoOpProgressContext no_op;
+    ASSERT_EQ(0, librbd::api::Image<>::remove(m_ioctx, clone_name, no_op));
+    ASSERT_EQ(0, ictx->operations->snap_unprotect(
+        cls::rbd::UserSnapshotNamespace(), "snap"));
+  };
+
+  int order = ictx->order;
+  ASSERT_EQ(0, librbd::clone(m_ioctx, m_image_name.c_str(), "snap", m_ioctx,
+                             clone_name.c_str(), ictx->features, &order, 0, 0));
+
+  ASSERT_EQ(0, open_image(clone_name, &ictx2));
+
+  MockRefreshImageCtx mock_image_ctx(*ictx2);
+  auto mock_refresh_parent_request = new MockRefreshParentRequest();
+  MockRefreshParentRequest mock_refresh_parent_request_ext;
+  MockExclusiveLock mock_exclusive_lock;
+  expect_op_work_queue(mock_image_ctx);
+  expect_test_features(mock_image_ctx);
+
+  mock_image_ctx.features &= ~RBD_FEATURE_OPERATIONS;
+
+  InSequence seq;
+  expect_get_mutable_metadata(mock_image_ctx, mock_image_ctx.features, 0);
+  expect_get_parent(mock_image_ctx, 0);
+  MockGetMetadataRequest mock_get_metadata_request;
+  expect_get_metadata(mock_image_ctx, mock_get_metadata_request,
+                      mock_image_ctx.header_oid, {}, 0);
+  expect_get_metadata(mock_image_ctx, mock_get_metadata_request, RBD_INFO, {},
+                      0);
+  expect_apply_metadata(mock_image_ctx, 0);
+  expect_get_group(mock_image_ctx, 0);
+  expect_refresh_parent_is_required(*mock_refresh_parent_request, true);
+  expect_refresh_parent_send(mock_image_ctx, *mock_refresh_parent_request,
+                             -ENOENT);
+  expect_get_mutable_metadata(mock_image_ctx, mock_image_ctx.features, 0);
+  expect_get_parent(mock_image_ctx, 0);
+  expect_get_metadata(mock_image_ctx, mock_get_metadata_request,
+                      mock_image_ctx.header_oid, {}, 0);
+  expect_get_metadata(mock_image_ctx, mock_get_metadata_request, RBD_INFO, {},
+                      0);
+  expect_apply_metadata(mock_image_ctx, 0);
+  expect_get_group(mock_image_ctx, 0);
+  expect_refresh_parent_is_required(mock_refresh_parent_request_ext, false);
+  if (ictx->test_features(RBD_FEATURE_EXCLUSIVE_LOCK)) {
+    expect_init_exclusive_lock(mock_image_ctx, mock_exclusive_lock, 0);
+  }
+  EXPECT_CALL(mock_image_ctx, rebuild_data_io_context());
+
+  C_SaferCond ctx;
+  auto req = new MockRefreshRequest(mock_image_ctx, false, false, &ctx);
+  req->send();
+
+  ASSERT_EQ(0, ctx.wait());
+}
+
+TEST_F(TestMockImageRefreshRequest, ChildEnoentRetriesLimit) {
+  REQUIRE_FEATURE(RBD_FEATURE_LAYERING);
+
+  librbd::ImageCtx *ictx;
+  librbd::ImageCtx *ictx2 = nullptr;
+  std::string clone_name = get_temp_image_name();
+
+  ASSERT_EQ(0, open_image(m_image_name, &ictx));
+  ASSERT_EQ(0, snap_create(*ictx, "snap"));
+  ASSERT_EQ(0, snap_protect(*ictx, "snap"));
+  BOOST_SCOPE_EXIT_ALL((&)) {
+    if (ictx2 != nullptr) {
+      close_image(ictx2);
+    }
+
+    librbd::NoOpProgressContext no_op;
+    ASSERT_EQ(0, librbd::api::Image<>::remove(m_ioctx, clone_name, no_op));
+    ASSERT_EQ(0, ictx->operations->snap_unprotect(
+        cls::rbd::UserSnapshotNamespace(), "snap"));
+  };
+
+  int order = ictx->order;
+  ASSERT_EQ(0, librbd::clone(m_ioctx, m_image_name.c_str(), "snap", m_ioctx,
+                             clone_name.c_str(), ictx->features, &order, 0, 0));
+
+  ASSERT_EQ(0, open_image(clone_name, &ictx2));
+
+  MockRefreshImageCtx mock_image_ctx(*ictx2);
+  constexpr int num_tries = RefreshRequest<>::MAX_ENOENT_RETRIES + 1;
+  MockRefreshParentRequest* mock_refresh_parent_requests[num_tries];
+  for (auto& mock_refresh_parent_request : mock_refresh_parent_requests) {
+    mock_refresh_parent_request = new MockRefreshParentRequest();
+  }
+  MockGetMetadataRequest mock_get_metadata_request;
+  expect_op_work_queue(mock_image_ctx);
+  expect_test_features(mock_image_ctx);
+
+  mock_image_ctx.features &= ~RBD_FEATURE_OPERATIONS;
+
+  InSequence seq;
+  for (auto mock_refresh_parent_request : mock_refresh_parent_requests) {
+    expect_get_mutable_metadata(mock_image_ctx, mock_image_ctx.features, 0);
+    expect_get_parent(mock_image_ctx, 0);
+    expect_get_metadata(mock_image_ctx, mock_get_metadata_request,
+                        mock_image_ctx.header_oid, {}, 0);
+    expect_get_metadata(mock_image_ctx, mock_get_metadata_request, RBD_INFO, {},
+                        0);
+    expect_apply_metadata(mock_image_ctx, 0);
+    expect_get_group(mock_image_ctx, 0);
+    expect_refresh_parent_is_required(*mock_refresh_parent_request, true);
+    expect_refresh_parent_send(mock_image_ctx, *mock_refresh_parent_request,
+                               -ENOENT);
+  }
+  expect_refresh_parent_apply(*mock_refresh_parent_requests[num_tries - 1]);
+  EXPECT_CALL(mock_image_ctx, rebuild_data_io_context());
+  expect_refresh_parent_finalize(
+      mock_image_ctx, *mock_refresh_parent_requests[num_tries - 1], 0);
+
+  C_SaferCond ctx;
+  auto req = new MockRefreshRequest(mock_image_ctx, false, false, &ctx);
+  req->send();
+
+  ASSERT_EQ(-ENOENT, ctx.wait());
+}
+
 TEST_F(TestMockImageRefreshRequest, SuccessOpFeatures) {
   REQUIRE_FORMAT_V2();