]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
rbd: remove the incomplete primary snapshot
authorPrasanna Kumar Kalever <prasanna.kalever@redhat.com>
Mon, 6 Mar 2023 09:58:03 +0000 (15:28 +0530)
committerPrasanna Kumar Kalever <prasanna.kalever@redhat.com>
Tue, 18 Apr 2023 15:04:18 +0000 (20:34 +0530)
Problem:
-------
At a high level, creating a primary snapshot consists of three steps:

1. actually creating a snapshot in the mirror namespace
2. generating a set of image state objects with additional metadata for
   the snapshot
3. marking the snapshot as complete after the image state objects are
   written out

Depending on the circumstances, a request to create a primary snapshot
can be forwarded to rbd-mirror daemon.  If that happens and rbd-mirror
daemon gets axed for some practical reason after completing steps (1)
and/or (2) but before completing step (3), we are left with a
permanently incomplete primary snapshot because upon retrying that
primary snapshot creation request, librbd notices that such snapshot
already exists.  It does not check whether this "pre-existing" snapshot
is complete.

Solution:
--------
As part of the next mirror snapshot create (say triggered by the
scheduler) the unlink_peer() is called, it checks if there exists any
incomplete snapshot and delete them accordingly.

Fixes: https://tracker.ceph.com/issues/58887
Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=2120624
Signed-off-by: Prasanna Kumar Kalever <prasanna.kalever@redhat.com>
src/librbd/mirror/snapshot/CreatePrimaryRequest.cc
src/test/librbd/mirror/snapshot/test_mock_CreatePrimaryRequest.cc

index 36cd3bddfc794d630037ec3ae28833f1fb2b0c7b..5a6115cd4f4ec56d483114ecde01c5ad5b85dd7d 100644 (file)
@@ -209,14 +209,15 @@ void CreatePrimaryRequest<I>::unlink_peer() {
       // or if it's not linked with any peer (happens if mirroring is enabled
       // on a pool with no peers configured or if UnlinkPeerRequest gets
       // interrupted)
-      if (info->mirror_peer_uuids.size() == 0) {
+      if (!info->mirror_peer_uuids.empty() &&
+          info->mirror_peer_uuids.count(peer) == 0) {
+        continue;
+      }
+      if (info->mirror_peer_uuids.empty() || !info->complete) {
         peer_uuid = peer;
         snap_id = snap_it.first;
         break;
       }
-      if (info->mirror_peer_uuids.count(peer) == 0) {
-        continue;
-      }
       count++;
       if (count == max_snapshots) {
         unlink_snap_id = snap_it.first;
index 0966810ea8d8b6f4c872b70c69c14642312c6425..f2169671bb688b151c0a44c1e83be9fd256b04c5 100644 (file)
@@ -157,7 +157,10 @@ public:
                            if (r != 0) {
                              return;
                            }
-                           snap_create(mock_image_ctx, ns, snap_name);
+                           auto mirror_ns =
+                             std::get<cls::rbd::MirrorSnapshotNamespace>(ns);
+                           mirror_ns.complete = true;
+                           snap_create(mock_image_ctx, mirror_ns, snap_name);
                          }),
                   WithArg<4>(CompleteContext(
                                r, mock_image_ctx.image_ctx->op_work_queue))
@@ -167,10 +170,10 @@ public:
   void expect_unlink_peer(MockTestImageCtx &mock_image_ctx,
                           MockUnlinkPeerRequest &mock_unlink_peer_request,
                           uint64_t snap_id, const std::string &peer_uuid,
-                          bool is_linked, int r) {
+                          bool is_linked, bool complete, int r) {
     EXPECT_CALL(mock_unlink_peer_request, send())
       .WillOnce(Invoke([&mock_image_ctx, &mock_unlink_peer_request,
-                        snap_id, peer_uuid, is_linked, r]() {
+                        snap_id, peer_uuid, is_linked, complete, r]() {
                          ASSERT_EQ(mock_unlink_peer_request.mirror_peer_uuid,
                                    peer_uuid);
                          ASSERT_EQ(mock_unlink_peer_request.snap_id, snap_id);
@@ -181,6 +184,7 @@ public:
                              std::get_if<cls::rbd::MirrorSnapshotNamespace>(
                                &it->second.snap_namespace);
                            ASSERT_NE(nullptr, info);
+                           ASSERT_EQ(complete, info->complete);
                            ASSERT_EQ(is_linked, info->mirror_peer_uuids.erase(
                                      peer_uuid));
                            if (info->mirror_peer_uuids.empty()) {
@@ -288,6 +292,40 @@ TEST_F(TestMockMirrorSnapshotCreatePrimaryRequest, CreateSnapshotError) {
   ASSERT_EQ(-EINVAL, ctx.wait());
 }
 
+TEST_F(TestMockMirrorSnapshotCreatePrimaryRequest, SuccessUnlinkIncomplete) {
+  REQUIRE_FORMAT_V2();
+
+  librbd::ImageCtx *ictx;
+  ASSERT_EQ(0, open_image(m_image_name, &ictx));
+  ictx->config.set_val("rbd_mirroring_max_mirroring_snapshots", "3");
+
+  MockTestImageCtx mock_image_ctx(*ictx);
+  cls::rbd::MirrorSnapshotNamespace ns{
+    cls::rbd::MIRROR_SNAPSHOT_STATE_PRIMARY, {"uuid"}, "", CEPH_NOSNAP};
+  ns.complete = false;
+  snap_create(mock_image_ctx, ns, "mirror_snap");
+
+  InSequence seq;
+
+  expect_clone_md_ctx(mock_image_ctx);
+  MockUtils mock_utils;
+  expect_can_create_primary_snapshot(mock_utils, false, false, true);
+  expect_get_mirror_peers(mock_image_ctx,
+                          {{"uuid", cls::rbd::MIRROR_PEER_DIRECTION_TX, "ceph",
+                            "mirror", "mirror uuid"}}, 0);
+  expect_create_snapshot(mock_image_ctx, 0);
+  MockUnlinkPeerRequest mock_unlink_peer_request;
+  auto it = mock_image_ctx.snap_info.rbegin();
+  auto snap_id = it->first;
+  expect_unlink_peer(mock_image_ctx, mock_unlink_peer_request, snap_id, "uuid",
+                     true, false, 0);
+  C_SaferCond ctx;
+  auto req = new MockCreatePrimaryRequest(&mock_image_ctx, "gid", CEPH_NOSNAP,
+                                          0U, 0U, nullptr, &ctx);
+  req->send();
+  ASSERT_EQ(0, ctx.wait());
+}
+
 TEST_F(TestMockMirrorSnapshotCreatePrimaryRequest, SuccessUnlinkPeer) {
   REQUIRE_FORMAT_V2();
 
@@ -299,6 +337,7 @@ TEST_F(TestMockMirrorSnapshotCreatePrimaryRequest, SuccessUnlinkPeer) {
   for (int i = 0; i < 3; i++) {
     cls::rbd::MirrorSnapshotNamespace ns{
       cls::rbd::MIRROR_SNAPSHOT_STATE_PRIMARY, {"uuid"}, "", CEPH_NOSNAP};
+    ns.complete = true;
     snap_create(mock_image_ctx, ns, "mirror_snap");
   }
 
@@ -315,7 +354,7 @@ TEST_F(TestMockMirrorSnapshotCreatePrimaryRequest, SuccessUnlinkPeer) {
   auto it = mock_image_ctx.snap_info.rbegin();
   auto snap_id = it->first;
   expect_unlink_peer(mock_image_ctx, mock_unlink_peer_request, snap_id, "uuid",
-                     true, 0);
+                     true, true, 0);
   C_SaferCond ctx;
   auto req = new MockCreatePrimaryRequest(&mock_image_ctx, "gid", CEPH_NOSNAP,
                                           0U, 0U, nullptr, &ctx);
@@ -333,6 +372,7 @@ TEST_F(TestMockMirrorSnapshotCreatePrimaryRequest, SuccessUnlinkNoPeer) {
   MockTestImageCtx mock_image_ctx(*ictx);
   cls::rbd::MirrorSnapshotNamespace ns{
     cls::rbd::MIRROR_SNAPSHOT_STATE_PRIMARY, {}, "", CEPH_NOSNAP};
+  ns.complete = true;
   snap_create(mock_image_ctx, ns, "mirror_snap");
 
   InSequence seq;
@@ -347,9 +387,8 @@ TEST_F(TestMockMirrorSnapshotCreatePrimaryRequest, SuccessUnlinkNoPeer) {
   MockUnlinkPeerRequest mock_unlink_peer_request;
   auto it = mock_image_ctx.snap_info.rbegin();
   auto snap_id = it->first;
-  std::list<std::string> peer_uuids = {"uuid"};
   expect_unlink_peer(mock_image_ctx, mock_unlink_peer_request, snap_id, "uuid",
-                     false, 0);
+                     false, true, 0);
 
   C_SaferCond ctx;
   auto req = new MockCreatePrimaryRequest(&mock_image_ctx, "gid", CEPH_NOSNAP,
@@ -370,6 +409,7 @@ TEST_F(TestMockMirrorSnapshotCreatePrimaryRequest, SuccessUnlinkMultiplePeers) {
     cls::rbd::MirrorSnapshotNamespace ns{
       cls::rbd::MIRROR_SNAPSHOT_STATE_PRIMARY, {"uuid1", "uuid2"}, "",
       CEPH_NOSNAP};
+    ns.complete = true;
     snap_create(mock_image_ctx, ns, "mirror_snap");
   }
 
@@ -388,9 +428,9 @@ TEST_F(TestMockMirrorSnapshotCreatePrimaryRequest, SuccessUnlinkMultiplePeers) {
   auto it = mock_image_ctx.snap_info.rbegin();
   auto snap_id = it->first;
   expect_unlink_peer(mock_image_ctx, mock_unlink_peer_request, snap_id, "uuid1",
-                     true, 0);
+                     true, true, 0);
   expect_unlink_peer(mock_image_ctx, mock_unlink_peer_request, snap_id, "uuid2",
-                     true, 0);
+                     true, true, 0);
   C_SaferCond ctx;
   auto req = new MockCreatePrimaryRequest(&mock_image_ctx, "gid", CEPH_NOSNAP,
                                           0U, 0U, nullptr, &ctx);