]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
rbd-mirror: validate that remote start snapshot still exists 39961/head
authorJason Dillaman <dillaman@redhat.com>
Fri, 11 Dec 2020 00:31:45 +0000 (19:31 -0500)
committerJason Dillaman <dillaman@redhat.com>
Tue, 9 Mar 2021 21:14:01 +0000 (16:14 -0500)
Perform a basic sanity check to verify that the remote start snapshot
still exists. This was previosly being deleted as part of the unlink
process due to a race condition between the remote side completing
a sync between snapshots 1 and 2 and snapshot 2 being unlinked due
to reaching max snapshots.

Signed-off-by: Jason Dillaman <dillaman@redhat.com>
(cherry picked from commit fb69efc6c19e6ee2bd8947129fb7f35442acb907)

src/test/rbd_mirror/image_replayer/snapshot/test_mock_Replayer.cc
src/tools/rbd_mirror/image_replayer/snapshot/Replayer.cc

index 4ce139d21f4e0da70052b4432a6c5cc8ff496141..fe1a0e3f92e632679680b6c127f4068e0a1150b7 100644 (file)
@@ -786,6 +786,10 @@ TEST_F(TestMockImageReplayerSnapshotReplayer, SyncSnapshot) {
   expect_is_refresh_required(mock_remote_image_ctx, true);
   expect_refresh(
     mock_remote_image_ctx, {
+      {1U, librbd::SnapInfo{"snap1", cls::rbd::MirrorSnapshotNamespace{
+         cls::rbd::MIRROR_SNAPSHOT_STATE_PRIMARY, {"remote mirror peer uuid"},
+         "", CEPH_NOSNAP, true, 0, {}},
+       0, {}, 0, 0, {}}},
       {2U, librbd::SnapInfo{"snap2", cls::rbd::UserSnapshotNamespace{},
        0, {}, 0, 0, {}}},
       {3U, librbd::SnapInfo{"snap3", cls::rbd::MirrorSnapshotNamespace{
@@ -839,6 +843,10 @@ TEST_F(TestMockImageReplayerSnapshotReplayer, SyncSnapshot) {
   expect_is_refresh_required(mock_remote_image_ctx, true);
   expect_refresh(
     mock_remote_image_ctx, {
+      {1U, librbd::SnapInfo{"snap1", cls::rbd::MirrorSnapshotNamespace{
+         cls::rbd::MIRROR_SNAPSHOT_STATE_PRIMARY, {"remote mirror peer uuid"},
+         "", CEPH_NOSNAP, true, 0, {}},
+       0, {}, 0, 0, {}}},
       {2U, librbd::SnapInfo{"snap2", cls::rbd::UserSnapshotNamespace{},
        0, {}, 0, 0, {}}},
       {3U, librbd::SnapInfo{"snap3", cls::rbd::MirrorSnapshotNamespace{
@@ -862,7 +870,14 @@ TEST_F(TestMockImageReplayerSnapshotReplayer, SyncSnapshot) {
          4, true, 0, {}},
        0, {}, 0, 0, {}}},
     }, 0);
-  expect_is_refresh_required(mock_remote_image_ctx, false);
+  expect_is_refresh_required(mock_remote_image_ctx, true);
+  expect_refresh(
+    mock_remote_image_ctx, {
+      {4U, librbd::SnapInfo{"snap4", cls::rbd::MirrorSnapshotNamespace{
+         cls::rbd::MIRROR_SNAPSHOT_STATE_PRIMARY, {"remote mirror peer uuid"},
+         "", CEPH_NOSNAP, true, 0, {}},
+       0, {}, 0, 0, {}}}
+    }, 0);
 
   // fire init
   C_SaferCond init_ctx;
@@ -2010,6 +2025,74 @@ TEST_F(TestMockImageReplayerSnapshotReplayer, SplitBrain) {
                                         mock_remote_image_ctx));
 }
 
+TEST_F(TestMockImageReplayerSnapshotReplayer, RemoteSnapshotMissingSplitBrain) {
+  librbd::MockTestImageCtx mock_local_image_ctx{*m_local_image_ctx};
+  librbd::MockTestImageCtx mock_remote_image_ctx{*m_remote_image_ctx};
+
+  MockThreads mock_threads(m_threads);
+  expect_work_queue_repeatedly(mock_threads);
+
+  MockReplayerListener mock_replayer_listener;
+  expect_notification(mock_threads, mock_replayer_listener);
+
+  InSequence seq;
+
+  MockInstanceWatcher mock_instance_watcher;
+  MockImageMeta mock_image_meta;
+  MockStateBuilder mock_state_builder(mock_local_image_ctx,
+                                      mock_remote_image_ctx,
+                                      mock_image_meta);
+  MockReplayer mock_replayer{&mock_threads, &mock_instance_watcher,
+                             "local mirror uuid", &m_pool_meta_cache,
+                             &mock_state_builder, &mock_replayer_listener};
+  m_pool_meta_cache.set_remote_pool_meta(
+    m_remote_io_ctx.get_id(),
+    {"remote mirror uuid", "remote mirror peer uuid"});
+
+  librbd::UpdateWatchCtx* update_watch_ctx = nullptr;
+  ASSERT_EQ(0, init_entry_replayer(mock_replayer, mock_threads,
+                                   mock_local_image_ctx,
+                                   mock_remote_image_ctx,
+                                   mock_replayer_listener,
+                                   mock_image_meta,
+                                   &update_watch_ctx));
+
+  // inject a missing remote start snap (deleted)
+  mock_local_image_ctx.snap_info = {
+    {11U, librbd::SnapInfo{"snap3", cls::rbd::MirrorSnapshotNamespace{
+       cls::rbd::MIRROR_SNAPSHOT_STATE_NON_PRIMARY, {},
+       "remote mirror uuid", 1, true, 0,
+       {{1, CEPH_NOSNAP}}},
+     0, {}, 0, 0, {}}}};
+  mock_remote_image_ctx.snap_info = {
+    {2U, librbd::SnapInfo{"snap2", cls::rbd::MirrorSnapshotNamespace{
+       cls::rbd::MIRROR_SNAPSHOT_STATE_PRIMARY, {"remote mirror peer uuid"},
+       "", CEPH_NOSNAP, true, 0, {}},
+     0, {}, 0, 0, {}}},
+    {3U, librbd::SnapInfo{"snap3", cls::rbd::MirrorSnapshotNamespace{
+       cls::rbd::MIRROR_SNAPSHOT_STATE_PRIMARY, {"remote mirror peer uuid"},
+       "", CEPH_NOSNAP, true, 0, {}},
+     0, {}, 0, 0, {}}}};
+
+  // split-brain due to missing snapshot 1
+  expect_load_image_meta(mock_image_meta, false, 0);
+  expect_is_refresh_required(mock_local_image_ctx, false);
+  expect_is_refresh_required(mock_remote_image_ctx, false);
+
+  // wake-up replayer
+  update_watch_ctx->handle_notify();
+
+  // wait for sync to complete and expect replay complete
+  ASSERT_EQ(0, wait_for_notification(1));
+  ASSERT_FALSE(mock_replayer.is_replaying());
+  ASSERT_EQ(-EEXIST, mock_replayer.get_error_code());
+  ASSERT_EQ(std::string{"split-brain"}, mock_replayer.get_error_description());
+
+  ASSERT_EQ(0, shut_down_entry_replayer(mock_replayer, mock_threads,
+                                        mock_local_image_ctx,
+                                        mock_remote_image_ctx));
+}
+
 TEST_F(TestMockImageReplayerSnapshotReplayer, RemoteFailover) {
   librbd::MockTestImageCtx mock_local_image_ctx{*m_local_image_ctx};
   librbd::MockTestImageCtx mock_remote_image_ctx{*m_remote_image_ctx};
index f46270c927fa554515c988efa4d5750f6bf43be7..b2a1c150cb08020d2c543acf6db961ae7d4ed7d8 100644 (file)
@@ -616,43 +616,54 @@ void Replayer<I>::scan_remote_mirror_snapshots(
     m_remote_snap_id_end = remote_snap_id;
     m_remote_mirror_snap_ns = *mirror_ns;
   }
-  image_locker.unlock();
-
-  unlink_snap_ids.erase(m_remote_snap_id_start);
-  unlink_snap_ids.erase(m_remote_snap_id_end);
-  if (!unlink_snap_ids.empty()) {
-    locker->unlock();
 
-    // retry the unlinking process for a remote snapshot that we do not
-    // need anymore
-    auto remote_snap_id = *unlink_snap_ids.begin();
-    dout(10) << "unlinking from remote snapshot " << remote_snap_id << dendl;
-    unlink_peer(remote_snap_id);
-    return;
+  if (m_remote_snap_id_start != 0 &&
+      remote_image_ctx->snap_info.count(m_remote_snap_id_start) == 0) {
+    // the remote start snapshot was deleted out from under us
+    derr << "failed to locate remote start snapshot: "
+         << "snap_id=" << m_remote_snap_id_start << dendl;
+    split_brain = true;
   }
 
-  if (m_remote_snap_id_end != CEPH_NOSNAP) {
-    dout(10) << "found remote mirror snapshot: "
-             << "remote_snap_id_start=" << m_remote_snap_id_start << ", "
-             << "remote_snap_id_end=" << m_remote_snap_id_end << ", "
-             << "remote_snap_ns=" << m_remote_mirror_snap_ns << dendl;
-    if (m_remote_mirror_snap_ns.complete) {
+  image_locker.unlock();
+
+  if (!split_brain) {
+    unlink_snap_ids.erase(m_remote_snap_id_start);
+    unlink_snap_ids.erase(m_remote_snap_id_end);
+    if (!unlink_snap_ids.empty()) {
       locker->unlock();
 
-      if (m_local_snap_id_end != CEPH_NOSNAP &&
-          !m_local_mirror_snap_ns.complete) {
-        // attempt to resume image-sync
-        dout(10) << "local image contains in-progress mirror snapshot"
-                 << dendl;
-        get_local_image_state();
+      // retry the unlinking process for a remote snapshot that we do not
+      // need anymore
+      auto remote_snap_id = *unlink_snap_ids.begin();
+      dout(10) << "unlinking from remote snapshot " << remote_snap_id << dendl;
+      unlink_peer(remote_snap_id);
+      return;
+    }
+
+    if (m_remote_snap_id_end != CEPH_NOSNAP) {
+      dout(10) << "found remote mirror snapshot: "
+               << "remote_snap_id_start=" << m_remote_snap_id_start << ", "
+               << "remote_snap_id_end=" << m_remote_snap_id_end << ", "
+               << "remote_snap_ns=" << m_remote_mirror_snap_ns << dendl;
+      if (m_remote_mirror_snap_ns.complete) {
+        locker->unlock();
+
+        if (m_local_snap_id_end != CEPH_NOSNAP &&
+            !m_local_mirror_snap_ns.complete) {
+          // attempt to resume image-sync
+          dout(10) << "local image contains in-progress mirror snapshot"
+                   << dendl;
+          get_local_image_state();
+        } else {
+          copy_snapshots();
+        }
+        return;
       } else {
-        copy_snapshots();
+        // might have raced with the creation of a remote mirror snapshot
+        // so we will need to refresh and rescan once it completes
+        dout(15) << "remote mirror snapshot not complete" << dendl;
       }
-      return;
-    } else {
-      // might have raced with the creation of a remote mirror snapshot
-      // so we will need to refresh and rescan once it completes
-      dout(15) << "remote mirror snapshot not complete" << dendl;
     }
   }