]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
rbd-mirror: avoid altering local group snap vector elements
authorPrasanna Kumar Kalever <prasanna.kalever@redhat.com>
Tue, 27 May 2025 07:52:37 +0000 (13:22 +0530)
committerIlya Dryomov <idryomov@gmail.com>
Sun, 28 Sep 2025 18:25:05 +0000 (20:25 +0200)
Now the m_local_group_snaps is dedicated to ListSnapshotsRequest() locally
and is unaltered.

Note, this patch fixes the crash, that can be seen with user snap
created, then remove and then created with the same name.

Signed-off-by: Prasanna Kumar Kalever <prasanna.kalever@redhat.com>
qa/workunits/rbd/rbd_mirror_group_simple.sh
src/tools/rbd_mirror/group_replayer/Replayer.cc
src/tools/rbd_mirror/group_replayer/Replayer.h

index 5735307991452b932b340cea79f80310614cf34c..ac184eb2f43c0bbcaefcf7dac2a7a99f142b4d98 100755 (executable)
@@ -277,6 +277,7 @@ test_group_rename()
 
   wait_for_group_present "${secondary_cluster}" "${pool}" "${group}" "${image_count}"
   wait_for_group_replay_started "${secondary_cluster}" "${pool}"/"${group}" "${image_count}"
+  wait_for_group_status_in_pool_dir "${secondary_cluster}" "${pool}/${group}" 'up+replaying' "${image_count}"
 
   group_rename "${primary_cluster}" "${pool}/${group}" "${pool}/${group}_renamed"
 
@@ -299,6 +300,7 @@ test_group_rename()
 
   wait_for_group_present "${secondary_cluster}" "${pool}" "${group1}" "${image_count}"
   wait_for_group_replay_started "${secondary_cluster}" "${pool}"/"${group1}" "${image_count}"
+  wait_for_group_status_in_pool_dir "${secondary_cluster}" "${pool}/${group1}" 'up+replaying' "${image_count}"
 
   group_rename "${primary_cluster}" "${pool}/${group1}" "${pool}/${group1}_renamed"
 
index d1c08a8a7b60350ef75c42ba733be4322fe2de79..db0eec3282fe796cefd0ffc70566cf6fbfd09c4b 100644 (file)
@@ -349,23 +349,21 @@ void Replayer<I>::load_local_group_snapshots() {
   //FIXME: This is not accessed under a lock
   if (!m_local_group_snaps.empty()) {
     for (auto &local_snap : m_local_group_snaps) {
+      // skip validation for already complete snapshots
       if (local_snap.state == cls::rbd::GROUP_SNAPSHOT_STATE_COMPLETE) {
         continue;
       }
+
+      // skip validation for primary snapshots
       auto ns = std::get_if<cls::rbd::GroupSnapshotNamespaceMirror>(
           &local_snap.snapshot_namespace);
       if (ns != nullptr &&
           ns->state == cls::rbd::MIRROR_SNAPSHOT_STATE_PRIMARY) {
         continue;
       }
-      // do not forward until previous local mirror group snapshot is COMPLETE
+
+      // validate incomplete non-primary mirror or regular snapshots
       validate_image_snaps_sync_complete(local_snap.id);
-      auto snap_type = cls::rbd::get_group_snap_namespace_type(
-          local_snap.snapshot_namespace);
-      if (snap_type == cls::rbd::GROUP_SNAPSHOT_NAMESPACE_TYPE_MIRROR) {
-        m_retry_validate_snap = true;
-        break;
-      }
     }
   }
 
@@ -415,7 +413,8 @@ void Replayer<I>::handle_load_local_group_snapshots(int r) {
         locker.unlock();
         handle_replay_complete(0, "orphan (force promoting)");
         return;
-      } else if (m_retry_validate_snap) {
+      } else if (m_retry_validate_snap ||
+                 it->state == cls::rbd::GROUP_SNAPSHOT_STATE_INCOMPLETE) {
         m_retry_validate_snap = false;
         locker.unlock();
         schedule_load_group_snapshots();
@@ -967,11 +966,10 @@ void Replayer<I>::create_mirror_snapshot(
     {}, cls::rbd::GROUP_SNAPSHOT_STATE_INCOMPLETE};
   local_snap.name = prepare_non_primary_mirror_snap_name(m_global_group_id,
       group_snap_id);
-  m_local_group_snaps.push_back(local_snap);
 
   auto comp = create_rados_callback(
-      new LambdaContext([this, snap, on_finish](int r) {
-        handle_create_mirror_snapshot(r, snap, on_finish);
+      new LambdaContext([this, group_snap_id, on_finish](int r) {
+        handle_create_mirror_snapshot(r, group_snap_id, on_finish);
         }));
 
   librados::ObjectWriteOperation op;
@@ -985,19 +983,8 @@ void Replayer<I>::create_mirror_snapshot(
 
 template <typename I>
 void Replayer<I>::handle_create_mirror_snapshot(
-    int r, cls::rbd::GroupSnapshot *snap, Context *on_finish) {
-  dout(10) << snap->id << ", r=" << r << dendl;
-
-  if (r < 0) { // clean the group snapshot here
-    dout(10) << "cleaning snapshot as failed group_snap_set previously with : "
-             << snap->id << ", this will be attempted to sync later" << dendl;
-    r = librbd::cls_client::group_snap_remove(&m_local_io_ctx,
-        librbd::util::group_header_name(m_local_group_id), snap->id);
-    if (r < 0) {
-      derr << "failed to remove group snapshot : "
-           << snap->id << " : " << cpp_strerror(r) << dendl;
-    }
-  }
+    int r, const std::string &group_snap_id, Context *on_finish) {
+  dout(10) << group_snap_id << ", r=" << r << dendl;
 
   on_finish->complete(r);
 }
@@ -1253,7 +1240,6 @@ void Replayer<I>::create_regular_snapshot(
     cls::rbd::GroupSnapshotNamespaceUser{},
       snap->name,
       cls::rbd::GROUP_SNAPSHOT_STATE_INCOMPLETE};
-  m_local_group_snaps.push_back(group_snap);
 
   librbd::cls_client::group_snap_set(&op, group_snap);
   auto comp = create_rados_callback(
@@ -1272,25 +1258,6 @@ void Replayer<I>::handle_create_regular_snapshot(
     int r, const std::string &group_snap_id, Context *on_finish) {
   dout(10) << group_snap_id << ", r=" << r << dendl;
 
-  if (r < 0) { // clean the group snapshot here
-    dout(10) << "cleaning snapshot as failed group_snap_set previously with : "
-             << group_snap_id << ", this will be attempted to sync later"
-             << dendl;
-    r = librbd::cls_client::group_snap_remove(&m_local_io_ctx,
-        librbd::util::group_header_name(m_local_group_id), group_snap_id);
-    if (r < 0) {
-      derr << "failed to remove group snapshot : "
-           << group_snap_id << " : " << cpp_strerror(r) << dendl;
-    }
-    for (auto local_snap = m_local_group_snaps.begin();
-        local_snap != m_local_group_snaps.end(); ++local_snap) {
-      if (local_snap->id != group_snap_id) {
-        continue;
-      }
-      m_local_group_snaps.erase(local_snap);
-    }
-  } // Note IR's don't need setting set_image_replayer_limits() for regular snaps.
-
   on_finish->complete(r);
 }
 
@@ -1472,6 +1439,7 @@ template <typename I>
 void Replayer<I>::handle_regular_snapshot_complete(
     int r, const std::string &group_snap_id, Context *on_finish) {
   dout(10) << group_snap_id << ", r=" << r << dendl;
+
   on_finish->complete(r);
 }
 
index 521ec95112e3ce547b9a861b96cdf0783c56785e..6a1179a80b5ad294cee25d15e6ff11c483cc32a7 100644 (file)
@@ -161,7 +161,7 @@ private:
     std::unique_lock<ceph::mutex> &locker,
     Context *on_finish);
   void handle_create_mirror_snapshot(
-    int r, cls::rbd::GroupSnapshot *snap, Context *on_finish);
+    int r, const std::string &group_snap_id, Context *on_finish);
 
   std::string prepare_non_primary_mirror_snap_name(
     const std::string &global_group_id, const std::string &snap_id);