]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
librbd/api: fix group promote completely
authorPrasanna Kumar Kalever <prasanna.kalever@redhat.com>
Thu, 17 Apr 2025 13:23:05 +0000 (18:53 +0530)
committerIlya Dryomov <idryomov@gmail.com>
Sun, 28 Sep 2025 18:25:04 +0000 (20:25 +0200)
* do not group/image demote on undo (promote failure)
* wait for demote snapshot sync to complete  locally before accepting and continue
  to actually perform group promote.

Signed-off-by: Prasanna Kumar Kalever <prasanna.kalever@redhat.com>
src/librbd/api/Mirror.cc

index 8c4b443f019aef7edd2d4b3f92d2af79dbe38aef..83e72fa4f4c7625f44dd4a00452e44ed4d0416b1 100644 (file)
@@ -520,7 +520,8 @@ std::string prepare_primary_mirror_snap_name(CephContext *cct,
 
 int get_last_mirror_snapshot_state(librados::IoCtx &group_ioctx,
                                    const std::string &group_id,
-                                   cls::rbd::MirrorSnapshotState *state) {
+                                   cls::rbd::MirrorSnapshotState *state,
+                                   cls::rbd::GroupSnapshotState *sync) {
   std::vector<cls::rbd::GroupSnapshot> snaps;
 
   C_SaferCond cond;
@@ -539,6 +540,9 @@ int get_last_mirror_snapshot_state(librados::IoCtx &group_ioctx,
     if (ns != nullptr) {
       // XXXMG: check primary_mirror_uuid matches?
       *state = ns->state;
+      if (sync != nullptr) {
+        *sync = it->state;
+      }
       return 0;
     }
   }
@@ -2601,13 +2605,6 @@ void remove_interim_snapshots(IoCtx& group_ioctx,
   CephContext *cct = (CephContext *)group_ioctx.cct();
   ldout(cct, 20) << dendl;
 
-  int r = cls_client::group_snap_remove(&group_ioctx, group_header_oid,
-      group_snap->id);
-  if (r < 0) {
-    lderr(cct) << "failed to remove group snapshot metadata: "
-               << cpp_strerror(r) << dendl;
-  }
-
   std::vector<C_SaferCond*> on_finishes(image_ctxs->size(), nullptr);
   for (size_t i = 0; i < image_ctxs->size(); ++i) {
     auto snap_id = group_snap->snaps[i].snap_id;
@@ -2640,6 +2637,7 @@ void remove_interim_snapshots(IoCtx& group_ioctx,
     on_finishes[i] = on_finish;
   }
 
+  int r;
   for (int i = 0, n = image_ctxs->size(); i < n; ++i) {
     if (!on_finishes[i]) {
       continue;
@@ -2654,6 +2652,13 @@ void remove_interim_snapshots(IoCtx& group_ioctx,
       // just report error, but don't abort the process
     }
   }
+
+  r = cls_client::group_snap_remove(&group_ioctx, group_header_oid,
+                                    group_snap->id);
+  if (r < 0) {
+    lderr(cct) << "failed to remove group snapshot metadata: "
+               << cpp_strerror(r) << dendl;
+  }
 }
 
 template <typename I>
@@ -2958,7 +2963,7 @@ int Mirror<I>::group_disable(IoCtx& group_ioctx, const char *group_name,
   }
 
   cls::rbd::MirrorSnapshotState state;
-  r = get_last_mirror_snapshot_state(group_ioctx, group_id, &state);
+  r = get_last_mirror_snapshot_state(group_ioctx, group_id, &state, nullptr);
   if (r == -ENOENT) {
     ldout(cct, 10) << "mirroring for group " << group_name
                    << " already disabled" << dendl;
@@ -3098,7 +3103,8 @@ int Mirror<I>::group_promote(IoCtx& group_ioctx, const char *group_name,
   }
 
   cls::rbd::MirrorSnapshotState state;
-  r = get_last_mirror_snapshot_state(group_ioctx, group_id, &state);
+  cls::rbd::GroupSnapshotState sync;
+  r = get_last_mirror_snapshot_state(group_ioctx, group_id, &state, &sync);
   if (r == -ENOENT) {
     state = cls::rbd::MIRROR_SNAPSHOT_STATE_PRIMARY_DEMOTED; // XXXMG?
     r = 0;
@@ -3110,9 +3116,11 @@ int Mirror<I>::group_promote(IoCtx& group_ioctx, const char *group_name,
   if (state == cls::rbd::MIRROR_SNAPSHOT_STATE_PRIMARY) {
     lderr(cct) << "group " << group_name << " is already primary" << dendl;
     return -EINVAL;
-  } else if (state == cls::rbd::MIRROR_SNAPSHOT_STATE_NON_PRIMARY && !force) {
+  } else if (!force && (state == cls::rbd::MIRROR_SNAPSHOT_STATE_NON_PRIMARY ||
+                        sync != cls::rbd::GROUP_SNAPSHOT_STATE_COMPLETE)) {
     lderr(cct) << "group " << group_name
-               << " is still primary within a remote cluster" << dendl;
+               << " is primary within a remote cluster or demotion is not propagated yet"
+               << dendl;
     return -EBUSY;
   }
 
@@ -3277,10 +3285,9 @@ int Mirror<I>::group_promote(IoCtx& group_ioctx, const char *group_name,
   int ret_code = 0;
   std::vector<uint64_t> snap_ids(image_ctxs.size(), CEPH_NOSNAP);
   std::vector<C_SaferCond*> on_finishes(image_ctxs.size(), nullptr);
-  C_SaferCond* on_finish;
 
   for (size_t i = 0; i < image_ctxs.size(); i++) {
-    on_finish = new C_SaferCond;
+    C_SaferCond* on_finish = new C_SaferCond;
     image_promote(image_ctxs[i], group_snap_id, force, &snap_ids[i], on_finish);
     on_finishes[i] = on_finish;
   }
@@ -3303,44 +3310,7 @@ int Mirror<I>::group_promote(IoCtx& group_ioctx, const char *group_name,
     // undo
     ldout(cct, 20) << "undoing group promote: " << ret_code << dendl;
     remove_interim_snapshots(group_ioctx, group_header_oid, &image_ctxs, &group_snap);
-    std::fill(snap_ids.begin(), snap_ids.end(), CEPH_NOSNAP);
-    group_snap.snaps.clear();
-    close_images(&image_ctxs);
-
-    r = prepare_group_images(group_ioctx, group_id, mirror_group.state,
-                             &image_ctxs, &group_snap, quiesce_requests,
-                             cls::rbd::MIRROR_SNAPSHOT_STATE_PRIMARY_DEMOTED,
-                             &mirror_peer_uuids,
-                             SNAP_CREATE_FLAG_SKIP_NOTIFY_QUIESCE);
-    if (r != 0) {
-      return r;
-    }
-    for (size_t i = 0; i < image_ctxs.size(); ++i) {
-      ImageCtx *ictx = image_ctxs[i];
-      ldout(cct, 20) << "demoting individual image: "
-                     << ictx->name.c_str() << dendl;
-      on_finish = new C_SaferCond;
-      image_demote(ictx, group_snap_id, &snap_ids[i], on_finish);
-      on_finishes[i] = on_finish;
-    }
-
-    for (int i = 0, n = image_ctxs.size(); i < n; ++i) {
-      if (!on_finishes[i]) {
-        continue;
-      }
-      r = on_finishes[i]->wait();
-      delete on_finishes[i];
-      if (r < 0) {
-        lderr(cct) << "failed demoting image: " << image_ctxs[i]->name << ": "
-                   << cpp_strerror(r) << dendl;
-        // just report error, but don't abort the process
-      } else{
-        group_snap.snaps[i].snap_id = snap_ids[i];
-      }
-    }
-  }
-
-  if (!ret_code) {
+  } else if (!ret_code) {
     group_snap.state = cls::rbd::GROUP_SNAPSHOT_STATE_COMPLETE;
     r = cls_client::group_snap_set(&group_ioctx, group_header_oid, group_snap);
     if (r < 0) {
@@ -3392,7 +3362,7 @@ int Mirror<I>::group_demote(IoCtx& group_ioctx,
   }
 
   cls::rbd::MirrorSnapshotState state;
-  r = get_last_mirror_snapshot_state(group_ioctx, group_id, &state);
+  r = get_last_mirror_snapshot_state(group_ioctx, group_id, &state, nullptr);
   if (r == -ENOENT) {
     state = cls::rbd::MIRROR_SNAPSHOT_STATE_PRIMARY;
     r = 0;
@@ -3550,7 +3520,7 @@ int Mirror<I>::group_resync(IoCtx& group_ioctx, const char *group_name) {
   }
 
   cls::rbd::MirrorSnapshotState state;
-  r = get_last_mirror_snapshot_state(group_ioctx, group_id, &state);
+  r = get_last_mirror_snapshot_state(group_ioctx, group_id, &state, nullptr);
   if (r == -ENOENT) {
     state = cls::rbd::MIRROR_SNAPSHOT_STATE_NON_PRIMARY;
     r = 0;
@@ -3815,7 +3785,7 @@ int Mirror<I>::group_info_list(librados::IoCtx& io_ctx,
       info.state = static_cast<rbd_mirror_group_state_t>(group.state);
 
       cls::rbd::MirrorSnapshotState promotion_state;
-      r = get_last_mirror_snapshot_state(io_ctx, group_id, &promotion_state);
+      r = get_last_mirror_snapshot_state(io_ctx, group_id, &promotion_state, nullptr);
       if (r == -ENOENT) {
        promotion_state = cls::rbd::MIRROR_SNAPSHOT_STATE_NON_PRIMARY;
       } else if (r < 0) {