]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
librbd: support group rollback to a snapshot with different membership wip-rbd-cgsm-base-plus-semi-dyn
authorPrasanna Kumar Kalever <prasanna.kalever@redhat.com>
Tue, 13 Jan 2026 07:27:58 +0000 (12:57 +0530)
committerPrasanna Kumar Kalever <prasanna.kalever@redhat.com>
Thu, 19 Feb 2026 08:35:32 +0000 (14:05 +0530)
support rollback to a snapshot with a different group membership, which is
required for the semi-dynamic groups feature.

one scenario where this is needed is when a force-promote operation is issued
on a secondary node while it is synchronizing a snapshot that contains
add-image information.

this commit also adds the required test cases for the semi-dynamic groups
add-image capability, along with a dedicated test covering rollback after
an image has been added to the group.

Signed-off-by: Prasanna Kumar Kalever <prasanna.kalever@redhat.com>
qa/workunits/rbd/rbd_mirror_group_simple.sh
qa/workunits/rbd/rbd_mirror_helpers.sh
src/librbd/api/Group.cc
src/librbd/api/Group.h
src/librbd/api/Migration.cc
src/librbd/api/Mirror.cc
src/librbd/api/Mirror.h
src/librbd/librbd.cc
src/tools/rbd_mirror/GroupReplayer.cc
src/tools/rbd_mirror/InstanceReplayer.cc
src/tools/rbd_mirror/PoolWatcher.cc

index cdc1d366524e3b6bca76e20647a79aabbd2a24b4..ba79d62fb1ecbd34f0ccf55774ad30d3af04b7de 100755 (executable)
@@ -968,12 +968,10 @@ test_mirrored_group_remove_all_images()
   check_daemon_running "${secondary_cluster}"
 }
 
-# create group then enable mirroring before adding images to the group.  Disable mirroring before removing group
-declare -a test_create_group_mirror_then_add_images_1=("${CLUSTER2}" "${CLUSTER1}" "${pool0}" "${group0}" "${image_prefix}" 'false' 5)
-# create group then enable mirroring before adding images to the group.  Remove group with mirroring enabled
-declare -a test_create_group_mirror_then_add_images_2=("${CLUSTER2}" "${CLUSTER1}" "${pool0}" "${group0}" "${image_prefix}" 'true' 5)
+# create group then enable mirroring before adding images to the group.
+declare -a test_create_group_mirror_then_add_images_1=("${CLUSTER2}" "${CLUSTER1}" "${pool0}" "${group0}" "${image_prefix}" 5)
 
-test_create_group_mirror_then_add_images_scenarios=2
+test_create_group_mirror_then_add_images_scenarios=1
 
 test_create_group_mirror_then_add_images()
 {
@@ -982,54 +980,35 @@ test_create_group_mirror_then_add_images()
   local pool=$1 ; shift
   local group=$1 ; shift
   local image_prefix=$1 ; shift
-  local disable_before_remove=$1 ; shift
   local image_count=$(($1*"${image_multiplier}")) ; shift
 
+  check_daemon_running "${primary_cluster}" true
+  check_daemon_running "${secondary_cluster}" true
+
   group_create "${primary_cluster}" "${pool}/${group}"
   mirror_group_enable "${primary_cluster}" "${pool}/${group}"
 
   wait_for_group_present "${secondary_cluster}" "${pool}" "${group}" 0
   wait_for_group_replay_started "${secondary_cluster}" "${pool}"/"${group}" 0
   wait_for_group_status_in_pool_dir "${secondary_cluster}" "${pool}"/"${group}" 'up+replaying' 0
-  if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then
-    wait_for_group_status_in_pool_dir "${primary_cluster}" "${pool}"/"${group}" 'down+unknown' 0
-  fi
+  wait_for_group_status_in_pool_dir "${primary_cluster}" "${pool}"/"${group}" 'up+stopped' 0
 
   images_create "${primary_cluster}" "${pool}/${image_prefix}" "${image_count}"
   group_images_add "${primary_cluster}" "${pool}/${group}" "${pool}/${image_prefix}" "${image_count}"
 
-  if [ -n "${RBD_MIRROR_NEW_IMPLICIT_BEHAVIOUR}" ]; then
-    # check secondary cluster sees 0 images
-    wait_for_group_status_in_pool_dir "${secondary_cluster}" "${pool}"/"${group}" 'up+replaying' 0
-    mirror_group_snapshot_and_wait_for_sync_complete "${secondary_cluster}" "${primary_cluster}" "${pool}"/"${group}"
-  fi
-
+  get_newest_complete_mirror_group_snapshot_id "${primary_cluster}" "${pool}/${group}" group_snap_id
+  wait_for_test_group_snap_present "${secondary_cluster}" "${pool}/${group}" "${group_snap_id}" 1
+  wait_for_group_snap_sync_complete "${secondary_cluster}" "${pool}/${group}" "${group_snap_id}"
   wait_for_group_present "${secondary_cluster}" "${pool}" "${group}" "${image_count}"
-  check_daemon_running "${secondary_cluster}"
-
   wait_for_group_replay_started "${secondary_cluster}" "${pool}"/"${group}" "${image_count}"
-  check_daemon_running "${secondary_cluster}"
-
   wait_for_group_status_in_pool_dir "${secondary_cluster}" "${pool}"/"${group}" 'up+replaying' "${image_count}"
+  wait_for_group_status_in_pool_dir "${primary_cluster}" "${pool}"/"${group}" 'up+stopped' "${image_count}"
 
-  check_daemon_running "${secondary_cluster}"
-  if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then
-    wait_for_group_status_in_pool_dir "${primary_cluster}" "${pool}"/"${group}" 'down+unknown' 0
-  fi
-  check_daemon_running "${secondary_cluster}"
-
-  if [ 'false' != "${disable_before_remove}" ]; then
-    mirror_group_disable "${primary_cluster}" "${pool}/${group}"
-  fi
-
+  # tidy up
+  mirror_group_disable "${primary_cluster}" "${pool}/${group}"
   group_remove "${primary_cluster}" "${pool}/${group}"
-  check_daemon_running "${secondary_cluster}"
-
   wait_for_group_not_present "${primary_cluster}" "${pool}" "${group}"
-  check_daemon_running "${secondary_cluster}"
   wait_for_group_not_present "${secondary_cluster}" "${pool}" "${group}"
-  check_daemon_running "${secondary_cluster}"
-
   images_remove "${primary_cluster}" "${pool}/${image_prefix}" "${image_count}"
 }
 
@@ -3895,6 +3874,101 @@ test_demote_snap_sync_after_restart()
   check_daemon_running "${secondary_cluster}"
 }
 
+declare -a test_rollback_after_add_image_1=("${CLUSTER2}" "${CLUSTER1}" "${pool0}" "${image_prefix}" 2)
+
+test_rollback_after_add_image_scenarios=1
+
+test_rollback_after_add_image()
+{
+  local primary_cluster=$1 ; shift
+  local secondary_cluster=$1 ; shift
+  local pool=$1 ; shift
+  local image_prefix=$1 ; shift
+  local image_count=$(($1*"${image_multiplier}")) ; shift
+
+  local snap0='snap_0'
+  check_daemon_running "${primary_cluster}" true
+  check_daemon_running "${secondary_cluster}" true
+
+  group_create "${primary_cluster}" "${pool}/${group0}"
+  image_create "${primary_cluster}" "${pool}/${image_prefix}0" 1G
+  image_create "${primary_cluster}" "${pool}/${image_prefix}1" 4G
+  group_images_add "${primary_cluster}" "${pool}/${group0}" "${pool}/${image_prefix}" "${image_count}"
+  mirror_group_enable "${primary_cluster}" "${pool}/${group0}"
+  wait_for_group_present "${secondary_cluster}" "${pool}" "${group0}" "${image_count}"
+  wait_for_group_replay_started "${secondary_cluster}" "${pool}"/"${group0}" "${image_count}"
+  wait_for_group_status_in_pool_dir "${secondary_cluster}" "${pool}"/"${group0}" 'up+replaying' "${image_count}"
+  wait_for_group_status_in_pool_dir "${primary_cluster}" "${pool}"/"${group0}" 'up+stopped'
+  wait_for_group_synced "${primary_cluster}" "${pool}"/"${group0}" "${secondary_cluster}" "${pool}"/"${group0}"
+
+  write_image "${primary_cluster}" "${pool}" "${image_prefix}0" 10 4096
+  write_image "${primary_cluster}" "${pool}" "${image_prefix}1" 10 4096
+
+  create_snapshot "${primary_cluster}" "${pool}" "${image_prefix}0" "${snap0}"
+  create_snapshot "${primary_cluster}" "${pool}" "${image_prefix}1" "${snap0}"
+  mirror_group_snapshot_and_wait_for_sync_complete "${secondary_cluster}" "${primary_cluster}" "${pool}"/"${group0}"
+  write_image "${primary_cluster}" "${pool}" "${image_prefix}1" 1024 4194304
+
+  # add image to already enabled group for mirroring
+  new_image=test-image-new
+  image_create "${primary_cluster}" "${pool}/${new_image}" 1G
+  group_image_add "${primary_cluster}" "${pool}/${group0}" "${pool}/${new_image}"
+  wait_for_image_present "${secondary_cluster}" "${pool}" "${new_image}" 'present'
+  get_image_mirroring_global_id "${primary_cluster}" "${pool}/${new_image}" global_id
+  test_image_with_global_id_present "${secondary_cluster}" "${pool}" "${new_image}" "${global_id}"
+  local group_snap_id
+  get_newest_created_group_snapshot_id "${primary_cluster}" "${pool}/${group0}" group_snap_id
+  # make sure snapshot is present on secondary cluster
+  wait_for_group_snap_present "${secondary_cluster}" "${pool}/${group0}" "${group_snap_id}"
+
+  # stop the daemon to prevent further syncing of snapshots
+  stop_mirrors "${secondary_cluster}" '-9'
+  # make sure the snapshot remains in an incomplete state after the daemon is stopped
+  test_group_snap_sync_incomplete "${secondary_cluster}" "${pool}/${group0}" "${group_snap_id}"
+
+  # force promote the group on the secondary - should rollback to the last complete snapshot with only two images in it.
+  local old_primary_cluster
+  mirror_group_promote "${secondary_cluster}" "${pool}/${group0}" '--force'
+
+  old_primary_cluster="${primary_cluster}"
+  primary_cluster="${secondary_cluster}"
+  secondary_cluster="${old_primary_cluster}"
+
+  # check that we rolled back to snap0 state
+  compare_image_with_snapshot "${primary_cluster}" "${pool}/${image_prefix}0" "${primary_cluster}" "${pool}/${image_prefix}0@${snap0}"
+  compare_image_with_snapshot "${primary_cluster}" "${pool}/${image_prefix}1" "${primary_cluster}" "${pool}/${image_prefix}1@${snap0}"
+  # check that new image is not present
+  test_image_with_global_id_not_present "${primary_cluster}" "${pool}" "${new_image}" "${global_id}"
+
+  # restart daemon and wait for the right status to reflect
+  start_mirrors "${primary_cluster}"
+  wait_for_group_status_in_pool_dir "${primary_cluster}" "${pool}"/"${group0}" 'up+stopped'
+  wait_for_group_status_in_pool_dir "${secondary_cluster}" "${pool}"/"${group0}" 'up+stopped'
+
+  # demote and wait for split-brain to follow
+  mirror_group_demote "${secondary_cluster}" "${pool}/${group0}"
+  # Note the group contain 3 images
+  wait_for_group_status_in_pool_dir "${secondary_cluster}" "${pool}"/"${group0}" 'up+error' $(("${image_count}"+1)) 'split-brain'
+
+  # resync and verify status of mirror group snapshot
+  local group_id_before_resync
+  get_id_from_group_info "${secondary_cluster}" "${pool}/${group0}" group_id_before_resync
+  mirror_group_resync "${secondary_cluster}" "${pool}/${group0}"
+  wait_for_group_id_changed  "${secondary_cluster}" "${pool}/${group0}" "${group_id_before_resync}"
+  wait_for_group_synced "${primary_cluster}" "${pool}"/"${group0}" "${secondary_cluster}" "${pool}"/"${group0}"
+
+  # TEST group membership has 2 images only and the respective groups reflect right mirror status
+  wait_for_group_status_in_pool_dir "${primary_cluster}" "${pool}"/"${group0}" 'up+stopped' ${image_count}
+  wait_for_group_status_in_pool_dir "${secondary_cluster}" "${pool}"/"${group0}" 'up+replaying' ${image_count}
+
+  # cleanup
+  mirror_group_disable "${primary_cluster}" "${pool}/${group0}"
+  group_remove "${primary_cluster}" "${pool}/${group0}"
+  wait_for_group_not_present "${primary_cluster}" "${pool}" "${group0}"
+  wait_for_group_not_present "${secondary_cluster}" "${pool}" "${group0}"
+  images_remove "${primary_cluster}" "${pool}/${image_prefix}" "${image_count}"
+}
+
 check_for_no_keys()
 {
   local primary_cluster=$1
@@ -4046,7 +4120,8 @@ run_all_tests()
   # This next test also requires dynamic groups - TODO enable
   # run_test_all_scenarios test_mirrored_group_add_and_remove_images
   # This next also requires dynamic groups - TODO enable
-  # run_test_all_scenarios test_create_group_mirror_then_add_images
+  run_test_all_scenarios test_create_group_mirror_then_add_images
+  run_test_all_scenarios test_rollback_after_add_image
   run_test_all_scenarios test_create_group_with_images_then_mirror
   # TODO: add the capabilty to have image from different pool in the mirror group
   run_test_all_scenarios test_images_different_pools
index 3ebc500d7a9cc69ed31269e3221682d37b5d10a8..925d31b36361cce3683bc53db7b03a7efaf340d4 100755 (executable)
@@ -2722,7 +2722,7 @@ test_group_snap_present()
     local group_snap_id=$3
     local expected_snap_count=$4
 
-    run_cmd "rbd --cluster ${cluster} group snap list ${group_spec} --format xml --pretty-format" 
+    try_cmd "rbd --cluster ${cluster} group snap list ${group_spec} --format xml --pretty-format" || :
 
     test "${expected_snap_count}" = "$(xmlstarlet sel -t -v "count(//group_snaps/group_snap[id='${group_snap_id}'])" < "$CMD_STDOUT")" || { fail; return 1; }
 }
index e351f5ce74399a08833c5c734d51aacfcdd5866b..0fad99cc3cff2fb34348d3d9c1eee032cb14fcbe 100644 (file)
@@ -304,7 +304,8 @@ template <typename I>
 int Group<I>::image_remove_by_id(librados::IoCtx& group_ioctx,
                                  const char *group_name,
                                  librados::IoCtx& image_ioctx,
-                                 const char *image_id)
+                                 const char *image_id,
+                                 bool force)
 {
   CephContext *cct = (CephContext *)group_ioctx.cct();
   ldout(cct, 20) << "io_ctx=" << &group_ioctx
@@ -332,11 +333,11 @@ int Group<I>::image_remove_by_id(librados::IoCtx& group_ioctx,
                << cpp_strerror(r) << dendl;
     return r;
   } else if (r == 0) {
-    if (mirror_group.state != cls::rbd::MIRROR_GROUP_STATE_DISABLED) {
+    if (!force && mirror_group.state != cls::rbd::MIRROR_GROUP_STATE_DISABLED) {
       lderr(cct) << "cannot remove image from mirror enabled group" << dendl;
       return -EINVAL;
     }
-    if (promotion_state != mirror::PROMOTION_STATE_PRIMARY) {
+    if (!force && promotion_state != mirror::PROMOTION_STATE_PRIMARY) {
       lderr(cct) << "group is not primary, cannot remove image" << dendl;
       return -EINVAL;
     }
index ae7cc4cd64044a85abe6d7ab3fb9a1febbbdb35d..2379b992dcec84416b66e39d0ac332ce0892b30b 100644 (file)
@@ -38,7 +38,8 @@ struct Group {
   static int image_remove_by_id(librados::IoCtx& group_ioctx,
                                 const char *group_name,
                                 librados::IoCtx& image_ioctx,
-                                const char *image_id);
+                                const char *image_id,
+                                bool force);
   static int image_list(librados::IoCtx& group_ioctx, const char *group_name,
                        std::vector<group_image_info_t> *images);
 
index 34cd0f1336903cc23ce323c4b8802843fbe4408c..835ade8ac2c193791dc033dc62ee34e347e3f296 100644 (file)
@@ -1647,7 +1647,8 @@ int Migration<I>::remove_group(I *image_ctx, group_info_t *group_info) {
   r = librbd::api::Group<I>::image_remove_by_id(group_ioctx,
                                                 group_info->name.c_str(),
                                                 image_ctx->md_ctx,
-                                                image_ctx->id.c_str());
+                                                image_ctx->id.c_str(),
+                                                false);
   if (r < 0) {
     lderr(m_cct) << "failed to remove image from group: " << cpp_strerror(r)
                  << dendl;
index 90793a8568afaa4efbac4d49347663f7575766f1..8b0a449181f8420a89f4816fe8ad53b58a7e57dc 100644 (file)
@@ -3071,6 +3071,192 @@ int Mirror<I>::group_image_add(IoCtx &group_ioctx,
   }
   return 0;
 }
+template <typename I>
+int Mirror<I>::group_revert_membership_to_snapshot(
+    IoCtx& group_ioctx,
+    const char *group_name,
+    std::string &global_group_id,
+    bool* requires_orphan,
+    std::string &rollback_snapname) {
+
+  CephContext *cct = (CephContext *)group_ioctx.cct();
+  ldout(cct, 20) << "io_ctx=" << &group_ioctx
+                 << ", group_name=" << group_name << dendl;
+
+  if (requires_orphan) {
+    *requires_orphan = false;
+  }
+
+  std::string group_id;
+  int r = cls_client::dir_get_id(&group_ioctx, RBD_GROUP_DIRECTORY,
+                                 group_name, &group_id);
+  if (r < 0) {
+    lderr(cct) << "error getting the group id: " << cpp_strerror(r) << dendl;
+    return r;
+  }
+
+  std::vector<cls::rbd::GroupImageStatus> images;
+  r = Group<I>::group_image_list_by_id(group_ioctx, group_id, &images);
+  if (r < 0) {
+    lderr(cct) << "failed listing images in the group: " << group_name
+               << " :" << cpp_strerror(r) << dendl;
+    return r;
+  }
+
+  std::vector<cls::rbd::GroupImageSpec> current_membership;
+  for (const auto& image : images) {
+    if (image.state == cls::rbd::GROUP_IMAGE_LINK_STATE_ATTACHED) {
+      current_membership.push_back(image.spec);
+    }
+  }
+
+  // rollback to last good group snapshot
+  std::vector<cls::rbd::GroupSnapshot> snaps;
+  C_SaferCond cond;
+  auto req = group::ListSnapshotsRequest<>::create(group_ioctx, group_id,
+                                                   true, true, &snaps, &cond);
+  req->send();
+  r = cond.wait();
+  if (r < 0) {
+    lderr(cct) << "failed to list snapshots in the group " << group_name
+               << " :" << cpp_strerror(r) << dendl;
+    return r;
+  }
+
+  bool need_rollback = false;
+  auto rollback_snap = snaps.rbegin();
+  bool tmp_requires_orphan = false;
+  for (; rollback_snap != snaps.rend(); ++rollback_snap) {
+    auto mirror_ns = std::get_if<cls::rbd::GroupSnapshotNamespaceMirror>(
+        &rollback_snap->snapshot_namespace);
+    if (mirror_ns == nullptr || mirror_ns->is_orphan()) {
+      continue;
+    }
+
+    tmp_requires_orphan = !mirror_ns->is_demoted();
+
+    if (!is_mirror_group_snapshot_complete(rollback_snap->state,
+                                           mirror_ns->complete)) {
+      need_rollback = true;
+      continue;
+    }
+    break;
+  }
+
+  if (rollback_snap == snaps.rend()) {
+    lderr(cct) << "cannot rollback, no complete mirror group snapshot available on group: "
+               << group_name << dendl;
+    return -EINVAL;
+  }
+
+  if (need_rollback) {
+    // rollback membership
+    if (requires_orphan) {
+      *requires_orphan = tmp_requires_orphan;
+    }
+    std::vector<cls::rbd::GroupImageSpec> rollback_membership;
+    for (auto& it : rollback_snap->snaps) {
+      rollback_membership.emplace_back(it.image_id, it.pool);
+    }
+
+    if (rollback_membership != current_membership) {
+      ldout(cct, 10) << "rollback group snapshot membership does not match current group membership"
+                     << dendl;
+      // Fix the group membership
+      std::vector<cls::rbd::GroupImageSpec> delta_images;
+      std::set_difference(current_membership.begin(), current_membership.end(),
+                          rollback_membership.begin(), rollback_membership.end(),
+                          std::back_inserter(delta_images));
+
+      std::vector<cls::rbd::GroupImageSpec> removed_images; // from current membership
+      std::vector<cls::rbd::GroupImageSpec> inserted_images; // to current membership
+
+      if (!delta_images.empty()) {
+        for (auto &s : delta_images) {
+          if (std::find(rollback_membership.begin(),
+                        rollback_membership.end(), s)
+              != rollback_membership.end()) {
+            removed_images.push_back(s);
+          } else {
+            inserted_images.push_back(s);
+          }
+        }
+      }
+
+      // insert back to membership
+      for (auto &s : removed_images) {
+        IoCtx image_ioctx;
+        r = librbd::util::create_ioctx(group_ioctx, "image", s.pool_id, {},
+                                       &image_ioctx);
+        if (r < 0) {
+          return r;
+        }
+
+        r = Group<I>::image_add(group_ioctx, group_name,
+                                image_ioctx, s.image_id.c_str());
+        if (r < 0) {
+          lderr(cct) << "error inserting image to group: " << group_name
+                     << " :" << cpp_strerror(r) << dendl;
+          return r;
+        }
+      }
+
+      // remove from the membership
+      for (auto &s : inserted_images) {
+        IoCtx image_ioctx;
+        r = librbd::util::create_ioctx(group_ioctx, "image", s.pool_id, {},
+                                       &image_ioctx);
+        if (r < 0) {
+          return r;
+        }
+
+        librbd::ImageCtx* image_ctx = new ImageCtx("", s.image_id,
+                                                   nullptr, image_ioctx, false);
+
+        C_SaferCond open_cond;
+        image_ctx->state->open(0, &open_cond);
+        r = open_cond.wait();
+        if (r < 0) {
+          lderr(cct) << "failed opening group image: "
+                     << cpp_strerror(r) << dendl;
+          return r;
+        }
+
+        r = image_disable(image_ctx, true /* force */, true);
+        if (r < 0) {
+          lderr(cct) << "failed to disable mirroring on image: " << s.image_id
+                     << " : " << cpp_strerror(r) << dendl;
+          image_ctx->state->close();
+          return r;
+        }
+
+        r = Group<I>::image_remove_by_id(group_ioctx, group_name, image_ioctx,
+                                         s.image_id.c_str(), true /* force */);
+        if (r < 0 && r != -ENOENT) {
+          lderr(cct) << "error removing image from a group: " << group_name
+                     << " :" << cpp_strerror(r) << dendl;
+          image_ctx->state->close();
+          return r;
+        }
+        image_ctx->state->close();
+      }
+
+      r = MirroringWatcher<I>::notify_group_updated(
+          group_ioctx, cls::rbd::MIRROR_GROUP_STATE_ENABLED, group_id,
+          global_group_id, rollback_membership.size());
+      if (r < 0) {
+        lderr(cct) << "failed to notify mirroring group=" << group_name
+                   << " updated: " << cpp_strerror(r) << dendl;
+        // not fatal
+      }
+    } // Fixing membership done.
+    rollback_snapname = rollback_snap->name;
+  } else {
+    ldout(cct, 10) << "no rollback and no orphan snapshot required" << dendl;
+  }
+
+  return 0;
+}
 
 template <typename I>
 int Mirror<I>::group_promote(IoCtx& group_ioctx, const char *group_name,
@@ -3148,72 +3334,18 @@ int Mirror<I>::group_promote(IoCtx& group_ioctx, const char *group_name,
   }
 
   if (force) {
-    std::vector<cls::rbd::GroupImageStatus> images;
-    r = Group<I>::group_image_list_by_id(group_ioctx, group_id, &images);
-    if (r < 0) {
-      lderr(cct) << "failed listing images in the group: " << group_name
-                 << " :" << cpp_strerror(r) << dendl;
-      return r;
-    }
-    std::vector<cls::rbd::GroupImageSpec> current_images;
-    for (const auto& image : images) {
-      if (image.state == cls::rbd::GROUP_IMAGE_LINK_STATE_ATTACHED) {
-        current_images.push_back(image.spec);
-      }
-    }
-    // rollback to last good group snapshot
-    std::vector<cls::rbd::GroupSnapshot> snaps;
-    C_SaferCond cond;
-    auto req = group::ListSnapshotsRequest<>::create(group_ioctx, group_id,
-                                                     true, true, &snaps, &cond);
-    req->send();
-    int r = cond.wait();
+    std::string rollback_snap;
+    bool requires_orphan = false;
+    r = group_revert_membership_to_snapshot(group_ioctx, group_name,
+                                            mirror_group.global_group_id,
+                                            &requires_orphan, rollback_snap);
     if (r < 0) {
-      lderr(cct) << "failed to list snapshots in the group " << group_name
-                 << " :" << cpp_strerror(r) << dendl;
+      lderr(cct) << "failed to revert to membership: "
+                 << cpp_strerror(r) << dendl;
       return r;
     }
 
-    if (snaps.empty()) {
-      lderr(cct) << "cannot rollback, no mirror group snapshot available on group: "
-                 << group_name << dendl;
-      return -EINVAL;
-    }
-
-    bool need_rollback = false;
-    auto snap = snaps.rbegin();
-    for (; snap != snaps.rend(); ++snap) {
-      auto mirror_ns = std::get_if<cls::rbd::GroupSnapshotNamespaceMirror>(
-          &snap->snapshot_namespace);
-      if (mirror_ns == nullptr || mirror_ns->is_orphan()) {
-        continue;
-      }
-      if (!is_mirror_group_snapshot_complete(snap->state, mirror_ns->complete)) {
-        need_rollback = true;
-        continue;
-      }
-      break;
-    }
-
-    if (snap == snaps.rend()) {
-      lderr(cct) << "cannot rollback, no complete mirror group snapshot available on group: "
-                 << group_name << dendl;
-      return -EINVAL;
-    }
-
-    if (need_rollback) {
-      // Check for group membership match
-      std::vector<cls::rbd::GroupImageSpec> rollback_images;
-      for (auto& it : snap->snaps) {
-        rollback_images.emplace_back(it.image_id, it.pool);
-      }
-
-      if (rollback_images != current_images) {
-        lderr(cct) << "group membership does not match snapshot membership with rollback_snap_id: "
-                   << snap->id << dendl;
-        return -EINVAL;
-      }
-
+    if (requires_orphan) {
       r = create_orphan_group_snapshot(group_ioctx, group_id,
                                        mirror_group.global_group_id);
       if (r < 0 ) {
@@ -3221,20 +3353,21 @@ int Mirror<I>::group_promote(IoCtx& group_ioctx, const char *group_name,
                    << cpp_strerror(r) << dendl;
         return r;
       }
+    }
 
+    if (!rollback_snap.empty()) {
+      ldout(cct, 5) << "rolling back to: " << rollback_snap << dendl;
       librbd::NoOpProgressContext prog_ctx;
-      r = Group<I>::snap_rollback(group_ioctx,
-                                  group_name, snap->name.c_str(), prog_ctx, false);
+      r = Group<I>::snap_rollback(group_ioctx, group_name,
+                                  rollback_snap.c_str(), prog_ctx, false);
       if (r < 0) {
-        lderr(cct) << "failed to rollback to group snapshot: " << snap->id
+        lderr(cct) << "failed to rollback to group snapshot: " << rollback_snap
                    << " :" << cpp_strerror(r) << dendl;
         return r;
       }
-      ldout(cct, 5) << "successfully rolled back to group snapshot: "
-                    << snap->id << dendl;
+      ldout(cct, 5) << "successfully rollbacked to group snapshot: "
+                    << rollback_snap << dendl;
       // Rollback to last good snapshot done
-    } else {
-      ldout(cct, 10) << "no rollback and no orphan snapshot required" << dendl;
     }
   }
 
index 65ab4f0490056bd4db70ed2496086b82dac8228f..4563d5a008ebec0abfa9e3901da127fbc2150eaf 100644 (file)
@@ -142,6 +142,11 @@ struct Mirror {
                                     uint64_t *snap_id, Context *on_finish);
 
   static int group_list(IoCtx &io_ctx, std::vector<std::string> *names);
+  static int group_revert_membership_to_snapshot(IoCtx& group_ioctx,
+                                                 const char *group_name,
+                                                 std::string &global_group_id,
+                                                 bool* requires_orphan,
+                                                 std::string &rollback_snapname);
   static int group_enable(IoCtx &group_ioctx, const char *group_name,
                           mirror_image_mode_t group_image_mode);
   static int group_disable(IoCtx &group_ioctx, const char *group_name,
index 45c32d4c65598f91855010891706c0103b12d8e9..fa245e9a01358ced3138691229503bd9da06a051 100644 (file)
@@ -1556,7 +1556,8 @@ namespace librbd {
                image_ioctx.get_pool_name().c_str(),
                image_ioctx.get_id(), image_id);
     int r = librbd::api::Group<>::image_remove_by_id(group_ioctx, group_name,
-                                                     image_ioctx, image_id);
+                                                     image_ioctx, image_id,
+                                                     false);
     tracepoint(librbd, group_image_remove_by_id_exit, r);
     return r;
   }
@@ -7668,7 +7669,8 @@ extern "C" int rbd_group_image_remove_by_id(rados_ioctx_t group_p,
              image_ioctx.get_id(), image_id);
 
   int r = librbd::api::Group<>::image_remove_by_id(group_ioctx, group_name,
-                                                   image_ioctx, image_id);
+                                                   image_ioctx, image_id,
+                                                   false);
 
   tracepoint(librbd, group_image_remove_by_id_exit, r);
   return r;
index 1ba9caa8a3af5aba0b9308642e72b467cb605a38..9203f2bc17c4b40d82ee0e9aca1d06f3afe62897 100644 (file)
@@ -701,7 +701,9 @@ void GroupReplayer<I>::handle_start_image_replayers(int r) {
 
   if (finish_start_if_interrupted()) {
     return;
-  } else if (r < 0) {
+  } else if (r < 0 && r != -ENOENT) {
+    /* sinario for ENOENT could be, as part of the group snapshot rollback
+     * image might be disabled and hence ENOENT */
     finish_start_fail(r, "failed to start image replayers");
     return;
   }
index e7d29740111e50674b943e96baccddcb6179f8ae..2c79888bccda8eebedb0236f581218cb2222890f 100644 (file)
@@ -308,11 +308,14 @@ void InstanceReplayer<I>::release_group(const std::string &global_group_id,
   }
 
   auto group_replayer = it->second;
-  m_group_replayers.erase(it);
-
+  if (group_replayer->is_finished()) {
+    m_group_replayers.erase(it);
+  }
   on_finish = new LambdaContext(
     [group_replayer, on_finish] (int r) {
-      group_replayer->destroy();
+      if (group_replayer->is_finished()) {
+        group_replayer->destroy();
+      }
       on_finish->complete(0);
     });
   stop_group_replayer(group_replayer, on_finish);
index ad7ccd56c42b397d198640ee01b6a2de9bea7e83..d843b00937810076cbb558fb5c5fd9ba83aebcd8 100644 (file)
@@ -464,8 +464,8 @@ void PoolWatcher<I>::notify_listener() {
       auto it = m_entities.find(entity);
       if (it != m_entities.end()) {
         m_entities.erase(entity);
-        m_entities.insert({entity, id});
       }
+      m_entities.insert({entity, id});
     }
     m_pending_removed_entities.clear();