]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
rbd-mirror: prune group snapshots stuck in CREATING state on restart
authorPrasanna Kumar Kalever <prasanna.kalever@redhat.com>
Tue, 16 Dec 2025 14:07:05 +0000 (19:37 +0530)
committerPrasanna Kumar Kalever <prasanna.kalever@redhat.com>
Thu, 29 Jan 2026 17:38:02 +0000 (23:08 +0530)
after a daemon restart, prune the entire group snapshot if it remains in
GROUP_SNAPSHOT_STATE_CREATING. This aligns group snapshot handling with the
image replayer logic and ensures that all member image snapshots are cleanly
deleted and recreated.

This is required because some image snapshots in the group may not have started
object copying prior to the restart, which can otherwise lead to missing image
state, object-map, or related metadata.

Also, add the necessary tests to validate interrupted synchronization during
group snapshot syncing. Specifically, cover the following scenarios:
Scenario 1: The snapshot on the secondary is in the creating phase when the
            daemon is restarted.
Scenario 2: The snapshot on the secondary is in the created phase when the
            daemon is restarted.

Signed-off-by: Prasanna Kumar Kalever <prasanna.kalever@redhat.com>
qa/workunits/rbd/rbd_mirror_group_simple.sh
qa/workunits/rbd/rbd_mirror_helpers.sh
src/tools/rbd_mirror/group_replayer/Replayer.cc
src/tools/rbd_mirror/group_replayer/Replayer.h
src/tools/rbd_mirror/image_replayer/snapshot/Replayer.h

index 090690e2a03fb10b67c1832c263bcb2cf28bffe6..31389a585058599a4a0cb7348131418c7b6489ef 100755 (executable)
@@ -3086,6 +3086,80 @@ test_interrupted_sync_restarted_daemon()
   images_remove "${primary_cluster}" "${pool}/${image_prefix}" "${image_count}"
 }
 
+# Scenario 1: The snapshot on the secondary is in the creating phase when the daemon is restarted.
+declare -a test_interrupted_sync_1=("${CLUSTER2}" "${CLUSTER1}" "${pool0}" "${image_prefix}" 'snap_creating' 2)
+# Scenario 2: The snapshot on the secondary is in the created phase when the daemon is restarted.
+declare -a test_interrupted_sync_2=("${CLUSTER2}" "${CLUSTER1}" "${pool0}" "${image_prefix}" 'snap_created' 2)
+
+test_interrupted_sync_scenarios=2
+
+test_interrupted_sync()
+{
+  local primary_cluster=$1 ; shift
+  local secondary_cluster=$1 ; shift
+  local pool=$1 ; shift
+  local image_prefix=$1 ; shift
+  local scenario=$1 ; shift
+  local image_count=$(($1*"${image_multiplier}")) ; shift
+  local group0=test-group0
+
+  start_mirrors "${primary_cluster}"
+  start_mirrors "${secondary_cluster}"
+
+  group_create "${primary_cluster}" "${pool}/${group0}"
+  image_create "${primary_cluster}" "${pool}/${image_prefix}1" 1G
+
+  big_image=test-image-big
+  image_create "${primary_cluster}" "${pool}/${big_image}" 4G
+  group_image_add "${primary_cluster}" "${pool}/${group0}" "${pool}/${image_prefix}1"
+  group_image_add "${primary_cluster}" "${pool}/${group0}" "${pool}/${big_image}"
+  mirror_group_enable "${primary_cluster}" "${pool}/${group0}"
+  wait_for_group_present "${secondary_cluster}" "${pool}" "${group0}" "${image_count}"
+  wait_for_group_replay_started "${secondary_cluster}" "${pool}"/"${group0}" "${image_count}"
+  wait_for_group_status_in_pool_dir "${secondary_cluster}" "${pool}"/"${group0}" 'up+replaying' "${image_count}"
+  wait_for_group_status_in_pool_dir "${primary_cluster}" "${pool}"/"${group0}" 'up+stopped' "${image_count}"
+  wait_for_group_synced "${primary_cluster}" "${pool}"/"${group0}" "${secondary_cluster}" "${pool}"/"${group0}"
+
+  write_image "${primary_cluster}" "${pool}" "${big_image}" 1024 4194304
+
+  local group_snap_id
+  mirror_group_snapshot "${primary_cluster}" "${pool}/${group0}" group_snap_id
+
+  local image_snap_id
+  wait_for_image_snapshot_with_group_snap_info "${secondary_cluster}" "${pool}" "${image_prefix}1" "${group_snap_id}" image_snap_id
+  if [ "${scenario}" = 'snap_creating' ]; then
+    stop_mirror_while_group_snapshot_incomplete "${secondary_cluster}" "${pool}" "${group0}" "${group_snap_id}" "creating"
+    test_group_snap_state "${secondary_cluster}" "${pool}" "${group0}" "${group_snap_id}" "creating"
+  elif [ "${scenario}" = 'snap_created' ]; then
+    stop_mirror_while_group_snapshot_incomplete "${secondary_cluster}" "${pool}" "${group0}" "${group_snap_id}" "created"
+    test_group_snap_state "${secondary_cluster}" "${pool}" "${group0}" "${group_snap_id}" "created"
+    test_group_snap_sync_incomplete "${secondary_cluster}" "${pool}/${group0}" "${group_snap_id}"
+  fi
+
+  start_mirrors "${secondary_cluster}"
+  wait_for_group_replay_started "${secondary_cluster}" "${pool}"/"${group0}" "${image_count}"
+  wait_for_group_status_in_pool_dir "${secondary_cluster}" "${pool}"/"${group0}" 'up+replaying' "${image_count}"
+  wait_for_group_snap_present "${secondary_cluster}" "${pool}/${group0}" "${group_snap_id}"
+  wait_for_group_snap_sync_complete "${secondary_cluster}" "${pool}/${group0}" "${group_snap_id}"
+
+  local new_image_snap_id
+  get_image_snapshot_with_group_snap_info "${secondary_cluster}" "${pool}" "${image_prefix}1" "${group_snap_id}" new_image_snap_id
+  if [ "${scenario}" = 'snap_creating' ]; then
+    test "${image_snap_id}" != "${new_image_snap_id}" ||  { fail "failed to recreate image snapshot with a new snap_id after restart"; return 1; }
+  elif [ "${scenario}" = 'snap_created' ]; then
+    test "${image_snap_id}" == "${new_image_snap_id}" ||  { fail "image snapshot recreated with a new snap_id after restart"; return 1; }
+  fi
+
+  # tidy up
+  mirror_group_disable "${primary_cluster}" "${pool}/${group0}"
+  group_remove "${primary_cluster}" "${pool}/${group0}"
+
+  wait_for_group_not_present "${primary_cluster}" "${pool}" "${group0}"
+  wait_for_group_not_present "${secondary_cluster}" "${pool}" "${group0}"
+
+  image_remove "${primary_cluster}" "${pool}/${image_prefix}1"
+  image_remove "${primary_cluster}" "${pool}/${big_image}"
+}
 
 # test force unlink time
 declare -a test_multiple_mirror_group_snapshot_unlink_time_1=("${CLUSTER2}" "${CLUSTER1}" "${pool0}")
@@ -3902,6 +3976,7 @@ run_all_tests()
   # TODO: add the capabilty to have clone images support in the mirror group
   run_test_all_scenarios test_group_with_clone_image
   run_test_all_scenarios test_interrupted_sync_restarted_daemon
+  run_test_all_scenarios test_interrupted_sync
   run_test_all_scenarios test_resync_after_relocate_and_force_promote
   run_test_all_scenarios test_multiple_mirror_group_snapshot_unlink_time
   run_test_all_scenarios test_force_promote_delete_group
index d68bd42ed45f22c579b30ebd1b36eaddd5658c39..3ebc500d7a9cc69ed31269e3221682d37b5d10a8 100755 (executable)
@@ -1858,6 +1858,78 @@ compare_images()
     return ${ret}
 }
 
+wait_for_image_snapshot_with_group_snap_info() {
+    local cluster=$1
+    local pool=$2
+    local image=$3
+    local group_snap_id=$4
+    local -n _id=$5
+
+    for s in 0.1 0.1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 4 8 8 8 8 8 8 8 8 16 16 32 32; do
+        run_cmd "rbd --cluster ${cluster} snap list ${pool}/${image} --all --format xml --pretty-format"
+        local img_snap_id=$(xmlstarlet sel -t -v "(//snapshots/snapshot[contains(name, '${group_snap_id}')]/id)" < "$CMD_STDOUT") || { img_snap_id=""; }
+        if [[ -n "${img_snap_id}" ]]; then
+          _id="${img_snap_id}"
+          return 0
+        fi
+        sleep "${s}"
+    done
+    fail "failed to find image snapshot associated with group snap ${group_snap_id}"; return 1;
+}
+
+get_image_snapshot_with_group_snap_info() {
+    local cluster=$1
+    local pool=$2
+    local image=$3
+    local group_snap_id=$4
+    local -n _id=$5
+
+    run_cmd "rbd --cluster ${cluster} snap list ${pool}/${image} --all --format xml --pretty-format"
+    local img_snap_id=$(xmlstarlet sel -t -v "(//snapshots/snapshot[contains(name, '${group_snap_id}')]/id)" < "$CMD_STDOUT") || { img_snap_id=""; }
+    if [[ -n "${img_snap_id}" ]]; then
+        _id="${img_snap_id}"
+        return 0
+    fi
+    fail "failed to find image snapshot associated with group snap ${group_snap_id}"; return 1;
+}
+
+stop_mirror_while_group_snapshot_incomplete() {
+    local cluster=$1
+    local pool=$2
+    local group=$3
+    local group_snap_id=$4
+    # expected_state must be "creating" or "created"
+    local expected_state=$5
+
+    if [ "${expected_state}" != "creating" ] && [ "${expected_state}" != "created" ]; then
+        fail "invalid value for expected_state: '${expected_state}' (allowed: 'creating' or 'created')"; return 1;
+    fi
+
+    local count=0
+    local state=""
+    local snaps_synced="false"
+    while [ "${count}" -lt 60 ]; do
+        run_cmd "rbd --cluster ${cluster} group snap ls ${pool}/${group} --format xml --pretty-format"
+        state=$(xmlstarlet sel -t -v "//group_snaps/group_snap[id='${group_snap_id}']/state" < "$CMD_STDOUT") || { state=''; }
+        snaps_synced=$(xmlstarlet sel -t -v "//group_snaps/group_snap[id='${group_snap_id}']/namespace/complete" < "$CMD_STDOUT") || { snaps_synced='false'; }
+
+        if [[ "${expected_state}" == "${state}" && "${snaps_synced}" == "false" ]]; then
+            # stop the daemon to prevent further syncing of snapshots
+            stop_mirrors "${cluster}" '-9'
+            return 0;
+        fi
+
+        count=$((count+1))
+        sleep 1;
+    done
+
+    if [ -z "${state}" ]; then
+        fail "group snapshot ${group_snap_id} not found"; return 1;
+    elif [ "${expected_state}" != "${state}" ]; then
+        fail "group snapshot ${group_snap_id} state mismatch: expected state '${expected_state}', got '${state}'"; return 1;
+    fi
+}
+
 compare_image_snapshots()
 {
     local pool=$1
@@ -2714,6 +2786,29 @@ test_group_snap_sync_state()
     fi
 }
 
+test_group_snap_state()
+{
+    local cluster=$1
+    local pool=$2
+    local group=$3
+    local group_snap_id=$4
+    # expected_state must be "creating" or "created"
+    local expected_state=$5
+
+    if [ "${expected_state}" != "creating" ] && [ "${expected_state}" != "created" ]; then
+        fail "invalid value for expected_state: '${expected_state}' (allowed: 'creating' or 'created')"; return 1;
+    fi
+
+    run_cmd "rbd --cluster ${cluster} group snap ls ${pool}/${group} --format xml --pretty-format"
+    state=$(xmlstarlet sel -t -v "//group_snaps/group_snap[id='${group_snap_id}']/state" < "$CMD_STDOUT") || { state=''; }
+
+    if [ -z "${state}" ]; then
+        fail "group snapshot ${group_snap_id} not found"; return 1;
+    elif [ "${expected_state}" != "${state}" ]; then
+        fail "group snapshot ${group_snap_id} state mismatch: expected state '${expected_state}', got '${state}'"; return 1;
+    fi
+}
+
 test_group_snap_sync_complete()
 {
     local cluster=$1
index 9db3596781a334bcf90e88174a6169ab13395a1c..20227c90f052537f53c5197e7b41230b7f584598 100644 (file)
@@ -389,8 +389,7 @@ void Replayer<I>::validate_local_group_snapshots() {
   }
   m_state = STATE_REPLAYING;
 
-  // early exit if no snapshots to process
-  if (m_local_group_snaps.empty()) {
+  if (m_local_group_snaps.empty() || m_check_creating_snaps) {
     load_local_group_snapshots(&locker);
     return;
   }
@@ -481,7 +480,8 @@ void Replayer<I>::handle_load_local_group_snapshots(int r) {
   }
 
   auto last_local_snap = get_latest_mirror_group_snapshot(m_local_group_snaps);
-  if (!last_local_snap) {
+  if (!last_local_snap) { // no snaps synced yet
+    m_check_creating_snaps = false;
     load_remote_group_snapshots();
     return;
   }
@@ -495,6 +495,10 @@ void Replayer<I>::handle_load_local_group_snapshots(int r) {
       handle_replay_complete(&locker, 0, "orphan (force promoting)");
       return;
     }
+    if (m_check_creating_snaps) {
+      prune_creating_group_snapshots_if_any(&locker);
+      return;
+    }
     if (!is_mirror_group_snapshot_complete(last_local_snap->state, ns.complete)) {
       m_retry_validate_snap = true;
     }
@@ -1659,19 +1663,37 @@ void Replayer<I>::handle_mirror_group_snapshot_unlink_peer(
   return;
 }
 
+template <typename I>
+void Replayer<I>::get_replayers_by_image_id(
+    std::unique_lock<ceph::mutex>* locker) {
+  if (!m_replayers_by_image_id.empty()) {
+    return;
+  }
+  m_replayers_by_image_id.reserve(m_image_replayers->size());
+  for (auto& [_, image_replayer] : *m_image_replayers) {
+    if (!image_replayer) {
+      continue;
+    }
+    locker->unlock();
+    const auto& id = image_replayer->get_local_image_id();
+    locker->lock();
+    if (!id.empty()) {
+      m_replayers_by_image_id.emplace_back(id, image_replayer);
+    }
+  }
+}
+
 template <typename I>
 bool Replayer<I>::prune_all_image_snapshots(
     cls::rbd::GroupSnapshot *local_snap,
     std::unique_lock<ceph::mutex>* locker) {
-  bool retain = false;
-
+  bool prune_done = true;
   if (!local_snap) {
-    return retain;
+    return prune_done;
   }
-
-  dout(10) << "attempting to prune image snaps from group snap: "
-    << local_snap->id << dendl;
-
+  dout(10) << "attempting to prune image snapshots for group snapshot "
+           << local_snap->id << dendl;
+  get_replayers_by_image_id(locker);
   for (auto &spec : local_snap->snaps) {
     std::string image_header_oid = librbd::util::header_name(spec.image_id);
     cls::rbd::SnapshotInfo snap_info;
@@ -1681,34 +1703,182 @@ bool Replayer<I>::prune_all_image_snapshots(
       continue;
     } else if (r < 0) {
       derr << "failed getting snap info for snap id: " << spec.snap_id
-        << ", : " << cpp_strerror(r) << dendl;
+           << ", : " << cpp_strerror(r) << dendl;
     }
-    retain = true;
-    locker->unlock();
-    for (auto it = m_image_replayers->begin();
-        it != m_image_replayers->end(); ++it) {
-      auto image_replayer = it->second;
-      if (!image_replayer) {
+    prune_done = false;
+    auto ir = std::find_if(
+        m_replayers_by_image_id.begin(),
+        m_replayers_by_image_id.end(),
+        [&spec](const std::pair<std::string, ImageReplayer<I>*>& entry) {
+        return entry.first == spec.image_id;
+        });
+    if (ir != m_replayers_by_image_id.end()) {
+      ImageReplayer<I>* image_replayer = ir->second;
+      dout(10) << "pruning snapshot " << spec.snap_id
+               << " for image " << spec.image_id << dendl;
+      // The ImageReplayer can have m_replayer empty, so no guaranty that
+      // this will succeed in one shot, but we keep retry for this and
+      // acheive anyway.
+      locker->unlock();
+      image_replayer->prune_snapshot(spec.snap_id);
+      locker->lock();
+    }
+  }
+
+  return prune_done;
+}
+
+// prune all image snapshots by matching group snapshot id
+template <typename I>
+bool Replayer<I>::prune_all_image_snapshots_by_gsid(
+    const std::string &group_snap_id,
+    const std::vector<cls::rbd::GroupImageStatus>& local_images,
+    std::unique_lock<ceph::mutex>* locker) {
+  bool prune_done = true;
+  if (local_images.empty()) {
+    return prune_done;
+  }
+  dout(10) << "attempting to prune image snapshots for group snapshot "
+           << group_snap_id << dendl;
+  get_replayers_by_image_id(locker);
+  for (const auto& image : local_images) {
+    const auto image_header_oid = librbd::util::header_name(image.spec.image_id);
+    ::SnapContext snapc;
+    int r = librbd::cls_client::get_snapcontext(&m_local_io_ctx, image_header_oid, &snapc);
+    if (r < 0) {
+      derr << "failed to get snap context for image "
+           << image.spec.image_id << ": "
+           << cpp_strerror(r) << dendl;
+      prune_done = false;
+      continue;
+    }
+    for (auto it = snapc.snaps.rbegin(); it != snapc.snaps.rend(); ++it) {
+      snapid_t snap_id = *it;
+      cls::rbd::SnapshotInfo snap_info;
+      r = librbd::cls_client::snapshot_get(&m_local_io_ctx, image_header_oid,
+          snap_id, &snap_info);
+      if (r == -ENOENT) {
+        continue;
+      } else if (r < 0) {
+        derr << "failed to get snapshot info for snap "
+             << snap_id << ": "
+             << cpp_strerror(r) << dendl;
+        prune_done = false;
         continue;
       }
-      auto local_image_id = image_replayer->get_local_image_id();
-      if (local_image_id.empty()) {
+      const auto* mirror_ns =
+        std::get_if<cls::rbd::MirrorSnapshotNamespace>(
+            &snap_info.snapshot_namespace);
+      if (mirror_ns && mirror_ns->group_snap_id != group_snap_id) {
         continue;
       }
-      if (local_image_id != spec.image_id) {
+      const auto* user_ns =
+        std::get_if<cls::rbd::ImageSnapshotNamespaceGroup>(
+            &snap_info.snapshot_namespace);
+      if (user_ns && user_ns->group_snapshot_id != group_snap_id) {
         continue;
       }
-      dout(10) << "pruning: " << spec.snap_id << dendl;
-      // The ImageReplayer can have m_replayer empty, so no guaranty that
-      // this will succeed in one shot, but we keep retry for this and
-      // acheive anyway.
-      image_replayer->prune_snapshot(spec.snap_id);
-      break;
+      prune_done = false;
+
+      auto ir = std::find_if(
+          m_replayers_by_image_id.begin(),
+          m_replayers_by_image_id.end(),
+          [&image](const std::pair<std::string, ImageReplayer<I>*>& entry) {
+          return entry.first == image.spec.image_id;
+          });
+
+      if (ir != m_replayers_by_image_id.end()) {
+        ImageReplayer<I>* image_replayer = ir->second;
+        dout(10) << "pruning snapshot " << snap_id
+                 << " for image " << image.spec.image_id << dendl;
+        locker->unlock();
+        image_replayer->prune_snapshot(snap_id);
+        locker->lock();
+      }
     }
-    locker->lock();
   }
 
-  return retain;
+  return prune_done;
+}
+
+// Check for non-primary mirror and user group snapshots in CREATING state,
+// and prune them on daemon restart.
+template <typename I>
+void Replayer<I>::prune_creating_group_snapshots_if_any(
+    std::unique_lock<ceph::mutex>* locker) {
+  auto prune_group_snaps =
+    std::make_shared<std::vector<cls::rbd::GroupSnapshot>>();
+  for (const auto& local_snap : m_local_group_snaps) {
+    if (local_snap.state != cls::rbd::GROUP_SNAPSHOT_STATE_CREATING) {
+      continue;
+    }
+    auto local_snap_ns = std::get_if<cls::rbd::GroupSnapshotNamespaceMirror>(
+        &local_snap.snapshot_namespace);
+    // skip primary snapshots;
+    // only prune non-primary mirror and user snapshots in CREATING state
+    if (local_snap_ns && local_snap_ns->is_primary()) {
+      continue;
+    }
+    prune_group_snaps->push_back(local_snap);
+  }
+
+  if (prune_group_snaps->empty()) {
+    // not found creating snapshots
+    m_check_creating_snaps = false;
+    locker->unlock();
+    schedule_load_group_snapshots();
+    return;
+  }
+  locker->unlock();
+
+  m_out_bl.clear();
+  auto local_images =
+    std::make_shared<std::vector<cls::rbd::GroupImageStatus>>();
+  auto* ctx = new LambdaContext(
+    [this, local_images, prune_group_snaps](int r) {
+      if (r < 0) {
+        derr << "failed to list group images: "
+             << cpp_strerror(r) << dendl;
+        // reload will retry if needed
+        schedule_load_group_snapshots();
+        return;
+      }
+
+      handle_prune_creating_group_snapshots_if_any(
+        *local_images, *prune_group_snaps);
+    });
+
+  // initiate image listing
+  local_group_image_list_by_id(&m_out_bl, local_images.get(), ctx);
+}
+
+template <typename I>
+void Replayer<I>::handle_prune_creating_group_snapshots_if_any(
+    const std::vector<cls::rbd::GroupImageStatus>& local_images,
+    const std::vector<cls::rbd::GroupSnapshot>& prune_group_snaps) {
+  int r = 0;
+  {
+    std::unique_lock locker{m_lock};
+    for (const auto& local_snap : prune_group_snaps) {
+      if (!prune_all_image_snapshots_by_gsid(
+            local_snap.id, local_images, &locker)) {
+        // still pending image snapshots; retry later
+        continue;
+      }
+      dout(10) << "all image snapshots pruned; removing creating group snapshot: "
+               << local_snap.id << dendl;
+      r = librbd::cls_client::group_snap_remove(
+          &m_local_io_ctx,
+          librbd::util::group_header_name(m_local_group_id),
+          local_snap.id);
+      if (r < 0) {
+        derr << "failed to remove group snapshot "
+             << local_snap.id << ": "
+             << cpp_strerror(r) << dendl;
+      }
+    }
+  }
+  schedule_load_group_snapshots();
 }
 
 template <typename I>
@@ -1733,7 +1903,7 @@ void Replayer<I>::prune_user_group_snapshots(
       dout(10) << "pruning user group snap in-progress: "
                << local_snap->name << ", with id: " << local_snap->id << dendl;
       // prune all the image snaps of the group snap locally
-      if (prune_all_image_snapshots(&(*local_snap), locker)) {
+      if (!prune_all_image_snapshots(&(*local_snap), locker)) {
         continue;
       }
       dout(10) << "all image snaps are pruned, finally pruning user group snap: "
@@ -1829,7 +1999,7 @@ void Replayer<I>::prune_mirror_group_snapshots(
     const auto& ns = std::get<cls::rbd::GroupSnapshotNamespaceMirror>(
         prune_snap->snapshot_namespace);
     if (ns.is_primary() ||
-        prune_all_image_snapshots(prune_snap, locker)) {
+        !prune_all_image_snapshots(prune_snap, locker)) {
       prune_snap = nullptr;
       skip_next_snap_check = false;
       continue;
index df3a4e721d045cf8eb25796ca4ea1bcc468a3dee..e475f69b0049e3fbf3a7113d293fa49e8e766860 100644 (file)
@@ -108,6 +108,7 @@ private:
 
   std::vector<cls::rbd::GroupSnapshot> m_local_group_snaps;
   std::vector<cls::rbd::GroupSnapshot> m_remote_group_snaps;
+  std::vector<std::pair<std::string, ImageReplayer<ImageCtxT> *>> m_replayers_by_image_id;
 
   bool m_update_group_state = true;
 
@@ -127,6 +128,8 @@ private:
 
   uint64_t m_last_snapshot_bytes = 0;
 
+  bool m_check_creating_snaps = true; // check and identify creating snaps just after restart
+
   bool is_replay_interrupted(std::unique_lock<ceph::mutex>* locker);
 
   void schedule_load_group_snapshots();
@@ -235,12 +238,22 @@ private:
   void handle_mirror_group_snapshot_unlink_peer(
       int r, const std::string &snap_id);
 
+  void get_replayers_by_image_id(std::unique_lock<ceph::mutex>* locker);
+  bool prune_all_image_snapshots_by_gsid(
+      const std::string &group_snap_id,
+      const std::vector<cls::rbd::GroupImageStatus>& local_images,
+      std::unique_lock<ceph::mutex>* locker);
   bool prune_all_image_snapshots(
       cls::rbd::GroupSnapshot *local_snap,
       std::unique_lock<ceph::mutex>* locker);
   void prune_user_group_snapshots(std::unique_lock<ceph::mutex>* locker);
   void prune_mirror_group_snapshots(std::unique_lock<ceph::mutex>* locker);
   void prune_group_snapshots(std::unique_lock<ceph::mutex>* locker);
+  void prune_creating_group_snapshots_if_any(
+      std::unique_lock<ceph::mutex>* locker);
+  void handle_prune_creating_group_snapshots_if_any(
+      const std::vector<cls::rbd::GroupImageStatus>& local_images,
+      const std::vector<cls::rbd::GroupSnapshot>& prune_group_snaps);
 
   void set_image_replayer_limits(const std::string &image_id,
                                  const cls::rbd::GroupSnapshot *remote_snap,
index 2dfe79f37ac3be8589f90a564dcc32015b418084..e9110d96b8da57895097da104bbdb0ea92d582da 100644 (file)
@@ -100,8 +100,14 @@ public:
   }
 
   void prune_snapshot(uint64_t snap_id) {
-    std::unique_lock locker(m_lock);
-    m_prune_snap_ids_by_gr.insert(snap_id);
+    {
+      std::unique_lock locker(m_lock);
+      m_prune_snap_ids_by_gr.insert(snap_id);
+      if (m_state != STATE_IDLE) {
+        return;
+      }
+    }
+    handle_image_update_notify();
   }
 
   void set_remote_snap_id_end_limit(uint64_t snap_id) {