From 1959bfa1b5bdef118f980f6f6042af4ff7f8db80 Mon Sep 17 00:00:00 2001 From: Prasanna Kumar Kalever Date: Fri, 20 Jun 2025 16:22:46 +0530 Subject: [PATCH] rbd-mirror: fix group replayer deleting the previous mirror group snapshot The prune logic in the group replayer currently holds the previous group snapshot until the current group snapshot syncing is complete. Hence there is always a previous group snapshot (and the respective image snapshots). But when there are user group snapshots group replayer takes them to account and prunes the previous mirror snapshots, which shouldn't be done. The group replayer should always hold the previous mirror group snapshot (irrespective of the user group snap). Also rename unlink_group_snapshots() -> prune_group_snapshots() and substitute the use of unlink with prune as required. Signed-off-by: Prasanna Kumar Kalever --- .../rbd_mirror/group_replayer/Replayer.cc | 115 ++++++++++++++---- .../rbd_mirror/group_replayer/Replayer.h | 4 +- 2 files changed, 97 insertions(+), 22 deletions(-) diff --git a/src/tools/rbd_mirror/group_replayer/Replayer.cc b/src/tools/rbd_mirror/group_replayer/Replayer.cc index c608e52f670..12bcde58b63 100644 --- a/src/tools/rbd_mirror/group_replayer/Replayer.cc +++ b/src/tools/rbd_mirror/group_replayer/Replayer.cc @@ -541,7 +541,7 @@ void Replayer::handle_load_remote_group_snapshots(int r) { } if (!m_local_group_snaps.empty()) { - unlink_group_snapshots(&locker); + prune_group_snapshots(&locker); auto last_local_snap = m_local_group_snaps.rbegin(); auto last_local_snap_ns = std::get_if( &last_local_snap->snapshot_namespace); @@ -1218,7 +1218,7 @@ bool Replayer::prune_all_image_snapshots( return retain; } - dout(10) << "attempting to unlink image snaps from group snap: " + dout(10) << "attempting to prune image snaps from group snap: " << local_snap->id << dendl; for (auto &spec : local_snap->snaps) { @@ -1261,36 +1261,90 @@ bool Replayer::prune_all_image_snapshots( } template -void Replayer::unlink_group_snapshots( +void Replayer::prune_user_group_snapshots( std::unique_lock* locker) { - if (m_local_group_snaps.empty()) { - return; - } int r; - auto last_local_snap_id = m_local_group_snaps.rbegin()->id; for (auto local_snap = m_local_group_snaps.begin(); local_snap != m_local_group_snaps.end(); ++local_snap) { - if (local_snap->id == last_local_snap_id) { + if (local_snap->state != cls::rbd::GROUP_SNAPSHOT_STATE_COMPLETE) { break; } auto snap_type = cls::rbd::get_group_snap_namespace_type( local_snap->snapshot_namespace); if (snap_type == cls::rbd::GROUP_SNAPSHOT_NAMESPACE_TYPE_USER) { - bool unlink_user_snap = true; + bool prune_user_snap = true; for (auto &remote_snap : m_remote_group_snaps) { - if (remote_snap.id == local_snap->id) { - unlink_user_snap = false; + if (remote_snap.name == local_snap->name) { + prune_user_snap = false; break; } } - if (!unlink_user_snap) { + if (!prune_user_snap) { continue; } - dout(10) << "unlinking regular group snap in-progress: " + dout(10) << "pruning regular group snap in-progress: " << local_snap->name << ", with id: " << local_snap->id << dendl; - } else { - // remove mirror peer uuid of the remote snap + // prune all the image snaps of the group snap locally + if (prune_all_image_snapshots(&(*local_snap), locker)) { + continue; + } + dout(10) << "all image snaps are pruned, finally pruning regular group snap: " + << local_snap->id << dendl; + r = librbd::cls_client::group_snap_remove(&m_local_io_ctx, + librbd::util::group_header_name(m_local_group_id), local_snap->id); + if (r < 0) { + derr << "failed to remove group snapshot : " + << local_snap->id << " : " << cpp_strerror(r) << dendl; + } + } + } +} + +template +void Replayer::prune_mirror_group_snapshots( + std::unique_lock* locker) { + int r; + bool is_prior_snap_user = false; + bool skip_next_snap_check = false; + cls::rbd::GroupSnapshot *prune_snap = nullptr; + auto last_local_snap_id = m_local_group_snaps.rbegin()->id; + for (auto local_snap = m_local_group_snaps.begin(); + local_snap != m_local_group_snaps.end(); ++local_snap) { + if (local_snap->state != cls::rbd::GROUP_SNAPSHOT_STATE_COMPLETE) { + break; + } + auto snap_type = cls::rbd::get_group_snap_namespace_type( + local_snap->snapshot_namespace); + if (snap_type != cls::rbd::GROUP_SNAPSHOT_NAMESPACE_TYPE_MIRROR) { + is_prior_snap_user = true; + if (prune_snap) { // snapshot before user snap exists ? + // delete snap before user snap, only if the user snap has next complete mirror snap + auto next_local_snap = std::next(local_snap); + if (next_local_snap == m_local_group_snaps.end()) { + break; // no next snap. + } + auto next_snap_type = cls::rbd::get_group_snap_namespace_type( + next_local_snap->snapshot_namespace); + if (next_snap_type != cls::rbd::GROUP_SNAPSHOT_NAMESPACE_TYPE_MIRROR) { + continue; // next snap is user snap again. + } else if (next_local_snap->state != cls::rbd::GROUP_SNAPSHOT_STATE_COMPLETE) { + break; // next snap is not complete yet. + } + } + continue; + } + if (!prune_snap) { + prune_snap = &(*local_snap); + } + + if (is_prior_snap_user) { + is_prior_snap_user = false; // free to continue prune on next iteratation + skip_next_snap_check = true; // as we are pruning mirror snap before user snap + continue; + } + + if (!skip_next_snap_check) { auto next_local_snap = std::next(local_snap); // If next local snap is end, or if it is the syncing in-progress snap, // then we still need this group snapshot. @@ -1298,22 +1352,41 @@ void Replayer::unlink_group_snapshots( (next_local_snap->id == last_local_snap_id && next_local_snap->state != cls::rbd::GROUP_SNAPSHOT_STATE_COMPLETE)) { break; + } else { + auto next_snap_type = cls::rbd::get_group_snap_namespace_type( + next_local_snap->snapshot_namespace); + if (next_snap_type == cls::rbd::GROUP_SNAPSHOT_NAMESPACE_TYPE_USER) { + continue; + } } - remove_mirror_peer_uuid(local_snap->id); } + remove_mirror_peer_uuid(prune_snap->id); // prune all the image snaps of the group snap locally - if (prune_all_image_snapshots(&(*local_snap), locker)) { + if (prune_all_image_snapshots(prune_snap, locker)) { + prune_snap = nullptr; + skip_next_snap_check = false; continue; } - dout(10) << "all image snaps are pruned, finally unlinking group snap: " - << local_snap->id << dendl; + dout(10) << "all image snaps are pruned, finally pruning mirror group snap: " + << prune_snap->id << dendl; r = librbd::cls_client::group_snap_remove(&m_local_io_ctx, - librbd::util::group_header_name(m_local_group_id), local_snap->id); + librbd::util::group_header_name(m_local_group_id), prune_snap->id); if (r < 0) { derr << "failed to remove group snapshot : " - << local_snap->id << " : " << cpp_strerror(r) << dendl; + << prune_snap->id << " : " << cpp_strerror(r) << dendl; } + prune_snap = nullptr; + skip_next_snap_check = false; + } +} + +template +void Replayer::prune_group_snapshots(std::unique_lock* locker) { + if (m_local_group_snaps.empty()) { + return; } + prune_user_group_snapshots(locker); + prune_mirror_group_snapshots(locker); } // set_image_replayer_limits, sets limits of remote_snap_id_end in the image diff --git a/src/tools/rbd_mirror/group_replayer/Replayer.h b/src/tools/rbd_mirror/group_replayer/Replayer.h index eb3fbb2f6f8..11b55f88c8e 100644 --- a/src/tools/rbd_mirror/group_replayer/Replayer.h +++ b/src/tools/rbd_mirror/group_replayer/Replayer.h @@ -190,7 +190,9 @@ private: bool prune_all_image_snapshots( cls::rbd::GroupSnapshot *local_snap, std::unique_lock* locker); - void unlink_group_snapshots(std::unique_lock* locker); + void prune_user_group_snapshots(std::unique_lock* locker); + void prune_mirror_group_snapshots(std::unique_lock* locker); + void prune_group_snapshots(std::unique_lock* locker); void set_image_replayer_limits(const std::string &image_id, cls::rbd::GroupSnapshot *remote_snap, -- 2.39.5