From 0bf8b5ef4494c5f91bb29b903ae70641921a2deb Mon Sep 17 00:00:00 2001 From: Prasanna Kumar Kalever Date: Thu, 20 Mar 2025 11:14:48 +0530 Subject: [PATCH] rbd-mirror: group-replayer check for remote demote state I'm seeing a possibility for 3 situations here for resync flagging and rbd-mirror daemon working on it: 1. No Demotion on Primary while/just-before resync is play'ed there is no demote snap along side resync, we can cancel syncing other snaps, and start resync as soon as resync is flagged, because there is no point syncing snaps that we are anyway going to delete the whole group and resync fresh. 2. first Demote + immediately Resync demote came first, this mean before proceeding with resync, we should always see if the last remote snap is PRIMARY (validate if the remote is still primary, which is on point) and only proceed 3. first Resync + immediately Demote resync Came first, so we head straight to resync. Signed-off-by: Prasanna Kumar Kalever --- .../rbd_mirror/group_replayer/Replayer.cc | 23 ++++++++++++------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/src/tools/rbd_mirror/group_replayer/Replayer.cc b/src/tools/rbd_mirror/group_replayer/Replayer.cc index 63c2f097e9730..2fe96e8bba639 100644 --- a/src/tools/rbd_mirror/group_replayer/Replayer.cc +++ b/src/tools/rbd_mirror/group_replayer/Replayer.cc @@ -270,12 +270,6 @@ void Replayer::load_local_group_snapshots() { if (m_stop_requested) { return; - } else if (is_resync_requested()) { - m_stop_requested = true; - dout(10) << "local group resync requested" << dendl; - // send stop for Group Replayer - notify_group_listener_stop(); - return; } else if (is_rename_requested()) { m_stop_requested = true; dout(10) << "remote group rename requested" << dendl; @@ -378,22 +372,35 @@ void Replayer::handle_load_remote_group_snapshots(int r) { } m_in_flight_op_tracker.finish_op(); + auto last_local_snap = m_local_group_snaps.rbegin(); + auto last_remote_snap = m_remote_group_snaps.rbegin(); if (r < 0) { // may be remote group is deleted? derr << "error listing remote mirror group snapshots: " << cpp_strerror(r) << dendl; notify_group_listener_stop(); return; + } else if (is_resync_requested()) { + dout(10) << "local group resync requested" << dendl; + auto last_remote_snap_ns = std::get_if( + &last_remote_snap->snapshot_namespace); + if (last_remote_snap_ns && + last_remote_snap_ns->state == cls::rbd::MIRROR_SNAPSHOT_STATE_PRIMARY) { + m_stop_requested = true; + // send stop for Group Replayer + notify_group_listener_stop(); + return; + } + dout(10) << "turns out remote is not primary, we cannot resync, will retry later" + << dendl; } if (!m_local_group_snaps.empty()) { - auto last_local_snap = m_local_group_snaps.rbegin(); unlink_group_snapshots(last_local_snap->id); auto last_local_snap_ns = std::get_if( &last_local_snap->snapshot_namespace); if (last_local_snap_ns && last_local_snap_ns->state == cls::rbd::MIRROR_SNAPSHOT_STATE_NON_PRIMARY_DEMOTED && !m_remote_group_snaps.empty()) { - auto last_remote_snap = m_remote_group_snaps.rbegin(); if (last_local_snap->id == last_remote_snap->id) { m_stop_requested = true; notify_group_listener_stop(); -- 2.39.5