From: Prasanna Kumar Kalever Date: Thu, 30 Jan 2025 11:39:30 +0000 (+0530) Subject: rbd-mirror: more improvements in the group replayer X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=de088f71814604871e65cf6aaa160d00e996f8ba;p=ceph.git rbd-mirror: more improvements in the group replayer - Arresting the interruptions after shutdown - Restart GroupReplayer if bootstrap returns remote group id as empty - Fix a test failure in "regular group snapshots test" Issue#23 Signed-off-by: Prasanna Kumar Kalever --- diff --git a/src/tools/rbd_mirror/GroupReplayer.cc b/src/tools/rbd_mirror/GroupReplayer.cc index cd3b83f5ff06e..a664b8fc79b47 100644 --- a/src/tools/rbd_mirror/GroupReplayer.cc +++ b/src/tools/rbd_mirror/GroupReplayer.cc @@ -595,9 +595,15 @@ void GroupReplayer::handle_bootstrap_group(int r) { return; } - C_SaferCond ctx; - create_group_replayer(&ctx); - ctx.wait(); + if (!m_remote_group_id.empty()) { + C_SaferCond ctx; + create_group_replayer(&ctx); + ctx.wait(); + } else { + r = -EINVAL; + finish_start(r, "remote is not ready yet"); // bootstrap again + return; + } } template diff --git a/src/tools/rbd_mirror/group_replayer/Replayer.cc b/src/tools/rbd_mirror/group_replayer/Replayer.cc index 2196ccaadff90..b22cd7d25ae10 100644 --- a/src/tools/rbd_mirror/group_replayer/Replayer.cc +++ b/src/tools/rbd_mirror/group_replayer/Replayer.cc @@ -69,6 +69,23 @@ Replayer::~Replayer() { ceph_assert(m_state == STATE_COMPLETE); } +template +bool Replayer::is_replay_interrupted() { + std::unique_lock locker{m_lock}; + return is_replay_interrupted(&locker); +} + +template +bool Replayer::is_replay_interrupted(std::unique_lock* locker) { + if (m_state == STATE_COMPLETE) { + locker->unlock(); + + return true; + } + + return false; +} + template void Replayer::schedule_load_group_snapshots() { dout(10) << dendl; @@ -195,6 +212,10 @@ template void Replayer::load_local_group_snapshots() { dout(10) << "m_local_group_id=" << m_local_group_id << dendl; + if (is_replay_interrupted()) { + return; + } + if (m_state != STATE_COMPLETE) { m_state = STATE_REPLAYING; } @@ -258,6 +279,9 @@ void Replayer::load_remote_group_snapshots() { dout(10) << "m_remote_group_id=" << m_remote_group_id << dendl; std::unique_lock locker{m_lock}; + if (is_replay_interrupted(&locker)) { + return; + } m_remote_group_snaps.clear(); auto ctx = new LambdaContext( [this] (int r) { @@ -311,6 +335,9 @@ template void Replayer::validate_image_snaps_sync_complete( const std::string &remote_group_snap_id) { std::unique_lock locker{m_lock}; + if (is_replay_interrupted(&locker)) { + return; + } // 1. get group membership // 2. get snap list of each image and check any image snap has the group // snapid and is set to complete. If yes call complete @@ -437,7 +464,12 @@ void Replayer::scan_for_unsynced_group_snapshots() { bool found = false; bool syncs_upto_date = false; + std::unique_lock locker{m_lock}; + if (is_replay_interrupted(&locker)) { + return; + } + if (m_remote_group_snaps.empty()) { goto out; } @@ -545,6 +577,9 @@ template void Replayer::try_create_group_snapshot(cls::rbd::GroupSnapshot snap, std::unique_lock &locker) { dout(10) << snap.id << dendl; + if (is_replay_interrupted(&locker)) { + return; + } ceph_assert(ceph_mutex_is_locked_by_me(m_lock)); auto snap_type = cls::rbd::get_group_snap_namespace_type( @@ -874,8 +909,6 @@ void Replayer::handle_create_regular_snapshot( << cpp_strerror(r) << dendl; } on_finish->complete(0); - - schedule_load_group_snapshots(); } template diff --git a/src/tools/rbd_mirror/group_replayer/Replayer.h b/src/tools/rbd_mirror/group_replayer/Replayer.h index cef4e9a5fa55a..e9c87115afb37 100644 --- a/src/tools/rbd_mirror/group_replayer/Replayer.h +++ b/src/tools/rbd_mirror/group_replayer/Replayer.h @@ -106,6 +106,8 @@ private: // map of >> std::map>> m_pending_group_snaps; + bool is_replay_interrupted(); + bool is_replay_interrupted(std::unique_lock* locker); int local_group_image_list_by_id( std::vector *image_ids);