From 2e05fc9730a226df451588ae0b59100a89407e28 Mon Sep 17 00:00:00 2001 From: Ramana Raja Date: Thu, 27 Mar 2025 13:09:47 -0400 Subject: [PATCH] rbd_mirror: add fields to mirror group status's description str Add the following fields to the mirror group status's description string: - last_snasphot_bytes - last_snasphot_complete_seconds - local_snapshot_timestamp - remote_snapshot_timestamp Signed-off-by: Ramana Raja --- src/tools/rbd_mirror/GroupReplayer.cc | 98 ++++++++++++- src/tools/rbd_mirror/GroupReplayer.h | 7 + src/tools/rbd_mirror/ImageReplayer.cc | 9 ++ src/tools/rbd_mirror/ImageReplayer.h | 1 + src/tools/rbd_mirror/MirrorStatusUpdater.cc | 19 ++- src/tools/rbd_mirror/MirrorStatusUpdater.h | 3 +- .../rbd_mirror/group_replayer/Replayer.cc | 136 +++++++++++++++++- .../rbd_mirror/group_replayer/Replayer.h | 7 + .../rbd_mirror/image_replayer/Replayer.h | 4 + .../image_replayer/snapshot/Replayer.h | 5 + 10 files changed, 282 insertions(+), 7 deletions(-) diff --git a/src/tools/rbd_mirror/GroupReplayer.cc b/src/tools/rbd_mirror/GroupReplayer.cc index d44987f40d4..d97a3511dec 100644 --- a/src/tools/rbd_mirror/GroupReplayer.cc +++ b/src/tools/rbd_mirror/GroupReplayer.cc @@ -520,6 +520,7 @@ void GroupReplayer::on_stop_replay(int r, const std::string &desc) m_state_desc = ""; } + cancel_update_mirror_group_replay_status(); cancel_image_replayers_check(); shut_down(r); } @@ -670,6 +671,7 @@ void GroupReplayer::handle_create_group_replayer(int r) { std::unique_lock timer_locker{m_threads->timer_lock}; schedule_image_replayers_check(); + schedule_update_mirror_group_replay_status(); } set_mirror_group_status_update( @@ -1065,6 +1067,62 @@ void GroupReplayer::remove_group_status_remote(bool force, Context *on_finish } } +template +void GroupReplayer::schedule_update_mirror_group_replay_status() { + ceph_assert(ceph_mutex_is_locked_by_me(m_lock)); + ceph_assert(ceph_mutex_is_locked_by_me(m_threads->timer_lock)); + if (m_state != STATE_REPLAYING) { + return; + } + + dout(10) << dendl; + + // periodically update the replaying status even if nothing changes + // so that we can adjust our performance stats + ceph_assert(m_update_status_task == nullptr); + m_update_status_task = create_context_callback< + GroupReplayer, + &GroupReplayer::handle_update_mirror_group_replay_status>(this); + m_threads->timer->add_event_after(10, m_update_status_task); +} + +template +void GroupReplayer::handle_update_mirror_group_replay_status(int r) { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked_by_me(m_threads->timer_lock)); + + ceph_assert(m_update_status_task != nullptr); + m_update_status_task = nullptr; + + auto ctx = new LambdaContext([this](int) { + update_mirror_group_replay_status(); + + { + std::unique_lock locker{m_lock}; + std::unique_lock timer_locker{m_threads->timer_lock}; + + schedule_update_mirror_group_replay_status(); + } + m_in_flight_op_tracker.finish_op(); + }); + + m_in_flight_op_tracker.start_op(); + m_threads->work_queue->queue(ctx, 0); +} + +template +void GroupReplayer::cancel_update_mirror_group_replay_status() { + std::unique_lock timer_locker{m_threads->timer_lock}; + if (m_update_status_task != nullptr) { + dout(10) << dendl; + + if (m_threads->timer->cancel_event(m_update_status_task)) { + m_update_status_task = nullptr; + } + } +} + template void GroupReplayer::set_mirror_group_status_update( cls::rbd::MirrorGroupStatusState state, const std::string &desc) { @@ -1125,10 +1183,10 @@ void GroupReplayer::set_mirror_group_status_update( } m_local_status_updater->set_mirror_group_status(m_global_group_id, - local_status, true); + local_status, true, false); if (m_remote_group_peer.mirror_status_updater != nullptr) { m_remote_group_peer.mirror_status_updater->set_mirror_group_status( - m_global_group_id, remote_status, true); + m_global_group_id, remote_status, true, false); } { @@ -1138,6 +1196,42 @@ void GroupReplayer::set_mirror_group_status_update( } } +template +void GroupReplayer::update_mirror_group_replay_status() { + dout(10) << dendl; + + reregister_admin_socket_hook(); + + cls::rbd::MirrorGroupStatusState status_state; + + if (is_replaying()) { + status_state = cls::rbd::MIRROR_GROUP_STATUS_STATE_REPLAYING; + } else { + dout(10) << "not replaying: ignoring update" << dendl; + return; + } + + ceph_assert(m_replayer != nullptr); + std::string replay_desc; + if (!m_replayer->get_replay_status(&replay_desc)) { + dout(15) << "waiting for replay status" << dendl; + return; + } + + cls::rbd::MirrorGroupSiteStatus site_status; + site_status.state = status_state; + site_status.up = true; + site_status.description = "replaying, " + replay_desc; + + m_local_status_updater->set_mirror_group_status(m_global_group_id, + site_status, false, + true); + if (m_remote_group_peer.mirror_status_updater != nullptr) { + m_remote_group_peer.mirror_status_updater->set_mirror_group_status( + m_global_group_id, site_status, false, true); + } +} + } // namespace mirror } // namespace rbd diff --git a/src/tools/rbd_mirror/GroupReplayer.h b/src/tools/rbd_mirror/GroupReplayer.h index a5341d21654..b9f89fdb44a 100644 --- a/src/tools/rbd_mirror/GroupReplayer.h +++ b/src/tools/rbd_mirror/GroupReplayer.h @@ -223,6 +223,7 @@ private: AsyncOpTracker m_in_flight_op_tracker; Context* m_replayer_check_task = nullptr; + Context* m_update_status_task = nullptr; group_replayer::BootstrapRequest *m_bootstrap_request = nullptr; group_replayer::Replayer *m_replayer = nullptr; @@ -287,6 +288,12 @@ private: void set_mirror_group_status_update(cls::rbd::MirrorGroupStatusState state, const std::string &desc); + + void schedule_update_mirror_group_replay_status(); + void handle_update_mirror_group_replay_status(int r); + void cancel_update_mirror_group_replay_status(); + void update_mirror_group_replay_status(); + void wait_for_ops(); void handle_wait_for_ops(int r); diff --git a/src/tools/rbd_mirror/ImageReplayer.cc b/src/tools/rbd_mirror/ImageReplayer.cc index 6a8d877dd27..1b7a4f98260 100644 --- a/src/tools/rbd_mirror/ImageReplayer.cc +++ b/src/tools/rbd_mirror/ImageReplayer.cc @@ -305,6 +305,15 @@ uint64_t ImageReplayer::get_remote_snap_id_end_limit() { return CEPH_NOSNAP; } +template +uint64_t ImageReplayer::get_last_snapshot_bytes() const { + std::unique_lock locker(m_lock); + if (m_replayer != nullptr) { + return m_replayer->get_last_snapshot_bytes(); + } + return 0; +} + template void ImageReplayer::set_state_description(int r, const std::string &desc) { dout(10) << "r=" << r << ", desc=" << desc << dendl; diff --git a/src/tools/rbd_mirror/ImageReplayer.h b/src/tools/rbd_mirror/ImageReplayer.h index c2dbe995ee6..538b8d81900 100644 --- a/src/tools/rbd_mirror/ImageReplayer.h +++ b/src/tools/rbd_mirror/ImageReplayer.h @@ -148,6 +148,7 @@ public: void prune_snapshot(uint64_t snap_id); void set_remote_snap_id_end_limit(uint64_t snap_id); uint64_t get_remote_snap_id_end_limit(); + uint64_t get_last_snapshot_bytes() const; protected: /** diff --git a/src/tools/rbd_mirror/MirrorStatusUpdater.cc b/src/tools/rbd_mirror/MirrorStatusUpdater.cc index 8f453d1e5b2..d4de4302f52 100644 --- a/src/tools/rbd_mirror/MirrorStatusUpdater.cc +++ b/src/tools/rbd_mirror/MirrorStatusUpdater.cc @@ -264,13 +264,26 @@ template void MirrorStatusUpdater::set_mirror_group_status( const std::string& global_group_id, const cls::rbd::MirrorGroupSiteStatus& mirror_group_site_status, - bool immediate_update) { + bool immediate_update, + bool skip_image_statuses_update) { dout(15) << "global_group_id=" << global_group_id << ", " - << "mirror_group_site_status=" << mirror_group_site_status << dendl; + << "mirror_group_site_status=" << mirror_group_site_status << ", " + << "immediate_update=" << immediate_update << ", " + << "skip_image_statuses_update=" << skip_image_statuses_update << dendl; std::unique_lock locker(m_lock); - m_global_group_status[global_group_id] = mirror_group_site_status; + if (skip_image_statuses_update) { + auto it = m_global_group_status.find(global_group_id); + ceph_assert(it != m_global_group_status.end()); + // update only group status fields + it->second.description = mirror_group_site_status.description; + it->second.state = mirror_group_site_status.state; + it->second.up = mirror_group_site_status.up; + } else { + m_global_group_status[global_group_id] = mirror_group_site_status; + } + if (immediate_update) { m_update_global_group_ids.insert(global_group_id); queue_update_task(std::move(locker)); diff --git a/src/tools/rbd_mirror/MirrorStatusUpdater.h b/src/tools/rbd_mirror/MirrorStatusUpdater.h index 998dcb1628c..a2fc2017184 100644 --- a/src/tools/rbd_mirror/MirrorStatusUpdater.h +++ b/src/tools/rbd_mirror/MirrorStatusUpdater.h @@ -52,7 +52,8 @@ public: void set_mirror_group_status( const std::string& global_group_id, const cls::rbd::MirrorGroupSiteStatus& mirror_group_site_status, - bool immediate_update); + bool immediate_update, + bool skip_image_statuses_update); void remove_mirror_group_status(const std::string& global_group_id, bool immediate_update, Context* on_finish); void remove_refresh_mirror_group_status(const std::string& global_group_id, diff --git a/src/tools/rbd_mirror/group_replayer/Replayer.cc b/src/tools/rbd_mirror/group_replayer/Replayer.cc index 8c323ea9e94..c47195624b7 100644 --- a/src/tools/rbd_mirror/group_replayer/Replayer.cc +++ b/src/tools/rbd_mirror/group_replayer/Replayer.cc @@ -33,6 +33,48 @@ using librbd::util::create_async_context_callback; using librbd::util::create_context_callback; using librbd::util::create_rados_callback; +namespace { + +const cls::rbd::GroupSnapshot* get_latest_mirror_group_snapshot( + const std::vector& gp_snaps) { + for (auto it = gp_snaps.rbegin(); it != gp_snaps.rend(); ++it) { + if (it->state == cls::rbd::GROUP_SNAPSHOT_STATE_COMPLETE && + (cls::rbd::get_group_snap_namespace_type(it->snapshot_namespace) == + cls::rbd::GROUP_SNAPSHOT_NAMESPACE_TYPE_MIRROR)) { + return &*it; + } + } + return nullptr; +} + +int get_group_snapshot_timestamp(librados::IoCtx& group_ioctx, + const cls::rbd::GroupSnapshot& group_snap, + utime_t* timestamp) { + // Set timestamp for an empty group snapshot as zero + if (group_snap.snaps.empty()) { + *timestamp = utime_t(0, 0); + return 0; + } + + cls::rbd::SnapshotInfo image_snap_info; + std::vector timestamps; + for (const auto& image_snap : group_snap.snaps) { + // TODO: Fetch the member image's snapshot info using the member image's + // IoCtx. Below assumes that the member images are in the group's pool. + int r = librbd::cls_client::snapshot_get( + &group_ioctx, librbd::util::header_name(image_snap.image_id), + image_snap.snap_id, &image_snap_info); + if (r < 0) { + return r; + } + timestamps.push_back(image_snap_info.timestamp); + } + *timestamp = *std::min_element(timestamps.begin(), timestamps.end()); + return 0; +} + +} // anonymous namespace + template Replayer::Replayer( Threads* threads, @@ -423,6 +465,15 @@ void Replayer::validate_image_snaps_sync_complete( } dout(10) << "group snap_id: " << group_snap_id << dendl; + if (m_snapshot_start.is_zero()) { + // The mirror group snapshot's start time being zero and the group snapshot + // being incomplete indicate that the mirror daemon was restarted. So reset + // the mirror group snap's start time that is used to calculate the total + // time taken to complete the syncing of the mirror group snap as best as + // we can. + m_snapshot_start = ceph_clock_now(); + } + auto itr = std::find_if( m_remote_group_snaps.begin(), m_remote_group_snaps.end(), [group_snap_id](const cls::rbd::GroupSnapshot &s) { @@ -682,6 +733,10 @@ void Replayer::try_create_group_snapshot(cls::rbd::GroupSnapshot snap, } ceph_assert(ceph_mutex_is_locked_by_me(m_lock)); + if (m_snapshot_start.is_zero()) { + m_snapshot_start = ceph_clock_now(); + } + auto snap_type = cls::rbd::get_group_snap_namespace_type( snap.snapshot_namespace); if (snap_type == cls::rbd::GROUP_SNAPSHOT_NAMESPACE_TYPE_MIRROR) { @@ -898,7 +953,24 @@ void Replayer::handle_mirror_snapshot_complete( int r, const std::string &group_snap_id, Context *on_finish) { dout(10) << group_snap_id << ", r=" << r << dendl; - on_finish->complete(r); + if (r < 0) { + on_finish->complete(r); + return; + } + + utime_t duration = ceph_clock_now() - m_snapshot_start; + m_last_snapshot_complete_seconds = duration.sec(); + m_snapshot_start = utime_t(0, 0); + + uint64_t last_snapshot_bytes = 0; + for (const auto& ir : *m_image_replayers) { + if (ir.second != nullptr) { + last_snapshot_bytes += ir.second->get_last_snapshot_bytes(); + } + } + m_last_snapshot_bytes = last_snapshot_bytes; + + on_finish->complete(0); } template @@ -1331,6 +1403,68 @@ void Replayer::finish_shut_down() { } } +template +bool Replayer::get_replay_status(std::string* description) { + dout(10) << dendl; + + std::unique_lock locker{m_lock}; + if (m_state != STATE_REPLAYING) { + derr << "replay not running" << dendl; + return false; + } + + json_spirit::mObject root_obj; + root_obj["last_snapshot_complete_seconds"] = + m_last_snapshot_complete_seconds; + root_obj["last_snapshot_bytes"] = m_last_snapshot_bytes; + + auto remote_gp_snap_ptr = get_latest_mirror_group_snapshot( + m_remote_group_snaps); + if (remote_gp_snap_ptr != nullptr) { + utime_t timestamp; + int r = get_group_snapshot_timestamp(m_remote_io_ctx, *remote_gp_snap_ptr, + ×tamp); + if (r < 0) { + derr << "error getting timestamp of remote group snapshot ID: " + << remote_gp_snap_ptr->id << ", r=" << cpp_strerror(r) << dendl; + return false; + } + root_obj["remote_snapshot_timestamp"] = timestamp.sec(); + } else { + return false; + } + + auto latest_local_gp_snap_ptr = get_latest_mirror_group_snapshot( + m_local_group_snaps); + if (latest_local_gp_snap_ptr != nullptr) { + // find remote group snap with ID matching that of the latest local + // group snap + remote_gp_snap_ptr = nullptr; + for (const auto& remote_group_snap: m_remote_group_snaps) { + if (remote_group_snap.id == latest_local_gp_snap_ptr->id) { + remote_gp_snap_ptr = &remote_group_snap; + break; + } + } + if (remote_gp_snap_ptr != nullptr) { + utime_t timestamp; + int r = get_group_snapshot_timestamp(m_remote_io_ctx, + *remote_gp_snap_ptr, + ×tamp); + if (r < 0) { + derr << "error getting timestamp of matching remote group snapshot ID: " + << remote_gp_snap_ptr->id << ", r=" << cpp_strerror(r) << dendl; + } else { + root_obj["local_snapshot_timestamp"] = timestamp.sec(); + } + } + } + + *description = json_spirit::write(root_obj, + json_spirit::remove_trailing_zeros); + return true; +} + } // namespace group_replayer } // namespace mirror } // namespace rbd diff --git a/src/tools/rbd_mirror/group_replayer/Replayer.h b/src/tools/rbd_mirror/group_replayer/Replayer.h index 1779fa47567..9c4215166fd 100644 --- a/src/tools/rbd_mirror/group_replayer/Replayer.h +++ b/src/tools/rbd_mirror/group_replayer/Replayer.h @@ -69,6 +69,8 @@ public: return (m_state == STATE_REPLAYING || m_state == STATE_IDLE); } + bool get_replay_status(std::string* description); + private: enum State { STATE_INIT, @@ -105,6 +107,11 @@ private: bool m_stop_requested = false; bool m_retry_validate_snap = false; + utime_t m_snapshot_start; + uint64_t m_last_snapshot_complete_seconds = 0; + + uint64_t m_last_snapshot_bytes = 0; + bool is_replay_interrupted(); bool is_replay_interrupted(std::unique_lock* locker); int local_group_image_list_by_id( diff --git a/src/tools/rbd_mirror/image_replayer/Replayer.h b/src/tools/rbd_mirror/image_replayer/Replayer.h index dc9ebe5ca97..f267d87c9e3 100644 --- a/src/tools/rbd_mirror/image_replayer/Replayer.h +++ b/src/tools/rbd_mirror/image_replayer/Replayer.h @@ -35,6 +35,10 @@ struct Replayer { virtual void prune_snapshot(uint64_t) = 0; virtual void set_remote_snap_id_end_limit(uint64_t) = 0; virtual uint64_t get_remote_snap_id_end_limit() = 0; + + virtual uint64_t get_last_snapshot_bytes() const { + return 0; + } }; } // namespace image_replayer diff --git a/src/tools/rbd_mirror/image_replayer/snapshot/Replayer.h b/src/tools/rbd_mirror/image_replayer/snapshot/Replayer.h index c1936c8a1ea..8b0a78e8c00 100644 --- a/src/tools/rbd_mirror/image_replayer/snapshot/Replayer.h +++ b/src/tools/rbd_mirror/image_replayer/snapshot/Replayer.h @@ -120,6 +120,11 @@ public: return m_remote_group_image_snap_id; } + uint64_t get_last_snapshot_bytes() const override { + std::unique_lock locker(m_lock); + return m_last_snapshot_bytes; + } + private: /** * @verbatim -- 2.39.5