]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
rbd_mirror: add fields to mirror group status's description str
authorRamana Raja <rraja@redhat.com>
Thu, 27 Mar 2025 17:09:47 +0000 (13:09 -0400)
committerIlya Dryomov <idryomov@gmail.com>
Sun, 28 Sep 2025 18:25:03 +0000 (20:25 +0200)
Add the following fields to the mirror group status's description string:
- last_snasphot_bytes
- last_snasphot_complete_seconds
- local_snapshot_timestamp
- remote_snapshot_timestamp

Signed-off-by: Ramana Raja <rraja@redhat.com>
src/tools/rbd_mirror/GroupReplayer.cc
src/tools/rbd_mirror/GroupReplayer.h
src/tools/rbd_mirror/ImageReplayer.cc
src/tools/rbd_mirror/ImageReplayer.h
src/tools/rbd_mirror/MirrorStatusUpdater.cc
src/tools/rbd_mirror/MirrorStatusUpdater.h
src/tools/rbd_mirror/group_replayer/Replayer.cc
src/tools/rbd_mirror/group_replayer/Replayer.h
src/tools/rbd_mirror/image_replayer/Replayer.h
src/tools/rbd_mirror/image_replayer/snapshot/Replayer.h

index d44987f40d4cf28de4359ac4ca55dc900eb92d72..d97a3511decf6058128b156743b05158ce50016e 100644 (file)
@@ -520,6 +520,7 @@ void GroupReplayer<I>::on_stop_replay(int r, const std::string &desc)
     m_state_desc = "";
   }
 
+  cancel_update_mirror_group_replay_status();
   cancel_image_replayers_check();
   shut_down(r);
 }
@@ -670,6 +671,7 @@ void GroupReplayer<I>::handle_create_group_replayer(int r) {
 
     std::unique_lock timer_locker{m_threads->timer_lock};
     schedule_image_replayers_check();
+    schedule_update_mirror_group_replay_status();
   }
 
   set_mirror_group_status_update(
@@ -1065,6 +1067,62 @@ void GroupReplayer<I>::remove_group_status_remote(bool force, Context *on_finish
   }
 }
 
+template <typename I>
+void GroupReplayer<I>::schedule_update_mirror_group_replay_status() {
+  ceph_assert(ceph_mutex_is_locked_by_me(m_lock));
+  ceph_assert(ceph_mutex_is_locked_by_me(m_threads->timer_lock));
+  if (m_state != STATE_REPLAYING) {
+    return;
+  }
+
+  dout(10) << dendl;
+
+  // periodically update the replaying status even if nothing changes
+  // so that we can adjust our performance stats
+  ceph_assert(m_update_status_task == nullptr);
+  m_update_status_task = create_context_callback<
+    GroupReplayer<I>,
+    &GroupReplayer<I>::handle_update_mirror_group_replay_status>(this);
+  m_threads->timer->add_event_after(10, m_update_status_task);
+}
+
+template <typename I>
+void GroupReplayer<I>::handle_update_mirror_group_replay_status(int r) {
+  dout(10) << dendl;
+
+  ceph_assert(ceph_mutex_is_locked_by_me(m_threads->timer_lock));
+
+  ceph_assert(m_update_status_task != nullptr);
+  m_update_status_task = nullptr;
+
+  auto ctx = new LambdaContext([this](int) {
+      update_mirror_group_replay_status();
+
+      {
+       std::unique_lock locker{m_lock};
+       std::unique_lock timer_locker{m_threads->timer_lock};
+
+       schedule_update_mirror_group_replay_status();
+      }
+      m_in_flight_op_tracker.finish_op();
+    });
+
+  m_in_flight_op_tracker.start_op();
+  m_threads->work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+void GroupReplayer<I>::cancel_update_mirror_group_replay_status() {
+  std::unique_lock timer_locker{m_threads->timer_lock};
+  if (m_update_status_task != nullptr) {
+    dout(10) << dendl;
+
+    if (m_threads->timer->cancel_event(m_update_status_task)) {
+      m_update_status_task = nullptr;
+    }
+  }
+}
+
 template <typename I>
 void GroupReplayer<I>::set_mirror_group_status_update(
     cls::rbd::MirrorGroupStatusState state, const std::string &desc) {
@@ -1125,10 +1183,10 @@ void GroupReplayer<I>::set_mirror_group_status_update(
   }
 
   m_local_status_updater->set_mirror_group_status(m_global_group_id,
-                                                  local_status, true);
+                                                  local_status, true, false);
   if (m_remote_group_peer.mirror_status_updater != nullptr) {
     m_remote_group_peer.mirror_status_updater->set_mirror_group_status(
-        m_global_group_id, remote_status, true);
+        m_global_group_id, remote_status, true, false);
   }
 
   {
@@ -1138,6 +1196,42 @@ void GroupReplayer<I>::set_mirror_group_status_update(
   }
 }
 
+template <typename I>
+void GroupReplayer<I>::update_mirror_group_replay_status() {
+  dout(10) << dendl;
+
+  reregister_admin_socket_hook();
+
+  cls::rbd::MirrorGroupStatusState status_state;
+
+  if (is_replaying()) {
+    status_state = cls::rbd::MIRROR_GROUP_STATUS_STATE_REPLAYING;
+  } else {
+    dout(10) << "not replaying: ignoring update" << dendl;
+    return;
+  }
+
+  ceph_assert(m_replayer != nullptr);
+  std::string replay_desc;
+  if (!m_replayer->get_replay_status(&replay_desc)) {
+    dout(15) << "waiting for replay status" << dendl;
+    return;
+  }
+
+  cls::rbd::MirrorGroupSiteStatus site_status;
+  site_status.state = status_state;
+  site_status.up = true;
+  site_status.description = "replaying, " + replay_desc;
+
+  m_local_status_updater->set_mirror_group_status(m_global_group_id,
+                                                  site_status, false,
+                                                  true);
+  if (m_remote_group_peer.mirror_status_updater != nullptr) {
+    m_remote_group_peer.mirror_status_updater->set_mirror_group_status(
+        m_global_group_id, site_status, false, true);
+  }
+}
+
 } // namespace mirror
 } // namespace rbd
 
index a5341d21654e71d4f984626a15d3c9c36ccf81f6..b9f89fdb44a619c75e773f10336ef5d76ff163c9 100644 (file)
@@ -223,6 +223,7 @@ private:
 
   AsyncOpTracker m_in_flight_op_tracker;
   Context* m_replayer_check_task = nullptr;
+  Context* m_update_status_task = nullptr;
 
   group_replayer::BootstrapRequest<ImageCtxT> *m_bootstrap_request = nullptr;
   group_replayer::Replayer<ImageCtxT> *m_replayer = nullptr;
@@ -287,6 +288,12 @@ private:
 
   void set_mirror_group_status_update(cls::rbd::MirrorGroupStatusState state,
                                       const std::string &desc);
+
+  void schedule_update_mirror_group_replay_status();
+  void handle_update_mirror_group_replay_status(int r);
+  void cancel_update_mirror_group_replay_status();
+  void update_mirror_group_replay_status();
+
   void wait_for_ops();
   void handle_wait_for_ops(int r);
 
index 6a8d877dd273f4fe531df83b25c86ca0d0fb5fc5..1b7a4f98260f40133d9ef63d3664474c348d5bc5 100644 (file)
@@ -305,6 +305,15 @@ uint64_t ImageReplayer<I>::get_remote_snap_id_end_limit() {
   return CEPH_NOSNAP;
 }
 
+template <typename I>
+uint64_t ImageReplayer<I>::get_last_snapshot_bytes() const {
+  std::unique_lock locker(m_lock);
+  if (m_replayer != nullptr) {
+    return m_replayer->get_last_snapshot_bytes();
+  }
+  return 0;
+}
+
 template <typename I>
 void ImageReplayer<I>::set_state_description(int r, const std::string &desc) {
   dout(10) << "r=" << r << ", desc=" << desc << dendl;
index c2dbe995ee6186a5076fafec52423048aa6bf3ee..538b8d819006eeef95165d426d6281f1f7d84c49 100644 (file)
@@ -148,6 +148,7 @@ public:
   void prune_snapshot(uint64_t snap_id);
   void set_remote_snap_id_end_limit(uint64_t snap_id);
   uint64_t get_remote_snap_id_end_limit();
+  uint64_t get_last_snapshot_bytes() const;
 
 protected:
   /**
index 8f453d1e5b24cda1ceed7afd6c27cbf9730486e4..d4de4302f5244dc585d7015ca3f84d4bc71a3ffd 100644 (file)
@@ -264,13 +264,26 @@ template <typename I>
 void MirrorStatusUpdater<I>::set_mirror_group_status(
     const std::string& global_group_id,
     const cls::rbd::MirrorGroupSiteStatus& mirror_group_site_status,
-    bool immediate_update) {
+    bool immediate_update,
+    bool skip_image_statuses_update) {
   dout(15) << "global_group_id=" << global_group_id << ", "
-           << "mirror_group_site_status=" << mirror_group_site_status << dendl;
+           << "mirror_group_site_status=" << mirror_group_site_status << ", "
+           << "immediate_update=" << immediate_update << ", "
+           << "skip_image_statuses_update=" << skip_image_statuses_update << dendl;
 
   std::unique_lock locker(m_lock);
 
-  m_global_group_status[global_group_id] = mirror_group_site_status;
+  if (skip_image_statuses_update) {
+    auto it = m_global_group_status.find(global_group_id);
+    ceph_assert(it != m_global_group_status.end());
+    // update only group status fields
+    it->second.description = mirror_group_site_status.description;
+    it->second.state = mirror_group_site_status.state;
+    it->second.up = mirror_group_site_status.up;
+  } else {
+    m_global_group_status[global_group_id] = mirror_group_site_status;
+  }
+
   if (immediate_update) {
     m_update_global_group_ids.insert(global_group_id);
     queue_update_task(std::move(locker));
index 998dcb1628c23189eb5e063a8cc33e87403f921f..a2fc2017184084da414f9bc398d78c29a7d9feb8 100644 (file)
@@ -52,7 +52,8 @@ public:
   void set_mirror_group_status(
       const std::string& global_group_id,
       const cls::rbd::MirrorGroupSiteStatus& mirror_group_site_status,
-      bool immediate_update);
+      bool immediate_update,
+      bool skip_image_statuses_update);
   void remove_mirror_group_status(const std::string& global_group_id,
                                   bool immediate_update, Context* on_finish);
   void remove_refresh_mirror_group_status(const std::string& global_group_id,
index 8c323ea9e940a6861b9d0e077572dad2a592ae46..c47195624b7d027b123254d7798286ad56064454 100644 (file)
@@ -33,6 +33,48 @@ using librbd::util::create_async_context_callback;
 using librbd::util::create_context_callback;
 using librbd::util::create_rados_callback;
 
+namespace {
+
+const cls::rbd::GroupSnapshot* get_latest_mirror_group_snapshot(
+    const std::vector<cls::rbd::GroupSnapshot>& gp_snaps) {
+  for (auto it = gp_snaps.rbegin(); it != gp_snaps.rend(); ++it) {
+    if (it->state == cls::rbd::GROUP_SNAPSHOT_STATE_COMPLETE &&
+        (cls::rbd::get_group_snap_namespace_type(it->snapshot_namespace) ==
+         cls::rbd::GROUP_SNAPSHOT_NAMESPACE_TYPE_MIRROR)) {
+         return &*it;
+    }
+  }
+  return nullptr;
+}
+
+int get_group_snapshot_timestamp(librados::IoCtx& group_ioctx,
+                                 const cls::rbd::GroupSnapshot& group_snap,
+                                 utime_t* timestamp) {
+  // Set timestamp for an empty group snapshot as zero
+  if (group_snap.snaps.empty()) {
+    *timestamp = utime_t(0, 0);
+    return 0;
+  }
+
+  cls::rbd::SnapshotInfo image_snap_info;
+  std::vector<utime_t> timestamps;
+  for (const auto& image_snap : group_snap.snaps) {
+    // TODO: Fetch the member image's snapshot info using the member image's
+    // IoCtx. Below assumes that the member images are in the group's pool.
+    int r = librbd::cls_client::snapshot_get(
+      &group_ioctx, librbd::util::header_name(image_snap.image_id),
+      image_snap.snap_id, &image_snap_info);
+    if (r < 0) {
+      return r;
+    }
+    timestamps.push_back(image_snap_info.timestamp);
+  }
+  *timestamp = *std::min_element(timestamps.begin(), timestamps.end());
+  return 0;
+}
+
+} // anonymous namespace
+
 template <typename I>
 Replayer<I>::Replayer(
     Threads<I>* threads,
@@ -423,6 +465,15 @@ void Replayer<I>::validate_image_snaps_sync_complete(
   }
   dout(10) << "group snap_id: " << group_snap_id << dendl;
 
+  if (m_snapshot_start.is_zero()) {
+    // The mirror group snapshot's start time being zero and the group snapshot
+    // being incomplete indicate that the mirror daemon was restarted. So reset
+    // the mirror group snap's start time that is used to calculate the total
+    // time taken to complete the syncing of the mirror group snap as best as
+    // we can.
+    m_snapshot_start = ceph_clock_now();
+  }
+
   auto itr = std::find_if(
       m_remote_group_snaps.begin(), m_remote_group_snaps.end(),
       [group_snap_id](const cls::rbd::GroupSnapshot &s) {
@@ -682,6 +733,10 @@ void Replayer<I>::try_create_group_snapshot(cls::rbd::GroupSnapshot snap,
   }
   ceph_assert(ceph_mutex_is_locked_by_me(m_lock));
 
+  if (m_snapshot_start.is_zero()) {
+    m_snapshot_start = ceph_clock_now();
+  }
+
   auto snap_type = cls::rbd::get_group_snap_namespace_type(
       snap.snapshot_namespace);
   if (snap_type == cls::rbd::GROUP_SNAPSHOT_NAMESPACE_TYPE_MIRROR) {
@@ -898,7 +953,24 @@ void Replayer<I>::handle_mirror_snapshot_complete(
     int r, const std::string &group_snap_id, Context *on_finish) {
   dout(10) << group_snap_id << ", r=" << r << dendl;
 
-  on_finish->complete(r);
+  if (r < 0) {
+    on_finish->complete(r);
+    return;
+  }
+
+  utime_t duration = ceph_clock_now() - m_snapshot_start;
+  m_last_snapshot_complete_seconds = duration.sec();
+  m_snapshot_start = utime_t(0, 0);
+
+  uint64_t last_snapshot_bytes = 0;
+  for (const auto& ir : *m_image_replayers) {
+    if (ir.second != nullptr) {
+      last_snapshot_bytes += ir.second->get_last_snapshot_bytes();
+    }
+  }
+  m_last_snapshot_bytes = last_snapshot_bytes;
+
+  on_finish->complete(0);
 }
 
 template <typename I>
@@ -1331,6 +1403,68 @@ void Replayer<I>::finish_shut_down() {
   }
 }
 
+template <typename I>
+bool Replayer<I>::get_replay_status(std::string* description) {
+  dout(10) << dendl;
+
+  std::unique_lock locker{m_lock};
+  if (m_state != STATE_REPLAYING) {
+    derr << "replay not running" << dendl;
+    return false;
+  }
+
+  json_spirit::mObject root_obj;
+  root_obj["last_snapshot_complete_seconds"] =
+    m_last_snapshot_complete_seconds;
+  root_obj["last_snapshot_bytes"] = m_last_snapshot_bytes;
+
+  auto remote_gp_snap_ptr = get_latest_mirror_group_snapshot(
+    m_remote_group_snaps);
+  if (remote_gp_snap_ptr != nullptr) {
+    utime_t timestamp;
+    int r = get_group_snapshot_timestamp(m_remote_io_ctx, *remote_gp_snap_ptr,
+                                         &timestamp);
+    if (r < 0) {
+      derr << "error getting timestamp of remote group snapshot ID: "
+           << remote_gp_snap_ptr->id << ", r=" << cpp_strerror(r) << dendl;
+      return false;
+    }
+    root_obj["remote_snapshot_timestamp"] = timestamp.sec();
+  } else {
+    return false;
+  }
+
+  auto latest_local_gp_snap_ptr = get_latest_mirror_group_snapshot(
+    m_local_group_snaps);
+  if (latest_local_gp_snap_ptr != nullptr) {
+    // find remote group snap with ID matching that of the latest local
+    // group snap
+    remote_gp_snap_ptr = nullptr;
+    for (const auto& remote_group_snap: m_remote_group_snaps) {
+      if (remote_group_snap.id == latest_local_gp_snap_ptr->id) {
+       remote_gp_snap_ptr = &remote_group_snap;
+       break;
+      }
+    }
+    if (remote_gp_snap_ptr != nullptr) {
+      utime_t timestamp;
+      int r = get_group_snapshot_timestamp(m_remote_io_ctx,
+                                           *remote_gp_snap_ptr,
+                                           &timestamp);
+      if (r < 0) {
+       derr << "error getting timestamp of matching remote group snapshot ID: "
+             << remote_gp_snap_ptr->id << ", r=" << cpp_strerror(r) << dendl;
+      } else {
+       root_obj["local_snapshot_timestamp"] = timestamp.sec();
+      }
+    }
+  }
+
+  *description = json_spirit::write(root_obj,
+                                    json_spirit::remove_trailing_zeros);
+  return true;
+}
+
 } // namespace group_replayer
 } // namespace mirror
 } // namespace rbd
index 1779fa47567a5169a2968e382bee677b1c72fcdf..9c4215166fd411c2f7cceb1ecd64824e394edfe6 100644 (file)
@@ -69,6 +69,8 @@ public:
     return (m_state == STATE_REPLAYING || m_state == STATE_IDLE);
   }
 
+  bool get_replay_status(std::string* description);
+
 private:
   enum State {
     STATE_INIT,
@@ -105,6 +107,11 @@ private:
   bool m_stop_requested = false;
   bool m_retry_validate_snap = false;
 
+  utime_t m_snapshot_start;
+  uint64_t m_last_snapshot_complete_seconds = 0;
+
+  uint64_t m_last_snapshot_bytes = 0;
+
   bool is_replay_interrupted();
   bool is_replay_interrupted(std::unique_lock<ceph::mutex>* locker);
   int local_group_image_list_by_id(
index dc9ebe5ca972ab9f055d50322db52ec0500a3104..f267d87c9e352e1cd5699bce6c119745f0f3afd4 100644 (file)
@@ -35,6 +35,10 @@ struct Replayer {
   virtual void prune_snapshot(uint64_t) = 0;
   virtual void set_remote_snap_id_end_limit(uint64_t) = 0;
   virtual uint64_t get_remote_snap_id_end_limit() = 0;
+
+  virtual uint64_t get_last_snapshot_bytes() const {
+    return 0;
+  }
 };
 
 } // namespace image_replayer
index c1936c8a1ea7234138192c061fda3525ee9403f9..8b0a78e8c00c6bc2a6ae626e0d6da624513cf0af 100644 (file)
@@ -120,6 +120,11 @@ public:
     return m_remote_group_image_snap_id;
   }
 
+  uint64_t get_last_snapshot_bytes() const override {
+    std::unique_lock locker(m_lock);
+    return m_last_snapshot_bytes;
+  }
+
 private:
   /**
    * @verbatim