]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
rbd-mirror: add and rename perf counters for journal and snapshot mirroring 50302/head
authorDivyansh Kamboj <dkamboj@redhat.com>
Fri, 17 Mar 2023 09:19:14 +0000 (14:49 +0530)
committerDivyansh Kamboj <dkamboj@redhat.com>
Tue, 21 Mar 2023 11:00:53 +0000 (16:30 +0530)
This commit renames the existing snapshot and journal based performance
counters in the rbd-mirror daemon to better reflect their purpose.
Additionally, new perf counters have been added to provide more detailed
information about the synchronization of snapshots between the source
and destination clusters.

This commit also switches to use labels instead of having the image spec
in the name of the counter.

Signed-off-by: Divyansh Kamboj <dkamboj@redhat.com>
src/tools/rbd_mirror/Types.h
src/tools/rbd_mirror/image_replayer/journal/Replayer.cc
src/tools/rbd_mirror/image_replayer/snapshot/Replayer.cc
src/tools/rbd_mirror/main.cc

index 7b2a3b5cea5fa2a834e954409fb6365fafdf51ec..9bba58fb130c3ad93425da4c3e69f76cd815ca98 100644 (file)
@@ -21,14 +21,19 @@ template <typename> struct MirrorStatusUpdater;
 // Performance counters
 enum {
   l_rbd_mirror_journal_first = 27000,
-  l_rbd_mirror_replay,
-  l_rbd_mirror_replay_bytes,
-  l_rbd_mirror_replay_latency,
+  l_rbd_mirror_journal_entries,
+  l_rbd_mirror_journal_replay_bytes,
+  l_rbd_mirror_journal_replay_latency,
   l_rbd_mirror_journal_last,
   l_rbd_mirror_snapshot_first,
-  l_rbd_mirror_snapshot_replay_snapshots,
-  l_rbd_mirror_snapshot_replay_snapshots_time,
-  l_rbd_mirror_snapshot_replay_bytes,
+  l_rbd_mirror_snapshot_snapshots,
+  l_rbd_mirror_snapshot_sync_time,
+  l_rbd_mirror_snapshot_sync_bytes,
+  // per-image only counters below
+  l_rbd_mirror_snapshot_remote_timestamp,
+  l_rbd_mirror_snapshot_local_timestamp,
+  l_rbd_mirror_snapshot_last_sync_time,
+  l_rbd_mirror_snapshot_last_sync_bytes,
   l_rbd_mirror_snapshot_last,
 };
 
index 3ce9104d247bd48ace93d53eb18863fe714be039..20560038ca7d9e504e162b8541e06a07a4faf248 100644 (file)
@@ -4,6 +4,8 @@
 #include "Replayer.h"
 #include "common/debug.h"
 #include "common/errno.h"
+#include "common/perf_counters.h"
+#include "common/perf_counters_key.h"
 #include "common/Timer.h"
 #include "librbd/Journal.h"
 #include "librbd/Utils.h"
@@ -1159,9 +1161,11 @@ void Replayer<I>::handle_process_entry_safe(
 
   auto latency = ceph_clock_now() - replay_start_time;
   if (g_journal_perf_counters) {
-    g_journal_perf_counters->inc(l_rbd_mirror_replay);
-    g_journal_perf_counters->inc(l_rbd_mirror_replay_bytes, replay_bytes);
-    g_journal_perf_counters->tinc(l_rbd_mirror_replay_latency, latency);
+    g_journal_perf_counters->inc(l_rbd_mirror_journal_entries);
+    g_journal_perf_counters->inc(l_rbd_mirror_journal_replay_bytes,
+                                 replay_bytes);
+    g_journal_perf_counters->tinc(l_rbd_mirror_journal_replay_latency,
+                                  latency);
   }
 
   auto ctx = new LambdaContext(
@@ -1170,9 +1174,9 @@ void Replayer<I>::handle_process_entry_safe(
       schedule_flush_local_replay_task();
 
       if (m_perf_counters) {
-        m_perf_counters->inc(l_rbd_mirror_replay);
-        m_perf_counters->inc(l_rbd_mirror_replay_bytes, replay_bytes);
-        m_perf_counters->tinc(l_rbd_mirror_replay_latency, latency);
+        m_perf_counters->inc(l_rbd_mirror_journal_entries);
+        m_perf_counters->inc(l_rbd_mirror_journal_replay_bytes, replay_bytes);
+        m_perf_counters->tinc(l_rbd_mirror_journal_replay_latency, latency);
       }
 
       m_event_replay_tracker.finish_op();
@@ -1270,13 +1274,23 @@ void Replayer<I>::register_perf_counters() {
 
   auto cct = static_cast<CephContext *>(m_state_builder->local_image_ctx->cct);
   auto prio = cct->_conf.get_val<int64_t>("rbd_mirror_image_perf_stats_prio");
-  PerfCountersBuilder plb(g_ceph_context, "rbd_mirror_image_" + m_image_spec,
-                          l_rbd_mirror_journal_first, l_rbd_mirror_journal_last);
-  plb.add_u64_counter(l_rbd_mirror_replay, "replay", "Replays", "r", prio);
-  plb.add_u64_counter(l_rbd_mirror_replay_bytes, "replay_bytes",
-                      "Replayed data", "rb", prio, unit_t(UNIT_BYTES));
-  plb.add_time_avg(l_rbd_mirror_replay_latency, "replay_latency",
-                   "Replay latency", "rl", prio);
+
+  auto local_image_ctx = m_state_builder->local_image_ctx;
+  std::string labels = ceph::perf_counters::key_create(
+      "rbd_mirror_journal_image",
+      {{"pool", local_image_ctx->md_ctx.get_pool_name()},
+       {"namespace", local_image_ctx->md_ctx.get_namespace()},
+       {"image", local_image_ctx->name}});
+
+  PerfCountersBuilder plb(g_ceph_context, labels, l_rbd_mirror_journal_first,
+                          l_rbd_mirror_journal_last);
+  plb.add_u64_counter(l_rbd_mirror_journal_entries, "entries",
+                      "Number of entries replayed", nullptr, prio);
+  plb.add_u64_counter(l_rbd_mirror_journal_replay_bytes, "replay_bytes",
+                      "Total bytes replayed", nullptr, prio,
+                      unit_t(UNIT_BYTES));
+  plb.add_time_avg(l_rbd_mirror_journal_replay_latency, "replay_latency",
+                   "Replay latency", nullptr, prio);
   m_perf_counters = plb.create_perf_counters();
   g_ceph_context->get_perfcounters_collection()->add(m_perf_counters);
 }
index 65caf28cff12a13959db411cb28ae26b690c59ff..b4da280bf1be88d8d1a981096f717eefa0c574ba 100644 (file)
@@ -4,6 +4,8 @@
 #include "Replayer.h"
 #include "common/debug.h"
 #include "common/errno.h"
+#include "common/perf_counters.h"
+#include "common/perf_counters_key.h"
 #include "include/stringify.h"
 #include "common/Timer.h"
 #include "cls/rbd/cls_rbd_client.h"
@@ -254,6 +256,10 @@ bool Replayer<I>::get_replay_status(std::string* description,
   json_spirit::mObject root_obj;
   root_obj["replay_state"] = replay_state;
   root_obj["remote_snapshot_timestamp"] = remote_snap_info->timestamp.sec();
+  if (m_perf_counters) {
+    m_perf_counters->tset(l_rbd_mirror_snapshot_remote_timestamp,
+                          remote_snap_info->timestamp);
+  }
 
   auto matching_remote_snap_id = util::compute_remote_snap_id(
     m_state_builder->local_image_ctx->image_lock,
@@ -269,6 +275,10 @@ bool Replayer<I>::get_replay_status(std::string* description,
     // synced and not the consistency point in time.
     root_obj["local_snapshot_timestamp"] =
       matching_remote_snap_it->second.timestamp.sec();
+    if (m_perf_counters) {
+      m_perf_counters->tset(l_rbd_mirror_snapshot_local_timestamp,
+                            matching_remote_snap_it->second.timestamp);
+    }
   }
 
   matching_remote_snap_it = m_state_builder->remote_image_ctx->snap_info.find(
@@ -1119,16 +1129,19 @@ void Replayer<I>::handle_copy_image(int r) {
     m_last_snapshot_sync_seconds = duration.sec();
 
     if (g_snapshot_perf_counters) {
-      g_snapshot_perf_counters->inc(l_rbd_mirror_snapshot_replay_bytes,
+      g_snapshot_perf_counters->inc(l_rbd_mirror_snapshot_sync_bytes,
                                     m_snapshot_bytes);
-      g_snapshot_perf_counters->inc(l_rbd_mirror_snapshot_replay_snapshots);
-      g_snapshot_perf_counters->tinc(
-        l_rbd_mirror_snapshot_replay_snapshots_time, duration);
+      g_snapshot_perf_counters->inc(l_rbd_mirror_snapshot_snapshots);
+      g_snapshot_perf_counters->tinc(l_rbd_mirror_snapshot_sync_time,
+                                     duration);
     }
     if (m_perf_counters) {
-      m_perf_counters->inc(l_rbd_mirror_snapshot_replay_bytes, m_snapshot_bytes);
-      m_perf_counters->inc(l_rbd_mirror_snapshot_replay_snapshots);
-      m_perf_counters->tinc(l_rbd_mirror_snapshot_replay_snapshots_time, duration);
+      m_perf_counters->inc(l_rbd_mirror_snapshot_sync_bytes, m_snapshot_bytes);
+      m_perf_counters->inc(l_rbd_mirror_snapshot_snapshots);
+      m_perf_counters->tinc(l_rbd_mirror_snapshot_sync_time, duration);
+      m_perf_counters->tset(l_rbd_mirror_snapshot_last_sync_time, duration);
+      m_perf_counters->set(l_rbd_mirror_snapshot_last_sync_bytes,
+                           m_snapshot_bytes);
     }
   }
 
@@ -1563,16 +1576,32 @@ void Replayer<I>::register_perf_counters() {
 
   auto cct = static_cast<CephContext *>(m_state_builder->local_image_ctx->cct);
   auto prio = cct->_conf.get_val<int64_t>("rbd_mirror_image_perf_stats_prio");
-  PerfCountersBuilder plb(g_ceph_context,
-                          "rbd_mirror_snapshot_image_" + m_image_spec,
-                          l_rbd_mirror_snapshot_first,
+
+  auto local_image_ctx = m_state_builder->local_image_ctx;
+  std::string labels = ceph::perf_counters::key_create(
+      "rbd_mirror_snapshot_image",
+      {{"pool", local_image_ctx->md_ctx.get_pool_name()},
+       {"namespace", local_image_ctx->md_ctx.get_namespace()},
+       {"image", local_image_ctx->name}});
+
+  PerfCountersBuilder plb(g_ceph_context, labels, l_rbd_mirror_snapshot_first,
                           l_rbd_mirror_snapshot_last);
-  plb.add_u64_counter(l_rbd_mirror_snapshot_replay_snapshots,
-                      "snapshots", "Snapshots", "r", prio);
-  plb.add_time_avg(l_rbd_mirror_snapshot_replay_snapshots_time,
-                   "snapshots_time", "Snapshots time", "rl", prio);
-  plb.add_u64_counter(l_rbd_mirror_snapshot_replay_bytes, "replay_bytes",
-                      "Replayed data", "rb", prio, unit_t(UNIT_BYTES));
+  plb.add_u64_counter(l_rbd_mirror_snapshot_snapshots, "snapshots",
+                      "Number of snapshots synced", nullptr, prio);
+  plb.add_time_avg(l_rbd_mirror_snapshot_sync_time, "sync_time",
+                   "Average sync time", nullptr, prio);
+  plb.add_u64_counter(l_rbd_mirror_snapshot_sync_bytes, "sync_bytes",
+                      "Total bytes synced", nullptr, prio, unit_t(UNIT_BYTES));
+  plb.add_time(l_rbd_mirror_snapshot_remote_timestamp, "remote_timestamp",
+               "Timestamp of the remote snapshot", nullptr, prio);
+  plb.add_time(l_rbd_mirror_snapshot_local_timestamp, "local_timestamp",
+               "Timestamp of the local snapshot", nullptr, prio);
+  plb.add_time(l_rbd_mirror_snapshot_last_sync_time, "last_sync_time",
+               "Time taken to sync the last snapshot", nullptr, prio);
+  plb.add_u64(l_rbd_mirror_snapshot_last_sync_bytes, "last_sync_bytes",
+              "Bytes synced for the last snapshot", nullptr, prio,
+              unit_t(UNIT_BYTES));
+
   m_perf_counters = plb.create_perf_counters();
   g_ceph_context->get_perfcounters_collection()->add(m_perf_counters);
 }
index 9fecb17f44da3f25ec4b4b46807b25974f08f982..85e95e6b6c3c1e8d68ec74eb5588c5d0aa541ee1 100644 (file)
@@ -68,27 +68,30 @@ int main(int argc, const char **argv)
   auto prio =
       g_ceph_context->_conf.get_val<int64_t>("rbd_mirror_perf_stats_prio");
   {
-    PerfCountersBuilder plb(g_ceph_context, "rbd_mirror",
+    PerfCountersBuilder plb(g_ceph_context, "rbd_mirror_journal",
                             rbd::mirror::l_rbd_mirror_journal_first,
                             rbd::mirror::l_rbd_mirror_journal_last);
-    plb.add_u64_counter(rbd::mirror::l_rbd_mirror_replay, "replay", "Replays",
-                        "r", prio);
-    plb.add_u64_counter(rbd::mirror::l_rbd_mirror_replay_bytes, "replay_bytes",
-                        "Replayed data", "rb", prio, unit_t(UNIT_BYTES));
-    plb.add_time_avg(rbd::mirror::l_rbd_mirror_replay_latency, "replay_latency",
-                     "Replay latency", "rl", prio);
+    plb.add_u64_counter(rbd::mirror::l_rbd_mirror_journal_entries, "entries",
+                        "Number of entries replayed", nullptr, prio);
+    plb.add_u64_counter(rbd::mirror::l_rbd_mirror_journal_replay_bytes,
+                        "replay_bytes", "Total bytes replayed", nullptr, prio,
+                        unit_t(UNIT_BYTES));
+    plb.add_time_avg(rbd::mirror::l_rbd_mirror_journal_replay_latency,
+                     "replay_latency", "Replay latency", nullptr, prio);
     g_journal_perf_counters = plb.create_perf_counters();
   }
   {
-    PerfCountersBuilder plb(g_ceph_context, "rbd_mirror_snapshot",
-                            rbd::mirror::l_rbd_mirror_snapshot_first,
-                            rbd::mirror::l_rbd_mirror_snapshot_last);
-    plb.add_u64_counter(rbd::mirror::l_rbd_mirror_snapshot_replay_snapshots,
-                        "snapshots", "Snapshots", "r", prio);
-    plb.add_time_avg(rbd::mirror::l_rbd_mirror_snapshot_replay_snapshots_time,
-                     "snapshots_time", "Snapshots time", "rl", prio);
-    plb.add_u64_counter(rbd::mirror::l_rbd_mirror_snapshot_replay_bytes,
-                        "replay_bytes", "Replayed data", "rb", prio,
+    PerfCountersBuilder plb(
+        g_ceph_context, "rbd_mirror_snapshot",
+        rbd::mirror::l_rbd_mirror_snapshot_first,
+        rbd::mirror::l_rbd_mirror_snapshot_remote_timestamp);
+    plb.add_u64_counter(rbd::mirror::l_rbd_mirror_snapshot_snapshots,
+                        "snapshots", "Number of snapshots synced", nullptr,
+                        prio);
+    plb.add_time_avg(rbd::mirror::l_rbd_mirror_snapshot_sync_time, "sync_time",
+                     "Average sync time", nullptr, prio);
+    plb.add_u64_counter(rbd::mirror::l_rbd_mirror_snapshot_sync_bytes,
+                        "sync_bytes", "Total bytes synced", nullptr, prio,
                         unit_t(UNIT_BYTES));
     g_snapshot_perf_counters = plb.create_perf_counters();
   }