]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
rbd-mirror: add perf counters to snapshot replayer 42987/head
authorArthur Outhenin-Chalandre <arthur.outhenin-chalandre@cern.ch>
Thu, 27 May 2021 09:44:45 +0000 (11:44 +0200)
committerArthur Outhenin-Chalandre <arthur.outhenin-chalandre@cern.ch>
Tue, 31 Aug 2021 07:44:53 +0000 (09:44 +0200)
Add bytes replayed and statistics about number of snapshots synchronized
and how long it took to do so.

Fixes: https://tracker.ceph.com/issues/50973
Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@cern.ch>
(cherry picked from commit cf9317b58f966c64554f46092fc4162b5e8fd5fb)

src/test/rbd_mirror/image_replayer/snapshot/test_mock_Replayer.cc
src/test/rbd_mirror/test_main.cc
src/tools/rbd_mirror/ImageReplayer.cc
src/tools/rbd_mirror/Types.h
src/tools/rbd_mirror/image_replayer/journal/Replayer.cc
src/tools/rbd_mirror/image_replayer/snapshot/Replayer.cc
src/tools/rbd_mirror/image_replayer/snapshot/Replayer.h
src/tools/rbd_mirror/main.cc

index 5567312f2acfae657ca5e6e0a2b5713a81d617d6..bd287a12e0d2661bd76d74503e9e97727fad459c 100644 (file)
@@ -2506,6 +2506,60 @@ TEST_F(TestMockImageReplayerSnapshotReplayer, SkipImageSync) {
                                         mock_remote_image_ctx));
 }
 
+TEST_F(TestMockImageReplayerSnapshotReplayer, ImageNameUpdated) {
+  librbd::MockTestImageCtx mock_local_image_ctx{*m_local_image_ctx};
+  librbd::MockTestImageCtx mock_remote_image_ctx{*m_remote_image_ctx};
+
+  MockThreads mock_threads(m_threads);
+  expect_work_queue_repeatedly(mock_threads);
+
+  MockReplayerListener mock_replayer_listener;
+  expect_notification(mock_threads, mock_replayer_listener);
+
+  InSequence seq;
+
+  MockInstanceWatcher mock_instance_watcher;
+  MockImageMeta mock_image_meta;
+  MockStateBuilder mock_state_builder(mock_local_image_ctx,
+                                      mock_remote_image_ctx,
+                                      mock_image_meta);
+  MockReplayer mock_replayer{&mock_threads, &mock_instance_watcher,
+                             "local mirror uuid", &m_pool_meta_cache,
+                             &mock_state_builder, &mock_replayer_listener};
+  m_pool_meta_cache.set_remote_pool_meta(
+    m_remote_io_ctx.get_id(),
+    {"remote mirror uuid", "remote mirror peer uuid"});
+
+  librbd::UpdateWatchCtx* update_watch_ctx = nullptr;
+  ASSERT_EQ(0, init_entry_replayer(mock_replayer, mock_threads,
+                                   mock_local_image_ctx,
+                                   mock_remote_image_ctx,
+                                   mock_replayer_listener,
+                                   mock_image_meta,
+                                   &update_watch_ctx));
+
+  // change the name of the image
+  mock_local_image_ctx.name = "NEW NAME";
+
+  // idle
+  expect_load_image_meta(mock_image_meta, true, 0);
+
+  // wake-up replayer
+  update_watch_ctx->handle_notify();
+
+  // wait for sync to complete and expect replay complete
+  ASSERT_EQ(0, wait_for_notification(2));
+  auto image_spec = image_replayer::util::compute_image_spec(m_local_io_ctx,
+                                                             "NEW NAME");
+  ASSERT_EQ(image_spec, mock_replayer.get_image_spec());
+  ASSERT_FALSE(mock_replayer.is_replaying());
+
+  // shut down
+  ASSERT_EQ(0, shut_down_entry_replayer(mock_replayer, mock_threads,
+                                        mock_local_image_ctx,
+                                        mock_remote_image_ctx));
+}
+
 } // namespace snapshot
 } // namespace image_replayer
 } // namespace mirror
index e6c708c97db080324bf09ea437d5d01360bf74e2..ed6641e2ea3e08a72a933ce458854865f33cddd2 100644 (file)
@@ -9,7 +9,8 @@
 #include <iostream>
 #include <string>
 
-PerfCounters *g_perf_counters = nullptr;
+PerfCounters *g_journal_perf_counters = nullptr;
+PerfCounters *g_snapshot_perf_counters = nullptr;
 
 extern void register_test_cluster_watcher();
 extern void register_test_image_policy();
index f5cd65fe821ecde22b3988f714ec504b1159af9d..dc0545048ba098d79de8f9f00292e4ff9654e429 100644 (file)
@@ -36,8 +36,6 @@
 #define dout_prefix *_dout << "rbd::mirror::" << *this << " " \
                            << __func__ << ": "
 
-extern PerfCounters *g_perf_counters;
-
 namespace rbd {
 namespace mirror {
 
index c299aabe3145b4583d31350d143a7191d5e502ee..7b2a3b5cea5fa2a834e954409fb6365fafdf51ec 100644 (file)
@@ -20,11 +20,16 @@ template <typename> struct MirrorStatusUpdater;
 
 // Performance counters
 enum {
-  l_rbd_mirror_first = 27000,
+  l_rbd_mirror_journal_first = 27000,
   l_rbd_mirror_replay,
   l_rbd_mirror_replay_bytes,
   l_rbd_mirror_replay_latency,
-  l_rbd_mirror_last,
+  l_rbd_mirror_journal_last,
+  l_rbd_mirror_snapshot_first,
+  l_rbd_mirror_snapshot_replay_snapshots,
+  l_rbd_mirror_snapshot_replay_snapshots_time,
+  l_rbd_mirror_snapshot_replay_bytes,
+  l_rbd_mirror_snapshot_last,
 };
 
 typedef std::shared_ptr<librados::Rados> RadosRef;
index a2d47f5505155156ef5a6b5776b1394cf0682adc..3ce9104d247bd48ace93d53eb18863fe714be039 100644 (file)
@@ -27,7 +27,7 @@
 #define dout_prefix *_dout << "rbd::mirror::image_replayer::journal::" \
                            << "Replayer: " << this << " " << __func__ << ": "
 
-extern PerfCounters *g_perf_counters;
+extern PerfCounters *g_journal_perf_counters;
 
 namespace rbd {
 namespace mirror {
@@ -1158,10 +1158,10 @@ void Replayer<I>::handle_process_entry_safe(
   }
 
   auto latency = ceph_clock_now() - replay_start_time;
-  if (g_perf_counters) {
-    g_perf_counters->inc(l_rbd_mirror_replay);
-    g_perf_counters->inc(l_rbd_mirror_replay_bytes, replay_bytes);
-    g_perf_counters->tinc(l_rbd_mirror_replay_latency, latency);
+  if (g_journal_perf_counters) {
+    g_journal_perf_counters->inc(l_rbd_mirror_replay);
+    g_journal_perf_counters->inc(l_rbd_mirror_replay_bytes, replay_bytes);
+    g_journal_perf_counters->tinc(l_rbd_mirror_replay_latency, latency);
   }
 
   auto ctx = new LambdaContext(
@@ -1271,7 +1271,7 @@ void Replayer<I>::register_perf_counters() {
   auto cct = static_cast<CephContext *>(m_state_builder->local_image_ctx->cct);
   auto prio = cct->_conf.get_val<int64_t>("rbd_mirror_image_perf_stats_prio");
   PerfCountersBuilder plb(g_ceph_context, "rbd_mirror_image_" + m_image_spec,
-                          l_rbd_mirror_first, l_rbd_mirror_last);
+                          l_rbd_mirror_journal_first, l_rbd_mirror_journal_last);
   plb.add_u64_counter(l_rbd_mirror_replay, "replay", "Replays", "r", prio);
   plb.add_u64_counter(l_rbd_mirror_replay_bytes, "replay_bytes",
                       "Replayed data", "rb", prio, unit_t(UNIT_BYTES));
index bde5150847506236f662c251e5a82c0f546c71e7..e94ec71dfe2d3275f2b7300a60c7170325bc6a30 100644 (file)
@@ -39,7 +39,7 @@
 #define dout_prefix *_dout << "rbd::mirror::image_replayer::snapshot::" \
                            << "Replayer: " << this << " " << __func__ << ": "
 
-extern PerfCounters *g_perf_counters;
+extern PerfCounters *g_snapshot_perf_counters;
 
 namespace rbd {
 namespace mirror {
@@ -127,6 +127,12 @@ Replayer<I>::Replayer(
 template <typename I>
 Replayer<I>::~Replayer() {
   dout(10) << dendl;
+
+  {
+    std::unique_lock locker{m_lock};
+    unregister_perf_counters();
+  }
+
   ceph_assert(m_state == STATE_COMPLETE);
   ceph_assert(m_update_watch_ctx == nullptr);
   ceph_assert(m_deep_copy_handler == nullptr);
@@ -151,6 +157,18 @@ void Replayer<I>::init(Context* on_finish) {
   m_remote_mirror_peer_uuid = remote_pool_meta.mirror_peer_uuid;
   dout(10) << "remote_mirror_peer_uuid=" << m_remote_mirror_peer_uuid << dendl;
 
+  {
+    auto local_image_ctx = m_state_builder->local_image_ctx;
+    std::shared_lock image_locker{local_image_ctx->image_lock};
+    m_image_spec = image_replayer::util::compute_image_spec(
+      local_image_ctx->md_ctx, local_image_ctx->name);
+  }
+
+  {
+    std::unique_lock locker{m_lock};
+    register_perf_counters();
+  }
+
   ceph_assert(m_on_init_shutdown == nullptr);
   m_on_init_shutdown = on_finish;
 
@@ -303,6 +321,24 @@ void Replayer<I>::load_local_image_meta() {
     m_image_updated = false;
   }
 
+  bool update_status = false;
+  {
+    auto local_image_ctx = m_state_builder->local_image_ctx;
+    std::shared_lock image_locker{local_image_ctx->image_lock};
+    auto image_spec = image_replayer::util::compute_image_spec(
+      local_image_ctx->md_ctx, local_image_ctx->name);
+    if (m_image_spec != image_spec) {
+      m_image_spec = image_spec;
+      update_status = true;
+    }
+  }
+  if (update_status) {
+    std::unique_lock locker{m_lock};
+    unregister_perf_counters();
+    register_perf_counters();
+    notify_status_updated();
+  }
+
   ceph_assert(m_state_builder->local_image_meta != nullptr);
   auto ctx = create_context_callback<
     Replayer<I>, &Replayer<I>::handle_load_local_image_meta>(this);
@@ -1046,6 +1082,7 @@ void Replayer<I>::copy_image() {
            << "snap_seqs=" << m_local_mirror_snap_ns.snap_seqs << dendl;
 
   m_snapshot_bytes = 0;
+  m_snapshot_replay_start = ceph_clock_now();
   m_deep_copy_handler = new DeepCopyHandler(this);
   auto ctx = create_context_callback<
     Replayer<I>, &Replayer<I>::handle_copy_image>(this);
@@ -1077,6 +1114,19 @@ void Replayer<I>::handle_copy_image(int r) {
   {
     std::unique_lock locker{m_lock};
     m_bytes_per_snapshot(m_snapshot_bytes);
+    auto time = ceph_clock_now() - m_snapshot_replay_start;
+    if (g_snapshot_perf_counters) {
+      g_snapshot_perf_counters->inc(l_rbd_mirror_snapshot_replay_bytes,
+                                    m_snapshot_bytes);
+      g_snapshot_perf_counters->inc(l_rbd_mirror_snapshot_replay_snapshots);
+      g_snapshot_perf_counters->tinc(
+        l_rbd_mirror_snapshot_replay_snapshots_time, time);
+    }
+    if (m_perf_counters) {
+      m_perf_counters->inc(l_rbd_mirror_snapshot_replay_bytes, m_snapshot_bytes);
+      m_perf_counters->inc(l_rbd_mirror_snapshot_replay_snapshots);
+      m_perf_counters->tinc(l_rbd_mirror_snapshot_replay_snapshots_time, time);
+    }
     m_snapshot_bytes = 0;
   }
 
@@ -1500,6 +1550,43 @@ bool Replayer<I>::is_replay_interrupted(std::unique_lock<ceph::mutex>* locker) {
   return false;
 }
 
+template <typename I>
+void Replayer<I>::register_perf_counters() {
+  dout(5) << dendl;
+
+  ceph_assert(ceph_mutex_is_locked_by_me(m_lock));
+  ceph_assert(m_perf_counters == nullptr);
+
+  auto cct = static_cast<CephContext *>(m_state_builder->local_image_ctx->cct);
+  auto prio = cct->_conf.get_val<int64_t>("rbd_mirror_image_perf_stats_prio");
+  PerfCountersBuilder plb(g_ceph_context,
+                          "rbd_mirror_snapshot_image_" + m_image_spec,
+                          l_rbd_mirror_snapshot_first,
+                          l_rbd_mirror_snapshot_last);
+  plb.add_u64_counter(l_rbd_mirror_snapshot_replay_snapshots,
+                      "snapshots", "Snapshots", "r", prio);
+  plb.add_time_avg(l_rbd_mirror_snapshot_replay_snapshots_time,
+                   "snapshots_time", "Snapshots time", "rl", prio);
+  plb.add_u64_counter(l_rbd_mirror_snapshot_replay_bytes, "replay_bytes",
+                      "Replayed data", "rb", prio, unit_t(UNIT_BYTES));
+  m_perf_counters = plb.create_perf_counters();
+  g_ceph_context->get_perfcounters_collection()->add(m_perf_counters);
+}
+
+template <typename I>
+void Replayer<I>::unregister_perf_counters() {
+  dout(5) << dendl;
+  ceph_assert(ceph_mutex_is_locked_by_me(m_lock));
+
+  PerfCounters *perf_counters = nullptr;
+  std::swap(perf_counters, m_perf_counters);
+
+  if (perf_counters != nullptr) {
+    g_ceph_context->get_perfcounters_collection()->remove(perf_counters);
+    delete perf_counters;
+  }
+}
+
 } // namespace snapshot
 } // namespace image_replayer
 } // namespace mirror
index c2c28171f87dcb63d79480e8f5f5553d004bb891..e3c4c20890287a471d0783fcf0c4cf45e0fb5d5e 100644 (file)
@@ -94,6 +94,11 @@ public:
     return m_error_description;
   }
 
+  std::string get_image_spec() const {
+    std::unique_lock locker(m_lock);
+    return m_image_spec;
+  }
+
 private:
   /**
    * @verbatim
@@ -205,6 +210,7 @@ private:
 
   State m_state = STATE_INIT;
 
+  std::string m_image_spec;
   Context* m_on_init_shutdown = nullptr;
 
   bool m_resync_requested = false;
@@ -238,6 +244,7 @@ private:
     uint64_t, boost::accumulators::stats<
       boost::accumulators::tag::rolling_mean>> m_bytes_per_snapshot{
     boost::accumulators::tag::rolling_window::window_size = 2};
+  utime_t m_snapshot_replay_start;
 
   uint32_t m_pending_snapshots = 0;
 
@@ -245,6 +252,8 @@ private:
   bool m_updating_sync_point = false;
   bool m_sync_in_progress = false;
 
+  PerfCounters *m_perf_counters = nullptr;
+
   void load_local_image_meta();
   void handle_load_local_image_meta(int r);
 
@@ -323,6 +332,8 @@ private:
   bool is_replay_interrupted();
   bool is_replay_interrupted(std::unique_lock<ceph::mutex>* lock);
 
+  void register_perf_counters();
+  void unregister_perf_counters();
 };
 
 } // namespace snapshot
index ab350a014cf8030e254f49543a3e76c35598aa99..74c97272e15908e579b29018147d6839a7d4ac81 100644 (file)
@@ -14,7 +14,8 @@
 #include <vector>
 
 rbd::mirror::Mirror *mirror = nullptr;
-PerfCounters *g_perf_counters = nullptr;
+PerfCounters *g_journal_perf_counters = nullptr;
+PerfCounters *g_snapshot_perf_counters = nullptr;
 
 void usage() {
   std::cout << "usage: rbd-mirror [options...]" << std::endl;
@@ -68,17 +69,33 @@ int main(int argc, const char **argv)
 
   auto prio =
       g_ceph_context->_conf.get_val<int64_t>("rbd_mirror_perf_stats_prio");
-  PerfCountersBuilder plb(g_ceph_context, "rbd_mirror",
-                          rbd::mirror::l_rbd_mirror_first,
-                          rbd::mirror::l_rbd_mirror_last);
-  plb.add_u64_counter(rbd::mirror::l_rbd_mirror_replay, "replay", "Replays",
-                      "r", prio);
-  plb.add_u64_counter(rbd::mirror::l_rbd_mirror_replay_bytes, "replay_bytes",
-                      "Replayed data", "rb", prio, unit_t(UNIT_BYTES));
-  plb.add_time_avg(rbd::mirror::l_rbd_mirror_replay_latency, "replay_latency",
-                   "Replay latency", "rl", prio);
-  g_perf_counters = plb.create_perf_counters();
-  g_ceph_context->get_perfcounters_collection()->add(g_perf_counters);
+  {
+    PerfCountersBuilder plb(g_ceph_context, "rbd_mirror",
+                            rbd::mirror::l_rbd_mirror_journal_first,
+                            rbd::mirror::l_rbd_mirror_journal_last);
+    plb.add_u64_counter(rbd::mirror::l_rbd_mirror_replay, "replay", "Replays",
+                        "r", prio);
+    plb.add_u64_counter(rbd::mirror::l_rbd_mirror_replay_bytes, "replay_bytes",
+                        "Replayed data", "rb", prio, unit_t(UNIT_BYTES));
+    plb.add_time_avg(rbd::mirror::l_rbd_mirror_replay_latency, "replay_latency",
+                     "Replay latency", "rl", prio);
+    g_journal_perf_counters = plb.create_perf_counters();
+  }
+  {
+    PerfCountersBuilder plb(g_ceph_context, "rbd_mirror_snapshot",
+                            rbd::mirror::l_rbd_mirror_snapshot_first,
+                            rbd::mirror::l_rbd_mirror_snapshot_last);
+    plb.add_u64_counter(rbd::mirror::l_rbd_mirror_snapshot_replay_snapshots,
+                        "snapshots", "Snapshots", "r", prio);
+    plb.add_time_avg(rbd::mirror::l_rbd_mirror_snapshot_replay_snapshots_time,
+                     "snapshots_time", "Snapshots time", "rl", prio);
+    plb.add_u64_counter(rbd::mirror::l_rbd_mirror_snapshot_replay_bytes,
+                        "replay_bytes", "Replayed data", "rb", prio,
+                        unit_t(UNIT_BYTES));
+    g_snapshot_perf_counters = plb.create_perf_counters();
+  }
+  g_ceph_context->get_perfcounters_collection()->add(g_journal_perf_counters);
+  g_ceph_context->get_perfcounters_collection()->add(g_snapshot_perf_counters);
 
   mirror = new rbd::mirror::Mirror(g_ceph_context, cmd_args);
   int r = mirror->init();
@@ -95,10 +112,12 @@ int main(int argc, const char **argv)
   unregister_async_signal_handler(SIGTERM, handle_signal);
   shutdown_async_signal_handler();
 
-  g_ceph_context->get_perfcounters_collection()->remove(g_perf_counters);
+  g_ceph_context->get_perfcounters_collection()->remove(g_journal_perf_counters);
+  g_ceph_context->get_perfcounters_collection()->remove(g_snapshot_perf_counters);
 
   delete mirror;
-  delete g_perf_counters;
+  delete g_journal_perf_counters;
+  delete g_snapshot_perf_counters;
 
   return r < 0 ? EXIT_SUCCESS : EXIT_FAILURE;
 }