From 433d984aee058cad953077651b3ad7f58e224722 Mon Sep 17 00:00:00 2001 From: Arthur Outhenin-Chalandre Date: Thu, 27 May 2021 11:44:45 +0200 Subject: [PATCH] rbd-mirror: add perf counters to snapshot replayer Add bytes replayed and statistics about number of snapshots synchronized and how long it took to do so. Fixes: https://tracker.ceph.com/issues/50973 Signed-off-by: Arthur Outhenin-Chalandre (cherry picked from commit cf9317b58f966c64554f46092fc4162b5e8fd5fb) --- .../snapshot/test_mock_Replayer.cc | 54 +++++++++++ src/test/rbd_mirror/test_main.cc | 3 +- src/tools/rbd_mirror/ImageReplayer.cc | 2 - src/tools/rbd_mirror/Types.h | 9 +- .../image_replayer/journal/Replayer.cc | 12 +-- .../image_replayer/snapshot/Replayer.cc | 89 ++++++++++++++++++- .../image_replayer/snapshot/Replayer.h | 11 +++ src/tools/rbd_mirror/main.cc | 47 +++++++--- 8 files changed, 201 insertions(+), 26 deletions(-) diff --git a/src/test/rbd_mirror/image_replayer/snapshot/test_mock_Replayer.cc b/src/test/rbd_mirror/image_replayer/snapshot/test_mock_Replayer.cc index 5567312f2acfa..bd287a12e0d26 100644 --- a/src/test/rbd_mirror/image_replayer/snapshot/test_mock_Replayer.cc +++ b/src/test/rbd_mirror/image_replayer/snapshot/test_mock_Replayer.cc @@ -2506,6 +2506,60 @@ TEST_F(TestMockImageReplayerSnapshotReplayer, SkipImageSync) { mock_remote_image_ctx)); } +TEST_F(TestMockImageReplayerSnapshotReplayer, ImageNameUpdated) { + librbd::MockTestImageCtx mock_local_image_ctx{*m_local_image_ctx}; + librbd::MockTestImageCtx mock_remote_image_ctx{*m_remote_image_ctx}; + + MockThreads mock_threads(m_threads); + expect_work_queue_repeatedly(mock_threads); + + MockReplayerListener mock_replayer_listener; + expect_notification(mock_threads, mock_replayer_listener); + + InSequence seq; + + MockInstanceWatcher mock_instance_watcher; + MockImageMeta mock_image_meta; + MockStateBuilder mock_state_builder(mock_local_image_ctx, + mock_remote_image_ctx, + mock_image_meta); + MockReplayer mock_replayer{&mock_threads, &mock_instance_watcher, + "local mirror uuid", &m_pool_meta_cache, + &mock_state_builder, &mock_replayer_listener}; + m_pool_meta_cache.set_remote_pool_meta( + m_remote_io_ctx.get_id(), + {"remote mirror uuid", "remote mirror peer uuid"}); + + librbd::UpdateWatchCtx* update_watch_ctx = nullptr; + ASSERT_EQ(0, init_entry_replayer(mock_replayer, mock_threads, + mock_local_image_ctx, + mock_remote_image_ctx, + mock_replayer_listener, + mock_image_meta, + &update_watch_ctx)); + + // change the name of the image + mock_local_image_ctx.name = "NEW NAME"; + + // idle + expect_load_image_meta(mock_image_meta, true, 0); + + // wake-up replayer + update_watch_ctx->handle_notify(); + + // wait for sync to complete and expect replay complete + ASSERT_EQ(0, wait_for_notification(2)); + auto image_spec = image_replayer::util::compute_image_spec(m_local_io_ctx, + "NEW NAME"); + ASSERT_EQ(image_spec, mock_replayer.get_image_spec()); + ASSERT_FALSE(mock_replayer.is_replaying()); + + // shut down + ASSERT_EQ(0, shut_down_entry_replayer(mock_replayer, mock_threads, + mock_local_image_ctx, + mock_remote_image_ctx)); +} + } // namespace snapshot } // namespace image_replayer } // namespace mirror diff --git a/src/test/rbd_mirror/test_main.cc b/src/test/rbd_mirror/test_main.cc index e6c708c97db08..ed6641e2ea3e0 100644 --- a/src/test/rbd_mirror/test_main.cc +++ b/src/test/rbd_mirror/test_main.cc @@ -9,7 +9,8 @@ #include #include -PerfCounters *g_perf_counters = nullptr; +PerfCounters *g_journal_perf_counters = nullptr; +PerfCounters *g_snapshot_perf_counters = nullptr; extern void register_test_cluster_watcher(); extern void register_test_image_policy(); diff --git a/src/tools/rbd_mirror/ImageReplayer.cc b/src/tools/rbd_mirror/ImageReplayer.cc index f5cd65fe821ec..dc0545048ba09 100644 --- a/src/tools/rbd_mirror/ImageReplayer.cc +++ b/src/tools/rbd_mirror/ImageReplayer.cc @@ -36,8 +36,6 @@ #define dout_prefix *_dout << "rbd::mirror::" << *this << " " \ << __func__ << ": " -extern PerfCounters *g_perf_counters; - namespace rbd { namespace mirror { diff --git a/src/tools/rbd_mirror/Types.h b/src/tools/rbd_mirror/Types.h index c299aabe3145b..7b2a3b5cea5fa 100644 --- a/src/tools/rbd_mirror/Types.h +++ b/src/tools/rbd_mirror/Types.h @@ -20,11 +20,16 @@ template struct MirrorStatusUpdater; // Performance counters enum { - l_rbd_mirror_first = 27000, + l_rbd_mirror_journal_first = 27000, l_rbd_mirror_replay, l_rbd_mirror_replay_bytes, l_rbd_mirror_replay_latency, - l_rbd_mirror_last, + l_rbd_mirror_journal_last, + l_rbd_mirror_snapshot_first, + l_rbd_mirror_snapshot_replay_snapshots, + l_rbd_mirror_snapshot_replay_snapshots_time, + l_rbd_mirror_snapshot_replay_bytes, + l_rbd_mirror_snapshot_last, }; typedef std::shared_ptr RadosRef; diff --git a/src/tools/rbd_mirror/image_replayer/journal/Replayer.cc b/src/tools/rbd_mirror/image_replayer/journal/Replayer.cc index a2d47f5505155..3ce9104d247bd 100644 --- a/src/tools/rbd_mirror/image_replayer/journal/Replayer.cc +++ b/src/tools/rbd_mirror/image_replayer/journal/Replayer.cc @@ -27,7 +27,7 @@ #define dout_prefix *_dout << "rbd::mirror::image_replayer::journal::" \ << "Replayer: " << this << " " << __func__ << ": " -extern PerfCounters *g_perf_counters; +extern PerfCounters *g_journal_perf_counters; namespace rbd { namespace mirror { @@ -1158,10 +1158,10 @@ void Replayer::handle_process_entry_safe( } auto latency = ceph_clock_now() - replay_start_time; - if (g_perf_counters) { - g_perf_counters->inc(l_rbd_mirror_replay); - g_perf_counters->inc(l_rbd_mirror_replay_bytes, replay_bytes); - g_perf_counters->tinc(l_rbd_mirror_replay_latency, latency); + if (g_journal_perf_counters) { + g_journal_perf_counters->inc(l_rbd_mirror_replay); + g_journal_perf_counters->inc(l_rbd_mirror_replay_bytes, replay_bytes); + g_journal_perf_counters->tinc(l_rbd_mirror_replay_latency, latency); } auto ctx = new LambdaContext( @@ -1271,7 +1271,7 @@ void Replayer::register_perf_counters() { auto cct = static_cast(m_state_builder->local_image_ctx->cct); auto prio = cct->_conf.get_val("rbd_mirror_image_perf_stats_prio"); PerfCountersBuilder plb(g_ceph_context, "rbd_mirror_image_" + m_image_spec, - l_rbd_mirror_first, l_rbd_mirror_last); + l_rbd_mirror_journal_first, l_rbd_mirror_journal_last); plb.add_u64_counter(l_rbd_mirror_replay, "replay", "Replays", "r", prio); plb.add_u64_counter(l_rbd_mirror_replay_bytes, "replay_bytes", "Replayed data", "rb", prio, unit_t(UNIT_BYTES)); diff --git a/src/tools/rbd_mirror/image_replayer/snapshot/Replayer.cc b/src/tools/rbd_mirror/image_replayer/snapshot/Replayer.cc index bde5150847506..e94ec71dfe2d3 100644 --- a/src/tools/rbd_mirror/image_replayer/snapshot/Replayer.cc +++ b/src/tools/rbd_mirror/image_replayer/snapshot/Replayer.cc @@ -39,7 +39,7 @@ #define dout_prefix *_dout << "rbd::mirror::image_replayer::snapshot::" \ << "Replayer: " << this << " " << __func__ << ": " -extern PerfCounters *g_perf_counters; +extern PerfCounters *g_snapshot_perf_counters; namespace rbd { namespace mirror { @@ -127,6 +127,12 @@ Replayer::Replayer( template Replayer::~Replayer() { dout(10) << dendl; + + { + std::unique_lock locker{m_lock}; + unregister_perf_counters(); + } + ceph_assert(m_state == STATE_COMPLETE); ceph_assert(m_update_watch_ctx == nullptr); ceph_assert(m_deep_copy_handler == nullptr); @@ -151,6 +157,18 @@ void Replayer::init(Context* on_finish) { m_remote_mirror_peer_uuid = remote_pool_meta.mirror_peer_uuid; dout(10) << "remote_mirror_peer_uuid=" << m_remote_mirror_peer_uuid << dendl; + { + auto local_image_ctx = m_state_builder->local_image_ctx; + std::shared_lock image_locker{local_image_ctx->image_lock}; + m_image_spec = image_replayer::util::compute_image_spec( + local_image_ctx->md_ctx, local_image_ctx->name); + } + + { + std::unique_lock locker{m_lock}; + register_perf_counters(); + } + ceph_assert(m_on_init_shutdown == nullptr); m_on_init_shutdown = on_finish; @@ -303,6 +321,24 @@ void Replayer::load_local_image_meta() { m_image_updated = false; } + bool update_status = false; + { + auto local_image_ctx = m_state_builder->local_image_ctx; + std::shared_lock image_locker{local_image_ctx->image_lock}; + auto image_spec = image_replayer::util::compute_image_spec( + local_image_ctx->md_ctx, local_image_ctx->name); + if (m_image_spec != image_spec) { + m_image_spec = image_spec; + update_status = true; + } + } + if (update_status) { + std::unique_lock locker{m_lock}; + unregister_perf_counters(); + register_perf_counters(); + notify_status_updated(); + } + ceph_assert(m_state_builder->local_image_meta != nullptr); auto ctx = create_context_callback< Replayer, &Replayer::handle_load_local_image_meta>(this); @@ -1046,6 +1082,7 @@ void Replayer::copy_image() { << "snap_seqs=" << m_local_mirror_snap_ns.snap_seqs << dendl; m_snapshot_bytes = 0; + m_snapshot_replay_start = ceph_clock_now(); m_deep_copy_handler = new DeepCopyHandler(this); auto ctx = create_context_callback< Replayer, &Replayer::handle_copy_image>(this); @@ -1077,6 +1114,19 @@ void Replayer::handle_copy_image(int r) { { std::unique_lock locker{m_lock}; m_bytes_per_snapshot(m_snapshot_bytes); + auto time = ceph_clock_now() - m_snapshot_replay_start; + if (g_snapshot_perf_counters) { + g_snapshot_perf_counters->inc(l_rbd_mirror_snapshot_replay_bytes, + m_snapshot_bytes); + g_snapshot_perf_counters->inc(l_rbd_mirror_snapshot_replay_snapshots); + g_snapshot_perf_counters->tinc( + l_rbd_mirror_snapshot_replay_snapshots_time, time); + } + if (m_perf_counters) { + m_perf_counters->inc(l_rbd_mirror_snapshot_replay_bytes, m_snapshot_bytes); + m_perf_counters->inc(l_rbd_mirror_snapshot_replay_snapshots); + m_perf_counters->tinc(l_rbd_mirror_snapshot_replay_snapshots_time, time); + } m_snapshot_bytes = 0; } @@ -1500,6 +1550,43 @@ bool Replayer::is_replay_interrupted(std::unique_lock* locker) { return false; } +template +void Replayer::register_perf_counters() { + dout(5) << dendl; + + ceph_assert(ceph_mutex_is_locked_by_me(m_lock)); + ceph_assert(m_perf_counters == nullptr); + + auto cct = static_cast(m_state_builder->local_image_ctx->cct); + auto prio = cct->_conf.get_val("rbd_mirror_image_perf_stats_prio"); + PerfCountersBuilder plb(g_ceph_context, + "rbd_mirror_snapshot_image_" + m_image_spec, + l_rbd_mirror_snapshot_first, + l_rbd_mirror_snapshot_last); + plb.add_u64_counter(l_rbd_mirror_snapshot_replay_snapshots, + "snapshots", "Snapshots", "r", prio); + plb.add_time_avg(l_rbd_mirror_snapshot_replay_snapshots_time, + "snapshots_time", "Snapshots time", "rl", prio); + plb.add_u64_counter(l_rbd_mirror_snapshot_replay_bytes, "replay_bytes", + "Replayed data", "rb", prio, unit_t(UNIT_BYTES)); + m_perf_counters = plb.create_perf_counters(); + g_ceph_context->get_perfcounters_collection()->add(m_perf_counters); +} + +template +void Replayer::unregister_perf_counters() { + dout(5) << dendl; + ceph_assert(ceph_mutex_is_locked_by_me(m_lock)); + + PerfCounters *perf_counters = nullptr; + std::swap(perf_counters, m_perf_counters); + + if (perf_counters != nullptr) { + g_ceph_context->get_perfcounters_collection()->remove(perf_counters); + delete perf_counters; + } +} + } // namespace snapshot } // namespace image_replayer } // namespace mirror diff --git a/src/tools/rbd_mirror/image_replayer/snapshot/Replayer.h b/src/tools/rbd_mirror/image_replayer/snapshot/Replayer.h index c2c28171f87dc..e3c4c20890287 100644 --- a/src/tools/rbd_mirror/image_replayer/snapshot/Replayer.h +++ b/src/tools/rbd_mirror/image_replayer/snapshot/Replayer.h @@ -94,6 +94,11 @@ public: return m_error_description; } + std::string get_image_spec() const { + std::unique_lock locker(m_lock); + return m_image_spec; + } + private: /** * @verbatim @@ -205,6 +210,7 @@ private: State m_state = STATE_INIT; + std::string m_image_spec; Context* m_on_init_shutdown = nullptr; bool m_resync_requested = false; @@ -238,6 +244,7 @@ private: uint64_t, boost::accumulators::stats< boost::accumulators::tag::rolling_mean>> m_bytes_per_snapshot{ boost::accumulators::tag::rolling_window::window_size = 2}; + utime_t m_snapshot_replay_start; uint32_t m_pending_snapshots = 0; @@ -245,6 +252,8 @@ private: bool m_updating_sync_point = false; bool m_sync_in_progress = false; + PerfCounters *m_perf_counters = nullptr; + void load_local_image_meta(); void handle_load_local_image_meta(int r); @@ -323,6 +332,8 @@ private: bool is_replay_interrupted(); bool is_replay_interrupted(std::unique_lock* lock); + void register_perf_counters(); + void unregister_perf_counters(); }; } // namespace snapshot diff --git a/src/tools/rbd_mirror/main.cc b/src/tools/rbd_mirror/main.cc index ab350a014cf80..74c97272e1590 100644 --- a/src/tools/rbd_mirror/main.cc +++ b/src/tools/rbd_mirror/main.cc @@ -14,7 +14,8 @@ #include rbd::mirror::Mirror *mirror = nullptr; -PerfCounters *g_perf_counters = nullptr; +PerfCounters *g_journal_perf_counters = nullptr; +PerfCounters *g_snapshot_perf_counters = nullptr; void usage() { std::cout << "usage: rbd-mirror [options...]" << std::endl; @@ -68,17 +69,33 @@ int main(int argc, const char **argv) auto prio = g_ceph_context->_conf.get_val("rbd_mirror_perf_stats_prio"); - PerfCountersBuilder plb(g_ceph_context, "rbd_mirror", - rbd::mirror::l_rbd_mirror_first, - rbd::mirror::l_rbd_mirror_last); - plb.add_u64_counter(rbd::mirror::l_rbd_mirror_replay, "replay", "Replays", - "r", prio); - plb.add_u64_counter(rbd::mirror::l_rbd_mirror_replay_bytes, "replay_bytes", - "Replayed data", "rb", prio, unit_t(UNIT_BYTES)); - plb.add_time_avg(rbd::mirror::l_rbd_mirror_replay_latency, "replay_latency", - "Replay latency", "rl", prio); - g_perf_counters = plb.create_perf_counters(); - g_ceph_context->get_perfcounters_collection()->add(g_perf_counters); + { + PerfCountersBuilder plb(g_ceph_context, "rbd_mirror", + rbd::mirror::l_rbd_mirror_journal_first, + rbd::mirror::l_rbd_mirror_journal_last); + plb.add_u64_counter(rbd::mirror::l_rbd_mirror_replay, "replay", "Replays", + "r", prio); + plb.add_u64_counter(rbd::mirror::l_rbd_mirror_replay_bytes, "replay_bytes", + "Replayed data", "rb", prio, unit_t(UNIT_BYTES)); + plb.add_time_avg(rbd::mirror::l_rbd_mirror_replay_latency, "replay_latency", + "Replay latency", "rl", prio); + g_journal_perf_counters = plb.create_perf_counters(); + } + { + PerfCountersBuilder plb(g_ceph_context, "rbd_mirror_snapshot", + rbd::mirror::l_rbd_mirror_snapshot_first, + rbd::mirror::l_rbd_mirror_snapshot_last); + plb.add_u64_counter(rbd::mirror::l_rbd_mirror_snapshot_replay_snapshots, + "snapshots", "Snapshots", "r", prio); + plb.add_time_avg(rbd::mirror::l_rbd_mirror_snapshot_replay_snapshots_time, + "snapshots_time", "Snapshots time", "rl", prio); + plb.add_u64_counter(rbd::mirror::l_rbd_mirror_snapshot_replay_bytes, + "replay_bytes", "Replayed data", "rb", prio, + unit_t(UNIT_BYTES)); + g_snapshot_perf_counters = plb.create_perf_counters(); + } + g_ceph_context->get_perfcounters_collection()->add(g_journal_perf_counters); + g_ceph_context->get_perfcounters_collection()->add(g_snapshot_perf_counters); mirror = new rbd::mirror::Mirror(g_ceph_context, cmd_args); int r = mirror->init(); @@ -95,10 +112,12 @@ int main(int argc, const char **argv) unregister_async_signal_handler(SIGTERM, handle_signal); shutdown_async_signal_handler(); - g_ceph_context->get_perfcounters_collection()->remove(g_perf_counters); + g_ceph_context->get_perfcounters_collection()->remove(g_journal_perf_counters); + g_ceph_context->get_perfcounters_collection()->remove(g_snapshot_perf_counters); delete mirror; - delete g_perf_counters; + delete g_journal_perf_counters; + delete g_snapshot_perf_counters; return r < 0 ? EXIT_SUCCESS : EXIT_FAILURE; } -- 2.39.5