]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
crimson: report dynamic perf stats to mgr
authorXuehan Xu <xuxuehan@qianxin.com>
Sat, 28 Dec 2024 09:56:34 +0000 (17:56 +0800)
committerXuehan Xu <xuxuehan@qianxin.com>
Fri, 7 Feb 2025 03:41:14 +0000 (11:41 +0800)
Signed-off-by: Xuehan Xu <xuxuehan@qianxin.com>
src/crimson/mgr/client.cc
src/crimson/mgr/client.h
src/crimson/osd/CMakeLists.txt
src/crimson/osd/osd.cc
src/crimson/osd/osd.h
src/crimson/osd/pg.h
src/crimson/osd/shard_services.h

index 32366e637544be8d479f3f9b67d5146e382e1d7d..25159729611c42e549c8e25923b0b80707efeba4 100644 (file)
@@ -26,10 +26,14 @@ namespace crimson::mgr
 {
 
 Client::Client(crimson::net::Messenger& msgr,
-                 WithStats& with_stats)
+              WithStats& with_stats,
+              set_perf_queries_cb_t cb_set,
+              get_perf_report_cb_t cb_get)
   : msgr{msgr},
     with_stats{with_stats},
-    report_timer{[this] {report();}}
+    report_timer{[this] {report();}},
+    set_perf_queries_cb(cb_set),
+    get_perf_report_cb(cb_get)
 {}
 
 seastar::future<> Client::start()
@@ -152,6 +156,10 @@ seastar::future<> Client::handle_mgr_conf(crimson::net::ConnectionRef,
   } else {
     report_timer.cancel();
   }
+  if (!m->osd_perf_metric_queries.empty()) {
+    ceph_assert(set_perf_queries_cb);
+    return set_perf_queries_cb(m->osd_perf_metric_queries);
+  }
   return seastar::now();
 }
 
@@ -202,6 +210,13 @@ void Client::_send_report()
     report->daemon_health_metrics = std::move(daemon_health_metrics);
     local_conf().get_config_bl(last_config_bl_version, &report->config_bl,
                              &last_config_bl_version);
+    if (get_perf_report_cb) {
+      return get_perf_report_cb(
+      ).then([report=std::move(report), this](auto payload) mutable {
+       report->metric_report_message = MetricReportMessage(std::move(payload));
+       return conn->send(std::move(report));
+      });
+    }
     return conn->send(std::move(report));
   });
 }
index 5d27bd2c0b37d167912a325f3e1e48f5cb288595..001c59e19d9620c2695ac40364dfba526d22890a 100644 (file)
@@ -10,6 +10,7 @@
 #include "crimson/net/Fwd.h"
 #include "mgr/DaemonHealthMetric.h"
 #include "mon/MgrMap.h"
+#include "mgr/MetricTypes.h"
 
 template<typename Message> using Ref = boost::intrusive_ptr<Message>;
 namespace crimson::net {
@@ -30,9 +31,14 @@ public:
 };
 
 class Client : public crimson::net::Dispatcher {
+  using get_perf_report_cb_t = std::function<seastar::future<MetricPayload> ()>;
+  using set_perf_queries_cb_t =
+    std::function<seastar::future<> (const ConfigPayload &)>;
 public:
   Client(crimson::net::Messenger& msgr,
-        WithStats& with_stats);
+        WithStats& with_stats,
+        set_perf_queries_cb_t cb_set,
+        get_perf_report_cb_t cb_get);
   seastar::future<> start();
   seastar::future<> stop();
   void report();
@@ -60,6 +66,8 @@ private:
   crimson::common::gate_per_shard gates;
   uint64_t last_config_bl_version = 0;
   std::string service_name, daemon_name;
+  set_perf_queries_cb_t set_perf_queries_cb;
+  get_perf_report_cb_t get_perf_report_cb;
 
   std::vector<DaemonHealthMetric> daemon_health_metrics;
 
index 50011adbcecfb34ddfd3b89f9eb7e7cb3c6f78f3..5ef6b3f36d2cc2e2503936d57ef8b1e13ca369cc 100644 (file)
@@ -56,6 +56,7 @@ add_executable(crimson-osd
   ${PROJECT_SOURCE_DIR}/src/osd/SnapMapper.cc
   ${PROJECT_SOURCE_DIR}/src/osd/recovery_types.cc
   ${PROJECT_SOURCE_DIR}/src/osd/osd_perf_counters.cc
+  ${PROJECT_SOURCE_DIR}/src/mgr/OSDPerfMetricTypes.cc
   watch.cc
   )
 if(HAS_VTA)
index 71ec30c1d5bc01d1e3b9682cd6959afb41b6e634..29d57378041051f39fb50411f2248596467d3b8f 100644 (file)
@@ -95,7 +95,16 @@ OSD::OSD(int id, uint32_t nonce,
     hb_front_msgr{hb_front_msgr},
     hb_back_msgr{hb_back_msgr},
     monc{new crimson::mon::Client{*public_msgr, *this}},
-    mgrc{new crimson::mgr::Client{*public_msgr, *this}},
+    mgrc{new crimson::mgr::Client{
+      *public_msgr,
+      *this,
+      [this](const ConfigPayload &config_payload) {
+       return set_perf_queries(config_payload);
+      },
+      [this] {
+       return get_perf_reports();
+      }
+    }},
     store{store},
     pg_shard_manager{osd_singleton_state,
                      shard_services,
@@ -176,6 +185,70 @@ seastar::future<> OSD::open_meta_coll()
   });
 }
 
+seastar::future<> OSD::set_perf_queries(const ConfigPayload &config_payload) {
+  LOG_PREFIX(OSD::set_perf_queries);
+  const OSDConfigPayload &osd_config_payload =
+    boost::get<OSDConfigPayload>(config_payload);
+  const std::map<OSDPerfMetricQuery, OSDPerfMetricLimits> &queries =
+    osd_config_payload.config;
+  DEBUG("setting {} queries", queries.size());
+
+  std::list<OSDPerfMetricQuery> supported_queries;
+  for (auto &it : queries) {
+    auto &query = it.first;
+    if (!query.key_descriptor.empty()) {
+      supported_queries.push_back(query);
+    }
+  }
+  if (supported_queries.size() < queries.size()) {
+    DEBUG("{} unsupported queries", queries.size() - supported_queries.size());
+  }
+
+  return shard_services.invoke_on_all(
+    [supported_queries, queries](auto &local_service) {
+    auto &pgs = local_service.local_state.pg_map.get_pgs();
+    local_service.local_state.m_perf_queries = supported_queries;
+    local_service.local_state.m_perf_limits = queries;
+    for (auto &[id, pg] : pgs) {
+      pg->set_dynamic_perf_stats_queries(supported_queries);
+    }
+  });
+}
+
+seastar::future<MetricPayload> OSD::get_perf_reports() {
+  LOG_PREFIX(OSD::get_perf_reports);
+  OSDMetricPayload payload;
+  std::map<OSDPerfMetricQuery, OSDPerfMetricReport> &reports = payload.report;
+
+  auto dps = co_await shard_services.map_reduce0(
+    [FNAME](auto &local_service) {
+      auto &pgs = local_service.local_state.pg_map.get_pgs();
+      auto &m_perf_queries = local_service.local_state.m_perf_queries;
+      DynamicPerfStats dps;
+      for (auto &[id, pg] : pgs) {
+       // m_perf_queries can be modified only in set_perf_queries by mgr client
+       // request, and it is protected by by mgr client's lock, which is held
+       // when set_perf_queries/get_perf_reports are called, so we may not hold
+       // m_perf_queries_lock here.
+       DynamicPerfStats pg_dps(m_perf_queries);
+       pg->get_dynamic_perf_stats(&pg_dps);
+       dps.merge(pg_dps);
+       DEBUG("reporting for pg {}", pg->get_pgid());
+      }
+      return dps;
+    },
+    DynamicPerfStats(shard_services.local().local_state.m_perf_queries),
+    [](auto left, auto right) {
+      left.merge(right);
+      return left;
+    });
+
+  dps.add_to_reports(shard_services.local().local_state.m_perf_limits, &reports);
+  DEBUG("reports for {} queries", reports.size());
+
+  co_return payload;
+}
+
 seastar::future<OSDMeta> OSD::open_or_create_meta_coll(FuturizedStore &store)
 {
   return store.get_sharded_store().open_collection(coll_t::meta()).then([&store](auto ch) {
index 1cf88bea5d9da6cc867c9fbd7c010c362e21fb9f..24804277c3274db670f364fdb44e21b5d741eba8 100644 (file)
@@ -236,6 +236,8 @@ private:
 
   std::vector<DaemonHealthMetric> get_health_metrics();
 
+  seastar::future<> set_perf_queries(const ConfigPayload &config_payload);
+  seastar::future<MetricPayload> get_perf_reports();
 private:
   crimson::common::gate_per_shard gate;
 
index ca84b24583e2c914df004077458797d42793ebda..70bbf7fb4ea33f2b771d61807b941cef67984d4d 100644 (file)
@@ -774,7 +774,16 @@ public:
     size_t inb,
     size_t outb,
     const utime_t &lat) {
-    dp_stats.add(pg_whoami.osd, get_info(), req, inb, outb, lat);
+    if (dp_stats.is_enabled()) {
+      dp_stats.add(pg_whoami.osd, get_info(), req, inb, outb, lat);
+    }
+  }
+  void set_dynamic_perf_stats_queries(
+    const std::list<OSDPerfMetricQuery> &queries) {
+    dp_stats.set_queries(queries);
+  }
+  void get_dynamic_perf_stats(DynamicPerfStats *stats) {
+    std::swap(dp_stats, *stats);
   }
   OSDriver &get_osdriver() final {
     return osdriver;
index f1ed9b8d911294368f90d2dc8ce3cc36841432aa..c548027ed0097b27ff05e60e89e482509f6c468e 100644 (file)
@@ -25,6 +25,7 @@
 #include "crimson/osd/state.h"
 #include "common/AsyncReserver.h"
 #include "crimson/net/Connection.h"
+#include "mgr/OSDPerfMetricTypes.h"
 
 namespace crimson::net {
   class Messenger;
@@ -197,6 +198,8 @@ class PerShardState {
   }
 
   OSDSuperblock per_shard_superblock;
+  std::list<OSDPerfMetricQuery> m_perf_queries;
+  std::map<OSDPerfMetricQuery, OSDPerfMetricLimits> m_perf_limits;
 
 public:
   PerShardState(