From 88fb54e99b7fce5fa69ae6b60fd2575a4a6142ab Mon Sep 17 00:00:00 2001 From: Xuehan Xu Date: Sat, 28 Dec 2024 17:56:34 +0800 Subject: [PATCH] crimson: report dynamic perf stats to mgr Signed-off-by: Xuehan Xu --- src/crimson/mgr/client.cc | 19 +++++++- src/crimson/mgr/client.h | 10 ++++- src/crimson/osd/CMakeLists.txt | 1 + src/crimson/osd/osd.cc | 75 +++++++++++++++++++++++++++++++- src/crimson/osd/osd.h | 2 + src/crimson/osd/pg.h | 11 ++++- src/crimson/osd/shard_services.h | 3 ++ 7 files changed, 116 insertions(+), 5 deletions(-) diff --git a/src/crimson/mgr/client.cc b/src/crimson/mgr/client.cc index 32366e637544b..25159729611c4 100644 --- a/src/crimson/mgr/client.cc +++ b/src/crimson/mgr/client.cc @@ -26,10 +26,14 @@ namespace crimson::mgr { Client::Client(crimson::net::Messenger& msgr, - WithStats& with_stats) + WithStats& with_stats, + set_perf_queries_cb_t cb_set, + get_perf_report_cb_t cb_get) : msgr{msgr}, with_stats{with_stats}, - report_timer{[this] {report();}} + report_timer{[this] {report();}}, + set_perf_queries_cb(cb_set), + get_perf_report_cb(cb_get) {} seastar::future<> Client::start() @@ -152,6 +156,10 @@ seastar::future<> Client::handle_mgr_conf(crimson::net::ConnectionRef, } else { report_timer.cancel(); } + if (!m->osd_perf_metric_queries.empty()) { + ceph_assert(set_perf_queries_cb); + return set_perf_queries_cb(m->osd_perf_metric_queries); + } return seastar::now(); } @@ -202,6 +210,13 @@ void Client::_send_report() report->daemon_health_metrics = std::move(daemon_health_metrics); local_conf().get_config_bl(last_config_bl_version, &report->config_bl, &last_config_bl_version); + if (get_perf_report_cb) { + return get_perf_report_cb( + ).then([report=std::move(report), this](auto payload) mutable { + report->metric_report_message = MetricReportMessage(std::move(payload)); + return conn->send(std::move(report)); + }); + } return conn->send(std::move(report)); }); } diff --git a/src/crimson/mgr/client.h b/src/crimson/mgr/client.h index 5d27bd2c0b37d..001c59e19d962 100644 --- a/src/crimson/mgr/client.h +++ b/src/crimson/mgr/client.h @@ -10,6 +10,7 @@ #include "crimson/net/Fwd.h" #include "mgr/DaemonHealthMetric.h" #include "mon/MgrMap.h" +#include "mgr/MetricTypes.h" template using Ref = boost::intrusive_ptr; namespace crimson::net { @@ -30,9 +31,14 @@ public: }; class Client : public crimson::net::Dispatcher { + using get_perf_report_cb_t = std::function ()>; + using set_perf_queries_cb_t = + std::function (const ConfigPayload &)>; public: Client(crimson::net::Messenger& msgr, - WithStats& with_stats); + WithStats& with_stats, + set_perf_queries_cb_t cb_set, + get_perf_report_cb_t cb_get); seastar::future<> start(); seastar::future<> stop(); void report(); @@ -60,6 +66,8 @@ private: crimson::common::gate_per_shard gates; uint64_t last_config_bl_version = 0; std::string service_name, daemon_name; + set_perf_queries_cb_t set_perf_queries_cb; + get_perf_report_cb_t get_perf_report_cb; std::vector daemon_health_metrics; diff --git a/src/crimson/osd/CMakeLists.txt b/src/crimson/osd/CMakeLists.txt index 50011adbcecfb..5ef6b3f36d2cc 100644 --- a/src/crimson/osd/CMakeLists.txt +++ b/src/crimson/osd/CMakeLists.txt @@ -56,6 +56,7 @@ add_executable(crimson-osd ${PROJECT_SOURCE_DIR}/src/osd/SnapMapper.cc ${PROJECT_SOURCE_DIR}/src/osd/recovery_types.cc ${PROJECT_SOURCE_DIR}/src/osd/osd_perf_counters.cc + ${PROJECT_SOURCE_DIR}/src/mgr/OSDPerfMetricTypes.cc watch.cc ) if(HAS_VTA) diff --git a/src/crimson/osd/osd.cc b/src/crimson/osd/osd.cc index 71ec30c1d5bc0..29d5737804105 100644 --- a/src/crimson/osd/osd.cc +++ b/src/crimson/osd/osd.cc @@ -95,7 +95,16 @@ OSD::OSD(int id, uint32_t nonce, hb_front_msgr{hb_front_msgr}, hb_back_msgr{hb_back_msgr}, monc{new crimson::mon::Client{*public_msgr, *this}}, - mgrc{new crimson::mgr::Client{*public_msgr, *this}}, + mgrc{new crimson::mgr::Client{ + *public_msgr, + *this, + [this](const ConfigPayload &config_payload) { + return set_perf_queries(config_payload); + }, + [this] { + return get_perf_reports(); + } + }}, store{store}, pg_shard_manager{osd_singleton_state, shard_services, @@ -176,6 +185,70 @@ seastar::future<> OSD::open_meta_coll() }); } +seastar::future<> OSD::set_perf_queries(const ConfigPayload &config_payload) { + LOG_PREFIX(OSD::set_perf_queries); + const OSDConfigPayload &osd_config_payload = + boost::get(config_payload); + const std::map &queries = + osd_config_payload.config; + DEBUG("setting {} queries", queries.size()); + + std::list supported_queries; + for (auto &it : queries) { + auto &query = it.first; + if (!query.key_descriptor.empty()) { + supported_queries.push_back(query); + } + } + if (supported_queries.size() < queries.size()) { + DEBUG("{} unsupported queries", queries.size() - supported_queries.size()); + } + + return shard_services.invoke_on_all( + [supported_queries, queries](auto &local_service) { + auto &pgs = local_service.local_state.pg_map.get_pgs(); + local_service.local_state.m_perf_queries = supported_queries; + local_service.local_state.m_perf_limits = queries; + for (auto &[id, pg] : pgs) { + pg->set_dynamic_perf_stats_queries(supported_queries); + } + }); +} + +seastar::future OSD::get_perf_reports() { + LOG_PREFIX(OSD::get_perf_reports); + OSDMetricPayload payload; + std::map &reports = payload.report; + + auto dps = co_await shard_services.map_reduce0( + [FNAME](auto &local_service) { + auto &pgs = local_service.local_state.pg_map.get_pgs(); + auto &m_perf_queries = local_service.local_state.m_perf_queries; + DynamicPerfStats dps; + for (auto &[id, pg] : pgs) { + // m_perf_queries can be modified only in set_perf_queries by mgr client + // request, and it is protected by by mgr client's lock, which is held + // when set_perf_queries/get_perf_reports are called, so we may not hold + // m_perf_queries_lock here. + DynamicPerfStats pg_dps(m_perf_queries); + pg->get_dynamic_perf_stats(&pg_dps); + dps.merge(pg_dps); + DEBUG("reporting for pg {}", pg->get_pgid()); + } + return dps; + }, + DynamicPerfStats(shard_services.local().local_state.m_perf_queries), + [](auto left, auto right) { + left.merge(right); + return left; + }); + + dps.add_to_reports(shard_services.local().local_state.m_perf_limits, &reports); + DEBUG("reports for {} queries", reports.size()); + + co_return payload; +} + seastar::future OSD::open_or_create_meta_coll(FuturizedStore &store) { return store.get_sharded_store().open_collection(coll_t::meta()).then([&store](auto ch) { diff --git a/src/crimson/osd/osd.h b/src/crimson/osd/osd.h index 1cf88bea5d9da..24804277c3274 100644 --- a/src/crimson/osd/osd.h +++ b/src/crimson/osd/osd.h @@ -236,6 +236,8 @@ private: std::vector get_health_metrics(); + seastar::future<> set_perf_queries(const ConfigPayload &config_payload); + seastar::future get_perf_reports(); private: crimson::common::gate_per_shard gate; diff --git a/src/crimson/osd/pg.h b/src/crimson/osd/pg.h index ca84b24583e2c..70bbf7fb4ea33 100644 --- a/src/crimson/osd/pg.h +++ b/src/crimson/osd/pg.h @@ -774,7 +774,16 @@ public: size_t inb, size_t outb, const utime_t &lat) { - dp_stats.add(pg_whoami.osd, get_info(), req, inb, outb, lat); + if (dp_stats.is_enabled()) { + dp_stats.add(pg_whoami.osd, get_info(), req, inb, outb, lat); + } + } + void set_dynamic_perf_stats_queries( + const std::list &queries) { + dp_stats.set_queries(queries); + } + void get_dynamic_perf_stats(DynamicPerfStats *stats) { + std::swap(dp_stats, *stats); } OSDriver &get_osdriver() final { return osdriver; diff --git a/src/crimson/osd/shard_services.h b/src/crimson/osd/shard_services.h index f1ed9b8d91129..c548027ed0097 100644 --- a/src/crimson/osd/shard_services.h +++ b/src/crimson/osd/shard_services.h @@ -25,6 +25,7 @@ #include "crimson/osd/state.h" #include "common/AsyncReserver.h" #include "crimson/net/Connection.h" +#include "mgr/OSDPerfMetricTypes.h" namespace crimson::net { class Messenger; @@ -197,6 +198,8 @@ class PerShardState { } OSDSuperblock per_shard_superblock; + std::list m_perf_queries; + std::map m_perf_limits; public: PerShardState( -- 2.47.3