From f6ba1eea4cde2354e639518210a56d157081ecac Mon Sep 17 00:00:00 2001 From: Venky Shankar Date: Tue, 10 Sep 2019 09:53:09 -0400 Subject: [PATCH] mds: forward mds metrics to ceph manager w/ quering interfaces `MetricAggregator` sets up manager callback to forward metrics data to ceph-mgr. Also, add querying interfaces for adding and removing user queries and a simple interface to fetch MDS perf metrics. Fixes: http://tracker.ceph.com/issues/36253 Signed-off-by: Venky Shankar --- src/mds/MetricAggregator.cc | 59 +++++++++++++++++++++++++++++++ src/mds/MetricAggregator.h | 4 +++ src/mgr/CMakeLists.txt | 1 + src/mgr/DaemonServer.cc | 24 ++++++++++++- src/mgr/DaemonServer.h | 22 +++++++++++- src/mgr/MDSPerfMetricCollector.cc | 36 +++++++++++++++++++ src/mgr/MDSPerfMetricCollector.h | 22 ++++++++++++ src/mgr/MetricCollector.cc | 3 ++ src/mgr/MetricTypes.h | 47 ++++++++++++++++++++++++ src/mgr/MgrClient.h | 7 ++++ 10 files changed, 223 insertions(+), 2 deletions(-) create mode 100644 src/mgr/MDSPerfMetricCollector.cc create mode 100644 src/mgr/MDSPerfMetricCollector.h diff --git a/src/mds/MetricAggregator.cc b/src/mds/MetricAggregator.cc index cf90de60e92..382e511a676 100644 --- a/src/mds/MetricAggregator.cc +++ b/src/mds/MetricAggregator.cc @@ -43,6 +43,14 @@ int MetricAggregator::init() { } }); + mgrc->set_perf_metric_query_cb( + [this](const ConfigPayload &config_payload) { + set_perf_queries(config_payload); + }, + [this]() { + return get_perf_reports(); + }); + return 0; } @@ -292,3 +300,54 @@ void MetricAggregator::notify_mdsmap(const MDSMap &mdsmap) { dout(10) << ": active set=[" << active_rank_addrs << "]" << dendl; } + +void MetricAggregator::set_perf_queries(const ConfigPayload &config_payload) { + const MDSConfigPayload &mds_config_payload = boost::get(config_payload); + const std::map &queries = mds_config_payload.config; + + dout(10) << ": setting " << queries.size() << " queries" << dendl; + + std::scoped_lock locker(lock); + std::map> new_data; + for (auto &p : queries) { + std::swap(new_data[p.first], query_metrics_map[p.first]); + } + std::swap(query_metrics_map, new_data); +} + +MetricPayload MetricAggregator::get_perf_reports() { + MDSMetricPayload payload; + MDSPerfMetricReport &metric_report = payload.metric_report; + std::map &reports = metric_report.reports; + + std::scoped_lock locker(lock); + + for (auto& [query, counters] : query_metrics_map) { + auto &report = reports[query]; + + query.get_performance_counter_descriptors(&report.performance_counter_descriptors); + + auto &descriptors = report.performance_counter_descriptors; + ceph_assert(descriptors.size() > 0); + + dout(20) << ": descriptors=" << descriptors << dendl; + + for (auto &p : counters) { + dout(20) << ": packing perf_metric_key=" << p.first << ", perf_counter=" + << p.second << dendl; + auto &bl = report.group_packed_performance_counters[p.first]; + query.pack_counters(p.second, &bl); + } + } + + // stash a copy of dealyed and failed ranks. mgr culls out metrics + // for failed ranks and tags metrics for delayed ranks as "stale". + for (auto &p : active_rank_addrs) { + auto rank = p.first; + if (mds_pinger.is_rank_lagging(rank)) { + metric_report.rank_metrics_delayed.insert(rank); + } + } + + return payload; +} diff --git a/src/mds/MetricAggregator.h b/src/mds/MetricAggregator.h index 2da8f86b9c2..69a1f75e9e2 100644 --- a/src/mds/MetricAggregator.h +++ b/src/mds/MetricAggregator.h @@ -13,6 +13,7 @@ #include "common/ceph_mutex.h" #include "messages/MMDSMetrics.h" +#include "mgr/MetricTypes.h" #include "mgr/MDSPerfMetricTypes.h" #include "mdstypes.h" @@ -80,6 +81,9 @@ private: void cull_metrics_for_rank(mds_rank_t rank); void ping_all_active_ranks(); + + void set_perf_queries(const ConfigPayload &config_payload); + MetricPayload get_perf_reports(); }; #endif // CEPH_MDS_METRIC_AGGREGATOR_H diff --git a/src/mgr/CMakeLists.txt b/src/mgr/CMakeLists.txt index fc6564cef55..0d9cbdf3444 100644 --- a/src/mgr/CMakeLists.txt +++ b/src/mgr/CMakeLists.txt @@ -21,6 +21,7 @@ if(WITH_MGR) OSDPerfMetricTypes.cc OSDPerfMetricCollector.cc MDSPerfMetricTypes.cc + MDSPerfMetricCollector.cc PyFormatter.cc PyUtil.cc PyModule.cc diff --git a/src/mgr/DaemonServer.cc b/src/mgr/DaemonServer.cc index f5115740de8..c6f5779158b 100644 --- a/src/mgr/DaemonServer.cc +++ b/src/mgr/DaemonServer.cc @@ -22,6 +22,7 @@ #include "mgr/mgr_commands.h" #include "mgr/DaemonHealthMetricCollector.h" #include "mgr/OSDPerfMetricCollector.h" +#include "mgr/MDSPerfMetricCollector.h" #include "mon/MonCommand.h" #include "messages/MMgrOpen.h" @@ -90,7 +91,9 @@ DaemonServer::DaemonServer(MonClient *monc_, shutting_down(false), tick_event(nullptr), osd_perf_metric_collector_listener(this), - osd_perf_metric_collector(osd_perf_metric_collector_listener) + osd_perf_metric_collector(osd_perf_metric_collector_listener), + mds_perf_metric_collector_listener(this), + mds_perf_metric_collector(mds_perf_metric_collector_listener) { g_conf().add_observer(this); } @@ -364,6 +367,22 @@ void DaemonServer::handle_osd_perf_metric_query_updated() })); } +void DaemonServer::handle_mds_perf_metric_query_updated() +{ + dout(10) << dendl; + + // Send a fresh MMgrConfigure to all clients, so that they can follow + // the new policy for transmitting stats + finisher.queue(new LambdaContext([this](int r) { + std::lock_guard l(lock); + for (auto &c : daemon_connections) { + if (c->peer_is_mds()) { + _send_configure(c); + } + } + })); +} + void DaemonServer::shutdown() { dout(10) << "begin" << dendl; @@ -2888,6 +2907,9 @@ void DaemonServer::_send_configure(ConnectionRef c) if (c->peer_is_osd()) { configure->osd_perf_metric_queries = osd_perf_metric_collector.get_queries(); + } else if (c->peer_is_mds()) { + configure->metric_config_message = + MetricConfigMessage(MDSConfigPayload(mds_perf_metric_collector.get_queries())); } c->send_message2(configure); diff --git a/src/mgr/DaemonServer.h b/src/mgr/DaemonServer.h index 185d0d246c1..4c8647beeed 100644 --- a/src/mgr/DaemonServer.h +++ b/src/mgr/DaemonServer.h @@ -32,6 +32,7 @@ #include "DaemonState.h" #include "MetricCollector.h" #include "OSDPerfMetricCollector.h" +#include "MDSPerfMetricCollector.h" class MMgrReport; class MMgrOpen; @@ -42,7 +43,7 @@ class MMgrCommand; struct MonCommand; class CommandContext; struct OSDPerfMetricQuery; - +struct MDSPerfMetricQuery; /** * Server used in ceph-mgr to communicate with Ceph daemons like @@ -124,10 +125,29 @@ private: OSDPerfMetricCollector osd_perf_metric_collector; void handle_osd_perf_metric_query_updated(); + class MDSPerfMetricCollectorListener : public MetricListener { + public: + MDSPerfMetricCollectorListener(DaemonServer *server) + : server(server) { + } + void handle_query_updated() override { + server->handle_mds_perf_metric_query_updated(); + } + private: + DaemonServer *server; + }; + MDSPerfMetricCollectorListener mds_perf_metric_collector_listener; + MDSPerfMetricCollector mds_perf_metric_collector; + void handle_mds_perf_metric_query_updated(); + void handle_metric_payload(const OSDMetricPayload &payload) { osd_perf_metric_collector.process_reports(payload); } + void handle_metric_payload(const MDSMetricPayload &payload) { + mds_perf_metric_collector.process_reports(payload); + } + void handle_metric_payload(const UnknownMetricPayload &payload) { ceph_abort(); } diff --git a/src/mgr/MDSPerfMetricCollector.cc b/src/mgr/MDSPerfMetricCollector.cc new file mode 100644 index 00000000000..7d08f3431d4 --- /dev/null +++ b/src/mgr/MDSPerfMetricCollector.cc @@ -0,0 +1,36 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "common/debug.h" +#include "common/errno.h" + +#include "messages/MMgrReport.h" +#include "mgr/MDSPerfMetricTypes.h" +#include "mgr/MDSPerfMetricCollector.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_mgr +#undef dout_prefix +#define dout_prefix *_dout << "mgr.mds_perf_metric_collector " << __func__ << " " + +MDSPerfMetricCollector::MDSPerfMetricCollector(MetricListener &listener) + : MetricCollector(listener) { +} + +void MDSPerfMetricCollector::process_reports(const MetricPayload &payload) { + const MDSPerfMetricReport &metric_report = boost::get(payload).metric_report; + + std::lock_guard locker(lock); + process_reports_generic( + metric_report.reports, [](PerformanceCounter *counter, const PerformanceCounter &update) { + counter->first = update.first; + counter->second = update.second; + }); + + // update delayed rank set + delayed_ranks = metric_report.rank_metrics_delayed; + dout(20) << ": delayed ranks=[" << delayed_ranks << "]" << dendl; +} diff --git a/src/mgr/MDSPerfMetricCollector.h b/src/mgr/MDSPerfMetricCollector.h new file mode 100644 index 00000000000..777e4c8fc8a --- /dev/null +++ b/src/mgr/MDSPerfMetricCollector.h @@ -0,0 +1,22 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_MGR_MDS_PERF_COLLECTOR_H +#define CEPH_MGR_MDS_PERF_COLLECTOR_H + +#include "mgr/MetricCollector.h" +#include "mgr/MDSPerfMetricTypes.h" + +// MDS performance query class +class MDSPerfMetricCollector + : public MetricCollector { +private: + std::set delayed_ranks; +public: + MDSPerfMetricCollector(MetricListener &listener); + + void process_reports(const MetricPayload &payload) override; +}; + +#endif // CEPH_MGR_MDS_PERF_COLLECTOR_H diff --git a/src/mgr/MetricCollector.cc b/src/mgr/MetricCollector.cc index 1eb26a8faad..836662a165d 100644 --- a/src/mgr/MetricCollector.cc +++ b/src/mgr/MetricCollector.cc @@ -6,6 +6,7 @@ #include "mgr/MetricCollector.h" #include "mgr/OSDPerfMetricTypes.h" +#include "mgr/MDSPerfMetricTypes.h" #define dout_context g_ceph_context #define dout_subsys ceph_subsys_mgr @@ -181,3 +182,5 @@ void MetricCollector::process_reports_generic( template class MetricCollector; +template class +MetricCollector; diff --git a/src/mgr/MetricTypes.h b/src/mgr/MetricTypes.h index 1bcc0170659..5f1adc6cf19 100644 --- a/src/mgr/MetricTypes.h +++ b/src/mgr/MetricTypes.h @@ -7,9 +7,11 @@ #include #include "include/denc.h" #include "mgr/OSDPerfMetricTypes.h" +#include "mgr/MDSPerfMetricTypes.h" enum class MetricReportType { METRIC_REPORT_TYPE_OSD = 0, + METRIC_REPORT_TYPE_MDS = 1, }; struct OSDMetricPayload { @@ -29,6 +31,23 @@ struct OSDMetricPayload { } }; +struct MDSMetricPayload { + static const MetricReportType METRIC_REPORT_TYPE = MetricReportType::METRIC_REPORT_TYPE_MDS; + MDSPerfMetricReport metric_report; + + MDSMetricPayload() { + } + MDSMetricPayload(const MDSPerfMetricReport &metric_report) + : metric_report(metric_report) { + } + + DENC(MDSMetricPayload, v, p) { + DENC_START(1, 1, p); + denc(v.metric_report, p); + DENC_FINISH(p); + } +}; + struct UnknownMetricPayload { static const MetricReportType METRIC_REPORT_TYPE = static_cast(-1); @@ -40,9 +59,11 @@ struct UnknownMetricPayload { }; WRITE_CLASS_DENC(OSDMetricPayload) +WRITE_CLASS_DENC(MDSMetricPayload) WRITE_CLASS_DENC(UnknownMetricPayload) typedef boost::variant MetricPayload; class EncodeMetricPayloadVisitor : public boost::static_visitor { @@ -97,6 +118,9 @@ struct MetricReportMessage { case MetricReportType::METRIC_REPORT_TYPE_OSD: payload = OSDMetricPayload(); break; + case MetricReportType::METRIC_REPORT_TYPE_MDS: + payload = MDSMetricPayload(); + break; default: payload = UnknownMetricPayload(); break; @@ -112,6 +136,7 @@ WRITE_CLASS_ENCODER(MetricReportMessage); enum MetricConfigType { METRIC_CONFIG_TYPE_OSD = 0, + METRIC_CONFIG_TYPE_MDS = 1, }; struct OSDConfigPayload { @@ -131,6 +156,23 @@ struct OSDConfigPayload { } }; +struct MDSConfigPayload { + static const MetricConfigType METRIC_CONFIG_TYPE = MetricConfigType::METRIC_CONFIG_TYPE_MDS; + std::map config; + + MDSConfigPayload() { + } + MDSConfigPayload(const std::map &config) + : config(config) { + } + + DENC(MDSConfigPayload, v, p) { + DENC_START(1, 1, p); + denc(v.config, p); + DENC_FINISH(p); + } +}; + struct UnknownConfigPayload { static const MetricConfigType METRIC_CONFIG_TYPE = static_cast(-1); @@ -142,9 +184,11 @@ struct UnknownConfigPayload { }; WRITE_CLASS_DENC(OSDConfigPayload) +WRITE_CLASS_DENC(MDSConfigPayload) WRITE_CLASS_DENC(UnknownConfigPayload) typedef boost::variant ConfigPayload; class EncodeConfigPayloadVisitor : public boost::static_visitor { @@ -199,6 +243,9 @@ struct MetricConfigMessage { case MetricConfigType::METRIC_CONFIG_TYPE_OSD: payload = OSDConfigPayload(); break; + case MetricConfigType::METRIC_CONFIG_TYPE_MDS: + payload = MDSConfigPayload(); + break; default: payload = UnknownConfigPayload(); break; diff --git a/src/mgr/MgrClient.h b/src/mgr/MgrClient.h index aed1072c674..6bd2b26e11a 100644 --- a/src/mgr/MgrClient.h +++ b/src/mgr/MgrClient.h @@ -1,3 +1,4 @@ + // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- // vim: ts=8 sw=2 smarttab /* @@ -175,6 +176,12 @@ private: } } + void handle_config_payload(const MDSConfigPayload &payload) { + if (set_perf_queries_cb) { + set_perf_queries_cb(payload); + } + } + void handle_config_payload(const UnknownConfigPayload &payload) { ceph_abort(); } -- 2.39.5