From: lvshanchun Date: Tue, 13 Mar 2018 06:41:14 +0000 (+0800) Subject: mgr,osd: make osd_metric more popular X-Git-Tag: v13.1.0~466^2~2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=714ffe0d5f078e4106ddbd8b57918ae2a0c899af;p=ceph.git mgr,osd: make osd_metric more popular Signed-off-by: lvshanchun --- diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 33a8de9e4078..73a3cc435643 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -732,7 +732,7 @@ if (WITH_MGR) mgr/DaemonServer.cc mgr/ClusterState.cc mgr/ActivePyModules.cc - mgr/OSDHealthMetricCollector.cc + mgr/DaemonHealthMetricCollector.cc mgr/StandbyPyModules.cc mgr/PyModule.cc mgr/PyModuleRegistry.cc diff --git a/src/messages/MMgrReport.h b/src/messages/MMgrReport.h index ecc9c4ad7f76..14bf89b0a01f 100644 --- a/src/messages/MMgrReport.h +++ b/src/messages/MMgrReport.h @@ -20,7 +20,7 @@ #include "msg/Message.h" #include "common/perf_counters.h" -#include "osd/OSDHealthMetric.h" +#include "mgr/DaemonHealthMetric.h" class PerfCounterType { @@ -98,7 +98,7 @@ public: // for service registration boost::optional> daemon_status; - std::vector osd_health_metrics; + std::vector daemon_health_metrics; // encode map> of current config bufferlist config_bl; @@ -116,7 +116,7 @@ public: decode(daemon_status, p); } if (header.version >= 5) { - decode(osd_health_metrics, p); + decode(daemon_health_metrics, p); } if (header.version >= 6) { decode(config_bl, p); @@ -131,7 +131,7 @@ public: encode(undeclare_types, payload); encode(service_name, payload); encode(daemon_status, payload); - encode(osd_health_metrics, payload); + encode(daemon_health_metrics, payload); encode(config_bl, payload); } @@ -150,8 +150,8 @@ public: if (daemon_status) { out << " status=" << daemon_status->size(); } - if (!osd_health_metrics.empty()) { - out << " osd_metrics=" << osd_health_metrics.size(); + if (!daemon_health_metrics.empty()) { + out << " daemon_metrics=" << daemon_health_metrics.size(); } out << ")"; } diff --git a/src/mgr/DaemonHealthMetric.h b/src/mgr/DaemonHealthMetric.h new file mode 100644 index 000000000000..e67815d2fd96 --- /dev/null +++ b/src/mgr/DaemonHealthMetric.h @@ -0,0 +1,61 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include +#include "include/denc.h" + +enum class daemon_metric : uint8_t { + SLOW_OPS, + PENDING_CREATING_PGS, + NONE, +}; + +union daemon_metric_t { + struct { + uint32_t n1; + uint32_t n2; + }; + uint64_t n; + daemon_metric_t(uint32_t x, uint32_t y) + : n1(x), n2(y) + {} + daemon_metric_t(uint64_t x = 0) + : n(x) + {} +}; + +class DaemonHealthMetric +{ +public: + DaemonHealthMetric() = default; + DaemonHealthMetric(daemon_metric type_, uint64_t n) + : type(type_), value(n) + {} + DaemonHealthMetric(daemon_metric type_, uint32_t n1, uint32_t n2) + : type(type_), value(n1, n2) + {} + daemon_metric get_type() const { + return type; + } + uint64_t get_n() const { + return value.n; + } + uint32_t get_n1() const { + return value.n1; + } + uint32_t get_n2() const { + return value.n2; + } + DENC(DaemonHealthMetric, v, p) { + DENC_START(1, 1, p); + denc(v.type, p); + denc(v.value.n, p); + DENC_FINISH(p); + } +private: + daemon_metric type = daemon_metric::NONE; + daemon_metric_t value; +}; +WRITE_CLASS_DENC(DaemonHealthMetric) diff --git a/src/mgr/DaemonHealthMetricCollector.cc b/src/mgr/DaemonHealthMetricCollector.cc new file mode 100644 index 000000000000..fb38e0de9b55 --- /dev/null +++ b/src/mgr/DaemonHealthMetricCollector.cc @@ -0,0 +1,101 @@ +#include + +#include "include/health.h" +#include "include/types.h" +#include "DaemonHealthMetricCollector.h" + + + +ostream& operator<<(ostream& os, + const DaemonHealthMetricCollector::DaemonKey& daemon) { + return os << daemon.first << "." << daemon.second; +} + +namespace { + +class SlowOps final : public DaemonHealthMetricCollector { + bool _is_relevant(daemon_metric type) const override { + return type == daemon_metric::SLOW_OPS; + } + health_check_t& _get_check(health_check_map_t& cm) const override { + return cm.get_or_add("SLOW_OPS", HEALTH_WARN, ""); + } + bool _update(const DaemonKey& daemon, + const DaemonHealthMetric& metric) override { + auto num_slow = metric.get_n1(); + auto blocked_time = metric.get_n2(); + value.n1 += num_slow; + value.n2 = std::max(value.n2, blocked_time); + if (num_slow || blocked_time) { + daemons.push_back(daemon); + return true; + } else { + return false; + } + } + void _summarize(health_check_t& check) const override { + if (daemons.empty()) { + return; + } + static const char* fmt = "%1% slow ops, oldest one blocked for %2% sec"; + check.summary = boost::str(boost::format(fmt) % value.n1 % value.n2); + ostringstream ss; + if (daemons.size() > 1) { + ss << "daemons " << daemons << " have slow ops."; + } else { + ss << daemons.front() << " has slow ops"; + } + check.detail.push_back(ss.str()); + } + vector daemons; +}; + + +class PendingPGs final : public DaemonHealthMetricCollector { + bool _is_relevant(daemon_metric type) const override { + return type == daemon_metric::PENDING_CREATING_PGS; + } + health_check_t& _get_check(health_check_map_t& cm) const override { + return cm.get_or_add("PENDING_CREATING_PGS", HEALTH_WARN, ""); + } + bool _update(const DaemonKey& osd, + const DaemonHealthMetric& metric) override { + value.n += metric.get_n(); + if (metric.get_n()) { + osds.push_back(osd); + return true; + } else { + return false; + } + } + void _summarize(health_check_t& check) const override { + if (osds.empty()) { + return; + } + static const char* fmt = "%1% PGs pending on creation"; + check.summary = boost::str(boost::format(fmt) % value.n); + ostringstream ss; + if (osds.size() > 1) { + ss << "osds " << osds << " have pending PGs."; + } else { + ss << osds.front() << " has pending PGs"; + } + check.detail.push_back(ss.str()); + } + vector osds; +}; + +} // anonymous namespace + +unique_ptr +DaemonHealthMetricCollector::create(daemon_metric m) +{ + switch (m) { + case daemon_metric::SLOW_OPS: + return unique_ptr{new SlowOps}; + case daemon_metric::PENDING_CREATING_PGS: + return unique_ptr{new PendingPGs}; + default: + return unique_ptr{}; + } +} diff --git a/src/mgr/DaemonHealthMetricCollector.h b/src/mgr/DaemonHealthMetricCollector.h new file mode 100644 index 000000000000..731b4c43b5c8 --- /dev/null +++ b/src/mgr/DaemonHealthMetricCollector.h @@ -0,0 +1,32 @@ +#pragma once + +#include +#include + +#include "DaemonHealthMetric.h" +#include "mon/health_check.h" + +class DaemonHealthMetricCollector { +public: + using DaemonKey = std::pair; + static std::unique_ptr create(daemon_metric m); + void update(const DaemonKey& daemon, const DaemonHealthMetric& metric) { + if (_is_relevant(metric.get_type())) { + reported = _update(daemon, metric); + } + } + void summarize(health_check_map_t& cm) { + if (reported) { + _summarize(_get_check(cm)); + } + } + virtual ~DaemonHealthMetricCollector() {} +private: + virtual bool _is_relevant(daemon_metric type) const = 0; + virtual health_check_t& _get_check(health_check_map_t& cm) const = 0; + virtual bool _update(const DaemonKey& daemon, const DaemonHealthMetric& metric) = 0; + virtual void _summarize(health_check_t& check) const = 0; +protected: + daemon_metric_t value; + bool reported = false; +}; diff --git a/src/mgr/DaemonServer.cc b/src/mgr/DaemonServer.cc index 0d5dac7a095a..ad73112c4890 100644 --- a/src/mgr/DaemonServer.cc +++ b/src/mgr/DaemonServer.cc @@ -20,7 +20,7 @@ #include "json_spirit/json_spirit_writer.h" #include "mgr/mgr_commands.h" -#include "mgr/OSDHealthMetricCollector.h" +#include "mgr/DaemonHealthMetricCollector.h" #include "mon/MonCommand.h" #include "messages/MMgrOpen.h" @@ -524,7 +524,7 @@ bool DaemonServer::handle_report(MMgrReport *m) } if (m->get_connection()->peer_is_osd()) { // only OSD sends health_checks to me now - daemon->osd_health_metrics = std::move(m->osd_health_metrics); + daemon->daemon_health_metrics = std::move(m->daemon_health_metrics); } } @@ -1701,13 +1701,13 @@ void DaemonServer::send_report() }); auto osds = daemon_state.get_by_service("osd"); - map> accumulated; + map> accumulated; for (const auto& osd : osds) { Mutex::Locker l(osd.second->lock); - for (const auto& metric : osd.second->osd_health_metrics) { + for (const auto& metric : osd.second->daemon_health_metrics) { auto acc = accumulated.find(metric.get_type()); if (acc == accumulated.end()) { - auto collector = OSDHealthMetricCollector::create(metric.get_type()); + auto collector = DaemonHealthMetricCollector::create(metric.get_type()); if (!collector) { derr << __func__ << " " << osd.first << "." << osd.second << " sent me an unknown health metric: " diff --git a/src/mgr/DaemonState.h b/src/mgr/DaemonState.h index 545d4b1bebcb..15106f7f07b7 100644 --- a/src/mgr/DaemonState.h +++ b/src/mgr/DaemonState.h @@ -98,7 +98,7 @@ class DaemonState std::map metadata; // TODO: this can be generalized to other daemons - std::vector osd_health_metrics; + std::vector daemon_health_metrics; // Ephemeral state bool service_daemon = false; diff --git a/src/mgr/MgrClient.cc b/src/mgr/MgrClient.cc index 25b0eea2cc8c..2bf5fa0165f3 100644 --- a/src/mgr/MgrClient.cc +++ b/src/mgr/MgrClient.cc @@ -329,7 +329,7 @@ void MgrClient::send_report() daemon_dirty_status = false; } - report->osd_health_metrics = std::move(osd_health_metrics); + report->daemon_health_metrics = std::move(daemon_health_metrics); cct->_conf->get_config_bl(last_config_bl_version, &report->config_bl, &last_config_bl_version); @@ -476,7 +476,8 @@ int MgrClient::service_daemon_update_status( return 0; } -void MgrClient::update_osd_health(std::vector&& metrics) +void MgrClient::update_daemon_health(std::vector&& metrics) { - osd_health_metrics = std::move(metrics); + daemon_health_metrics = std::move(metrics); } + diff --git a/src/mgr/MgrClient.h b/src/mgr/MgrClient.h index 0f4d3c32e69f..885a74259c9c 100644 --- a/src/mgr/MgrClient.h +++ b/src/mgr/MgrClient.h @@ -17,7 +17,7 @@ #include "msg/Connection.h" #include "msg/Dispatcher.h" #include "mon/MgrMap.h" -#include "osd/OSDHealthMetric.h" +#include "mgr/DaemonHealthMetric.h" #include "common/perf_counters.h" #include "common/Timer.h" @@ -81,7 +81,7 @@ protected: std::string service_name, daemon_name; std::map daemon_metadata; std::map daemon_status; - std::vector osd_health_metrics; + std::vector daemon_health_metrics; void reconnect(); void _send_open(); @@ -120,7 +120,7 @@ public: const std::map& metadata); int service_daemon_update_status( std::map&& status); - void update_osd_health(std::vector&& metrics); + void update_daemon_health(std::vector&& metrics); private: void send_stats(); diff --git a/src/mgr/OSDHealthMetricCollector.cc b/src/mgr/OSDHealthMetricCollector.cc deleted file mode 100644 index d86ad07be063..000000000000 --- a/src/mgr/OSDHealthMetricCollector.cc +++ /dev/null @@ -1,101 +0,0 @@ -#include - -#include "include/health.h" -#include "include/types.h" -#include "OSDHealthMetricCollector.h" - - - -ostream& operator<<(ostream& os, - const OSDHealthMetricCollector::DaemonKey& daemon) { - return os << daemon.first << "." << daemon.second; -} - -namespace { - -class SlowOps final : public OSDHealthMetricCollector { - bool _is_relevant(osd_metric type) const override { - return type == osd_metric::SLOW_OPS; - } - health_check_t& _get_check(health_check_map_t& cm) const override { - return cm.get_or_add("SLOW_OPS", HEALTH_WARN, ""); - } - bool _update(const DaemonKey& osd, - const OSDHealthMetric& metric) override { - auto num_slow = metric.get_n1(); - auto blocked_time = metric.get_n2(); - value.n1 += num_slow; - value.n2 = std::max(value.n2, blocked_time); - if (num_slow || blocked_time) { - osds.push_back(osd); - return true; - } else { - return false; - } - } - void _summarize(health_check_t& check) const override { - if (osds.empty()) { - return; - } - static const char* fmt = "%1% slow ops, oldest one blocked for %2% sec"; - check.summary = boost::str(boost::format(fmt) % value.n1 % value.n2); - ostringstream ss; - if (osds.size() > 1) { - ss << "osds " << osds << " have slow ops."; - } else { - ss << osds.front() << " has slow ops"; - } - check.detail.push_back(ss.str()); - } - vector osds; -}; - - -class PendingPGs final : public OSDHealthMetricCollector { - bool _is_relevant(osd_metric type) const override { - return type == osd_metric::PENDING_CREATING_PGS; - } - health_check_t& _get_check(health_check_map_t& cm) const override { - return cm.get_or_add("PENDING_CREATING_PGS", HEALTH_WARN, ""); - } - bool _update(const DaemonKey& osd, - const OSDHealthMetric& metric) override { - value.n += metric.get_n(); - if (metric.get_n()) { - osds.push_back(osd); - return true; - } else { - return false; - } - } - void _summarize(health_check_t& check) const override { - if (osds.empty()) { - return; - } - static const char* fmt = "%1% PGs pending on creation"; - check.summary = boost::str(boost::format(fmt) % value.n); - ostringstream ss; - if (osds.size() > 1) { - ss << "osds " << osds << " have pending PGs."; - } else { - ss << osds.front() << " has pending PGs"; - } - check.detail.push_back(ss.str()); - } - vector osds; -}; - -} // anonymous namespace - -unique_ptr -OSDHealthMetricCollector::create(osd_metric m) -{ - switch (m) { - case osd_metric::SLOW_OPS: - return unique_ptr{new SlowOps}; - case osd_metric::PENDING_CREATING_PGS: - return unique_ptr{new PendingPGs}; - default: - return unique_ptr{}; - } -} diff --git a/src/mgr/OSDHealthMetricCollector.h b/src/mgr/OSDHealthMetricCollector.h deleted file mode 100644 index c28872ef13b0..000000000000 --- a/src/mgr/OSDHealthMetricCollector.h +++ /dev/null @@ -1,30 +0,0 @@ -#include -#include - -#include "osd/OSDHealthMetric.h" -#include "mon/health_check.h" - -class OSDHealthMetricCollector { -public: - using DaemonKey = std::pair; - static std::unique_ptr create(osd_metric m); - void update(const DaemonKey& osd, const OSDHealthMetric& metric) { - if (_is_relevant(metric.get_type())) { - reported = _update(osd, metric); - } - } - void summarize(health_check_map_t& cm) { - if (reported) { - _summarize(_get_check(cm)); - } - } - virtual ~OSDHealthMetricCollector() {} -private: - virtual bool _is_relevant(osd_metric type) const = 0; - virtual health_check_t& _get_check(health_check_map_t& cm) const = 0; - virtual bool _update(const DaemonKey& osd, const OSDHealthMetric& metric) = 0; - virtual void _summarize(health_check_t& check) const = 0; -protected: - osd_metric_t value; - bool reported = false; -}; diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 2babb4e289af..c7f1fd3b4499 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -4985,7 +4985,7 @@ void OSD::tick_without_osd_lock() } } - mgrc.update_osd_health(get_health_metrics()); + mgrc.update_daemon_health(get_health_metrics()); service.kick_recovery_queue(); tick_timer_without_osd_lock.add_event_after(OSD_TICK_INTERVAL, new C_Tick_WithoutOSDLock(this)); } @@ -7065,9 +7065,9 @@ MPGStats* OSD::collect_pg_stats() return m; } -vector OSD::get_health_metrics() +vector OSD::get_health_metrics() { - vector metrics; + vector metrics; { utime_t oldest_secs; const utime_t now = ceph_clock_now(); @@ -7083,10 +7083,10 @@ vector OSD::get_health_metrics() } }; if (op_tracker.visit_ops_in_flight(&oldest_secs, count_slow_ops)) { - metrics.emplace_back(osd_metric::SLOW_OPS, slow, oldest_secs); + metrics.emplace_back(daemon_metric::SLOW_OPS, slow, oldest_secs); } else { // no news is not good news. - metrics.emplace_back(osd_metric::SLOW_OPS, 0, 0); + metrics.emplace_back(daemon_metric::SLOW_OPS, 0, 0); } } with_unique_lock(pending_creates_lock, [&]() { @@ -7096,7 +7096,7 @@ vector OSD::get_health_metrics() n_primaries++; } } - metrics.emplace_back(osd_metric::PENDING_CREATING_PGS, n_primaries); + metrics.emplace_back(daemon_metric::PENDING_CREATING_PGS, n_primaries); }); return metrics; } diff --git a/src/osd/OSD.h b/src/osd/OSD.h index 83676b686900..47d7ac22f0c9 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -2091,7 +2091,7 @@ protected: // -- status reporting -- MPGStats *collect_pg_stats(); - std::vector get_health_metrics(); + std::vector get_health_metrics(); private: bool ms_can_fast_dispatch_any() const override { return true; } diff --git a/src/osd/OSDHealthMetric.h b/src/osd/OSDHealthMetric.h deleted file mode 100644 index ec16cffe812b..000000000000 --- a/src/osd/OSDHealthMetric.h +++ /dev/null @@ -1,61 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab - -#pragma once - -#include -#include "include/denc.h" - -enum class osd_metric : uint8_t { - SLOW_OPS, - PENDING_CREATING_PGS, - NONE, -}; - -union osd_metric_t { - struct { - uint32_t n1; - uint32_t n2; - }; - uint64_t n; - osd_metric_t(uint32_t x, uint32_t y) - : n1(x), n2(y) - {} - osd_metric_t(uint64_t x = 0) - : n(x) - {} -}; - -class OSDHealthMetric -{ -public: - OSDHealthMetric() = default; - OSDHealthMetric(osd_metric type_, uint64_t n) - : type(type_), value(n) - {} - OSDHealthMetric(osd_metric type_, uint32_t n1, uint32_t n2) - : type(type_), value(n1, n2) - {} - osd_metric get_type() const { - return type; - } - uint64_t get_n() const { - return value.n; - } - uint32_t get_n1() const { - return value.n1; - } - uint32_t get_n2() const { - return value.n2; - } - DENC(OSDHealthMetric, v, p) { - DENC_START(1, 1, p); - denc(v.type, p); - denc(v.value.n, p); - DENC_FINISH(p); - } -private: - osd_metric type = osd_metric::NONE; - osd_metric_t value; -}; -WRITE_CLASS_DENC(OSDHealthMetric)