From: Kefu Chai Date: Mon, 23 Oct 2017 04:29:31 +0000 (+0800) Subject: osd: send health-checks to mgr X-Git-Tag: v13.0.1~103^2~3 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=f4b74125e44fe78154fb377fa06fc08b3325859d;p=ceph.git osd: send health-checks to mgr Signed-off-by: Kefu Chai --- diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 97b829a53339..b9afa353d6e9 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -5019,23 +5019,11 @@ void OSD::tick_without_osd_lock() } } - check_ops_in_flight(); + mgrc.update_osd_health(get_health_metrics()); service.kick_recovery_queue(); tick_timer_without_osd_lock.add_event_after(OSD_TICK_INTERVAL, new C_Tick_WithoutOSDLock(this)); } -void OSD::check_ops_in_flight() -{ - string summary; - vector warnings; - if (op_tracker.check_ops_in_flight(&summary, warnings)) { - clog->warn() << summary; - for (const auto& warning : warnings) { - clog->warn() << warning; - } - } -} - // Usage: // setomapval [namespace/] // rmomapkey [namespace/] @@ -7096,6 +7084,42 @@ MPGStats* OSD::collect_pg_stats() return m; } +vector OSD::get_health_metrics() +{ + vector metrics; + { + utime_t oldest_secs; + const utime_t now = ceph_clock_now(); + auto too_old = now; + too_old -= cct->_conf->get_val("osd_op_complaint_time"); + int slow = 0; + auto count_slow_ops = [&](TrackedOp& op) { + if (op.get_initiated() < too_old) { + slow++; + return true; + } else { + return false; + } + }; + if (op_tracker.visit_ops_in_flight(&oldest_secs, count_slow_ops)) { + metrics.emplace_back(osd_metric::SLOW_OPS, slow, oldest_secs); + } else { + // no news is not good news. + metrics.emplace_back(osd_metric::SLOW_OPS, 0, 0); + } + } + with_unique_lock(pending_creates_lock, [&]() { + auto n_primaries = pending_creates_from_mon; + for (const auto& create : pending_creates_from_osd) { + if (create.second) { + n_primaries++; + } + } + metrics.emplace_back(osd_metric::PENDING_CREATING_PGS, n_primaries); + }); + return metrics; +} + // ===================================================== // MAP diff --git a/src/osd/OSD.h b/src/osd/OSD.h index c0591a4d1ffc..bc20926a590d 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -1554,7 +1554,6 @@ private: // -- op tracking -- OpTracker op_tracker; - void check_ops_in_flight(); void test_ops(std::string command, std::string args, ostream& ss); friend class TestOpsSocketHook; TestOpsSocketHook *test_ops_hook; @@ -2233,6 +2232,7 @@ protected: // -- status reporting -- MPGStats *collect_pg_stats(); + std::vector get_health_metrics(); private: bool ms_can_fast_dispatch_any() const override { return true; } diff --git a/src/osd/OSDHealthMetric.h b/src/osd/OSDHealthMetric.h new file mode 100644 index 000000000000..ec16cffe812b --- /dev/null +++ b/src/osd/OSDHealthMetric.h @@ -0,0 +1,61 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include +#include "include/denc.h" + +enum class osd_metric : uint8_t { + SLOW_OPS, + PENDING_CREATING_PGS, + NONE, +}; + +union osd_metric_t { + struct { + uint32_t n1; + uint32_t n2; + }; + uint64_t n; + osd_metric_t(uint32_t x, uint32_t y) + : n1(x), n2(y) + {} + osd_metric_t(uint64_t x = 0) + : n(x) + {} +}; + +class OSDHealthMetric +{ +public: + OSDHealthMetric() = default; + OSDHealthMetric(osd_metric type_, uint64_t n) + : type(type_), value(n) + {} + OSDHealthMetric(osd_metric type_, uint32_t n1, uint32_t n2) + : type(type_), value(n1, n2) + {} + osd_metric get_type() const { + return type; + } + uint64_t get_n() const { + return value.n; + } + uint32_t get_n1() const { + return value.n1; + } + uint32_t get_n2() const { + return value.n2; + } + DENC(OSDHealthMetric, v, p) { + DENC_START(1, 1, p); + denc(v.type, p); + denc(v.value.n, p); + DENC_FINISH(p); + } +private: + osd_metric type = osd_metric::NONE; + osd_metric_t value; +}; +WRITE_CLASS_DENC(OSDHealthMetric)