From: Sage Weil Date: Mon, 12 Jun 2017 22:39:15 +0000 (-0400) Subject: mon: new health check framework X-Git-Tag: v12.1.1~58^2~40 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=8e815abe4ef92e3da74c4ab2ad3e84017810e234;p=ceph-ci.git mon: new health check framework Signed-off-by: Sage Weil --- diff --git a/src/common/config_opts.h b/src/common/config_opts.h index d879a2c38a8..a684e536f9e 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -309,7 +309,6 @@ OPTION(mon_clock_drift_warn_backoff, OPT_FLOAT, 5) // exponential backoff for cl OPTION(mon_timecheck_interval, OPT_FLOAT, 300.0) // on leader, timecheck (clock drift check) interval (seconds) OPTION(mon_timecheck_skew_interval, OPT_FLOAT, 30.0) // on leader, timecheck (clock drift check) interval when in presence of a skew (seconds) OPTION(mon_pg_stuck_threshold, OPT_INT, 300) // number of seconds after which pgs can be considered inactive, unclean, or stale (see doc/control.rst under dump_stuck for more info) -OPTION(mon_health_max_detail, OPT_INT, 50) // max detailed pgs to report in health detail OPTION(mon_pg_min_inactive, OPT_U64, 1) // the number of PGs which have to be inactive longer than 'mon_pg_stuck_threshold' before health goes into ERR. 0 means disabled, never go into ERR. OPTION(mon_pg_warn_min_per_osd, OPT_INT, 30) // min # pgs per (in) osd before we warn the admin OPTION(mon_pg_warn_max_per_osd, OPT_INT, 300) // max # pgs per (in) osd before we warn the admin @@ -352,6 +351,8 @@ OPTION(mon_health_data_update_interval, OPT_FLOAT, 60.0) OPTION(mon_health_to_clog, OPT_BOOL, true) OPTION(mon_health_to_clog_interval, OPT_INT, 3600) OPTION(mon_health_to_clog_tick_interval, OPT_DOUBLE, 60.0) +OPTION(mon_health_preluminous_compat, OPT_BOOL, false) +OPTION(mon_health_max_detail, OPT_INT, 50) // max detailed pgs to report in health detail OPTION(mon_data_avail_crit, OPT_INT, 5) OPTION(mon_data_avail_warn, OPT_INT, 30) OPTION(mon_data_size_warn, OPT_U64, 15*1024*1024*1024) // issue a warning when the monitor's data store goes over 15GB (in bytes) diff --git a/src/messages/MMonHealthChecks.h b/src/messages/MMonHealthChecks.h new file mode 100644 index 00000000000..6b66847633f --- /dev/null +++ b/src/messages/MMonHealthChecks.h @@ -0,0 +1,47 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_MMON_HEALTH_CHECKS_H +#define CEPH_MMON_HEALTH_CHECKS_H + +#include "messages/PaxosServiceMessage.h" +#include "mon/health_check.h" + +struct MMonHealthChecks : public PaxosServiceMessage +{ + static const int HEAD_VERSION = 1; + static const int COMPAT_VERSION = 1; + + health_check_map_t health_checks; + + MMonHealthChecks() + : PaxosServiceMessage(MSG_MON_HEALTH_CHECKS, HEAD_VERSION, COMPAT_VERSION) { + } + MMonHealthChecks(health_check_map_t& m) + : PaxosServiceMessage(MSG_MON_HEALTH_CHECKS, HEAD_VERSION, COMPAT_VERSION), + health_checks(m) { + } + +private: + ~MMonHealthChecks() override { } + +public: + const char *get_type_name() const override { return "mon_health_checks"; } + void print(ostream &o) const override { + o << "mon_health_checks(" << health_checks.checks.size() << " checks)"; + } + + void decode_payload() override { + bufferlist::iterator p = payload.begin(); + paxos_decode(p); + ::decode(health_checks, p); + } + + void encode_payload(uint64_t features) override { + paxos_encode(); + ::encode(health_checks, payload); + } + +}; + +#endif diff --git a/src/mon/CMakeLists.txt b/src/mon/CMakeLists.txt index 1ba6802e120..556157132f8 100644 --- a/src/mon/CMakeLists.txt +++ b/src/mon/CMakeLists.txt @@ -15,6 +15,7 @@ set(lib_mon_srcs LogMonitor.cc AuthMonitor.cc Elector.cc + HealthMonitor.cc OldHealthMonitor.cc DataHealthService.cc PGMonitor.cc diff --git a/src/mon/HealthMonitor.cc b/src/mon/HealthMonitor.cc new file mode 100644 index 00000000000..2742751abf3 --- /dev/null +++ b/src/mon/HealthMonitor.cc @@ -0,0 +1,355 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2013 Inktank, Inc + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include +#include +#include +#include + +#include "include/assert.h" +#include "include/stringify.h" + +#include "mon/Monitor.h" +#include "mon/HealthService.h" +#include "mon/HealthMonitor.h" +#include "mon/DataHealthService.h" + +#include "messages/MMonHealth.h" +#include "messages/MMonHealthChecks.h" + +#include "common/Formatter.h" + +#define dout_subsys ceph_subsys_mon +#undef dout_prefix +#define dout_prefix _prefix(_dout, mon, this) +static ostream& _prefix(std::ostream *_dout, const Monitor *mon, + const HealthMonitor *hmon) { + return *_dout << "mon." << mon->name << "@" << mon->rank + << "(" << mon->get_state_name() << ").health "; +} + +HealthMonitor::HealthMonitor(Monitor *m, Paxos *p, const string& service_name) + : PaxosService(m, p, service_name) { +} + +void HealthMonitor::init() +{ + dout(10) << __func__ << dendl; +} + +void HealthMonitor::create_initial() +{ + dout(10) << __func__ << dendl; +} + +void HealthMonitor::update_from_paxos(bool *need_bootstrap) +{ + version = get_last_committed(); + dout(10) << __func__ << dendl; + load_health(); + + bufferlist qbl; + mon->store->get(service_name, "quorum", qbl); + if (qbl.length()) { + auto p = qbl.begin(); + ::decode(quorum_checks, p); + } else { + quorum_checks.clear(); + } + + bufferlist lbl; + mon->store->get(service_name, "leader", lbl); + if (lbl.length()) { + auto p = lbl.begin(); + ::decode(leader_checks, p); + } else { + leader_checks.clear(); + } + + dout(20) << "dump:"; + JSONFormatter jf(true); + jf.open_object_section("health"); + jf.open_object_section("quorum_health"); + for (auto& p : quorum_checks) { + string s = string("mon.") + stringify(p.first); + jf.dump_object(s.c_str(), p.second); + } + jf.close_section(); + jf.dump_object("leader_health", leader_checks); + jf.close_section(); + jf.flush(*_dout); + *_dout << dendl; +} + +void HealthMonitor::create_pending() +{ + dout(10) << " " << version << dendl; +} + +void HealthMonitor::encode_pending(MonitorDBStore::TransactionRef t) +{ + ++version; + dout(10) << " " << version << dendl; + put_last_committed(t, version); + + bufferlist qbl; + ::encode(quorum_checks, qbl); + t->put(service_name, "quorum", qbl); + bufferlist lbl; + ::encode(leader_checks, lbl); + t->put(service_name, "leader", lbl); + + health_check_map_t pending_health; + + // combine per-mon details carefully... + map> names; // code -> + for (auto p : quorum_checks) { + for (auto q : p.second.checks) { + names[q.first].insert(mon->monmap->get_name(p.first)); + } + pending_health.merge(p.second); + } + for (auto p : pending_health.checks) { + p.second.summary = boost::regex_replace( + p.second.summary, + boost::regex("%num%"), stringify(names[p.first].size())); + p.second.summary = boost::regex_replace( + p.second.summary, + boost::regex("%names%"), stringify(names[p.first])); + p.second.summary = boost::regex_replace( + p.second.summary, + boost::regex("%plurals%"), + names[p.first].size() > 1 ? "s" : ""); + p.second.summary = boost::regex_replace( + p.second.summary, + boost::regex("%isorare%"), + names[p.first].size() > 1 ? "are" : "is"); + } + + pending_health.merge(leader_checks); + encode_health(pending_health, t); +} + +version_t HealthMonitor::get_trim_to() +{ + // we don't actually need *any* old states, but keep a few. + if (version > 5) { + return version - 5; + } + return 0; +} + +bool HealthMonitor::preprocess_query(MonOpRequestRef op) +{ + switch (op->get_req()->get_type()) { + case MSG_MON_HEALTH: + { + MMonHealth *hm = static_cast(op->get_req()); + int service_type = hm->get_service_type(); + if (services.count(service_type) == 0) { + dout(1) << __func__ << " service type " << service_type + << " not registered -- drop message!" << dendl; + return false; + } + return services[service_type]->service_dispatch(op); + } + + case MSG_MON_HEALTH_CHECKS: + return preprocess_health_checks(op); + } + return false; +} + +bool HealthMonitor::prepare_update(MonOpRequestRef op) +{ + return false; +} + +bool HealthMonitor::preprocess_health_checks(MonOpRequestRef op) +{ + MMonHealthChecks *m = static_cast(op->get_req()); + quorum_checks[m->get_source().num()] = m->health_checks; + return true; +} + +void HealthMonitor::tick() +{ + if (!is_active()) { + return; + } + dout(10) << __func__ << dendl; + bool changed = false; + if (check_member_health()) { + changed = true; + } + if (mon->is_leader()) { + if (check_leader_health()) { + changed = true; + } + } + if (changed) { + propose_pending(); + } +} + +bool HealthMonitor::check_member_health() +{ + dout(20) << __func__ << dendl; + bool changed = false; + + // snapshot of usage + DataStats stats; + get_fs_stats(stats.fs_stats, g_conf->mon_data.c_str()); + map extra; + uint64_t store_size = mon->store->get_estimated_size(extra); + assert(store_size > 0); + stats.store_stats.bytes_total = store_size; + stats.store_stats.bytes_sst = extra["sst"]; + stats.store_stats.bytes_log = extra["log"]; + stats.store_stats.bytes_misc = extra["misc"]; + stats.last_update = ceph_clock_now(); + dout(10) << __func__ << " avail " << stats.fs_stats.avail_percent << "%" + << " total " << prettybyte_t(stats.fs_stats.byte_total) + << ", used " << prettybyte_t(stats.fs_stats.byte_used) + << ", avail " << prettybyte_t(stats.fs_stats.byte_avail) << dendl; + + // MON_DISK_{LOW,CRIT,BIG} + health_check_map_t next; + if (stats.fs_stats.avail_percent <= g_conf->mon_data_avail_crit) { + stringstream ss, ss2; + ss << "mon%plurals% %names% %isorare% very low on available space"; + auto& d = next.add("MON_DISK_CRIT", HEALTH_ERR, ss.str()); + ss2 << "mon." << mon->name << " has " << stats.fs_stats.avail_percent + << "% avail"; + d.detail.push_back(ss2.str()); + } else if (stats.fs_stats.avail_percent <= g_conf->mon_data_avail_warn) { + stringstream ss, ss2; + ss << "mon%plurals% %names% %isorare% low on available space"; + auto& d = next.add("MON_DISK_LOW", HEALTH_ERR, ss.str()); + ss2 << "mon." << mon->name << " has " << stats.fs_stats.avail_percent + << "% avail"; + d.detail.push_back(ss2.str()); + } + if (stats.store_stats.bytes_total >= g_conf->mon_data_size_warn) { + stringstream ss, ss2; + ss << "mon%plurals% %names% %isorare% using a lot of disk space"; + auto& d = next.add("MON_DISK_BIG", HEALTH_WARN, ss.str()); + ss2 << "mon." << mon->name << " is " + << prettybyte_t(stats.store_stats.bytes_total) + << " >= mon_data_size_warn (" + << prettybyte_t(g_conf->mon_data_size_warn) << ")"; + d.detail.push_back(ss2.str()); + } + + auto p = quorum_checks.find(mon->rank); + if (p == quorum_checks.end() || + p->second != next) { + if (mon->is_leader()) { + // prepare to propose + quorum_checks[mon->rank] = next; + changed = true; + } else { + // tell the leader + mon->messenger->send_message(new MMonHealthChecks(next), + mon->monmap->get_inst(mon->get_leader())); + } + } + return changed; +} + +bool HealthMonitor::check_leader_health() +{ + dout(20) << __func__ << dendl; + bool changed = false; + + // prune quorum_health + { + auto& qset = mon->get_quorum(); + auto p = quorum_checks.begin(); + while (p != quorum_checks.end()) { + if (qset.count(p->first) == 0) { + p = quorum_checks.erase(p); + changed = true; + } else { + ++p; + } + } + } + + health_check_map_t next; + + // MON_DOWN + { + int max = mon->monmap->size(); + int actual = mon->get_quorum().size(); + if (actual < max) { + ostringstream ss; + ss << (max-actual) << "/" << max << " mons down, quorum " + << mon->get_quorum_names(); + auto& d = next.add("MON_DOWN", HEALTH_WARN, ss.str()); + set q = mon->get_quorum(); + for (int i=0; imonmap->get_name(i) << " (rank " << i + << ") addr " << mon->monmap->get_addr(i) + << " is down (out of quorum)"; + d.detail.push_back(ss.str()); + } + } + } + } + + // MON_CLOCK_SKEW + if (!mon->timecheck_skews.empty()) { + list warns; + list details; + for (map::iterator i = mon->timecheck_skews.begin(); + i != mon->timecheck_skews.end(); ++i) { + entity_inst_t inst = i->first; + double skew = i->second; + double latency = mon->timecheck_latencies[inst]; + string name = mon->monmap->get_name(inst.addr); + ostringstream tcss; + health_status_t tcstatus = mon->timecheck_status(tcss, skew, latency); + if (tcstatus != HEALTH_OK) { + warns.push_back(name); + ostringstream tmp_ss; + tmp_ss << "mon." << name + << " addr " << inst.addr << " " << tcss.str() + << " (latency " << latency << "s)"; + details.push_back(tmp_ss.str()); + } + } + if (!warns.empty()) { + ostringstream ss; + ss << "clock skew detected on"; + while (!warns.empty()) { + ss << " mon." << warns.front(); + warns.pop_front(); + if (!warns.empty()) + ss << ","; + } + auto& d = next.add("MON_CLOCK_SKEW", HEALTH_WARN, + "monitor clock skew detected"); + d.detail.swap(details); + } + } + + if (next != leader_checks) { + changed = true; + leader_checks = next; + } + return changed; +} diff --git a/src/mon/HealthMonitor.h b/src/mon/HealthMonitor.h new file mode 100644 index 00000000000..5387ce0340a --- /dev/null +++ b/src/mon/HealthMonitor.h @@ -0,0 +1,71 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2013 Inktank, Inc + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ +#ifndef CEPH_HEALTH_MONITOR_H +#define CEPH_HEALTH_MONITOR_H + +#include "mon/PaxosService.h" + +//forward declaration +namespace ceph { class Formatter; } +class HealthService; + +class HealthMonitor : public PaxosService +{ + map services; + version_t version = 0; + map quorum_checks; // for each quorum member + health_check_map_t leader_checks; // leader only + +public: + HealthMonitor(Monitor *m, Paxos *p, const string& service_name); + ~HealthMonitor() override { + assert(services.empty()); + } + + /** + * @defgroup HealthMonitor_Inherited_h Inherited abstract methods + * @{ + */ + void init() override; + + void get_health( + list >& summary, + list > *detail, + CephContext *cct) const override {} + + bool preprocess_query(MonOpRequestRef op) override; + bool prepare_update(MonOpRequestRef op) override; + + bool preprocess_health_checks(MonOpRequestRef op); + bool prepare_health_checks(MonOpRequestRef op); + + bool check_leader_health(); + bool check_member_health(); + + void create_initial() override; + void update_from_paxos(bool *need_bootstrap) override; + void create_pending() override; + void encode_pending(MonitorDBStore::TransactionRef t) override; + version_t get_trim_to() override; + + void encode_full(MonitorDBStore::TransactionRef t) override { } + + void tick() override; + + /** + * @} // HealthMonitor_Inherited_h + */ +}; + +#endif // CEPH_HEALTH_MONITOR_H diff --git a/src/mon/MgrMonitor.cc b/src/mon/MgrMonitor.cc index 234453c7a7e..b01028ef33e 100644 --- a/src/mon/MgrMonitor.cc +++ b/src/mon/MgrMonitor.cc @@ -60,6 +60,8 @@ void MgrMonitor::update_from_paxos(bool *need_bootstrap) dout(4) << "active server: " << map.active_addr << "(" << map.active_gid << ")" << dendl; + load_health(); + if (map.available) { first_seen_inactive = utime_t(); } else { @@ -86,6 +88,18 @@ void MgrMonitor::encode_pending(MonitorDBStore::TransactionRef t) pending_map.encode(bl, mon->get_quorum_con_features()); put_version(t, pending_map.epoch, bl); put_last_committed(t, pending_map.epoch); + + health_check_map_t next; + if (!pending_map.available) { + health_status_t level = HEALTH_WARN; + utime_t now = ceph_clock_now(); + if (first_seen_inactive != utime_t() && + now - first_seen_inactive > g_conf->mon_mgr_inactive_grace) { + level = HEALTH_ERR; + } + next.add("MGR_DOWN", level, "no active mgr"); + } + encode_health(next, t); } bool MgrMonitor::check_caps(MonOpRequestRef op, const uuid_d& fsid) diff --git a/src/mon/MgrStatMonitor.cc b/src/mon/MgrStatMonitor.cc index add84e278b7..5c37433458d 100644 --- a/src/mon/MgrStatMonitor.cc +++ b/src/mon/MgrStatMonitor.cc @@ -71,7 +71,7 @@ MonPGStatService *MgrStatMonitor::get_pg_stat_service() void MgrStatMonitor::create_initial() { - dout(10) << dendl; + dout(10) << __func__ << dendl; version = 0; service_map.epoch = 1; ::encode(service_map, pending_service_map_bl, CEPH_FEATURES_ALL); diff --git a/src/mon/Monitor.cc b/src/mon/Monitor.cc index acb50bc5081..0330236a840 100644 --- a/src/mon/Monitor.cc +++ b/src/mon/Monitor.cc @@ -78,6 +78,7 @@ #include "MgrStatMonitor.h" #include "mon/QuorumService.h" #include "mon/OldHealthMonitor.h" +#include "mon/HealthMonitor.h" #include "mon/ConfigKeyService.h" #include "common/config.h" #include "common/cmdparse.h" @@ -204,6 +205,7 @@ Monitor::Monitor(CephContext* cct_, string nm, MonitorDBStore *s, paxos_service[PAXOS_AUTH] = new AuthMonitor(this, paxos, "auth"); paxos_service[PAXOS_MGR] = new MgrMonitor(this, paxos, "mgr"); paxos_service[PAXOS_MGRSTAT] = new MgrStatMonitor(this, paxos, "mgrstat"); + paxos_service[PAXOS_HEALTH] = new HealthMonitor(this, paxos, "health"); health_monitor = new OldHealthMonitor(this); config_key_service = new ConfigKeyService(this, paxos); @@ -2445,6 +2447,115 @@ void Monitor::do_health_to_clog(bool force) health_status_cache.summary = summary; } +health_status_t Monitor::get_health_status( + bool want_detail, + Formatter *f, + std::string *plain, + const char *sep1, + const char *sep2) +{ + health_status_t r = HEALTH_OK; + bool compat = g_conf->mon_health_preluminous_compat; + if (f) { + f->open_object_section("health"); + f->open_object_section("checks"); + } + + string summary; + string *psummary = f ? nullptr : &summary; + for (auto& svc : paxos_service) { + r = std::min(r, svc->get_health_checks().dump_summary( + f, psummary, sep2, want_detail)); + } + + if (f) { + f->close_section(); + f->dump_stream("status") << r; + } else { + // one-liner: HEALTH_FOO[ thing1[; thing2 ...]] + *plain = stringify(r); + if (summary.size()) { + *plain += sep1; + *plain += summary; + } + *plain += "\n"; + } + + if (f && compat) { + f->open_array_section("summary"); + for (auto& svc : paxos_service) { + svc->get_health_checks().dump_summary_compat(f); + } + f->close_section(); + f->dump_stream("overall_status") << r; + } + + if (want_detail) { + if (f && compat) { + f->open_array_section("detail"); + } + + for (auto& svc : paxos_service) { + svc->get_health_checks().dump_detail(f, plain, compat); + } + + if (f && compat) { + f->close_section(); + } + } + if (f) { + f->close_section(); + } + return r; +} + +void Monitor::log_health( + const health_check_map_t& updated, + const health_check_map_t& previous, + MonitorDBStore::TransactionRef t) +{ + if (!g_conf->mon_health_to_clog) { + return; + } + // FIXME: log atomically as part of @t instead of using clog. + dout(10) << __func__ << " updated " << updated.checks.size() + << " previous " << previous.checks.size() + << dendl; + for (auto& p : updated.checks) { + auto q = previous.checks.find(p.first); + if (q == previous.checks.end()) { + // new + ostringstream ss; + ss << p.second.severity << " " << p.first << ": " + << p.second.summary; + if (p.second.severity == HEALTH_WARN) + clog->warn() << ss.str(); + else + clog->error() << ss.str(); + } else { + if (p.second.summary != q->second.summary || + p.second.severity != q->second.severity) { + // summary or severity changed (ignore detail changes at this level) + ostringstream ss; + ss << p.second.severity << " " << p.first << " (update): " + << p.second.summary; + if (p.second.severity == HEALTH_WARN) + clog->warn() << ss.str(); + else + clog->error() << ss.str(); + } + } + } + for (auto& p : previous.checks) { + if (!updated.checks.count(p.first)) { + // cleared + ostringstream ss; + ss << HEALTH_OK << " " << p.first << ": " << p.second.summary; + clog->info() << ss.str(); + } + } +} + health_status_t Monitor::get_health(list& status, bufferlist *detailbl, Formatter *f) @@ -2550,12 +2661,9 @@ void Monitor::get_cluster_status(stringstream &ss, Formatter *f) if (f) f->open_object_section("status"); - // reply with the status for all the components - list health; - get_health(health, NULL, f); - if (f) { f->dump_stream("fsid") << monmap->get_fsid(); + get_health_status(false, f, nullptr); f->dump_unsigned("election_epoch", get_epoch()); { f->open_array_section("quorum"); @@ -2579,7 +2687,6 @@ void Monitor::get_cluster_status(stringstream &ss, Formatter *f) f->open_object_section("fsmap"); mdsmon()->get_fsmap().print_summary(f, NULL); f->close_section(); - f->open_object_section("mgrmap"); mgrmon()->get_map().print_summary(f, nullptr); f->close_section(); @@ -2587,11 +2694,21 @@ void Monitor::get_cluster_status(stringstream &ss, Formatter *f) f->dump_object("servicemap", mgrstatmon()->get_service_map()); f->close_section(); } else { - ss << " cluster:\n"; ss << " id: " << monmap->get_fsid() << "\n"; - ss << " health: " << joinify(health.begin(), health.end(), - string("\n ")) << "\n"; + + string health; + if (osdmon()->osdmap.require_osd_release >= CEPH_RELEASE_LUMINOUS) { + get_health_status(false, nullptr, &health, + "\n ", "\n "); + } else { + list ls; + get_health(ls, NULL, f); + health = joinify(ls.begin(), ls.end(), + string("\n ")); + } + ss << " health: " << health << "\n"; + ss << "\n \n services:\n"; { size_t maxlen = 3; @@ -3112,25 +3229,35 @@ void Monitor::handle_command(MonOpRequestRef op) } rdata.append(ds); } else if (prefix == "health") { - list health_str; - get_health(health_str, detail == "detail" ? &rdata : NULL, f.get()); - if (f) { - f->flush(ds); - ds << '\n'; + if (osdmon()->osdmap.require_osd_release >= CEPH_RELEASE_LUMINOUS) { + string plain; + get_health_status(detail == "detail", f.get(), f ? nullptr : &plain); + if (f) { + f->flush(rdata); + } else { + rdata.append(plain); + } } else { - assert(!health_str.empty()); - ds << health_str.front(); - health_str.pop_front(); - if (!health_str.empty()) { - ds << ' '; - ds << joinify(health_str.begin(), health_str.end(), string("; ")); + list health_str; + get_health(health_str, detail == "detail" ? &rdata : NULL, f.get()); + if (f) { + f->flush(ds); + ds << '\n'; + } else { + assert(!health_str.empty()); + ds << health_str.front(); + health_str.pop_front(); + if (!health_str.empty()) { + ds << ' '; + ds << joinify(health_str.begin(), health_str.end(), string("; ")); + } } + bufferlist comb; + comb.append(ds); + if (detail == "detail") + comb.append(rdata); + rdata = comb; } - bufferlist comb; - comb.append(ds); - if (detail == "detail") - comb.append(rdata); - rdata = comb; } else if (prefix == "df") { bool verbose = (detail == "detail"); if (f) @@ -4119,6 +4246,11 @@ void Monitor::dispatch_op(MonOpRequestRef op) health_monitor->dispatch(op); break; + case MSG_MON_HEALTH_CHECKS: + op->set_type_service(); + paxos_service[PAXOS_HEALTH]->dispatch(op); + break; + default: dealt_with = false; break; diff --git a/src/mon/Monitor.h b/src/mon/Monitor.h index 7b3aa0522c5..fa7f9e9acdd 100644 --- a/src/mon/Monitor.h +++ b/src/mon/Monitor.h @@ -32,6 +32,7 @@ #include "common/Timer.h" +#include "health_check.h" #include "MonMap.h" #include "Elector.h" #include "Paxos.h" @@ -497,6 +498,7 @@ private: version_t timecheck_round; unsigned int timecheck_acks; utime_t timecheck_round_start; + friend class HealthMonitor; /* When we hit a skew we will start a new round based off of * 'mon_timecheck_skew_interval'. Each new round will be backed off * until we hit 'mon_timecheck_interval' -- which is the typical @@ -649,6 +651,10 @@ public: return (class MgrStatMonitor*) paxos_service[PAXOS_MGRSTAT]; } + class MgrStatMonitor *healthmon() { + return (class MgrStatMonitor*) paxos_service[PAXOS_MGRSTAT]; + } + friend class Paxos; friend class OSDMonitor; friend class MDSMonitor; @@ -738,6 +744,18 @@ public: */ health_status_t get_health(list& status, bufferlist *detailbl, Formatter *f); + + health_status_t get_health_status( + bool want_detail, + Formatter *f, + std::string *plain, + const char *sep1 = " ", + const char *sep2 = "; "); + void log_health( + const health_check_map_t& updated, + const health_check_map_t& previous, + MonitorDBStore::TransactionRef t); + void get_cluster_status(stringstream &ss, Formatter *f); void reply_command(MonOpRequestRef op, int rc, const string &rs, version_t version); diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index a22fd324495..c39556ac8a4 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -275,6 +275,8 @@ void OSDMonitor::update_from_paxos(bool *need_bootstrap) mapping_job.reset(); } + load_health(); + /* * We will possibly have a stashed latest that *we* wrote, and we will * always be sure to have the oldest full map in the first..last range @@ -1101,6 +1103,19 @@ void OSDMonitor::encode_pending(MonitorDBStore::TransactionRef t) ::encode(pending_creatings, creatings_bl); t->put(OSD_PG_CREATING_PREFIX, "creating", creatings_bl); } + + // health + _check_health(tmp, t); +} + +void OSDMonitor::_check_health( + const OSDMap& nextmap, + MonitorDBStore::TransactionRef t) +{ + dout(20) << __func__ << dendl; + health_check_map_t next; +#warning write me + encode_health(next, t); } void OSDMonitor::trim_creating_pgs(creating_pgs_t* creating_pgs, diff --git a/src/mon/OSDMonitor.h b/src/mon/OSDMonitor.h index 9a944107970..0682abfdbfe 100644 --- a/src/mon/OSDMonitor.h +++ b/src/mon/OSDMonitor.h @@ -162,6 +162,8 @@ public: FAST_READ_DEFAULT }; + void _check_health(const OSDMap& next, MonitorDBStore::TransactionRef t); + // svc public: void create_initial() override; diff --git a/src/mon/PaxosService.cc b/src/mon/PaxosService.cc index b133fc1a582..91152943b06 100644 --- a/src/mon/PaxosService.cc +++ b/src/mon/PaxosService.cc @@ -431,3 +431,12 @@ void PaxosService::trim(MonitorDBStore::TransactionRef t, } } +void PaxosService::load_health() +{ + bufferlist bl; + mon->store->get("health", service_name, bl); + if (bl.length()) { + auto p = bl.begin(); + ::decode(health_checks, p); + } +} diff --git a/src/mon/PaxosService.h b/src/mon/PaxosService.h index ca75915841e..da3038ff1e9 100644 --- a/src/mon/PaxosService.h +++ b/src/mon/PaxosService.h @@ -77,15 +77,23 @@ protected: */ bool have_pending; -protected: + /** + * health checks for this service + * + * Child must populate this during encode_pending() by calling encode_health(). + */ + health_check_map_t health_checks; +public: + const health_check_map_t& get_health_checks() { + return health_checks; + } +protected: /** * format of our state in leveldb, 0 for default */ version_t format_version; - - /** * @defgroup PaxosService_h_callbacks Callback classes * @{ @@ -428,6 +436,15 @@ public: list > *detail, CephContext *cct) const { } + void encode_health(const health_check_map_t& next, + MonitorDBStore::TransactionRef t) { + bufferlist bl; + ::encode(next, bl); + t->put("health", service_name, bl); + mon->log_health(next, health_checks, t); + } + void load_health(); + private: /** * @defgroup PaxosService_h_store_keys Set of keys that are usually used on diff --git a/src/mon/health_check.h b/src/mon/health_check.h new file mode 100644 index 00000000000..e9e53836e47 --- /dev/null +++ b/src/mon/health_check.h @@ -0,0 +1,192 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include +#include + +#include "include/health.h" +#include "common/Formatter.h" + +struct health_check_t { + health_status_t severity; + std::string summary; + std::list detail; + + DENC(health_check_t, v, p) { + DENC_START(1, 1, p); + denc(v.severity, p); + denc(v.summary, p); + denc(v.detail, p); + DENC_FINISH(p); + } + + friend bool operator==(const health_check_t& l, + const health_check_t& r) { + return l.severity == r.severity && + l.summary == r.summary && + l.detail == r.detail; + } + friend bool operator!=(const health_check_t& l, + const health_check_t& r) { + return !(l == r); + } + + void dump(Formatter *f) const { + f->dump_stream("severity") << severity; + f->dump_string("summary", summary); + f->open_array_section("detail"); + for (auto& p : detail) { + f->dump_string("item", p); + } + f->close_section(); + } + + static void generate_test_instances(list& ls) { + ls.push_back(new health_check_t); + ls.push_back(new health_check_t); + ls.back()->severity = HEALTH_ERR; + ls.back()->summary = "summarization"; + ls.back()->detail = {"one", "two", "three"}; + } +}; +WRITE_CLASS_DENC(health_check_t) + + +struct health_check_map_t { + map checks; + + DENC(health_check_map_t, v, p) { + DENC_START(1, 1, p); + denc(v.checks, p); + DENC_FINISH(p); + } + + void dump(Formatter *f) const { + for (auto& p : checks) { + f->dump_object(p.first.c_str(), p.second); + } + } + + static void generate_test_instances(list& ls) { + ls.push_back(new health_check_map_t); + ls.push_back(new health_check_map_t); + { + auto& d = ls.back()->add("FOO", HEALTH_WARN, "foo"); + d.detail.push_back("a"); + d.detail.push_back("b"); + } + { + auto& d = ls.back()->add("BAR", HEALTH_ERR, "bar!"); + d.detail.push_back("c"); + d.detail.push_back("d"); + } + } + + void clear() { + checks.clear(); + } + void swap(health_check_map_t& other) { + checks.swap(other.checks); + } + + health_check_t& add(const std::string& code, + health_status_t severity, + const std::string& summary) { + assert(checks.count(code) == 0); + health_check_t& r = checks[code]; + r.severity = severity; + r.summary = summary; + return r; + } + + void merge(const health_check_map_t& o) { + for (auto& p : o.checks) { + auto q = checks.find(p.first); + if (q == checks.end()) { + // new check + checks[p.first] = p.second; + } else { + // merge details, and hope the summary matches! + q->second.detail.insert( + q->second.detail.end(), + p.second.detail.begin(), + p.second.detail.end()); + } + } + } + + health_status_t dump_summary(Formatter *f, std::string *plain, + const char *sep, bool detail) const { + health_status_t r = HEALTH_OK; + for (auto& p : checks) { + if (r > p.second.severity) { + r = p.second.severity; + } + if (f) { + f->open_object_section(p.first.c_str()); + f->dump_stream("severity") << p.second.severity; + f->dump_string("message", p.second.summary); + if (detail) { + f->open_array_section("detail"); + for (auto& d : p.second.detail) { + f->dump_string("item", d); + } + f->close_section(); + } + f->close_section(); + } else { + if (!plain->empty()) { + *plain += sep; + } + *plain += p.second.summary; + } + } + return r; + } + + void dump_summary_compat(Formatter *f) const { + for (auto& p : checks) { + f->open_object_section("item"); + f->dump_stream("severity") << p.second.severity; + f->dump_string("summary", p.second.summary); + f->close_section(); + } + } + + void dump_detail(Formatter *f, std::string *plain, bool compat) const { + for (auto& p : checks) { + if (f) { + if (compat) { + // this is sloppy, but the best we can do: just dump all of the + // individual checks' details together + for (auto& d : p.second.detail) { + f->dump_string("item", d); + } + } + } else { + if (!compat) { + *plain += p.first + " " + p.second.summary + "\n"; + } + for (auto& d : p.second.detail) { + if (!compat) { + *plain += " "; + } + *plain += d; + *plain += "\n"; + } + } + } + } + + friend bool operator==(const health_check_map_t& l, + const health_check_map_t& r) { + return l.checks == r.checks; + } + friend bool operator!=(const health_check_map_t& l, + const health_check_map_t& r) { + return !(l == r); + } +}; +WRITE_CLASS_DENC(health_check_map_t) diff --git a/src/mon/mon_types.h b/src/mon/mon_types.h index 883f4669e2b..a23238b7d0b 100644 --- a/src/mon/mon_types.h +++ b/src/mon/mon_types.h @@ -31,7 +31,8 @@ #define PAXOS_AUTH 5 #define PAXOS_MGR 6 #define PAXOS_MGRSTAT 7 -#define PAXOS_NUM 8 +#define PAXOS_HEALTH 8 +#define PAXOS_NUM 9 inline const char *get_paxos_name(int p) { switch (p) { @@ -43,6 +44,7 @@ inline const char *get_paxos_name(int p) { case PAXOS_AUTH: return "auth"; case PAXOS_MGR: return "mgr"; case PAXOS_MGRSTAT: return "mgrstat"; + case PAXOS_HEALTH: return "health"; default: ceph_abort(); return 0; } } diff --git a/src/msg/Message.cc b/src/msg/Message.cc index 4860889989f..9d1953d75b1 100644 --- a/src/msg/Message.cc +++ b/src/msg/Message.cc @@ -96,6 +96,7 @@ using namespace std; #include "messages/MMonGetVersion.h" #include "messages/MMonGetVersionReply.h" #include "messages/MMonHealth.h" +#include "messages/MMonHealthChecks.h" #include "messages/MMonMetadata.h" #include "messages/MDataPing.h" #include "messages/MAuth.h" @@ -783,6 +784,11 @@ Message *decode_message(CephContext *cct, int crcflags, case MSG_MON_HEALTH: m = new MMonHealth(); break; + + case MSG_MON_HEALTH_CHECKS: + m = new MMonHealthChecks(); + break; + #if defined(HAVE_XIO) case MSG_DATA_PING: m = new MDataPing(); diff --git a/src/msg/Message.h b/src/msg/Message.h index 611d691df99..d1b63ac1f21 100644 --- a/src/msg/Message.h +++ b/src/msg/Message.h @@ -183,6 +183,8 @@ // Special #define MSG_NOP 0x607 +#define MSG_MON_HEALTH_CHECKS 0x608 + // *** ceph-mgr <-> OSD/MDS daemons *** #define MSG_MGR_OPEN 0x700 #define MSG_MGR_CONFIGURE 0x701