]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
mon: new health check framework
authorSage Weil <sage@redhat.com>
Mon, 12 Jun 2017 22:39:15 +0000 (18:39 -0400)
committerSage Weil <sage@redhat.com>
Wed, 12 Jul 2017 16:51:30 +0000 (12:51 -0400)
Signed-off-by: Sage Weil <sage@redhat.com>
17 files changed:
src/common/config_opts.h
src/messages/MMonHealthChecks.h [new file with mode: 0644]
src/mon/CMakeLists.txt
src/mon/HealthMonitor.cc [new file with mode: 0644]
src/mon/HealthMonitor.h [new file with mode: 0644]
src/mon/MgrMonitor.cc
src/mon/MgrStatMonitor.cc
src/mon/Monitor.cc
src/mon/Monitor.h
src/mon/OSDMonitor.cc
src/mon/OSDMonitor.h
src/mon/PaxosService.cc
src/mon/PaxosService.h
src/mon/health_check.h [new file with mode: 0644]
src/mon/mon_types.h
src/msg/Message.cc
src/msg/Message.h

index d879a2c38a85825e6cddab8c5c01df510d06e604..a684e536f9e5d1f442083d984820b43bb1129729 100644 (file)
@@ -309,7 +309,6 @@ OPTION(mon_clock_drift_warn_backoff, OPT_FLOAT, 5) // exponential backoff for cl
 OPTION(mon_timecheck_interval, OPT_FLOAT, 300.0) // on leader, timecheck (clock drift check) interval (seconds)
 OPTION(mon_timecheck_skew_interval, OPT_FLOAT, 30.0) // on leader, timecheck (clock drift check) interval when in presence of a skew (seconds)
 OPTION(mon_pg_stuck_threshold, OPT_INT, 300) // number of seconds after which pgs can be considered inactive, unclean, or stale (see doc/control.rst under dump_stuck for more info)
-OPTION(mon_health_max_detail, OPT_INT, 50) // max detailed pgs to report in health detail
 OPTION(mon_pg_min_inactive, OPT_U64, 1) // the number of PGs which have to be inactive longer than 'mon_pg_stuck_threshold' before health goes into ERR. 0 means disabled, never go into ERR.
 OPTION(mon_pg_warn_min_per_osd, OPT_INT, 30)  // min # pgs per (in) osd before we warn the admin
 OPTION(mon_pg_warn_max_per_osd, OPT_INT, 300)  // max # pgs per (in) osd before we warn the admin
@@ -352,6 +351,8 @@ OPTION(mon_health_data_update_interval, OPT_FLOAT, 60.0)
 OPTION(mon_health_to_clog, OPT_BOOL, true)
 OPTION(mon_health_to_clog_interval, OPT_INT, 3600)
 OPTION(mon_health_to_clog_tick_interval, OPT_DOUBLE, 60.0)
+OPTION(mon_health_preluminous_compat, OPT_BOOL, false)
+OPTION(mon_health_max_detail, OPT_INT, 50) // max detailed pgs to report in health detail
 OPTION(mon_data_avail_crit, OPT_INT, 5)
 OPTION(mon_data_avail_warn, OPT_INT, 30)
 OPTION(mon_data_size_warn, OPT_U64, 15*1024*1024*1024) // issue a warning when the monitor's data store goes over 15GB (in bytes)
diff --git a/src/messages/MMonHealthChecks.h b/src/messages/MMonHealthChecks.h
new file mode 100644 (file)
index 0000000..6b66847
--- /dev/null
@@ -0,0 +1,47 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_MMON_HEALTH_CHECKS_H
+#define CEPH_MMON_HEALTH_CHECKS_H
+
+#include "messages/PaxosServiceMessage.h"
+#include "mon/health_check.h"
+
+struct MMonHealthChecks : public PaxosServiceMessage
+{
+  static const int HEAD_VERSION = 1;
+  static const int COMPAT_VERSION = 1;
+
+  health_check_map_t health_checks;
+
+  MMonHealthChecks()
+    : PaxosServiceMessage(MSG_MON_HEALTH_CHECKS, HEAD_VERSION, COMPAT_VERSION) {
+  }
+  MMonHealthChecks(health_check_map_t& m)
+    : PaxosServiceMessage(MSG_MON_HEALTH_CHECKS, HEAD_VERSION, COMPAT_VERSION),
+      health_checks(m) {
+  }
+
+private:
+  ~MMonHealthChecks() override { }
+
+public:
+  const char *get_type_name() const override { return "mon_health_checks"; }
+  void print(ostream &o) const override {
+    o << "mon_health_checks(" << health_checks.checks.size() << " checks)";
+  }
+
+  void decode_payload() override {
+    bufferlist::iterator p = payload.begin();
+    paxos_decode(p);
+    ::decode(health_checks, p);
+  }
+
+  void encode_payload(uint64_t features) override {
+    paxos_encode();
+    ::encode(health_checks, payload);
+  }
+
+};
+
+#endif
index 1ba6802e120639adb852e33e3b33740b4eec826f..556157132f805da9c670de6dfe317baec9ddd17a 100644 (file)
@@ -15,6 +15,7 @@ set(lib_mon_srcs
   LogMonitor.cc
   AuthMonitor.cc
   Elector.cc
+  HealthMonitor.cc
   OldHealthMonitor.cc
   DataHealthService.cc
   PGMonitor.cc
diff --git a/src/mon/HealthMonitor.cc b/src/mon/HealthMonitor.cc
new file mode 100644 (file)
index 0000000..2742751
--- /dev/null
@@ -0,0 +1,355 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2013 Inktank, Inc
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+
+#include <stdlib.h>
+#include <limits.h>
+#include <sstream>
+#include <boost/regex.hpp>
+
+#include "include/assert.h"
+#include "include/stringify.h"
+
+#include "mon/Monitor.h"
+#include "mon/HealthService.h"
+#include "mon/HealthMonitor.h"
+#include "mon/DataHealthService.h"
+
+#include "messages/MMonHealth.h"
+#include "messages/MMonHealthChecks.h"
+
+#include "common/Formatter.h"
+
+#define dout_subsys ceph_subsys_mon
+#undef dout_prefix
+#define dout_prefix _prefix(_dout, mon, this)
+static ostream& _prefix(std::ostream *_dout, const Monitor *mon,
+                        const HealthMonitor *hmon) {
+  return *_dout << "mon." << mon->name << "@" << mon->rank
+               << "(" << mon->get_state_name() << ").health ";
+}
+
+HealthMonitor::HealthMonitor(Monitor *m, Paxos *p, const string& service_name)
+  : PaxosService(m, p, service_name) {
+}
+
+void HealthMonitor::init()
+{
+  dout(10) << __func__ << dendl;
+}
+
+void HealthMonitor::create_initial()
+{
+  dout(10) << __func__ << dendl;
+}
+
+void HealthMonitor::update_from_paxos(bool *need_bootstrap)
+{
+  version = get_last_committed();
+  dout(10) << __func__ << dendl;
+  load_health();
+
+  bufferlist qbl;
+  mon->store->get(service_name, "quorum", qbl);
+  if (qbl.length()) {
+    auto p = qbl.begin();
+    ::decode(quorum_checks, p);
+  } else {
+    quorum_checks.clear();
+  }
+
+  bufferlist lbl;
+  mon->store->get(service_name, "leader", lbl);
+  if (lbl.length()) {
+    auto p = lbl.begin();
+    ::decode(leader_checks, p);
+  } else {
+    leader_checks.clear();
+  }
+
+  dout(20) << "dump:";
+  JSONFormatter jf(true);
+  jf.open_object_section("health");
+  jf.open_object_section("quorum_health");
+  for (auto& p : quorum_checks) {
+    string s = string("mon.") + stringify(p.first);
+    jf.dump_object(s.c_str(), p.second);
+  }
+  jf.close_section();
+  jf.dump_object("leader_health", leader_checks);
+  jf.close_section();
+  jf.flush(*_dout);
+  *_dout << dendl;
+}
+
+void HealthMonitor::create_pending()
+{
+  dout(10) << " " << version << dendl;
+}
+
+void HealthMonitor::encode_pending(MonitorDBStore::TransactionRef t)
+{
+  ++version;
+  dout(10) << " " << version << dendl;
+  put_last_committed(t, version);
+
+  bufferlist qbl;
+  ::encode(quorum_checks, qbl);
+  t->put(service_name, "quorum", qbl);
+  bufferlist lbl;
+  ::encode(leader_checks, lbl);
+  t->put(service_name, "leader", lbl);
+
+  health_check_map_t pending_health;
+
+  // combine per-mon details carefully...
+  map<string,set<string>> names; // code -> <mon names>
+  for (auto p : quorum_checks) {
+    for (auto q : p.second.checks) {
+      names[q.first].insert(mon->monmap->get_name(p.first));
+    }
+    pending_health.merge(p.second);
+  }
+  for (auto p : pending_health.checks) {
+    p.second.summary = boost::regex_replace(
+      p.second.summary,
+      boost::regex("%num%"), stringify(names[p.first].size()));
+    p.second.summary = boost::regex_replace(
+      p.second.summary,
+      boost::regex("%names%"), stringify(names[p.first]));
+    p.second.summary = boost::regex_replace(
+      p.second.summary,
+      boost::regex("%plurals%"),
+      names[p.first].size() > 1 ? "s" : "");
+    p.second.summary = boost::regex_replace(
+      p.second.summary,
+      boost::regex("%isorare%"),
+      names[p.first].size() > 1 ? "are" : "is");
+  }
+
+  pending_health.merge(leader_checks);
+  encode_health(pending_health, t);
+}
+
+version_t HealthMonitor::get_trim_to()
+{
+  // we don't actually need *any* old states, but keep a few.
+  if (version > 5) {
+    return version - 5;
+  }
+  return 0;
+}
+
+bool HealthMonitor::preprocess_query(MonOpRequestRef op)
+{
+  switch (op->get_req()->get_type()) {
+  case MSG_MON_HEALTH:
+    {
+      MMonHealth *hm = static_cast<MMonHealth*>(op->get_req());
+      int service_type = hm->get_service_type();
+      if (services.count(service_type) == 0) {
+       dout(1) << __func__ << " service type " << service_type
+               << " not registered -- drop message!" << dendl;
+       return false;
+      }
+      return services[service_type]->service_dispatch(op);
+    }
+
+  case MSG_MON_HEALTH_CHECKS:
+    return preprocess_health_checks(op);
+  }
+  return false;
+}
+
+bool HealthMonitor::prepare_update(MonOpRequestRef op)
+{
+  return false;
+}
+
+bool HealthMonitor::preprocess_health_checks(MonOpRequestRef op)
+{
+  MMonHealthChecks *m = static_cast<MMonHealthChecks*>(op->get_req());
+  quorum_checks[m->get_source().num()] = m->health_checks;
+  return true;
+}
+
+void HealthMonitor::tick()
+{
+  if (!is_active()) {
+    return;
+  }
+  dout(10) << __func__ << dendl;
+  bool changed = false;
+  if (check_member_health()) {
+    changed = true;
+  }
+  if (mon->is_leader()) {
+    if (check_leader_health()) {
+      changed = true;
+    }
+  }
+  if (changed) {
+    propose_pending();
+  }
+}
+
+bool HealthMonitor::check_member_health()
+{
+  dout(20) << __func__ << dendl;
+  bool changed = false;
+
+  // snapshot of usage
+  DataStats stats;
+  get_fs_stats(stats.fs_stats, g_conf->mon_data.c_str());
+  map<string,uint64_t> extra;
+  uint64_t store_size = mon->store->get_estimated_size(extra);
+  assert(store_size > 0);
+  stats.store_stats.bytes_total = store_size;
+  stats.store_stats.bytes_sst = extra["sst"];
+  stats.store_stats.bytes_log = extra["log"];
+  stats.store_stats.bytes_misc = extra["misc"];
+  stats.last_update = ceph_clock_now();
+  dout(10) << __func__ << " avail " << stats.fs_stats.avail_percent << "%"
+          << " total " << prettybyte_t(stats.fs_stats.byte_total)
+          << ", used " << prettybyte_t(stats.fs_stats.byte_used)
+          << ", avail " << prettybyte_t(stats.fs_stats.byte_avail) << dendl;
+
+  // MON_DISK_{LOW,CRIT,BIG}
+  health_check_map_t next;
+  if (stats.fs_stats.avail_percent <= g_conf->mon_data_avail_crit) {
+    stringstream ss, ss2;
+    ss << "mon%plurals% %names% %isorare% very low on available space";
+    auto& d = next.add("MON_DISK_CRIT", HEALTH_ERR, ss.str());
+    ss2 << "mon." << mon->name << " has " << stats.fs_stats.avail_percent
+       << "% avail";
+    d.detail.push_back(ss2.str());
+  } else if (stats.fs_stats.avail_percent <= g_conf->mon_data_avail_warn) {
+    stringstream ss, ss2;
+    ss << "mon%plurals% %names% %isorare% low on available space";
+    auto& d = next.add("MON_DISK_LOW", HEALTH_ERR, ss.str());
+    ss2 << "mon." << mon->name << " has " << stats.fs_stats.avail_percent
+       << "% avail";
+    d.detail.push_back(ss2.str());
+  }
+  if (stats.store_stats.bytes_total >= g_conf->mon_data_size_warn) {
+    stringstream ss, ss2;
+    ss << "mon%plurals% %names% %isorare% using a lot of disk space";
+    auto& d = next.add("MON_DISK_BIG", HEALTH_WARN, ss.str());
+    ss2 << "mon." << mon->name << " is "
+       << prettybyte_t(stats.store_stats.bytes_total)
+       << " >= mon_data_size_warn ("
+       << prettybyte_t(g_conf->mon_data_size_warn) << ")";
+    d.detail.push_back(ss2.str());
+  }
+
+  auto p = quorum_checks.find(mon->rank);
+  if (p == quorum_checks.end() ||
+      p->second != next) {
+    if (mon->is_leader()) {
+      // prepare to propose
+      quorum_checks[mon->rank] = next;
+      changed = true;
+    } else {
+      // tell the leader
+      mon->messenger->send_message(new MMonHealthChecks(next),
+                                  mon->monmap->get_inst(mon->get_leader()));
+    }
+  }
+  return changed;
+}
+
+bool HealthMonitor::check_leader_health()
+{
+  dout(20) << __func__ << dendl;
+  bool changed = false;
+
+  // prune quorum_health
+  {
+    auto& qset = mon->get_quorum();
+    auto p = quorum_checks.begin();
+    while (p != quorum_checks.end()) {
+      if (qset.count(p->first) == 0) {
+       p = quorum_checks.erase(p);
+       changed = true;
+      } else {
+       ++p;
+      }
+    }
+  }
+
+  health_check_map_t next;
+
+  // MON_DOWN
+  {
+    int max = mon->monmap->size();
+    int actual = mon->get_quorum().size();
+    if (actual < max) {
+      ostringstream ss;
+      ss << (max-actual) << "/" << max << " mons down, quorum "
+        << mon->get_quorum_names();
+      auto& d = next.add("MON_DOWN", HEALTH_WARN, ss.str());
+      set<int> q = mon->get_quorum();
+      for (int i=0; i<max; i++) {
+       if (q.count(i) == 0) {
+         ostringstream ss;
+         ss << "mon." << mon->monmap->get_name(i) << " (rank " << i
+            << ") addr " << mon->monmap->get_addr(i)
+            << " is down (out of quorum)";
+         d.detail.push_back(ss.str());
+       }
+      }
+    }
+  }
+
+  // MON_CLOCK_SKEW
+  if (!mon->timecheck_skews.empty()) {
+    list<string> warns;
+    list<string> details;
+    for (map<entity_inst_t,double>::iterator i = mon->timecheck_skews.begin();
+        i != mon->timecheck_skews.end(); ++i) {
+      entity_inst_t inst = i->first;
+      double skew = i->second;
+      double latency = mon->timecheck_latencies[inst];
+      string name = mon->monmap->get_name(inst.addr);
+      ostringstream tcss;
+      health_status_t tcstatus = mon->timecheck_status(tcss, skew, latency);
+      if (tcstatus != HEALTH_OK) {
+       warns.push_back(name);
+       ostringstream tmp_ss;
+       tmp_ss << "mon." << name
+              << " addr " << inst.addr << " " << tcss.str()
+              << " (latency " << latency << "s)";
+       details.push_back(tmp_ss.str());
+      }
+    }
+    if (!warns.empty()) {
+      ostringstream ss;
+      ss << "clock skew detected on";
+      while (!warns.empty()) {
+       ss << " mon." << warns.front();
+       warns.pop_front();
+       if (!warns.empty())
+         ss << ",";
+      }
+      auto& d = next.add("MON_CLOCK_SKEW", HEALTH_WARN,
+                        "monitor clock skew detected");
+      d.detail.swap(details);
+    }
+  }
+
+  if (next != leader_checks) {
+    changed = true;
+    leader_checks = next;
+  }
+  return changed;
+}
diff --git a/src/mon/HealthMonitor.h b/src/mon/HealthMonitor.h
new file mode 100644 (file)
index 0000000..5387ce0
--- /dev/null
@@ -0,0 +1,71 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2013 Inktank, Inc
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+#ifndef CEPH_HEALTH_MONITOR_H
+#define CEPH_HEALTH_MONITOR_H
+
+#include "mon/PaxosService.h"
+
+//forward declaration
+namespace ceph { class Formatter; }
+class HealthService;
+
+class HealthMonitor : public PaxosService
+{
+  map<int,HealthService*> services;
+  version_t version = 0;
+  map<int,health_check_map_t> quorum_checks;  // for each quorum member
+  health_check_map_t leader_checks;           // leader only
+
+public:
+  HealthMonitor(Monitor *m, Paxos *p, const string& service_name);
+  ~HealthMonitor() override {
+    assert(services.empty());
+  }
+
+  /**
+   * @defgroup HealthMonitor_Inherited_h Inherited abstract methods
+   * @{
+   */
+  void init() override;
+
+  void get_health(
+    list<pair<health_status_t,string> >& summary,
+    list<pair<health_status_t,string> > *detail,
+    CephContext *cct) const override {}
+
+  bool preprocess_query(MonOpRequestRef op) override;
+  bool prepare_update(MonOpRequestRef op) override;
+
+  bool preprocess_health_checks(MonOpRequestRef op);
+  bool prepare_health_checks(MonOpRequestRef op);
+
+  bool check_leader_health();
+  bool check_member_health();
+
+  void create_initial() override;
+  void update_from_paxos(bool *need_bootstrap) override;
+  void create_pending() override;
+  void encode_pending(MonitorDBStore::TransactionRef t) override;
+  version_t get_trim_to() override;
+
+  void encode_full(MonitorDBStore::TransactionRef t) override { }
+
+  void tick() override;
+
+  /**
+   * @} // HealthMonitor_Inherited_h
+   */
+};
+
+#endif // CEPH_HEALTH_MONITOR_H
index 234453c7a7e2d67f7e6d2ee22ca86b94aa4b71da..b01028ef33ea4aeb1442056b31bff57f9ddd49c3 100644 (file)
@@ -60,6 +60,8 @@ void MgrMonitor::update_from_paxos(bool *need_bootstrap)
     dout(4) << "active server: " << map.active_addr
            << "(" << map.active_gid << ")" << dendl;
 
+    load_health();
+
     if (map.available) {
       first_seen_inactive = utime_t();
     } else {
@@ -86,6 +88,18 @@ void MgrMonitor::encode_pending(MonitorDBStore::TransactionRef t)
   pending_map.encode(bl, mon->get_quorum_con_features());
   put_version(t, pending_map.epoch, bl);
   put_last_committed(t, pending_map.epoch);
+
+  health_check_map_t next;
+  if (!pending_map.available) {
+    health_status_t level = HEALTH_WARN;
+    utime_t now = ceph_clock_now();
+    if (first_seen_inactive != utime_t() &&
+       now - first_seen_inactive > g_conf->mon_mgr_inactive_grace) {
+      level = HEALTH_ERR;
+    }
+    next.add("MGR_DOWN", level, "no active mgr");
+  }
+  encode_health(next, t);
 }
 
 bool MgrMonitor::check_caps(MonOpRequestRef op, const uuid_d& fsid)
index add84e278b72af26809960e3bc0e82669fea33e8..5c37433458d08832afdb60a57a12b9dbbf6939c0 100644 (file)
@@ -71,7 +71,7 @@ MonPGStatService *MgrStatMonitor::get_pg_stat_service()
 
 void MgrStatMonitor::create_initial()
 {
-  dout(10) << dendl;
+  dout(10) << __func__ << dendl;
   version = 0;
   service_map.epoch = 1;
   ::encode(service_map, pending_service_map_bl, CEPH_FEATURES_ALL);
index acb50bc508125c45be8163adb2091d579a3f52fd..0330236a840800e34fdf4aa8a4cd006259df4720 100644 (file)
@@ -78,6 +78,7 @@
 #include "MgrStatMonitor.h"
 #include "mon/QuorumService.h"
 #include "mon/OldHealthMonitor.h"
+#include "mon/HealthMonitor.h"
 #include "mon/ConfigKeyService.h"
 #include "common/config.h"
 #include "common/cmdparse.h"
@@ -204,6 +205,7 @@ Monitor::Monitor(CephContext* cct_, string nm, MonitorDBStore *s,
   paxos_service[PAXOS_AUTH] = new AuthMonitor(this, paxos, "auth");
   paxos_service[PAXOS_MGR] = new MgrMonitor(this, paxos, "mgr");
   paxos_service[PAXOS_MGRSTAT] = new MgrStatMonitor(this, paxos, "mgrstat");
+  paxos_service[PAXOS_HEALTH] = new HealthMonitor(this, paxos, "health");
 
   health_monitor = new OldHealthMonitor(this);
   config_key_service = new ConfigKeyService(this, paxos);
@@ -2445,6 +2447,115 @@ void Monitor::do_health_to_clog(bool force)
   health_status_cache.summary = summary;
 }
 
+health_status_t Monitor::get_health_status(
+  bool want_detail,
+  Formatter *f,
+  std::string *plain,
+  const char *sep1,
+  const char *sep2)
+{
+  health_status_t r = HEALTH_OK;
+  bool compat = g_conf->mon_health_preluminous_compat;
+  if (f) {
+    f->open_object_section("health");
+    f->open_object_section("checks");
+  }
+
+  string summary;
+  string *psummary = f ? nullptr : &summary;
+  for (auto& svc : paxos_service) {
+    r = std::min(r, svc->get_health_checks().dump_summary(
+                  f, psummary, sep2, want_detail));
+  }
+
+  if (f) {
+    f->close_section();
+    f->dump_stream("status") << r;
+  } else {
+    // one-liner: HEALTH_FOO[ thing1[; thing2 ...]]
+    *plain = stringify(r);
+    if (summary.size()) {
+      *plain += sep1;
+      *plain += summary;
+    }
+    *plain += "\n";
+  }
+
+  if (f && compat) {
+    f->open_array_section("summary");
+    for (auto& svc : paxos_service) {
+      svc->get_health_checks().dump_summary_compat(f);
+    }
+    f->close_section();
+    f->dump_stream("overall_status") << r;
+  }
+
+  if (want_detail) {
+    if (f && compat) {
+      f->open_array_section("detail");
+    }
+
+    for (auto& svc : paxos_service) {
+      svc->get_health_checks().dump_detail(f, plain, compat);
+    }
+
+    if (f && compat) {
+      f->close_section();
+    }
+  }
+  if (f) {
+    f->close_section();
+  }
+  return r;
+}
+
+void Monitor::log_health(
+  const health_check_map_t& updated,
+  const health_check_map_t& previous,
+  MonitorDBStore::TransactionRef t)
+{
+  if (!g_conf->mon_health_to_clog) {
+    return;
+  }
+  // FIXME: log atomically as part of @t instead of using clog.
+  dout(10) << __func__ << " updated " << updated.checks.size()
+          << " previous " << previous.checks.size()
+          << dendl;
+  for (auto& p : updated.checks) {
+    auto q = previous.checks.find(p.first);
+    if (q == previous.checks.end()) {
+      // new
+      ostringstream ss;
+      ss << p.second.severity << " " << p.first << ": "
+        << p.second.summary;
+      if (p.second.severity == HEALTH_WARN)
+       clog->warn() << ss.str();
+      else
+       clog->error() << ss.str();
+    } else {
+      if (p.second.summary != q->second.summary ||
+         p.second.severity != q->second.severity) {
+       // summary or severity changed (ignore detail changes at this level)
+       ostringstream ss;
+       ss << p.second.severity << " " << p.first << " (update): "
+          << p.second.summary;
+       if (p.second.severity == HEALTH_WARN)
+         clog->warn() << ss.str();
+       else
+         clog->error() << ss.str();
+      }
+    }
+  }
+  for (auto& p : previous.checks) {
+    if (!updated.checks.count(p.first)) {
+      // cleared
+      ostringstream ss;
+      ss << HEALTH_OK << " " << p.first << ": " << p.second.summary;
+      clog->info() << ss.str();
+    }
+  }
+}
+
 health_status_t Monitor::get_health(list<string>& status,
                                     bufferlist *detailbl,
                                     Formatter *f)
@@ -2550,12 +2661,9 @@ void Monitor::get_cluster_status(stringstream &ss, Formatter *f)
   if (f)
     f->open_object_section("status");
 
-  // reply with the status for all the components
-  list<string> health;
-  get_health(health, NULL, f);
-
   if (f) {
     f->dump_stream("fsid") << monmap->get_fsid();
+    get_health_status(false, f, nullptr);
     f->dump_unsigned("election_epoch", get_epoch());
     {
       f->open_array_section("quorum");
@@ -2579,7 +2687,6 @@ void Monitor::get_cluster_status(stringstream &ss, Formatter *f)
     f->open_object_section("fsmap");
     mdsmon()->get_fsmap().print_summary(f, NULL);
     f->close_section();
-
     f->open_object_section("mgrmap");
     mgrmon()->get_map().print_summary(f, nullptr);
     f->close_section();
@@ -2587,11 +2694,21 @@ void Monitor::get_cluster_status(stringstream &ss, Formatter *f)
     f->dump_object("servicemap", mgrstatmon()->get_service_map());
     f->close_section();
   } else {
-
     ss << "  cluster:\n";
     ss << "    id:     " << monmap->get_fsid() << "\n";
-    ss << "    health: " << joinify(health.begin(), health.end(), 
-                                 string("\n            ")) << "\n";
+
+    string health;
+    if (osdmon()->osdmap.require_osd_release >= CEPH_RELEASE_LUMINOUS) {
+      get_health_status(false, nullptr, &health,
+                       "\n            ", "\n            ");
+    } else {
+      list<string> ls;
+      get_health(ls, NULL, f);
+      health = joinify(ls.begin(), ls.end(),
+                      string("\n            "));
+    }
+    ss << "    health: " << health << "\n";
+
     ss << "\n \n  services:\n";
     {
       size_t maxlen = 3;
@@ -3112,25 +3229,35 @@ void Monitor::handle_command(MonOpRequestRef op)
       }
       rdata.append(ds);
     } else if (prefix == "health") {
-      list<string> health_str;
-      get_health(health_str, detail == "detail" ? &rdata : NULL, f.get());
-      if (f) {
-        f->flush(ds);
-        ds << '\n';
+      if (osdmon()->osdmap.require_osd_release >= CEPH_RELEASE_LUMINOUS) {
+       string plain;
+       get_health_status(detail == "detail", f.get(), f ? nullptr : &plain);
+       if (f) {
+         f->flush(rdata);
+       } else {
+         rdata.append(plain);
+       }
       } else {
-       assert(!health_str.empty());
-       ds << health_str.front();
-       health_str.pop_front();
-       if (!health_str.empty()) {
-         ds << ' ';
-         ds << joinify(health_str.begin(), health_str.end(), string("; "));
+       list<string> health_str;
+       get_health(health_str, detail == "detail" ? &rdata : NULL, f.get());
+       if (f) {
+         f->flush(ds);
+         ds << '\n';
+       } else {
+         assert(!health_str.empty());
+         ds << health_str.front();
+         health_str.pop_front();
+         if (!health_str.empty()) {
+           ds << ' ';
+           ds << joinify(health_str.begin(), health_str.end(), string("; "));
+         }
        }
+       bufferlist comb;
+       comb.append(ds);
+       if (detail == "detail")
+         comb.append(rdata);
+       rdata = comb;
       }
-      bufferlist comb;
-      comb.append(ds);
-      if (detail == "detail")
-       comb.append(rdata);
-      rdata = comb;
     } else if (prefix == "df") {
       bool verbose = (detail == "detail");
       if (f)
@@ -4119,6 +4246,11 @@ void Monitor::dispatch_op(MonOpRequestRef op)
       health_monitor->dispatch(op);
       break;
 
+    case MSG_MON_HEALTH_CHECKS:
+      op->set_type_service();
+      paxos_service[PAXOS_HEALTH]->dispatch(op);
+      break;
+
     default:
       dealt_with = false;
       break;
index 7b3aa0522c5f71b0f0679c4479b3d4171c968692..fa7f9e9acdd32f8cd99f25f32ae942253eb5db3b 100644 (file)
@@ -32,6 +32,7 @@
 
 #include "common/Timer.h"
 
+#include "health_check.h"
 #include "MonMap.h"
 #include "Elector.h"
 #include "Paxos.h"
@@ -497,6 +498,7 @@ private:
   version_t timecheck_round;
   unsigned int timecheck_acks;
   utime_t timecheck_round_start;
+  friend class HealthMonitor;
   /* When we hit a skew we will start a new round based off of
    * 'mon_timecheck_skew_interval'. Each new round will be backed off
    * until we hit 'mon_timecheck_interval' -- which is the typical
@@ -649,6 +651,10 @@ public:
     return (class MgrStatMonitor*) paxos_service[PAXOS_MGRSTAT];
   }
 
+  class MgrStatMonitor *healthmon() {
+    return (class MgrStatMonitor*) paxos_service[PAXOS_MGRSTAT];
+  }
+
   friend class Paxos;
   friend class OSDMonitor;
   friend class MDSMonitor;
@@ -738,6 +744,18 @@ public:
    */
   health_status_t get_health(list<string>& status, bufferlist *detailbl,
                              Formatter *f);
+
+  health_status_t get_health_status(
+    bool want_detail,
+    Formatter *f,
+    std::string *plain,
+    const char *sep1 = " ",
+    const char *sep2 = "; ");
+  void log_health(
+    const health_check_map_t& updated,
+    const health_check_map_t& previous,
+    MonitorDBStore::TransactionRef t);
+
   void get_cluster_status(stringstream &ss, Formatter *f);
 
   void reply_command(MonOpRequestRef op, int rc, const string &rs, version_t version);
index a22fd324495eb50c099c56552f62dde109a3333a..c39556ac8a4286e0f613048b054f4ad6f1c99bf5 100644 (file)
@@ -275,6 +275,8 @@ void OSDMonitor::update_from_paxos(bool *need_bootstrap)
     mapping_job.reset();
   }
 
+  load_health();
+
   /*
    * We will possibly have a stashed latest that *we* wrote, and we will
    * always be sure to have the oldest full map in the first..last range
@@ -1101,6 +1103,19 @@ void OSDMonitor::encode_pending(MonitorDBStore::TransactionRef t)
     ::encode(pending_creatings, creatings_bl);
     t->put(OSD_PG_CREATING_PREFIX, "creating", creatings_bl);
   }
+
+  // health
+  _check_health(tmp, t);
+}
+
+void OSDMonitor::_check_health(
+  const OSDMap& nextmap,
+  MonitorDBStore::TransactionRef t)
+{
+  dout(20) << __func__ << dendl;
+  health_check_map_t next;
+#warning write me
+  encode_health(next, t);
 }
 
 void OSDMonitor::trim_creating_pgs(creating_pgs_t* creating_pgs,
index 9a944107970b5046d6c1e94bee28a0afeb40272e..0682abfdbfec347eb0619fd00b9737b7729115fb 100644 (file)
@@ -162,6 +162,8 @@ public:
     FAST_READ_DEFAULT
   };
 
+  void _check_health(const OSDMap& next, MonitorDBStore::TransactionRef t);
+
   // svc
 public:  
   void create_initial() override;
index b133fc1a582bb84101366003bb438ad33bcb6f0c..91152943b0602e0f42858f99d76e216aa0c0bbdb 100644 (file)
@@ -431,3 +431,12 @@ void PaxosService::trim(MonitorDBStore::TransactionRef t,
   }
 }
 
+void PaxosService::load_health()
+{
+  bufferlist bl;
+  mon->store->get("health", service_name, bl);
+  if (bl.length()) {
+    auto p = bl.begin();
+    ::decode(health_checks, p);
+  }
+}
index ca75915841e591702b41a3d80444f52c78a99fa7..da3038ff1e9f9a77c6e45d453058a7bbf3ad4194 100644 (file)
@@ -77,15 +77,23 @@ protected:
    */
   bool have_pending; 
 
-protected:
+  /**
+   * health checks for this service
+   *
+   * Child must populate this during encode_pending() by calling encode_health().
+   */
+  health_check_map_t health_checks;
+public:
+  const health_check_map_t& get_health_checks() {
+    return health_checks;
+  }
 
+protected:
   /**
    * format of our state in leveldb, 0 for default
    */
   version_t format_version;
 
-
-
   /**
    * @defgroup PaxosService_h_callbacks Callback classes
    * @{
@@ -428,6 +436,15 @@ public:
                          list<pair<health_status_t,string> > *detail,
                          CephContext *cct) const { }
 
+  void encode_health(const health_check_map_t& next,
+                    MonitorDBStore::TransactionRef t) {
+    bufferlist bl;
+    ::encode(next, bl);
+    t->put("health", service_name, bl);
+    mon->log_health(next, health_checks, t);
+  }
+  void load_health();
+
  private:
   /**
    * @defgroup PaxosService_h_store_keys Set of keys that are usually used on
diff --git a/src/mon/health_check.h b/src/mon/health_check.h
new file mode 100644 (file)
index 0000000..e9e5383
--- /dev/null
@@ -0,0 +1,192 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include <string>
+#include <map>
+
+#include "include/health.h"
+#include "common/Formatter.h"
+
+struct health_check_t {
+  health_status_t severity;
+  std::string summary;
+  std::list<std::string> detail;
+
+  DENC(health_check_t, v, p) {
+    DENC_START(1, 1, p);
+    denc(v.severity, p);
+    denc(v.summary, p);
+    denc(v.detail, p);
+    DENC_FINISH(p);
+  }
+
+  friend bool operator==(const health_check_t& l,
+                        const health_check_t& r) {
+    return l.severity == r.severity &&
+      l.summary == r.summary &&
+      l.detail == r.detail;
+  }
+  friend bool operator!=(const health_check_t& l,
+                        const health_check_t& r) {
+    return !(l == r);
+  }
+
+  void dump(Formatter *f) const {
+    f->dump_stream("severity") << severity;
+    f->dump_string("summary", summary);
+    f->open_array_section("detail");
+    for (auto& p : detail) {
+      f->dump_string("item", p);
+    }
+    f->close_section();
+  }
+
+  static void generate_test_instances(list<health_check_t*>& ls) {
+    ls.push_back(new health_check_t);
+    ls.push_back(new health_check_t);
+    ls.back()->severity = HEALTH_ERR;
+    ls.back()->summary = "summarization";
+    ls.back()->detail = {"one", "two", "three"};
+  }
+};
+WRITE_CLASS_DENC(health_check_t)
+
+
+struct health_check_map_t {
+  map<std::string,health_check_t> checks;
+
+  DENC(health_check_map_t, v, p) {
+    DENC_START(1, 1, p);
+    denc(v.checks, p);
+    DENC_FINISH(p);
+  }
+
+  void dump(Formatter *f) const {
+    for (auto& p : checks) {
+      f->dump_object(p.first.c_str(), p.second);
+    }
+  }
+
+  static void generate_test_instances(list<health_check_map_t*>& ls) {
+    ls.push_back(new health_check_map_t);
+    ls.push_back(new health_check_map_t);
+    {
+      auto& d = ls.back()->add("FOO", HEALTH_WARN, "foo");
+      d.detail.push_back("a");
+      d.detail.push_back("b");
+    }
+    {
+      auto& d = ls.back()->add("BAR", HEALTH_ERR, "bar!");
+      d.detail.push_back("c");
+      d.detail.push_back("d");
+    }
+  }
+
+  void clear() {
+    checks.clear();
+  }
+  void swap(health_check_map_t& other) {
+    checks.swap(other.checks);
+  }
+
+  health_check_t& add(const std::string& code,
+                     health_status_t severity,
+                     const std::string& summary) {
+    assert(checks.count(code) == 0);
+    health_check_t& r = checks[code];
+    r.severity = severity;
+    r.summary = summary;
+    return r;
+  }
+
+  void merge(const health_check_map_t& o) {
+    for (auto& p : o.checks) {
+      auto q = checks.find(p.first);
+      if (q == checks.end()) {
+       // new check
+       checks[p.first] = p.second;
+      } else {
+       // merge details, and hope the summary matches!
+       q->second.detail.insert(
+         q->second.detail.end(),
+         p.second.detail.begin(),
+         p.second.detail.end());
+      }
+    }
+  }
+
+  health_status_t dump_summary(Formatter *f, std::string *plain,
+                              const char *sep, bool detail) const {
+    health_status_t r = HEALTH_OK;
+    for (auto& p : checks) {
+      if (r > p.second.severity) {
+       r = p.second.severity;
+      }
+      if (f) {
+       f->open_object_section(p.first.c_str());
+       f->dump_stream("severity") << p.second.severity;
+       f->dump_string("message", p.second.summary);
+       if (detail) {
+         f->open_array_section("detail");
+         for (auto& d : p.second.detail) {
+           f->dump_string("item", d);
+         }
+         f->close_section();
+       }
+       f->close_section();
+      } else {
+       if (!plain->empty()) {
+         *plain += sep;
+       }
+       *plain += p.second.summary;
+      }
+    }
+    return r;
+  }
+
+  void dump_summary_compat(Formatter *f) const {
+    for (auto& p : checks) {
+      f->open_object_section("item");
+      f->dump_stream("severity") << p.second.severity;
+      f->dump_string("summary", p.second.summary);
+      f->close_section();
+    }
+  }
+
+  void dump_detail(Formatter *f, std::string *plain, bool compat) const {
+    for (auto& p : checks) {
+      if (f) {
+       if (compat) {
+         // this is sloppy, but the best we can do: just dump all of the
+         // individual checks' details together
+         for (auto& d : p.second.detail) {
+           f->dump_string("item", d);
+         }
+       }
+      } else {
+       if (!compat) {
+         *plain += p.first + " " + p.second.summary + "\n";
+       }
+       for (auto& d : p.second.detail) {
+         if (!compat) {
+           *plain += "    ";
+         }
+         *plain += d;
+         *plain += "\n";
+       }
+      }
+    }
+  }
+
+  friend bool operator==(const health_check_map_t& l,
+                        const health_check_map_t& r) {
+    return l.checks == r.checks;
+  }
+  friend bool operator!=(const health_check_map_t& l,
+                        const health_check_map_t& r) {
+    return !(l == r);
+  }
+};
+WRITE_CLASS_DENC(health_check_map_t)
index 883f4669e2b7f4ca52504a3240f7d111f07e7ea9..a23238b7d0b9c53c7f991e6d02d205e828329788 100644 (file)
@@ -31,7 +31,8 @@
 #define PAXOS_AUTH       5
 #define PAXOS_MGR        6
 #define PAXOS_MGRSTAT    7
-#define PAXOS_NUM        8
+#define PAXOS_HEALTH    8
+#define PAXOS_NUM        9
 
 inline const char *get_paxos_name(int p) {
   switch (p) {
@@ -43,6 +44,7 @@ inline const char *get_paxos_name(int p) {
   case PAXOS_AUTH: return "auth";
   case PAXOS_MGR: return "mgr";
   case PAXOS_MGRSTAT: return "mgrstat";
+  case PAXOS_HEALTH: return "health";
   default: ceph_abort(); return 0;
   }
 }
index 4860889989fe9c16719fe91d8fd84fe4cd22cc11..9d1953d75b195de2cba073061fa21be408294198 100644 (file)
@@ -96,6 +96,7 @@ using namespace std;
 #include "messages/MMonGetVersion.h"
 #include "messages/MMonGetVersionReply.h"
 #include "messages/MMonHealth.h"
+#include "messages/MMonHealthChecks.h"
 #include "messages/MMonMetadata.h"
 #include "messages/MDataPing.h"
 #include "messages/MAuth.h"
@@ -783,6 +784,11 @@ Message *decode_message(CephContext *cct, int crcflags,
   case MSG_MON_HEALTH:
     m = new MMonHealth();
     break;
+
+  case MSG_MON_HEALTH_CHECKS:
+    m = new MMonHealthChecks();
+    break;
+
 #if defined(HAVE_XIO)
   case MSG_DATA_PING:
     m = new MDataPing();
index 611d691df992c474bb67b311c553f7929e43b8ad..d1b63ac1f21994edaab9e0c99f2c4c0a47724242 100644 (file)
 // Special
 #define MSG_NOP                   0x607
 
+#define MSG_MON_HEALTH_CHECKS     0x608
+
 // *** ceph-mgr <-> OSD/MDS daemons ***
 #define MSG_MGR_OPEN              0x700
 #define MSG_MGR_CONFIGURE         0x701