From: Colin Patrick McCabe Date: Mon, 11 Oct 2010 23:42:35 +0000 (-0700) Subject: mon: add 'ceph health' command X-Git-Tag: v0.23~171 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=fbb5a457bacc656cd31cb5cfeff7f461f829fa79;p=ceph.git mon: add 'ceph health' command Create MDSMonitor::get_health and OSDMonitor::get_health to check the health of the MDSes and OSDes, respectively. Signed-off-by: Colin McCabe --- diff --git a/src/include/types.h b/src/include/types.h index e439924867d8..b747ed579e27 100644 --- a/src/include/types.h +++ b/src/include/types.h @@ -477,4 +477,27 @@ inline ostream& operator<<(ostream& out, const ceph_mon_subscribe_item& i) << ((i.flags & CEPH_SUBSCRIBE_ONETIME) ? "" : "+"); } +enum health_status_t { + HEALTH_ERR = 0, + HEALTH_WARN = 1, + HEALTH_OK = 2, +}; + +#ifdef __cplusplus +inline ostream& operator<<(ostream &oss, health_status_t status) { + switch (status) { + case HEALTH_ERR: + oss << "HEALTH_ERR"; + break; + case HEALTH_WARN: + oss << "HEALTH_WARN"; + break; + case HEALTH_OK: + oss << "HEALTH_OK"; + break; + } + return oss; +}; +#endif + #endif diff --git a/src/mon/MDSMonitor.cc b/src/mon/MDSMonitor.cc index 642d9f170764..4c87a150990c 100644 --- a/src/mon/MDSMonitor.cc +++ b/src/mon/MDSMonitor.cc @@ -414,6 +414,24 @@ void MDSMonitor::committed() tick(); } +enum health_status_t MDSMonitor::get_health(std::ostream &ss) +{ + health_status_t ret(HEALTH_OK); + + bool ok = true; + if (mdsmap.is_stopped()) { + ss << "mdsmap: STATUS_ERROR. Mdsmap is stopped! "; + ret = HEALTH_ERR; + } + else if (!mdsmap.is_full()) { + ss << "mdsmap: STATUS_WARN. Mdsmap is not full. "; + ret = HEALTH_WARN; + } + if (ret > HEALTH_OK) { + ss << "mdsmap: " << "'" << mdsmap << "'" << std::endl; + } + return ret; +} bool MDSMonitor::preprocess_command(MMonCommand *m) { diff --git a/src/mon/MDSMonitor.h b/src/mon/MDSMonitor.h index 3bb6e724bc02..ba05d048c87d 100644 --- a/src/mon/MDSMonitor.h +++ b/src/mon/MDSMonitor.h @@ -83,6 +83,7 @@ class MDSMonitor : public PaxosService { bool preprocess_offload_targets(MMDSLoadTargets *m); bool prepare_offload_targets(MMDSLoadTargets *m); + enum health_status_t get_health(std::ostream &ss); bool preprocess_command(MMonCommand *m); bool prepare_command(MMonCommand *m); diff --git a/src/mon/Monitor.cc b/src/mon/Monitor.cc index 37ee6db4f6a4..54a387edba03 100644 --- a/src/mon/Monitor.cc +++ b/src/mon/Monitor.cc @@ -298,6 +298,7 @@ void Monitor::handle_command(MMonCommand *m) bufferlist rdata; string rs; int r = -EINVAL; + rs = "unrecognized subsystem"; if (!m->cmd.empty()) { if (m->cmd[0] == "mds") { mdsmon()->dispatch(m); @@ -339,7 +340,10 @@ void Monitor::handle_command(MMonCommand *m) authmon()->dispatch(m); return; } - rs = "unrecognized subsystem"; + if (m->cmd[0] == "health") { + monmon()->dispatch(m); + return; + } } else rs = "no command"; diff --git a/src/mon/MonmapMonitor.cc b/src/mon/MonmapMonitor.cc index fab922be690d..4027449c0067 100644 --- a/src/mon/MonmapMonitor.cc +++ b/src/mon/MonmapMonitor.cc @@ -18,6 +18,8 @@ #include "messages/MMonCommand.h" #include "common/Timer.h" +#include "mon/MDSMonitor.h" +#include "mon/OSDMonitor.h" #include #include "config.h" @@ -150,10 +152,30 @@ bool MonmapMonitor::preprocess_command(MMonCommand *m) else if (m->cmd[1] == "remove") return false; } + else if (m->cmd[0] == "health") { + ostringstream oss; + health_status_t overall = HEALTH_OK; + try { + health_status_t ret; + ret = mon->mdsmon()->get_health(oss); + if (ret < overall) + overall = ret; + ret = mon->osdmon()->get_health(oss); + if (ret < overall) + overall = ret; + } + catch (const std::exception &e) { + oss << " monmapmonitor: caught exception while " + << "checking health: '" << e.what() << "'"; + } + ss << overall << oss.str(); + r = 0; + } if (r != -1) { string rs; getline(ss, rs); + mon->reply_command(m, r, rs, rdata, paxos->get_version()); return true; } else diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index ca57c96f475f..5ab2d9555eeb 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -1016,7 +1016,27 @@ void OSDMonitor::mark_all_down() propose_pending(); } +enum health_status_t OSDMonitor::get_health(std::ostream &ss) +{ + enum health_status_t ret(HEALTH_OK); + + int num_osds = osdmap.get_num_osds(); + int num_up_osds = osdmap.get_num_up_osds(); + int num_in_osds = osdmap.get_num_in_osds(); + if (num_osds == 0) { + ss << " osdmonitor: no OSDS in osdmap!"; + ret = HEALTH_ERR; + } + else if ((num_up_osds != num_osds) || + (num_in_osds != num_osds)) { + ss << " osdmonitor: num_osds = " << num_osds << ", "; + ss << "num_up_osds = " << num_up_osds << ", num_in_osds = "; + ss << num_in_osds; + ret = HEALTH_WARN; + } + return ret; +} bool OSDMonitor::preprocess_command(MMonCommand *m) { diff --git a/src/mon/OSDMonitor.h b/src/mon/OSDMonitor.h index 9be1ab30750c..193c209e9dc8 100644 --- a/src/mon/OSDMonitor.h +++ b/src/mon/OSDMonitor.h @@ -158,6 +158,7 @@ private: void tick(); // check state, take actions + enum health_status_t get_health(std::ostream &ss); bool preprocess_command(MMonCommand *m); bool prepare_command(MMonCommand *m);