From: John Spray Date: Wed, 27 Aug 2014 18:04:03 +0000 (+0100) Subject: mon: handle beacon health metrics in MDSMonitor X-Git-Tag: v0.86~152^2~2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=1058a9e18a2640825082784047bab4c3ce3693b1;p=ceph.git mon: handle beacon health metrics in MDSMonitor Fixes: #9151 Signed-off-by: John Spray --- diff --git a/src/mon/MDSMonitor.cc b/src/mon/MDSMonitor.cc index fea198cf3c2d..c066e4904af8 100644 --- a/src/mon/MDSMonitor.cc +++ b/src/mon/MDSMonitor.cc @@ -40,7 +40,7 @@ #define dout_subsys ceph_subsys_mon #undef dout_prefix #define dout_prefix _prefix(_dout, mon, mdsmap) -static ostream& _prefix(std::ostream *_dout, Monitor *mon, MDSMap& mdsmap) { +static ostream& _prefix(std::ostream *_dout, Monitor *mon, MDSMap const& mdsmap) { return *_dout << "mon." << mon->name << "@" << mon->rank << "(" << mon->get_state_name() << ").mds e" << mdsmap.get_epoch() << " "; @@ -137,6 +137,19 @@ void MDSMonitor::encode_pending(MonitorDBStore::TransactionRef t) /* put everything in the transaction */ put_version(t, pending_mdsmap.epoch, mdsmap_bl); put_last_committed(t, pending_mdsmap.epoch); + + // Encode MDSHealth data + for (std::map::iterator i = pending_daemon_health.begin(); + i != pending_daemon_health.end(); ++i) { + bufferlist bl; + i->second.encode(bl); + t->put(MDS_HEALTH_PREFIX, stringify(i->first), bl); + } + for (std::set::iterator i = pending_daemon_health_rm.begin(); + i != pending_daemon_health_rm.end(); ++i) { + t->erase(MDS_HEALTH_PREFIX, stringify(*i)); + } + pending_daemon_health_rm.clear(); } version_t MDSMonitor::get_trim_to() @@ -386,6 +399,9 @@ bool MDSMonitor::prepare_beacon(MMDSBeacon *m) return false; } + // Store health + pending_daemon_health[gid] = m->get_health(); + // boot? if (state == MDSMap::STATE_BOOT) { // zap previous instance of this name? @@ -549,6 +565,25 @@ void MDSMonitor::get_health(list >& summary, list > *detail) const { mdsmap.get_health(summary, detail); + + // For each MDS GID... + for (std::map::const_iterator i = pending_mdsmap.mds_info.begin(); + i != pending_mdsmap.mds_info.end(); ++i) { + // Decode MDSHealth + bufferlist bl; + mon->store->get(MDS_HEALTH_PREFIX, stringify(i->first), bl); + if (!bl.length()) { + derr << "Missing health data for MDS " << i->first << dendl; + continue; + } + MDSHealth health; + bufferlist::iterator bl_i = bl.begin(); + health.decode(bl_i); + + for (std::list::iterator j = health.metrics.begin(); j != health.metrics.end(); ++j) { + summary.push_back(std::make_pair(j->sev, j->message)); + } + } } void MDSMonitor::dump_info(Formatter *f) @@ -1582,6 +1617,8 @@ void MDSMonitor::tick() propose_osdmap = true; } pending_mdsmap.mds_info.erase(gid); + pending_daemon_health.erase(gid); + pending_daemon_health_rm.insert(gid); last_beacon.erase(gid); do_propose = true; } else if (info.state == MDSMap::STATE_STANDBY_REPLAY) { @@ -1589,6 +1626,8 @@ void MDSMonitor::tick() << " " << ceph_mds_state_name(info.state) << dendl; pending_mdsmap.mds_info.erase(gid); + pending_daemon_health.erase(gid); + pending_daemon_health_rm.insert(gid); last_beacon.erase(gid); do_propose = true; } else { @@ -1599,6 +1638,8 @@ void MDSMonitor::tick() << " " << ceph_mds_state_name(info.state) << " (laggy)" << dendl; pending_mdsmap.mds_info.erase(gid); + pending_daemon_health.erase(gid); + pending_daemon_health_rm.insert(gid); do_propose = true; } else if (!info.laggy()) { dout(10) << " marking " << gid << " " << info.addr << " mds." << info.rank << "." << info.inc @@ -1613,10 +1654,8 @@ void MDSMonitor::tick() if (propose_osdmap) request_proposal(mon->osdmon()); - } - // have a standby take over? set failed; pending_mdsmap.get_failed_mds_set(failed); diff --git a/src/mon/MDSMonitor.h b/src/mon/MDSMonitor.h index b1c5a961709f..0343c3247f89 100644 --- a/src/mon/MDSMonitor.h +++ b/src/mon/MDSMonitor.h @@ -36,6 +36,8 @@ class MMDSGetMap; class MMonCommand; class MMDSLoadTargets; +#define MDS_HEALTH_PREFIX "mds_health" + class MDSMonitor : public PaxosService { public: // mds maps @@ -133,6 +135,10 @@ public: void check_subs(); void check_sub(Subscription *sub); +private: + // MDS daemon GID to latest health state from that GID + std::map pending_daemon_health; + std::set pending_daemon_health_rm; }; #endif