]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
mon: handle beacon health metrics in MDSMonitor
authorJohn Spray <john.spray@redhat.com>
Wed, 27 Aug 2014 18:04:03 +0000 (19:04 +0100)
committerJohn Spray <john.spray@redhat.com>
Tue, 2 Sep 2014 13:06:25 +0000 (14:06 +0100)
Fixes: #9151
Signed-off-by: John Spray <john.spray@redhat.com>
src/mon/MDSMonitor.cc
src/mon/MDSMonitor.h

index fea198cf3c2d8ba69e3c5931f3cd0d4c7c1ae28b..c066e4904af8d3c5d2433c6084142db18ce1fff3 100644 (file)
@@ -40,7 +40,7 @@
 #define dout_subsys ceph_subsys_mon
 #undef dout_prefix
 #define dout_prefix _prefix(_dout, mon, mdsmap)
-static ostream& _prefix(std::ostream *_dout, Monitor *mon, MDSMap& mdsmap) {
+static ostream& _prefix(std::ostream *_dout, Monitor *mon, MDSMap const& mdsmap) {
   return *_dout << "mon." << mon->name << "@" << mon->rank
                << "(" << mon->get_state_name()
                << ").mds e" << mdsmap.get_epoch() << " ";
@@ -137,6 +137,19 @@ void MDSMonitor::encode_pending(MonitorDBStore::TransactionRef t)
   /* put everything in the transaction */
   put_version(t, pending_mdsmap.epoch, mdsmap_bl);
   put_last_committed(t, pending_mdsmap.epoch);
+
+  // Encode MDSHealth data
+  for (std::map<uint64_t, MDSHealth>::iterator i = pending_daemon_health.begin();
+      i != pending_daemon_health.end(); ++i) {
+    bufferlist bl;
+    i->second.encode(bl);
+    t->put(MDS_HEALTH_PREFIX, stringify(i->first), bl);
+  }
+  for (std::set<uint64_t>::iterator i = pending_daemon_health_rm.begin();
+      i != pending_daemon_health_rm.end(); ++i) {
+    t->erase(MDS_HEALTH_PREFIX, stringify(*i));
+  }
+  pending_daemon_health_rm.clear();
 }
 
 version_t MDSMonitor::get_trim_to()
@@ -386,6 +399,9 @@ bool MDSMonitor::prepare_beacon(MMDSBeacon *m)
     return false;
   }
 
+  // Store health
+  pending_daemon_health[gid] = m->get_health();
+
   // boot?
   if (state == MDSMap::STATE_BOOT) {
     // zap previous instance of this name?
@@ -549,6 +565,25 @@ void MDSMonitor::get_health(list<pair<health_status_t, string> >& summary,
                            list<pair<health_status_t, string> > *detail) const
 {
   mdsmap.get_health(summary, detail);
+
+  // For each MDS GID...
+  for (std::map<uint64_t, MDSMap::mds_info_t>::const_iterator i = pending_mdsmap.mds_info.begin();
+      i != pending_mdsmap.mds_info.end(); ++i) {
+    // Decode MDSHealth
+    bufferlist bl;
+    mon->store->get(MDS_HEALTH_PREFIX, stringify(i->first), bl);
+    if (!bl.length()) {
+      derr << "Missing health data for MDS " << i->first << dendl;
+      continue;
+    }
+    MDSHealth health;
+    bufferlist::iterator bl_i = bl.begin();
+    health.decode(bl_i);
+
+    for (std::list<MDSHealthMetric>::iterator j = health.metrics.begin(); j != health.metrics.end(); ++j) {
+      summary.push_back(std::make_pair(j->sev, j->message));
+    }
+  }
 }
 
 void MDSMonitor::dump_info(Formatter *f)
@@ -1582,6 +1617,8 @@ void MDSMonitor::tick()
          propose_osdmap = true;
        }
        pending_mdsmap.mds_info.erase(gid);
+        pending_daemon_health.erase(gid);
+        pending_daemon_health_rm.insert(gid);
        last_beacon.erase(gid);
        do_propose = true;
       } else if (info.state == MDSMap::STATE_STANDBY_REPLAY) {
@@ -1589,6 +1626,8 @@ void MDSMonitor::tick()
                 << " " << ceph_mds_state_name(info.state)
                 << dendl;
        pending_mdsmap.mds_info.erase(gid);
+        pending_daemon_health.erase(gid);
+        pending_daemon_health_rm.insert(gid);
        last_beacon.erase(gid);
        do_propose = true;
       } else {
@@ -1599,6 +1638,8 @@ void MDSMonitor::tick()
                   << " " << ceph_mds_state_name(info.state)
                   << " (laggy)" << dendl;
          pending_mdsmap.mds_info.erase(gid);
+          pending_daemon_health.erase(gid);
+          pending_daemon_health_rm.insert(gid);
          do_propose = true;
        } else if (!info.laggy()) {
          dout(10) << " marking " << gid << " " << info.addr << " mds." << info.rank << "." << info.inc
@@ -1613,10 +1654,8 @@ void MDSMonitor::tick()
 
     if (propose_osdmap)
       request_proposal(mon->osdmon());
-
   }
 
-
   // have a standby take over?
   set<int> failed;
   pending_mdsmap.get_failed_mds_set(failed);
index b1c5a961709fd5abea011a7959861eef4a4fd916..0343c3247f892f6bcf4091067000044e89579de3 100644 (file)
@@ -36,6 +36,8 @@ class MMDSGetMap;
 class MMonCommand;
 class MMDSLoadTargets;
 
+#define MDS_HEALTH_PREFIX "mds_health"
+
 class MDSMonitor : public PaxosService {
  public:
   // mds maps
@@ -133,6 +135,10 @@ public:
   void check_subs();
   void check_sub(Subscription *sub);
 
+private:
+  // MDS daemon GID to latest health state from that GID
+  std::map<uint64_t, MDSHealth> pending_daemon_health;
+  std::set<uint64_t> pending_daemon_health_rm;
 };
 
 #endif