]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
MDSMap: improve health check 143/head
authorSage Weil <sage@inktank.com>
Sat, 23 Mar 2013 04:04:43 +0000 (21:04 -0700)
committerSage Weil <sage@inktank.com>
Sat, 23 Mar 2013 04:04:43 +0000 (21:04 -0700)
Note if the cluster is degraded.  If so, indicate specifically which MDSs
are degraded and what state they are in.

Signed-off-by: Sage Weil <sage@inktank.com>
src/mds/MDSMap.cc

index 29b0b69ce91c5ffa800d5f1724abe65e54467e72..e0b25f4e0a3123a1e6ad333d70df964f745f8188 100644 (file)
@@ -246,7 +246,7 @@ void MDSMap::get_health(list<pair<health_status_t,string> >& summary,
        << " failed";
     summary.push_back(make_pair(HEALTH_ERR, oss.str()));
     if (detail) {
-      for (set<int>::iterator p = failed.begin(); p != failed.end(); ++p) {
+      for (set<int>::const_iterator p = failed.begin(); p != failed.end(); ++p) {
        std::ostringstream oss;
        oss << "mds." << *p << " has failed";
        detail->push_back(make_pair(HEALTH_ERR, oss.str()));
@@ -254,6 +254,30 @@ void MDSMap::get_health(list<pair<health_status_t,string> >& summary,
     }
   }
 
+  if (is_degraded()) {
+    summary.push_back(make_pair(HEALTH_WARN, "mds cluster is degraded"));
+    if (detail) {
+      detail->push_back(make_pair(HEALTH_WARN, "mds cluster is degraded"));
+      for (unsigned i=0; i< get_max_mds(); i++) {
+       if (!is_up(i))
+         continue;
+       uint64_t gid = up.find(i)->second;
+       map<uint64_t,mds_info_t>::const_iterator info = mds_info.find(gid);
+       stringstream ss;
+       if (is_resolve(i))
+         ss << "mds." << info->second.name << " at " << info->second.addr << " rank " << i << " is resolving";
+       if (is_replay(i))
+         ss << "mds." << info->second.name << " at " << info->second.addr << " rank " << i << " is replaying journal";
+       if (is_rejoin(i))
+         ss << "mds." << info->second.name << " at " << info->second.addr << " rank " << i << " is rejoining";
+       if (is_reconnect(i))
+         ss << "mds." << info->second.name << " at " << info->second.addr << " rank " << i << " is reconnecting to clients";
+       if (ss.str().length())
+         detail->push_back(make_pair(HEALTH_WARN, ss.str()));
+      }
+    }
+  }
+
   map<int32_t,uint64_t>::const_iterator u = up.begin();
   map<int32_t,uint64_t>::const_iterator u_end = up.end();
   map<uint64_t,mds_info_t>::const_iterator m_end = mds_info.end();
@@ -271,6 +295,7 @@ void MDSMap::get_health(list<pair<health_status_t,string> >& summary,
       }
     }
   }
+
   if (!laggy.empty()) {
     std::ostringstream oss;
     oss << "mds " << laggy