]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
MDSMonitor: show laggy MDSs at higher debug level 16278/head
authorPatrick Donnelly <pdonnell@redhat.com>
Wed, 12 Jul 2017 02:36:32 +0000 (19:36 -0700)
committerPatrick Donnelly <pdonnell@redhat.com>
Wed, 12 Jul 2017 03:32:26 +0000 (20:32 -0700)
Also, print laggy daemons even if the OSDMap is not yet writeable.

This is mostly for operators wanting to see a more visible message that an MDS
has been replaced.

Related-to: http://tracker.ceph.com/issues/19706
Signed-off-by: Patrick Donnelly <pdonnell@redhat.com>
src/mon/MDSMonitor.cc
src/mon/MDSMonitor.h

index 608e1aeedc3e8d43c19e5c29af4264d1f955b6b8..ac22848dc485bef9cc49efcab8a7daf0296335b5 100644 (file)
@@ -1906,21 +1906,14 @@ bool MDSMonitor::maybe_expand_cluster(std::shared_ptr<Filesystem> fs)
  * is available, fail this daemon (remove from map) and pass its
  * role to another daemon.
  */
-void MDSMonitor::maybe_replace_gid(mds_gid_t gid,
-    const beacon_info_t &beacon,
+void MDSMonitor::maybe_replace_gid(mds_gid_t gid, const MDSMap::mds_info_t& info,
     bool *mds_propose, bool *osd_propose)
 {
   assert(mds_propose != nullptr);
   assert(osd_propose != nullptr);
 
-  const MDSMap::mds_info_t info = pending_fsmap.get_info_gid(gid);
   const auto fscid = pending_fsmap.mds_roles.at(gid);
 
-  dout(10) << "no beacon from " << gid << " " << info.addr << " mds."
-    << info.rank << "." << info.inc
-    << " " << ceph_mds_state_name(info.state)
-    << " since " << beacon.stamp << dendl;
-
   // We will only take decisive action (replacing/removing a daemon)
   // if we have some indicating that some other daemon(s) are successfully
   // getting beacons through recently.
@@ -2144,32 +2137,36 @@ void MDSMonitor::tick()
     }
   }
 
-  // If the OSDMap is writeable, we can blacklist things, so we can
-  // try failing any laggy MDS daemons.  Consider each one for failure.
-  if (mon->osdmon()->is_writeable()) {
-    bool propose_osdmap = false;
-
-    map<mds_gid_t, beacon_info_t>::iterator p = last_beacon.begin();
-    while (p != last_beacon.end()) {
-      mds_gid_t gid = p->first;
-      auto beacon_info = p->second;
-      ++p;
-
-      if (!pending_fsmap.gid_exists(gid)) {
-       // clean it out
-       last_beacon.erase(gid);
-       continue;
-      }
+  bool propose_osdmap = false;
+  bool osdmap_writeable = mon->osdmon()->is_writeable();
+  auto p = last_beacon.begin();
+  while (p != last_beacon.end()) {
+    mds_gid_t gid = p->first;
+    auto beacon_info = p->second;
+    ++p;
 
-      if (beacon_info.stamp < cutoff) {
-        maybe_replace_gid(gid, beacon_info, &do_propose, &propose_osdmap);
-      }
+    if (!pending_fsmap.gid_exists(gid)) {
+      // clean it out
+      last_beacon.erase(gid);
+      continue;
     }
 
-    if (propose_osdmap) {
-      request_proposal(mon->osdmon());
+    if (beacon_info.stamp < cutoff) {
+      auto &info = pending_fsmap.get_info_gid(gid);
+      dout(1) << "no beacon from mds." << info.rank << "." << info.inc
+              << " (gid: " << gid << " addr: " << info.addr
+              << " state: " << ceph_mds_state_name(info.state) << ")"
+              << " since " << beacon_info.stamp << dendl;
+      // If the OSDMap is writeable, we can blacklist things, so we can
+      // try failing any laggy MDS daemons.  Consider each one for failure.
+      if (osdmap_writeable) {
+        maybe_replace_gid(gid, info, &do_propose, &propose_osdmap);
+      }
     }
   }
+  if (propose_osdmap) {
+    request_proposal(mon->osdmon());
+  }
 
   for (auto i : pending_fsmap.filesystems) {
     auto fs = i.second;
index 05946b51c3c81abae0379639b40f682301fd7365..65cb7551ac19d379d9d72f4ce302ad89fcaabd71 100644 (file)
@@ -129,7 +129,7 @@ class MDSMonitor : public PaxosService {
 
   bool maybe_promote_standby(std::shared_ptr<Filesystem> fs);
   bool maybe_expand_cluster(std::shared_ptr<Filesystem> fs);
-  void maybe_replace_gid(mds_gid_t gid, const beacon_info_t &beacon,
+  void maybe_replace_gid(mds_gid_t gid, const MDSMap::mds_info_t& info,
       bool *mds_propose, bool *osd_propose);
   void tick() override;     // check state, take actions