From: John Spray <john.spray@redhat.com>
Date: Tue, 3 Nov 2015 12:56:47 +0000 (+0000)
Subject: mds: refactor availability check
X-Git-Tag: v10.0.2~190^2
X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=a77bfd09eff6bc2bf159bee644fb41a737f42973;p=ceph.git

mds: refactor availability check

...to give a clean three-way state instead of
relying on caller to check stuck availability
first and then whether any are active.

Signed-off-by: John Spray <john.spray@redhat.com>
---

diff --git a/src/client/Client.cc b/src/client/Client.cc
index 48af10e2fbb..961bf6932c3 100644
--- a/src/client/Client.cc
+++ b/src/client/Client.cc
@@ -5035,17 +5035,21 @@ int Client::mount(const std::string &mount_root, bool require_mds)
   ldout(cct, 2) << "mounted: have mdsmap " << mdsmap->get_epoch() << dendl;
   if (require_mds) {
     while (1) {
-      if (mdsmap->cluster_unavailable()) {
-        // If the cluster is stuck unavailable, error out
+      auto availability = mdsmap->is_cluster_available();
+      if (availability == MDSMap::STUCK_UNAVAILABLE) {
+        // Error out
         ldout(cct, 10) << "mds cluster unavailable: epoch=" << mdsmap->get_epoch() << dendl;
         return CEPH_FUSE_NO_MDS_UP;
-      } else if (mdsmap->get_num_mds(CEPH_MDS_STATE_ACTIVE) > 0) {
-        // If somebody is active, continue to mount
+      } else if (availability == MDSMap::AVAILABLE) {
+        // Continue to mount
         break;
-      } else {
+      } else if (availability == MDSMap::TRANSIENT_UNAVAILABLE) {
         // Else, wait.  MDSMonitor will update the map to bring
         // us to a conclusion eventually.
         wait_on_list(waiting_for_mdsmap);
+      } else {
+        // Unexpected value!
+        assert(0);
       }
     }
   }
diff --git a/src/mds/MDSMap.cc b/src/mds/MDSMap.cc
index 3f1077e6ded..73c2df98bc0 100644
--- a/src/mds/MDSMap.cc
+++ b/src/mds/MDSMap.cc
@@ -639,20 +639,23 @@ void MDSMap::decode(bufferlist::iterator& p)
   DECODE_FINISH(p);
 }
 
-bool MDSMap::cluster_unavailable() const
+MDSMap::availability_t MDSMap::is_cluster_available() const
 {
   if (epoch == 0) {
-    return false;
+    // This is ambiguous between "mds map was never initialized on mons" and
+    // "we never got an mdsmap from the mons".  Treat it like the latter.
+    return TRANSIENT_UNAVAILABLE;
   }
 
+
   // If a rank is marked damage (unavailable until operator intervenes)
   if (damaged.size()) {
-    return true;
+    return STUCK_UNAVAILABLE;
   }
 
   // If no ranks are created (filesystem not initialized)
   if (in.empty()) {
-    return true;
+    return STUCK_UNAVAILABLE;
   }
 
   for (const auto rank : in) {
@@ -660,20 +663,30 @@ bool MDSMap::cluster_unavailable() const
     if (up.count(rank) != 0) {
       name = mds_info.at(up.at(rank)).name;
     }
-    const mds_rank_t replacement = find_replacement_for(rank, name, false);
-    const bool standby_avail = replacement != MDS_GID_NONE;
+    const mds_gid_t replacement = find_replacement_for(rank, name, false);
+    const bool standby_avail = (replacement != MDS_GID_NONE);
 
     // If the rank is unfilled, and there are no standbys, we're unavailable
     if (up.count(rank) == 0 && !standby_avail) {
-      return true;
+      return STUCK_UNAVAILABLE;
     } else if (up.count(rank) && mds_info.at(up.at(rank)).laggy() && !standby_avail) {
       // If the daemon is laggy and there are no standbys, we're unavailable.
       // It would be nice to give it some grace here, but to do so callers
       // would have to poll this time-wise, vs. just waiting for updates
       // to mdsmap, so it's not worth the complexity.
-      return true;
+      return STUCK_UNAVAILABLE;
     }
   }
 
-  return false;
+  if (get_num_mds(CEPH_MDS_STATE_ACTIVE) > 0) {
+    // Nobody looks stuck, so indicate to client they should go ahead
+    // and try mounting if anybody is active.  This may include e.g.
+    // one MDS failing over and another active: the client should
+    // proceed to start talking to the active one and let the
+    // transiently-unavailable guy catch up later.
+    return AVAILABLE;
+  } else {
+    // Nothing indicating we were stuck, but nobody active (yet)
+    return TRANSIENT_UNAVAILABLE;
+  }
 }
diff --git a/src/mds/MDSMap.h b/src/mds/MDSMap.h
index b7b5d341416..16249b0cd9c 100644
--- a/src/mds/MDSMap.h
+++ b/src/mds/MDSMap.h
@@ -459,12 +459,30 @@ public:
   void get_health(list<pair<health_status_t,std::string> >& summary,
 		  list<pair<health_status_t,std::string> > *detail) const;
 
+  typedef enum
+  {
+    AVAILABLE = 0,
+    TRANSIENT_UNAVAILABLE = 1,
+    STUCK_UNAVAILABLE = 2
+
+  } availability_t;
+
   /**
-   * If any of the ranks are stuck unavailable, return true.  This is a
+   * Return indication of whether cluster is available.  This is a
    * heuristic for clients to see if they should bother waiting to talk to
    * MDSs, or whether they should error out at startup/mount.
+   *
+   * A TRANSIENT_UNAVAILABLE result indicates that the cluster is in a
+   * transition state like replaying, or is potentially about the fail over.
+   * Clients should wait for an updated map before making a final decision
+   * about whether the filesystem is mountable.
+   *
+   * A STUCK_UNAVAILABLE result indicates that we can't see a way that
+   * the cluster is about to recover on its own, so it'll probably require
+   * administrator intervention: clients should probaly not bother trying
+   * to mount.
    */
-  bool cluster_unavailable() const;
+  availability_t is_cluster_available() const;
 
   // mds states
   bool is_down(mds_rank_t m) const { return up.count(m) == 0; }