mds: refactor availability check

author John Spray <john.spray@redhat.com>

Tue, 3 Nov 2015 12:56:47 +0000 (12:56 +0000)

committer John Spray <john.spray@redhat.com>

Tue, 3 Nov 2015 12:56:47 +0000 (12:56 +0000)
author John Spray <john.spray@redhat.com>
Tue, 3 Nov 2015 12:56:47 +0000 (12:56 +0000)
committer John Spray <john.spray@redhat.com>
Tue, 3 Nov 2015 12:56:47 +0000 (12:56 +0000)
diff --git a/src/client/Client.cc b/src/client/Client.cc

index 48af10e2fbbe2158069ed2036dc64341a72b921f..961bf6932c38a1b48d9207222255ccb0dfd9f51c 100644 (file)
--- a/src/client/Client.cc
+++ b/src/client/Client.cc
@@ -5035,17 +5035,21 @@ int Client::mount(const std::string &mount_root, bool require_mds)
    ldout(cct, 2) << "mounted: have mdsmap " << mdsmap->get_epoch() << dendl;
    if (require_mds) {
      while (1) {
-      if (mdsmap->cluster_unavailable()) {
-        // If the cluster is stuck unavailable, error out
+      auto availability = mdsmap->is_cluster_available();
+      if (availability == MDSMap::STUCK_UNAVAILABLE) {
+        // Error out
          ldout(cct, 10) << "mds cluster unavailable: epoch=" << mdsmap->get_epoch() << dendl;
          return CEPH_FUSE_NO_MDS_UP;
-      } else if (mdsmap->get_num_mds(CEPH_MDS_STATE_ACTIVE) > 0) {
-        // If somebody is active, continue to mount
+      } else if (availability == MDSMap::AVAILABLE) {
+        // Continue to mount
          break;
-      } else {
+      } else if (availability == MDSMap::TRANSIENT_UNAVAILABLE) {
          // Else, wait.  MDSMonitor will update the map to bring
          // us to a conclusion eventually.
          wait_on_list(waiting_for_mdsmap);
+      } else {
+        // Unexpected value!
+        assert(0);
        }
      }
    }
diff --git a/src/mds/MDSMap.cc b/src/mds/MDSMap.cc

index 3f1077e6dedf9efde26a250ba63cb1bc6afd1342..73c2df98bc0420b4887e3fa972d815c04ed98d83 100644 (file)
--- a/src/mds/MDSMap.cc
+++ b/src/mds/MDSMap.cc
@@ -639,20 +639,23 @@ void MDSMap::decode(bufferlist::iterator& p)
    DECODE_FINISH(p);
  }
  
-bool MDSMap::cluster_unavailable() const
+MDSMap::availability_t MDSMap::is_cluster_available() const
  {
    if (epoch == 0) {
-    return false;
+    // This is ambiguous between "mds map was never initialized on mons" and
+    // "we never got an mdsmap from the mons".  Treat it like the latter.
+    return TRANSIENT_UNAVAILABLE;
    }
  
+
    // If a rank is marked damage (unavailable until operator intervenes)
    if (damaged.size()) {
-    return true;
+    return STUCK_UNAVAILABLE;
    }
  
    // If no ranks are created (filesystem not initialized)
    if (in.empty()) {
-    return true;
+    return STUCK_UNAVAILABLE;
    }
  
    for (const auto rank : in) {
@@ -660,20 +663,30 @@ bool MDSMap::cluster_unavailable() const
      if (up.count(rank) != 0) {
        name = mds_info.at(up.at(rank)).name;
      }
-    const mds_rank_t replacement = find_replacement_for(rank, name, false);
-    const bool standby_avail = replacement != MDS_GID_NONE;
+    const mds_gid_t replacement = find_replacement_for(rank, name, false);
+    const bool standby_avail = (replacement != MDS_GID_NONE);
  
      // If the rank is unfilled, and there are no standbys, we're unavailable
      if (up.count(rank) == 0 && !standby_avail) {
-      return true;
+      return STUCK_UNAVAILABLE;
      } else if (up.count(rank) && mds_info.at(up.at(rank)).laggy() && !standby_avail) {
        // If the daemon is laggy and there are no standbys, we're unavailable.
        // It would be nice to give it some grace here, but to do so callers
        // would have to poll this time-wise, vs. just waiting for updates
        // to mdsmap, so it's not worth the complexity.
-      return true;
+      return STUCK_UNAVAILABLE;
      }
    }
  
-  return false;
+  if (get_num_mds(CEPH_MDS_STATE_ACTIVE) > 0) {
+    // Nobody looks stuck, so indicate to client they should go ahead
+    // and try mounting if anybody is active.  This may include e.g.
+    // one MDS failing over and another active: the client should
+    // proceed to start talking to the active one and let the
+    // transiently-unavailable guy catch up later.
+    return AVAILABLE;
+  } else {
+    // Nothing indicating we were stuck, but nobody active (yet)
+    return TRANSIENT_UNAVAILABLE;
+  }
  }
diff --git a/src/mds/MDSMap.h b/src/mds/MDSMap.h

index b7b5d341416613147caa8d267d58ec796e08e013..16249b0cd9c37800e7ee4dc134af547489ff6553 100644 (file)
--- a/src/mds/MDSMap.h
+++ b/src/mds/MDSMap.h
@@ -459,12 +459,30 @@ public:
    void get_health(list<pair<health_status_t,std::string> >& summary,
                   list<pair<health_status_t,std::string> > *detail) const;
  
+  typedef enum
+  {
+    AVAILABLE = 0,
+    TRANSIENT_UNAVAILABLE = 1,
+    STUCK_UNAVAILABLE = 2
+
+  } availability_t;
+
    /**
-   * If any of the ranks are stuck unavailable, return true.  This is a
+   * Return indication of whether cluster is available.  This is a
     * heuristic for clients to see if they should bother waiting to talk to
     * MDSs, or whether they should error out at startup/mount.
+   *
+   * A TRANSIENT_UNAVAILABLE result indicates that the cluster is in a
+   * transition state like replaying, or is potentially about the fail over.
+   * Clients should wait for an updated map before making a final decision
+   * about whether the filesystem is mountable.
+   *
+   * A STUCK_UNAVAILABLE result indicates that we can't see a way that
+   * the cluster is about to recover on its own, so it'll probably require
+   * administrator intervention: clients should probaly not bother trying
+   * to mount.
     */
-  bool cluster_unavailable() const;
+  availability_t is_cluster_available() const;
  
    // mds states
    bool is_down(mds_rank_t m) const { return up.count(m) == 0; }
author	John Spray <john.spray@redhat.com>
	Tue, 3 Nov 2015 12:56:47 +0000 (12:56 +0000)
committer	John Spray <john.spray@redhat.com>
	Tue, 3 Nov 2015 12:56:47 +0000 (12:56 +0000)
src/client/Client.cc		patch \| blob \| history
src/mds/MDSMap.cc		patch \| blob \| history
src/mds/MDSMap.h		patch \| blob \| history