client: a better check for MDS availability

author John Spray <john.spray@redhat.com>

Tue, 13 Oct 2015 23:50:44 +0000 (00:50 +0100)

committer John Spray <john.spray@redhat.com>

Tue, 3 Nov 2015 12:34:25 +0000 (12:34 +0000)
author John Spray <john.spray@redhat.com>
Tue, 13 Oct 2015 23:50:44 +0000 (00:50 +0100)
committer John Spray <john.spray@redhat.com>
Tue, 3 Nov 2015 12:34:25 +0000 (12:34 +0000)
diff --git a/src/client/Client.cc b/src/client/Client.cc

index 3c1acf9c94d65b09a399ee8ebf7ca297e2ffd864..48af10e2fbbe2158069ed2036dc64341a72b921f 100644 (file)
--- a/src/client/Client.cc
+++ b/src/client/Client.cc
@@ -5035,14 +5035,16 @@ int Client::mount(const std::string &mount_root, bool require_mds)
    ldout(cct, 2) << "mounted: have mdsmap " << mdsmap->get_epoch() << dendl;
    if (require_mds) {
      while (1) {
-      if (mdsmap->get_epoch() > 0) {
-        if (mdsmap->get_num_mds(CEPH_MDS_STATE_ACTIVE) == 0) {
-          ldout(cct, 10) << "no mds up: epoch=" << mdsmap->get_epoch() << dendl;
-          return CEPH_FUSE_NO_MDS_UP;
-        } else {
-          break;
-        }
+      if (mdsmap->cluster_unavailable()) {
+        // If the cluster is stuck unavailable, error out
+        ldout(cct, 10) << "mds cluster unavailable: epoch=" << mdsmap->get_epoch() << dendl;
+        return CEPH_FUSE_NO_MDS_UP;
+      } else if (mdsmap->get_num_mds(CEPH_MDS_STATE_ACTIVE) > 0) {
+        // If somebody is active, continue to mount
+        break;
        } else {
+        // Else, wait.  MDSMonitor will update the map to bring
+        // us to a conclusion eventually.
          wait_on_list(waiting_for_mdsmap);
        }
      }
diff --git a/src/mds/MDSMap.cc b/src/mds/MDSMap.cc

index 796aad3f45f32ed5845c8ddfb98442dd79cf7776..a0f81a4552af80981a6dc27b307717c4f12b3d47 100644 (file)
--- a/src/mds/MDSMap.cc
+++ b/src/mds/MDSMap.cc
@@ -638,3 +638,41 @@ void MDSMap::decode(bufferlist::iterator& p)
    }
    DECODE_FINISH(p);
  }
+
+bool MDSMap::cluster_unavailable() const
+{
+  if (epoch == 0) {
+    return false;
+  }
+
+  // If a rank is marked damage (unavailable until operator intervenes)
+  if (damaged.size()) {
+    return true;
+  }
+
+  // If no ranks are created (filesystem not initialized)
+  if (in.empty()) {
+    return true;
+  }
+
+  for (const auto rank : in) {
+    std::string name;
+    if (up.count(rank) != 0) {
+      name = mds_info.at(up.at(rank)).name;
+    }
+    const bool standby_avail = find_replacement_for(rank, name) != MDS_GID_NONE;
+
+    // If the rank is unfilled, and there are no standbys, we're unavailable
+    if (up.count(rank) == 0 && !standby_avail) {
+      return true;
+    } else if (up.count(rank) && mds_info.at(up.at(rank)).laggy() && !standby_avail) {
+      // If the daemon is laggy and there are no standbys, we're unavailable.
+      // It would be nice to give it some grace here, but to do so callers
+      // would have to poll this time-wise, vs. just waiting for updates
+      // to mdsmap, so it's not worth the complexity.
+      return true;
+    }
+  }
+
+  return false;
+}
diff --git a/src/mds/MDSMap.h b/src/mds/MDSMap.h

index f4b369b590ed15613158058f0caf6195440007d2..c35fe3bdbe32998b2b97244dd8574e3a7be3d386 100644 (file)
--- a/src/mds/MDSMap.h
+++ b/src/mds/MDSMap.h
@@ -409,7 +409,7 @@ public:
      return NULL;
    }
  
-  mds_gid_t find_standby_for(mds_rank_t mds, std::string& name) {
+  mds_gid_t find_standby_for(mds_rank_t mds, std::string& name) const {
      std::map<mds_gid_t, mds_info_t>::const_iterator generic_standby
        = mds_info.end();
      for (std::map<mds_gid_t, mds_info_t>::const_iterator p = mds_info.begin();
@@ -446,7 +446,7 @@ public:
      return MDS_GID_NONE;
    }
  
-  mds_gid_t find_replacement_for(mds_rank_t mds, std::string& name) {
+  mds_gid_t find_replacement_for(mds_rank_t mds, std::string& name) const {
      const mds_gid_t standby = find_standby_for(mds, name);
      if (standby)
        return standby;
@@ -457,6 +457,13 @@ public:
    void get_health(list<pair<health_status_t,std::string> >& summary,
                   list<pair<health_status_t,std::string> > *detail) const;
  
+  /**
+   * If any of the ranks are stuck unavailable, return true.  This is a
+   * heuristic for clients to see if they should bother waiting to talk to
+   * MDSs, or whether they should error out at startup/mount.
+   */
+  bool cluster_unavailable() const;
+
    // mds states
    bool is_down(mds_rank_t m) const { return up.count(m) == 0; }
    bool is_up(mds_rank_t m) const { return up.count(m); }
author	John Spray <john.spray@redhat.com>
	Tue, 13 Oct 2015 23:50:44 +0000 (00:50 +0100)
committer	John Spray <john.spray@redhat.com>
	Tue, 3 Nov 2015 12:34:25 +0000 (12:34 +0000)
src/client/Client.cc		patch \| blob \| history
src/mds/MDSMap.cc		patch \| blob \| history
src/mds/MDSMap.h		patch \| blob \| history