From: John Spray Date: Tue, 3 Nov 2015 12:56:47 +0000 (+0000) Subject: mds: refactor availability check X-Git-Tag: v10.0.2~190^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=a77bfd09eff6bc2bf159bee644fb41a737f42973;p=ceph.git mds: refactor availability check ...to give a clean three-way state instead of relying on caller to check stuck availability first and then whether any are active. Signed-off-by: John Spray --- diff --git a/src/client/Client.cc b/src/client/Client.cc index 48af10e2fbb..961bf6932c3 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -5035,17 +5035,21 @@ int Client::mount(const std::string &mount_root, bool require_mds) ldout(cct, 2) << "mounted: have mdsmap " << mdsmap->get_epoch() << dendl; if (require_mds) { while (1) { - if (mdsmap->cluster_unavailable()) { - // If the cluster is stuck unavailable, error out + auto availability = mdsmap->is_cluster_available(); + if (availability == MDSMap::STUCK_UNAVAILABLE) { + // Error out ldout(cct, 10) << "mds cluster unavailable: epoch=" << mdsmap->get_epoch() << dendl; return CEPH_FUSE_NO_MDS_UP; - } else if (mdsmap->get_num_mds(CEPH_MDS_STATE_ACTIVE) > 0) { - // If somebody is active, continue to mount + } else if (availability == MDSMap::AVAILABLE) { + // Continue to mount break; - } else { + } else if (availability == MDSMap::TRANSIENT_UNAVAILABLE) { // Else, wait. MDSMonitor will update the map to bring // us to a conclusion eventually. wait_on_list(waiting_for_mdsmap); + } else { + // Unexpected value! + assert(0); } } } diff --git a/src/mds/MDSMap.cc b/src/mds/MDSMap.cc index 3f1077e6ded..73c2df98bc0 100644 --- a/src/mds/MDSMap.cc +++ b/src/mds/MDSMap.cc @@ -639,20 +639,23 @@ void MDSMap::decode(bufferlist::iterator& p) DECODE_FINISH(p); } -bool MDSMap::cluster_unavailable() const +MDSMap::availability_t MDSMap::is_cluster_available() const { if (epoch == 0) { - return false; + // This is ambiguous between "mds map was never initialized on mons" and + // "we never got an mdsmap from the mons". Treat it like the latter. + return TRANSIENT_UNAVAILABLE; } + // If a rank is marked damage (unavailable until operator intervenes) if (damaged.size()) { - return true; + return STUCK_UNAVAILABLE; } // If no ranks are created (filesystem not initialized) if (in.empty()) { - return true; + return STUCK_UNAVAILABLE; } for (const auto rank : in) { @@ -660,20 +663,30 @@ bool MDSMap::cluster_unavailable() const if (up.count(rank) != 0) { name = mds_info.at(up.at(rank)).name; } - const mds_rank_t replacement = find_replacement_for(rank, name, false); - const bool standby_avail = replacement != MDS_GID_NONE; + const mds_gid_t replacement = find_replacement_for(rank, name, false); + const bool standby_avail = (replacement != MDS_GID_NONE); // If the rank is unfilled, and there are no standbys, we're unavailable if (up.count(rank) == 0 && !standby_avail) { - return true; + return STUCK_UNAVAILABLE; } else if (up.count(rank) && mds_info.at(up.at(rank)).laggy() && !standby_avail) { // If the daemon is laggy and there are no standbys, we're unavailable. // It would be nice to give it some grace here, but to do so callers // would have to poll this time-wise, vs. just waiting for updates // to mdsmap, so it's not worth the complexity. - return true; + return STUCK_UNAVAILABLE; } } - return false; + if (get_num_mds(CEPH_MDS_STATE_ACTIVE) > 0) { + // Nobody looks stuck, so indicate to client they should go ahead + // and try mounting if anybody is active. This may include e.g. + // one MDS failing over and another active: the client should + // proceed to start talking to the active one and let the + // transiently-unavailable guy catch up later. + return AVAILABLE; + } else { + // Nothing indicating we were stuck, but nobody active (yet) + return TRANSIENT_UNAVAILABLE; + } } diff --git a/src/mds/MDSMap.h b/src/mds/MDSMap.h index b7b5d341416..16249b0cd9c 100644 --- a/src/mds/MDSMap.h +++ b/src/mds/MDSMap.h @@ -459,12 +459,30 @@ public: void get_health(list >& summary, list > *detail) const; + typedef enum + { + AVAILABLE = 0, + TRANSIENT_UNAVAILABLE = 1, + STUCK_UNAVAILABLE = 2 + + } availability_t; + /** - * If any of the ranks are stuck unavailable, return true. This is a + * Return indication of whether cluster is available. This is a * heuristic for clients to see if they should bother waiting to talk to * MDSs, or whether they should error out at startup/mount. + * + * A TRANSIENT_UNAVAILABLE result indicates that the cluster is in a + * transition state like replaying, or is potentially about the fail over. + * Clients should wait for an updated map before making a final decision + * about whether the filesystem is mountable. + * + * A STUCK_UNAVAILABLE result indicates that we can't see a way that + * the cluster is about to recover on its own, so it'll probably require + * administrator intervention: clients should probaly not bother trying + * to mount. */ - bool cluster_unavailable() const; + availability_t is_cluster_available() const; // mds states bool is_down(mds_rank_t m) const { return up.count(m) == 0; }