ldout(cct, 2) << "mounted: have mdsmap " << mdsmap->get_epoch() << dendl;
if (require_mds) {
while (1) {
- if (mdsmap->cluster_unavailable()) {
- // If the cluster is stuck unavailable, error out
+ auto availability = mdsmap->is_cluster_available();
+ if (availability == MDSMap::STUCK_UNAVAILABLE) {
+ // Error out
ldout(cct, 10) << "mds cluster unavailable: epoch=" << mdsmap->get_epoch() << dendl;
return CEPH_FUSE_NO_MDS_UP;
- } else if (mdsmap->get_num_mds(CEPH_MDS_STATE_ACTIVE) > 0) {
- // If somebody is active, continue to mount
+ } else if (availability == MDSMap::AVAILABLE) {
+ // Continue to mount
break;
- } else {
+ } else if (availability == MDSMap::TRANSIENT_UNAVAILABLE) {
// Else, wait. MDSMonitor will update the map to bring
// us to a conclusion eventually.
wait_on_list(waiting_for_mdsmap);
+ } else {
+ // Unexpected value!
+ assert(0);
}
}
}
DECODE_FINISH(p);
}
-bool MDSMap::cluster_unavailable() const
+MDSMap::availability_t MDSMap::is_cluster_available() const
{
if (epoch == 0) {
- return false;
+ // This is ambiguous between "mds map was never initialized on mons" and
+ // "we never got an mdsmap from the mons". Treat it like the latter.
+ return TRANSIENT_UNAVAILABLE;
}
+
// If a rank is marked damage (unavailable until operator intervenes)
if (damaged.size()) {
- return true;
+ return STUCK_UNAVAILABLE;
}
// If no ranks are created (filesystem not initialized)
if (in.empty()) {
- return true;
+ return STUCK_UNAVAILABLE;
}
for (const auto rank : in) {
if (up.count(rank) != 0) {
name = mds_info.at(up.at(rank)).name;
}
- const mds_rank_t replacement = find_replacement_for(rank, name, false);
- const bool standby_avail = replacement != MDS_GID_NONE;
+ const mds_gid_t replacement = find_replacement_for(rank, name, false);
+ const bool standby_avail = (replacement != MDS_GID_NONE);
// If the rank is unfilled, and there are no standbys, we're unavailable
if (up.count(rank) == 0 && !standby_avail) {
- return true;
+ return STUCK_UNAVAILABLE;
} else if (up.count(rank) && mds_info.at(up.at(rank)).laggy() && !standby_avail) {
// If the daemon is laggy and there are no standbys, we're unavailable.
// It would be nice to give it some grace here, but to do so callers
// would have to poll this time-wise, vs. just waiting for updates
// to mdsmap, so it's not worth the complexity.
- return true;
+ return STUCK_UNAVAILABLE;
}
}
- return false;
+ if (get_num_mds(CEPH_MDS_STATE_ACTIVE) > 0) {
+ // Nobody looks stuck, so indicate to client they should go ahead
+ // and try mounting if anybody is active. This may include e.g.
+ // one MDS failing over and another active: the client should
+ // proceed to start talking to the active one and let the
+ // transiently-unavailable guy catch up later.
+ return AVAILABLE;
+ } else {
+ // Nothing indicating we were stuck, but nobody active (yet)
+ return TRANSIENT_UNAVAILABLE;
+ }
}
void get_health(list<pair<health_status_t,std::string> >& summary,
list<pair<health_status_t,std::string> > *detail) const;
+ typedef enum
+ {
+ AVAILABLE = 0,
+ TRANSIENT_UNAVAILABLE = 1,
+ STUCK_UNAVAILABLE = 2
+
+ } availability_t;
+
/**
- * If any of the ranks are stuck unavailable, return true. This is a
+ * Return indication of whether cluster is available. This is a
* heuristic for clients to see if they should bother waiting to talk to
* MDSs, or whether they should error out at startup/mount.
+ *
+ * A TRANSIENT_UNAVAILABLE result indicates that the cluster is in a
+ * transition state like replaying, or is potentially about the fail over.
+ * Clients should wait for an updated map before making a final decision
+ * about whether the filesystem is mountable.
+ *
+ * A STUCK_UNAVAILABLE result indicates that we can't see a way that
+ * the cluster is about to recover on its own, so it'll probably require
+ * administrator intervention: clients should probaly not bother trying
+ * to mount.
*/
- bool cluster_unavailable() const;
+ availability_t is_cluster_available() const;
// mds states
bool is_down(mds_rank_t m) const { return up.count(m) == 0; }