(default 4s). The daemons may have crashed. The Ceph monitor will
automatically replace laggy daemons with standbys if any are available.
+Message: insufficient standby daemons available
+Description: One or more file systems are configured to have a certain number
+of standby daemons available (including daemons in standby-replay) but the
+cluster does not have enough standby daemons. The standby deamons not in replay
+count towards any file system (i.e. they may overlap). This warning can
+configured by setting ``ceph fs set <fs> standby_count_wanted <count>``. Use
+zero for ``count`` to disable.
+
+
Daemon-reported health checks
=============================
wait ``mds_beacon_grace`` seconds (default 15 seconds) before marking
the daemon as *laggy*.
+Each file system may specify a number of standby daemons to be considered
+healthy. This number includes daemons in standby-replay waiting for a rank to
+fail (remember that a standby-replay daemon will not be assigned to take over a
+failure for another rank or a failure in a another CephFS file system). The
+pool of standby daemons not in replay count towards any file system count.
+Each file system may set the number of standby daemons wanted using:
+
+::
+
+ ceph fs set <fs name> standby_count_wanted <count>
+
+Setting ``count`` to 0 will disable the health check.
+
+
Configuring standby daemons
---------------------------
new_fs->mds_map.modified = ceph_clock_now();
new_fs->mds_map.session_timeout = g_conf->mds_session_timeout;
new_fs->mds_map.session_autoclose = g_conf->mds_session_autoclose;
+ new_fs->mds_map.standby_count_wanted = fs->mds_map.standby_count_wanted;
new_fs->mds_map.enabled = true;
// Persist the new FSMap
void FSMap::get_health(list<pair<health_status_t,string> >& summary,
list<pair<health_status_t,string> > *detail) const
{
- for (auto i : filesystems) {
- auto fs = i.second;
+ mds_rank_t standby_count_wanted = 0;
+ for (const auto &i : filesystems) {
+ const auto &fs = i.second;
// TODO: move get_health up into here so that we can qualify
// all the messages with what filesystem they're talking about
fs->mds_map.get_health(summary, detail);
+
+ standby_count_wanted = std::max(standby_count_wanted, fs->mds_map.get_standby_count_wanted((mds_rank_t)standby_daemons.size()));
+ }
+
+ if (standby_count_wanted) {
+ std::ostringstream oss;
+ oss << "insufficient standby daemons available: have " << standby_daemons.size() << "; want " << standby_count_wanted << " more";
+ summary.push_back(make_pair(HEALTH_WARN, oss.str()));
+ }
+}
+
+bool FSMap::check_health(void)
+{
+ bool changed = false;
+ for (auto &i : filesystems) {
+ changed |= i.second->mds_map.check_health((mds_rank_t)standby_daemons.size());
}
+ return changed;
}
void FSMap::encode(bufferlist& bl, uint64_t features) const
void get_health(list<pair<health_status_t,std::string> >& summary,
list<pair<health_status_t,std::string> > *detail) const;
+ bool check_health(void);
+
/**
* Assert that the FSMap, Filesystem, MDSMap, mds_info_t relations are
* all self-consistent.
f->dump_bool("enabled", enabled);
f->dump_string("fs_name", fs_name);
f->dump_string("balancer", balancer);
+ f->dump_int("standby_count_wanted", std::max(0, standby_count_wanted));
}
void MDSMap::generate_test_instances(list<MDSMap*>& ls)
out << "metadata_pool\t" << metadata_pool << "\n";
out << "inline_data\t" << (inline_data_enabled ? "enabled" : "disabled") << "\n";
out << "balancer\t" << balancer << "\n";
+ out << "standby_count_wanted\t" << std::max(0, standby_count_wanted) << "\n";
multimap< pair<mds_rank_t, unsigned>, mds_gid_t > foo;
for (const auto &p : mds_info) {
::encode(cas_pool, bl);
// kclient ignores everything from here
- __u16 ev = 11;
+ __u16 ev = 12;
::encode(ev, bl);
::encode(compat, bl);
::encode(metadata_pool, bl);
::encode(fs_name, bl);
::encode(damaged, bl);
::encode(balancer, bl);
+ ::encode(standby_count_wanted, bl);
ENCODE_FINISH(bl);
}
}
if (ev >= 11) {
- ::decode(balancer, p);
+ ::decode(balancer, p);
}
+
+ if (ev >= 12) {
+ ::decode(standby_count_wanted, p);
+ }
+
DECODE_FINISH(p);
}
return state_valid;
}
+
+bool MDSMap::check_health(mds_rank_t standby_daemon_count)
+{
+ std::set<mds_rank_t> standbys;
+ get_standby_replay_mds_set(standbys);
+ std::set<mds_rank_t> actives;
+ get_active_mds_set(actives);
+ mds_rank_t standbys_avail = (mds_rank_t)standbys.size()+standby_daemon_count;
+
+ /* If there are standby daemons available/replaying and
+ * standby_count_wanted is unset (default), then we set it to 1. This will
+ * happen during health checks by the mons. Also, during initial creation
+ * of the FS we will have no actives so we don't want to change the default
+ * yet.
+ */
+ if (standby_count_wanted == -1 && actives.size() > 0 && standbys_avail > 0) {
+ set_standby_count_wanted(1);
+ return true;
+ }
+ return false;
+}
*/
mds_rank_t max_mds; /* The maximum number of active MDSes. Also, the maximum rank. */
+ mds_rank_t standby_count_wanted;
string balancer; /* The name/version of the mantle balancer (i.e. the rados obj name) */
std::set<mds_rank_t> in; // currently defined cluster
cas_pool(-1),
metadata_pool(-1),
max_mds(0),
+ standby_count_wanted(-1),
ever_allowed_features(0),
explicitly_allowed_features(0),
inline_data_enabled(false),
mds_rank_t get_max_mds() const { return max_mds; }
void set_max_mds(mds_rank_t m) { max_mds = m; }
+ mds_rank_t get_standby_count_wanted(mds_rank_t standby_daemon_count) const {
+ assert(standby_daemon_count >= 0);
+ std::set<mds_rank_t> s;
+ get_standby_replay_mds_set(s);
+ mds_rank_t standbys_avail = (mds_rank_t)s.size()+standby_daemon_count;
+ mds_rank_t wanted = std::max(0, standby_count_wanted);
+ return wanted > standbys_avail ? wanted - standbys_avail : 0;
+ }
+ void set_standby_count_wanted(mds_rank_t n) { standby_count_wanted = n; }
+ bool check_health(mds_rank_t standby_daemon_count);
+
const std::string get_balancer() const { return balancer; }
void set_balancer(std::string val) { balancer.assign(val); }
void get_active_mds_set(std::set<mds_rank_t>& s) const {
get_mds_set(s, MDSMap::STATE_ACTIVE);
}
+ void get_standby_replay_mds_set(std::set<mds_rank_t>& s) const {
+ get_mds_set(s, MDSMap::STATE_STANDBY_REPLAY);
+ }
void get_failed_mds_set(std::set<mds_rank_t>& s) const {
s = failed;
}
});
ss << "marked " << (is_down ? "down" : "up");
+ } else if (var == "standby_count_wanted") {
+ if (interr.length()) {
+ ss << var << " requires an integer value";
+ return -EINVAL;
+ }
+ if (n < 0) {
+ ss << var << " must be non-negative";
+ return -ERANGE;
+ }
+ fsmap.modify_filesystem(
+ fs->fscid,
+ [n](std::shared_ptr<Filesystem> fs)
+ {
+ fs->mds_map.set_standby_count_wanted(n);
+ });
} else {
ss << "unknown variable " << var;
return -EINVAL;
if (!mon->is_leader()) return;
+ do_propose |= pending_fsmap.check_health();
+
// expand mds cluster (add new nodes to @in)?
for (auto i : pending_fsmap.filesystems) {
do_propose |= maybe_expand_cluster(i.second);
COMMAND("fs set " \
"name=fs_name,type=CephString " \
"name=var,type=CephChoices,strings=max_mds|max_file_size"
- "|allow_new_snaps|inline_data|cluster_down|allow_multimds|allow_dirfrags|balancer " \
+ "|allow_new_snaps|inline_data|cluster_down|allow_multimds|allow_dirfrags|balancer" \
+ "|standby_count_wanted " \
"name=val,type=CephString " \
"name=confirm,type=CephString,req=false", \
"set mds parameter <var> to <val>", "mds", "rw", "cli,rest")