std::ostream &ss) = 0;
};
+
+static constexpr auto errmsg_for_unhealthy_mds = \
+ "MDS has one of two health warnings which could extend recovery: "
+ "MDS_TRIM or MDS_CACHE_OVERSIZED. MDS failover is not recommended "
+ "since it might cause unexpected file system unavailability. If "
+ "you wish to proceed, pass --yes-i-really-mean-it";
+
+
#endif
}
}
+bool MDSMonitor::has_health_warnings(vector<mds_metric_t> warnings)
+{
+ for (auto& [gid, health] : pending_daemon_health) {
+ for (auto& metric : health.metrics) {
+ // metric.type here is the type of health warning. We are only
+ // looking for types of health warnings passed to this func member
+ // through variable "warnings".
+ auto it = std::find(warnings.begin(), warnings.end(), metric.type);
+ if (it != warnings.end()) {
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
int MDSMonitor::filesystem_command(
FSMap &fsmap,
MonOpRequestRef op,
} else if (prefix == "mds fail") {
string who;
cmd_getval(cmdmap, "role_or_gid", who);
+ bool confirm = false;
+ cmd_getval(cmdmap, "yes_i_really_mean_it", confirm);
MDSMap::mds_info_t failed_info;
mds_gid_t gid = gid_from_arg(fsmap, who, ss);
return -EPERM;
}
+ if (!confirm &&
+ has_health_warnings({MDS_HEALTH_TRIM, MDS_HEALTH_CACHE_OVERSIZED})) {
+ ss << errmsg_for_unhealthy_mds;
+ return -EPERM;
+ }
+
r = fail_mds(fsmap, ss, who, &failed_info);
if (r < 0 && r == -EAGAIN) {
mon.osdmon()->wait_for_writeable(op, new C_RetryMessage(this, op));
#include <map>
#include <set>
+#include <vector>
#include "include/types.h"
#include "PaxosFSMap.h"
bool preprocess_query(MonOpRequestRef op) override; // true if processed.
bool prepare_update(MonOpRequestRef op) override;
bool should_propose(double& delay) override;
+ bool has_health_warnings(std::vector<mds_metric_t> warnings);
bool should_print_status() const {
auto& fs = get_fsmap();
"name=gid,type=CephInt,range=0 "
"name=state,type=CephInt,range=0|20",
"set mds state of <gid> to <numeric-state>", "mds", "rw", FLAG(HIDDEN))
-COMMAND("mds fail name=role_or_gid,type=CephString",
+COMMAND("mds fail name=role_or_gid,type=CephString "
+ "name=yes_i_really_mean_it,type=CephBool,req=false",
"Mark MDS failed: trigger a failover if a standby is available",
"mds", "rw")
COMMAND("mds repaired name=role,type=CephString",