OPTION(mon_mgr_digest_period, OPT_INT, 5) // How frequently to send digests
OPTION(mon_mgr_beacon_grace, OPT_INT, 30) // How long to wait to failover
OPTION(mon_mgr_inactive_grace, OPT_INT, 60) // How long before health WARN -> ERR
+OPTION(mon_mgr_mkfs_grace, OPT_INT, 60) // How long before we complain about MGR_DOWN
OPTION(rgw_crypt_require_ssl, OPT_BOOL, true) // requests including encryption key headers must be sent over ssl
OPTION(rgw_crypt_default_encryption_key, OPT_STR, "") // base64 encoded key for encryption of rgw objects
OPTION(rgw_crypt_s3_kms_encryption_keys, OPT_STR, "") // extra keys that may be used for aws:kms
dout(4) << "active server: " << map.active_addr
<< "(" << map.active_gid << ")" << dendl;
+ ever_had_active_mgr = get_value("ever_had_active_mgr");
+
load_health();
if (map.available) {
pending_map.epoch++;
}
+health_status_t MgrMonitor::should_warn_about_mgr_down()
+{
+ utime_t now = ceph_clock_now();
+ // we warn if
+ // - we've ever had an active mgr, or
+ // - we have osds AND we've exceeded the grace period
+ // which means a new mon cluster and be HEALTH_OK indefinitely as long as
+ // no OSDs are ever created.
+ if (ever_had_active_mgr ||
+ (mon->osdmon()->osdmap.get_num_osds() > 0 &&
+ now > mon->monmap->created + g_conf->mon_mgr_mkfs_grace)) {
+ health_status_t level = HEALTH_WARN;
+ if (first_seen_inactive != utime_t() &&
+ now - first_seen_inactive > g_conf->mon_mgr_inactive_grace) {
+ level = HEALTH_ERR;
+ }
+ return level;
+ }
+ return HEALTH_OK;
+}
+
void MgrMonitor::encode_pending(MonitorDBStore::TransactionRef t)
{
dout(10) << __func__ << " " << pending_map << dendl;
health_check_map_t next;
if (!pending_map.available) {
- health_status_t level = HEALTH_WARN;
- utime_t now = ceph_clock_now();
- if (first_seen_inactive != utime_t() &&
- now - first_seen_inactive > g_conf->mon_mgr_inactive_grace) {
- level = HEALTH_ERR;
+ auto level = should_warn_about_mgr_down();
+ if (level != HEALTH_OK) {
+ next.add("MGR_DOWN", level, "no active mgr");
+ } else {
+ dout(10) << __func__ << " no health warning (never active and new cluster)"
+ << dendl;
}
- next.add("MGR_DOWN", level, "no active mgr");
+ } else {
+ put_value(t, "ever_had_active_mgr", 1);
}
encode_health(next, t);
}
}
}
+ if (!pending_map.available &&
+ should_warn_about_mgr_down() != HEALTH_OK) {
+ dout(10) << " exceeded mon_mgr_mkfs_grace " << g_conf->mon_mgr_mkfs_grace
+ << " seconds" << dendl;
+ propose = true;
+ }
+
if (propose) {
propose_pending();
}
{
MgrMap map;
MgrMap pending_map;
+ bool ever_had_active_mgr = false;
utime_t first_seen_inactive;
bool check_caps(MonOpRequestRef op, const uuid_d& fsid);
+ health_status_t should_warn_about_mgr_down();
+
public:
MgrMonitor(Monitor *mn, Paxos *p, const string& service_name)
: PaxosService(mn, p, service_name)