From fb0944e22acf6f8b6cefb59cc4c41dc48087bfd7 Mon Sep 17 00:00:00 2001 From: Joao Eduardo Luis Date: Wed, 30 Apr 2014 17:13:30 +0100 Subject: [PATCH] mon: OSDMonitor: HEALTH_WARN on 'mon osd down out interval == 0' A 'status' or 'health' request will return a HEALTH_WARN whenever the monitor handling the request has the option set to zero. Fixes: 7784 Signed-off-by: Joao Eduardo Luis (cherry picked from commit b2112d5087b449d3b019678cb266ff6fa897897e) --- PendingReleaseNotes | 5 +++++ src/common/config_opts.h | 1 + src/mon/OSDMonitor.cc | 23 +++++++++++++++++++++++ 3 files changed, 29 insertions(+) diff --git a/PendingReleaseNotes b/PendingReleaseNotes index 6f462c81f50..27bca5ba594 100644 --- a/PendingReleaseNotes +++ b/PendingReleaseNotes @@ -6,3 +6,8 @@ v0.67.8 non-plain format. This is consistent with the behavior for a pool which used to hold images, but contains none. Scripts relying on this behavior should be updated. + +- HEALTH_WARN on 'mon osd down out interval == 0'. Having this option set + to zero on the leader acts much like having the 'noout' flag set. This + warning will only be reported if the monitor getting the 'health' or + 'status' request has this option set to zero. diff --git a/src/common/config_opts.h b/src/common/config_opts.h index b41db5a1783..b4f3d9f7216 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -163,6 +163,7 @@ OPTION(mon_osd_nearfull_ratio, OPT_FLOAT, .85) // what % full makes an OSD near OPTION(mon_globalid_prealloc, OPT_INT, 100) // how many globalids to prealloc OPTION(mon_osd_report_timeout, OPT_INT, 900) // grace period before declaring unresponsive OSDs dead OPTION(mon_force_standby_active, OPT_BOOL, true) // should mons force standby-replay mds to be active +OPTION(mon_warn_on_osd_down_out_interval_zero, OPT_BOOL, true) // warn if 'mon_osd_down_out_interval == 0' OPTION(mon_min_osdmap_epochs, OPT_INT, 500) OPTION(mon_max_pgmap_epochs, OPT_INT, 500) OPTION(mon_max_log_epochs, OPT_INT, 500) diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index 97a193ca623..2431fa84d12 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -1975,6 +1975,29 @@ void OSDMonitor::get_health(list >& summary, detail->push_back(make_pair(HEALTH_WARN, ss.str())); } + // Warn if 'mon_osd_down_out_interval' is set to zero. + // Having this option set to zero on the leader acts much like the + // 'noout' flag. It's hard to figure out what's going wrong with clusters + // without the 'noout' flag set but acting like that just the same, so + // we report a HEALTH_WARN in case this option is set to zero. + // This is an ugly hack to get the warning out, but until we find a way + // to spread global options throughout the mon cluster and have all mons + // using a base set of the same options, we need to work around this sort + // of things. + // There's also the obvious drawback that if this is set on a single + // monitor on a 3-monitor cluster, this warning will only be shown every + // third monitor connection. + if (g_conf->mon_warn_on_osd_down_out_interval_zero && + g_conf->mon_osd_down_out_interval == 0) { + ostringstream ss; + ss << "mon." << mon->name << " has mon_osd_down_out_interval set to 0"; + summary.push_back(make_pair(HEALTH_WARN, ss.str())); + if (detail) { + ss << "; this has the same effect as the 'noout' flag"; + detail->push_back(make_pair(HEALTH_WARN, ss.str())); + } + } + get_pools_health(summary, detail); } } -- 2.47.3