From c6e4fff5e3f5c5a45b3c4fb647548bd2a2dfea0c Mon Sep 17 00:00:00 2001 From: David Zafman Date: Tue, 14 Feb 2017 16:37:07 -0800 Subject: [PATCH] mon: Add warning if diff in OSD usage > config mon_warn_osd_usage_percent (10%) Signed-off-by: David Zafman (cherry picked from commit c8004e6558359fb542e45bb4b483a6c91afdc0b4) Conflicts: src/common/config_opts.h: some options in the original patch hunk are not supported in Jewel, manually add the new `mon_warn_osd_usage_percent' option. --- src/common/config_opts.h | 1 + src/mon/PGMonitor.cc | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/src/common/config_opts.h b/src/common/config_opts.h index 3967bf6633e87..4903ce2832583 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -273,6 +273,7 @@ OPTION(mon_crush_min_required_version, OPT_STR, "firefly") OPTION(mon_warn_on_crush_straw_calc_version_zero, OPT_BOOL, true) // warn if crush straw_calc_version==0 OPTION(mon_warn_on_osd_down_out_interval_zero, OPT_BOOL, true) // warn if 'mon_osd_down_out_interval == 0' OPTION(mon_warn_on_cache_pools_without_hit_sets, OPT_BOOL, true) +OPTION(mon_warn_osd_usage_percent, OPT_FLOAT, .40) // warn if difference in usage percent between OSDs exceeds specified percent OPTION(mon_min_osdmap_epochs, OPT_INT, 500) OPTION(mon_max_pgmap_epochs, OPT_INT, 500) OPTION(mon_max_log_epochs, OPT_INT, 500) diff --git a/src/mon/PGMonitor.cc b/src/mon/PGMonitor.cc index 9ff5d48fb7829..08e9f9c7ae44e 100644 --- a/src/mon/PGMonitor.cc +++ b/src/mon/PGMonitor.cc @@ -2336,6 +2336,27 @@ void PGMonitor::get_health(list >& summary, } } + if (g_conf->mon_warn_osd_usage_percent) { + float max_osd_perc_avail = 0.0, min_osd_perc_avail = 1.0; + for (auto p = pg_map.osd_stat.begin(); p != pg_map.osd_stat.end(); ++p) { + // kb should never be 0, but avoid divide by zero in case of corruption + if (p->second.kb <= 0) + continue; + float perc_avail = ((float)(p->second.kb - p->second.kb_avail)) / ((float)p->second.kb); + if (perc_avail > max_osd_perc_avail) + max_osd_perc_avail = perc_avail; + if (perc_avail < min_osd_perc_avail) + min_osd_perc_avail = perc_avail; + } + if ((max_osd_perc_avail - min_osd_perc_avail) > g_conf->mon_warn_osd_usage_percent) { + ostringstream ss; + ss << "Difference in osd space utilization " << ((max_osd_perc_avail - min_osd_perc_avail) *100) << "% greater than " << (g_conf->mon_warn_osd_usage_percent * 100) << "%"; + summary.push_back(make_pair(HEALTH_WARN, ss.str())); + if (detail) + detail->push_back(make_pair(HEALTH_WARN, ss.str())); + } + } + // recovery list sl; pg_map.overall_recovery_summary(NULL, &sl); -- 2.39.5