OPTION(mon_warn_on_crush_straw_calc_version_zero, OPT_BOOL, true) // warn if crush straw_calc_version==0
OPTION(mon_warn_on_osd_down_out_interval_zero, OPT_BOOL, true) // warn if 'mon_osd_down_out_interval == 0'
OPTION(mon_warn_on_cache_pools_without_hit_sets, OPT_BOOL, true)
+OPTION(mon_warn_osd_usage_percent, OPT_FLOAT, .40) // warn if difference in usage percent between OSDs exceeds specified percent
OPTION(mon_min_osdmap_epochs, OPT_INT, 500)
OPTION(mon_max_pgmap_epochs, OPT_INT, 500)
OPTION(mon_max_log_epochs, OPT_INT, 500)
}
}
+ if (g_conf->mon_warn_osd_usage_percent) {
+ float max_osd_perc_avail = 0.0, min_osd_perc_avail = 1.0;
+ for (auto p = pg_map.osd_stat.begin(); p != pg_map.osd_stat.end(); ++p) {
+ // kb should never be 0, but avoid divide by zero in case of corruption
+ if (p->second.kb <= 0)
+ continue;
+ float perc_avail = ((float)(p->second.kb - p->second.kb_avail)) / ((float)p->second.kb);
+ if (perc_avail > max_osd_perc_avail)
+ max_osd_perc_avail = perc_avail;
+ if (perc_avail < min_osd_perc_avail)
+ min_osd_perc_avail = perc_avail;
+ }
+ if ((max_osd_perc_avail - min_osd_perc_avail) > g_conf->mon_warn_osd_usage_percent) {
+ ostringstream ss;
+ ss << "Difference in osd space utilization " << ((max_osd_perc_avail - min_osd_perc_avail) *100) << "% greater than " << (g_conf->mon_warn_osd_usage_percent * 100) << "%";
+ summary.push_back(make_pair(HEALTH_WARN, ss.str()));
+ if (detail)
+ detail->push_back(make_pair(HEALTH_WARN, ss.str()));
+ }
+ }
+
// recovery
list<string> sl;
pg_map.overall_recovery_summary(NULL, &sl);