]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mon: Add warning if diff in OSD usage > config mon_warn_osd_usage_percent (10%)
authorDavid Zafman <dzafman@redhat.com>
Wed, 15 Feb 2017 00:37:07 +0000 (16:37 -0800)
committerAlexey Sheplyakov <asheplyakov@mirantis.com>
Mon, 19 Jun 2017 05:46:42 +0000 (09:46 +0400)
Signed-off-by: David Zafman <dzafman@redhat.com>
(cherry picked from commit c8004e6558359fb542e45bb4b483a6c91afdc0b4)

Conflicts:
src/common/config_opts.h: some options in the original patch hunk
  are not supported in Jewel, manually add the new `mon_warn_osd_usage_percent'
  option.

src/common/config_opts.h
src/mon/PGMonitor.cc

index 3967bf6633e872d187f71b1aeb0ba5b3304e644b..4903ce2832583297c1d47de9cc6b824ef03cf8c1 100644 (file)
@@ -273,6 +273,7 @@ OPTION(mon_crush_min_required_version, OPT_STR, "firefly")
 OPTION(mon_warn_on_crush_straw_calc_version_zero, OPT_BOOL, true) // warn if crush straw_calc_version==0
 OPTION(mon_warn_on_osd_down_out_interval_zero, OPT_BOOL, true) // warn if 'mon_osd_down_out_interval == 0'
 OPTION(mon_warn_on_cache_pools_without_hit_sets, OPT_BOOL, true)
+OPTION(mon_warn_osd_usage_percent, OPT_FLOAT, .40) // warn if difference in usage percent between OSDs exceeds specified percent
 OPTION(mon_min_osdmap_epochs, OPT_INT, 500)
 OPTION(mon_max_pgmap_epochs, OPT_INT, 500)
 OPTION(mon_max_log_epochs, OPT_INT, 500)
index 9ff5d48fb7829d94131a141f30ed9dc711e3a61a..08e9f9c7ae44e13a47ee4f8a7ca7e5639cb24cad 100644 (file)
@@ -2336,6 +2336,27 @@ void PGMonitor::get_health(list<pair<health_status_t,string> >& summary,
     }
   }
 
+  if (g_conf->mon_warn_osd_usage_percent) {
+    float max_osd_perc_avail = 0.0, min_osd_perc_avail = 1.0;
+    for (auto p = pg_map.osd_stat.begin(); p != pg_map.osd_stat.end(); ++p) {
+      // kb should never be 0, but avoid divide by zero in case of corruption
+      if (p->second.kb <= 0)
+        continue;
+      float perc_avail = ((float)(p->second.kb - p->second.kb_avail)) / ((float)p->second.kb);
+      if (perc_avail > max_osd_perc_avail)
+        max_osd_perc_avail = perc_avail;
+      if (perc_avail < min_osd_perc_avail)
+        min_osd_perc_avail = perc_avail;
+    }
+    if ((max_osd_perc_avail - min_osd_perc_avail) > g_conf->mon_warn_osd_usage_percent) {
+      ostringstream ss;
+      ss << "Difference in osd space utilization " << ((max_osd_perc_avail - min_osd_perc_avail) *100) << "% greater than " << (g_conf->mon_warn_osd_usage_percent * 100) << "%";
+      summary.push_back(make_pair(HEALTH_WARN, ss.str()));
+      if (detail)
+        detail->push_back(make_pair(HEALTH_WARN, ss.str()));
+    }
+  }
+
   // recovery
   list<string> sl;
   pg_map.overall_recovery_summary(NULL, &sl);