]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mon: Add warning if diff in OSD usage > config mon_warn_osd_usage_percent (10%)
authorDavid Zafman <dzafman@redhat.com>
Wed, 15 Feb 2017 00:37:07 +0000 (16:37 -0800)
committerNathan Cutler <ncutler@suse.com>
Tue, 18 Jul 2017 20:12:57 +0000 (22:12 +0200)
Signed-off-by: David Zafman <dzafman@redhat.com>
(cherry picked from commit c8004e6558359fb542e45bb4b483a6c91afdc0b4)

src/common/config_opts.h
src/mon/PGMonitor.cc

index 3fea4206aaa2f5b2393de0bd5a86a7d6e3e98f8b..1c55505c47d80348c29a3ff59955f5f22d68b9b2 100644 (file)
@@ -305,6 +305,7 @@ OPTION(mon_crush_min_required_version, OPT_STR, "firefly")
 OPTION(mon_warn_on_crush_straw_calc_version_zero, OPT_BOOL, true) // warn if crush straw_calc_version==0
 OPTION(mon_warn_on_osd_down_out_interval_zero, OPT_BOOL, true) // warn if 'mon_osd_down_out_interval == 0'
 OPTION(mon_warn_on_cache_pools_without_hit_sets, OPT_BOOL, true)
+OPTION(mon_warn_osd_usage_percent, OPT_FLOAT, .40) // warn if difference in usage percent between OSDs exceeds specified percent
 OPTION(mon_min_osdmap_epochs, OPT_INT, 500)
 OPTION(mon_max_pgmap_epochs, OPT_INT, 500)
 OPTION(mon_max_log_epochs, OPT_INT, 500)
index 8450b784a5ecea3d32498d64479737bf50b850db..d3761dd95cbf5683d833655e2c2c621fcc28592f 100644 (file)
@@ -1796,6 +1796,27 @@ void PGMonitor::get_health(list<pair<health_status_t,string> >& summary,
     }
   }
 
+  if (g_conf->mon_warn_osd_usage_percent) {
+    float max_osd_perc_avail = 0.0, min_osd_perc_avail = 1.0;
+    for (auto p = pg_map.osd_stat.begin(); p != pg_map.osd_stat.end(); ++p) {
+      // kb should never be 0, but avoid divide by zero in case of corruption
+      if (p->second.kb <= 0)
+        continue;
+      float perc_avail = ((float)(p->second.kb - p->second.kb_avail)) / ((float)p->second.kb);
+      if (perc_avail > max_osd_perc_avail)
+        max_osd_perc_avail = perc_avail;
+      if (perc_avail < min_osd_perc_avail)
+        min_osd_perc_avail = perc_avail;
+    }
+    if ((max_osd_perc_avail - min_osd_perc_avail) > g_conf->mon_warn_osd_usage_percent) {
+      ostringstream ss;
+      ss << "Difference in osd space utilization " << ((max_osd_perc_avail - min_osd_perc_avail) *100) << "% greater than " << (g_conf->mon_warn_osd_usage_percent * 100) << "%";
+      summary.push_back(make_pair(HEALTH_WARN, ss.str()));
+      if (detail)
+        detail->push_back(make_pair(HEALTH_WARN, ss.str()));
+    }
+  }
+
   // recovery
   list<string> sl;
   pg_map.overall_recovery_summary(NULL, &sl);