mon/PGMonitor: health warn if pool has relatively high objects/pg

author Sage Weil <sage@inktank.com>

Sat, 21 Sep 2013 06:08:53 +0000 (23:08 -0700)

committer Sage Weil <sage@inktank.com>

Mon, 23 Sep 2013 21:57:22 +0000 (14:57 -0700)
author Sage Weil <sage@inktank.com>
Sat, 21 Sep 2013 06:08:53 +0000 (23:08 -0700)
committer Sage Weil <sage@inktank.com>
Mon, 23 Sep 2013 21:57:22 +0000 (14:57 -0700)
diff --git a/src/common/config_opts.h b/src/common/config_opts.h

index 51212b5e4bf20bc03d0cc277beada25f91a5ad83..1f616e2c9700759923c8ae173e05b4533a0362e9 100644 (file)
--- a/src/common/config_opts.h
+++ b/src/common/config_opts.h
@@ -159,6 +159,7 @@ OPTION(mon_accept_timeout, OPT_FLOAT, 10.0)    // on leader, if paxos update isn
  OPTION(mon_pg_create_interval, OPT_FLOAT, 30.0) // no more than every 30s
  OPTION(mon_pg_stuck_threshold, OPT_INT, 300) // number of seconds after which pgs can be considered inactive, unclean, or stale (see doc/control.rst under dump_stuck for more info)
  OPTION(mon_pg_warn_min_per_osd, OPT_INT, 20)  // min # pgs per (in) osd before we warn the admin
+OPTION(mon_pg_warn_max_object_skew, OPT_FLOAT, 10.0) // max skew few average in objects per pg
  OPTION(mon_osd_full_ratio, OPT_FLOAT, .95) // what % full makes an OSD "full"
  OPTION(mon_osd_nearfull_ratio, OPT_FLOAT, .85) // what % full makes an OSD near full
  OPTION(mon_globalid_prealloc, OPT_INT, 100)   // how many globalids to prealloc
diff --git a/src/mon/PGMonitor.cc b/src/mon/PGMonitor.cc

index d1676b26b694374839e69d73d61a0fe4576d6c40..76cfde7de0eb81a5924e65a297539fb0b454a0c7 100644 (file)
--- a/src/mon/PGMonitor.cc
+++ b/src/mon/PGMonitor.cc
@@ -1873,6 +1873,24 @@ void PGMonitor::get_health(list<pair<health_status_t,string> >& summary,
         if (detail)
           detail->push_back(make_pair(HEALTH_WARN, ss.str()));
        }
+      int average_objects_per_pg = pg_map.pg_sum.stats.sum.num_objects / pg_map.pg_stat.size();
+      if (average_objects_per_pg > 0) {
+       int objects_per_pg = p->second.stats.sum.num_objects / pi->get_pg_num();
+       float ratio = (float)objects_per_pg / (float)average_objects_per_pg;
+       if (g_conf->mon_pg_warn_max_object_skew > 0 &&
+           ratio > g_conf->mon_pg_warn_max_object_skew) {
+         ostringstream ss;
+         ss << "pool " << mon->osdmon()->osdmap.get_pool_name(p->first) << " has too few pgs";
+         summary.push_back(make_pair(HEALTH_WARN, ss.str()));
+         if (detail) {
+           ostringstream ss;
+           ss << "pool " << mon->osdmon()->osdmap.get_pool_name(p->first) << " objects per pg ("
+              << objects_per_pg << ") is more than " << ratio << " times cluster average ("
+              << average_objects_per_pg << ")";
+           detail->push_back(make_pair(HEALTH_WARN, ss.str()));
+         }
+       }
+      }
      }
    }
  }
author	Sage Weil <sage@inktank.com>
	Sat, 21 Sep 2013 06:08:53 +0000 (23:08 -0700)
committer	Sage Weil <sage@inktank.com>
	Mon, 23 Sep 2013 21:57:22 +0000 (14:57 -0700)
src/common/config_opts.h		patch \| blob \| history
src/mon/PGMonitor.cc		patch \| blob \| history