]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd/OSDMap: health alert for non-power-of-two pg_num
authorSage Weil <sage@redhat.com>
Mon, 23 Sep 2019 19:40:48 +0000 (14:40 -0500)
committerNathan Cutler <ncutler@suse.com>
Fri, 24 Jan 2020 12:36:03 +0000 (13:36 +0100)
Fixes: https://tracker.ceph.com/issues/41647
Signed-off-by: Sage Weil <sage@redhat.com>
(cherry picked from commit 6e46b1c0e50ad377d5db058e0452b0d956b3fad6)

Conflicts:
PendingReleaseNotes
- dropped this change, since it does not apply to nautilus
        src/osd/OSDMap.cc
- checks->add() takes an additional argument in master - dropped it

doc/rados/operations/health-checks.rst
src/common/options.cc
src/mon/OSDMonitor.cc
src/osd/OSDMap.cc
src/osd/OSDMap.h
src/tools/osdmaptool.cc

index 2ed2da4871a9543df69dff3316604efadfdf860f..3cbf9eae4afae64adb9ec7ad37becfadb9674b2d 100644 (file)
@@ -636,6 +636,23 @@ The PG count for existing pools can be increased or new pools can be created.
 Please refer to :ref:`choosing-number-of-placement-groups` for more
 information.
 
+POOL_PG_NUM_NOT_POWER_OF_TWO
+____________________________
+
+One or more pools has a ``pg_num`` value that is not a power of two.
+Although this is not strictly incorrect, it does lead to a less
+balanced distribution of data because some PGs have roughly twice as
+much data as others.
+
+This is easily corrected by setting the ``pg_num`` value for the
+affected pool(s) to a nearby power of two::
+
+  ceph osd pool set <pool-name> pg_num <value>
+
+This health warning can be disabled with::
+
+  ceph config set global mon_warn_on_pool_pg_num_not_power_of_two false
+
 POOL_TOO_FEW_PGS
 ________________
 
index 2bee6e90fd60c1a37a322463b0be73d27f2ee163..141db25016ca3165fd1c300d2103622320aca624 100644 (file)
@@ -1735,6 +1735,11 @@ std::vector<Option> get_global_options() {
     .add_service("mgr")
     .set_description("issue POOL_APP_NOT_ENABLED health warning if pool has not application enabled"),
 
+    Option("mon_warn_on_pool_pg_num_not_power_of_two", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(true)
+    .add_service("mon")
+    .set_description("issue POOL_PG_NUM_NOT_POWER_OF_TWO warning if pool has a non-power-of-two pg_num value"),
+
     Option("mon_warn_on_misplaced", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
     .set_default(false)
     .add_service("mgr")
index 4f8f4d421fa4e178e55add6d495f1b542f37a1d3..972a47a8f220cc8e17277d241c3cab19df1f761d 100644 (file)
@@ -1870,7 +1870,7 @@ void OSDMonitor::encode_pending(MonitorDBStore::TransactionRef t)
 
   // health
   health_check_map_t next;
-  tmp.check_health(&next);
+  tmp.check_health(g_ceph_context, &next);
   encode_health(next, t);
 }
 
index a97c1c4e1dea9fb20c3e5818a05ed630c372b6e0..889131210f753201de542b1372829081f993d442 100644 (file)
@@ -5413,7 +5413,8 @@ void print_osd_utilization(const OSDMap& osdmap,
   }
 }
 
-void OSDMap::check_health(health_check_map_t *checks) const
+void OSDMap::check_health(CephContext *cct,
+                         health_check_map_t *checks) const
 {
   int num_osds = get_num_osds();
 
@@ -5839,6 +5840,27 @@ void OSDMap::check_health(health_check_map_t *checks) const
       d.detail.swap(nearfull_detail);
     }
   }
+
+  // POOL_PG_NUM_NOT_POWER_OF_TWO
+  if (cct->_conf.get_val<bool>("mon_warn_on_pool_pg_num_not_power_of_two")) {
+    list<string> detail;
+    for (auto it : get_pools()) {
+      if (!isp2(it.second.get_pg_num_target())) {
+       ostringstream ss;
+       ss << "pool '" << get_pool_name(it.first)
+          << "' pg_num " << it.second.get_pg_num_target()
+          << " is not a power of two";
+       detail.push_back(ss.str());
+      }
+    }
+    if (!detail.empty()) {
+      ostringstream ss;
+      ss << detail.size() << " pool(s) have non-power-of-two pg_num";
+      auto& d = checks->add("POOL_PG_NUM_NOT_POWER_OF_TWO", HEALTH_WARN,
+                           ss.str());
+      d.detail.swap(detail);
+    }
+  }
 }
 
 int OSDMap::parse_osd_id_list(const vector<string>& ls, set<int> *out,
index a5414896f473ff3d8cd7228454b0e1d95997141b..64c5f88d8902559a2561714765afc73a1af52f1d 100644 (file)
@@ -1499,7 +1499,7 @@ public:
   static void generate_test_instances(list<OSDMap*>& o);
   bool check_new_blacklist_entries() const { return new_blacklist_entries; }
 
-  void check_health(health_check_map_t *checks) const;
+  void check_health(CephContext *cct, health_check_map_t *checks) const;
 
   int parse_osd_id_list(const vector<string>& ls,
                        set<int> *out,
index 160adce0ed7a428f8167c1d0f855fde76bd04eae..9d62f00db7dbd5cc146bf201a51bd82e50190934 100644 (file)
@@ -739,7 +739,7 @@ skip_upmap:
 
   if (health) {
     health_check_map_t checks;
-    osdmap.check_health(&checks);
+    osdmap.check_health(g_ceph_context, &checks);
     JSONFormatter jf(true);
     jf.dump_object("checks", checks);
     jf.flush(cout);