]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/DaemonServer: add option to bypass careful throttling for thrasher
authorSage Weil <sage@redhat.com>
Wed, 19 Sep 2018 22:04:38 +0000 (17:04 -0500)
committerSage Weil <sage@redhat.com>
Sat, 20 Oct 2018 20:21:58 +0000 (15:21 -0500)
Signed-off-by: Sage Weil <sage@redhat.com>
qa/tasks/thrashosds.py
src/common/options.cc
src/mgr/DaemonServer.cc

index 7af32f0ff952f8e5de79fae4de217136c6c495f8..4f8ed5b608c7a163501b10b58a9e9df0cdbfe6ac 100644 (file)
@@ -127,6 +127,8 @@ def task(ctx, config):
     chance_thrash_pg_upmap: 1.0
     chance_thrash_pg_upmap_items: 1.0
 
+    aggressive_pg_num_changes: (true)  whether we should bypass the careful throttling of pg_num and pgp_num changes in mgr's adjust_pgs() controller
+
     example:
 
     tasks:
@@ -153,6 +155,7 @@ def task(ctx, config):
     config['noscrub_toggle_delay'] = config.get('noscrub_toggle_delay', 2.0)
     # add default value for random_eio
     config['random_eio'] = config.get('random_eio', 0.0)
+    aggro = config.get('aggressive_pg_num_changes', True)
 
     log.info("config is {config}".format(config=str(config)))
 
@@ -190,6 +193,12 @@ def task(ctx, config):
         if config.get(f):
             cluster_manager.config[f] = config.get(f)
 
+    if aggro:
+        cluster_manager.raw_cluster_cmd(
+            'config', 'set', 'mgr',
+            'mgr_debug_aggressive_pg_num_changes',
+            'true')
+
     log.info('Beginning thrashosds...')
     thrash_proc = ceph_manager.Thrasher(
         cluster_manager,
@@ -204,3 +213,7 @@ def task(ctx, config):
         cluster_manager.wait_for_all_osds_up()
         cluster_manager.flush_all_pg_stats()
         cluster_manager.wait_for_recovery(config.get('timeout', 360))
+        if aggro:
+            cluster_manager.raw_cluster_cmd(
+                'config', 'rm', 'mgr',
+                'mgr_debug_aggressive_pg_num_changes')
index d7d892f802df25f10e6f78d4de2ec70cf86d5e4a..b8643b8bcba87ae55ac015f2354943961bea8c86 100644 (file)
@@ -4927,6 +4927,11 @@ std::vector<Option> get_global_options() {
     .set_default(1.0)
     .set_description("Time to wait during shutdown to deregister service with mgr"),
 
+    Option("mgr_debug_aggressive_pg_num_changes", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(false)
+    .set_description("Bypass most throttling and safety checks in pg[p]_num controller")
+    .add_service("mgr"),
+
     Option("mon_mgr_digest_period", Option::TYPE_INT, Option::LEVEL_DEV)
     .set_default(5)
     .add_service("mon")
index 899692b1ca8e74da59ae344a65b445d69d094a5d..b65119c56c62398ffcd3306fc53fa5d6aacbd551 100644 (file)
@@ -2225,6 +2225,7 @@ void DaemonServer::adjust_pgs()
   dout(20) << dendl;
   unsigned max = std::max<int64_t>(1, g_conf()->mon_osd_max_creating_pgs);
   double max_misplaced = g_conf().get_val<double>("target_max_misplaced_ratio");
+  bool aggro = g_conf().get_val<bool>("mgr_debug_aggressive_pg_num_changes");
 
   map<string,unsigned> pg_num_to_set;
   map<string,unsigned> pgp_num_to_set;
@@ -2398,15 +2399,15 @@ void DaemonServer::adjust_pgs()
                         << " pgp_num " << p.get_pgp_num()
                         << " - increase blocked by pg_num " << p.get_pg_num()
                         << dendl;
-             } else if (inactive_pgs_ratio > 0 ||
-                 degraded_ratio > 0 ||
-                 unknown_pgs_ratio > 0) {
+             } else if (!aggro && (inactive_pgs_ratio > 0 ||
+                                   degraded_ratio > 0 ||
+                                   unknown_pgs_ratio > 0)) {
                dout(10) << "pool " << i.first
                         << " pgp_num_target " << p.get_pgp_num_target()
                         << " pgp_num " << p.get_pgp_num()
                         << " - inactive|degraded|unknown pgs, deferring pgp_num"
                         << " update" << dendl;
-             } else if (misplaced_ratio > max_misplaced) {
+             } else if (!aggro && (misplaced_ratio > max_misplaced)) {
                dout(10) << "pool " << i.first
                         << " pgp_num_target " << p.get_pgp_num_target()
                         << " pgp_num " << p.get_pgp_num()
@@ -2421,20 +2422,25 @@ void DaemonServer::adjust_pgs()
                // single adjustment that's more than half of the
                // max_misplaced, to somewhat limit the magnitude of
                // our potential error here.
-               double room =
-                 std::min<double>(max_misplaced - misplaced_ratio,
-                                  misplaced_ratio / 2.0);
-               unsigned estmax = std::max<unsigned>(
-                 (double)p.get_pg_num() * room, 1u);
-               int delta = target - p.get_pgp_num();
-               int next = p.get_pgp_num();
-               if (delta < 0) {
-                 next += std::max<int>(-estmax, delta);
+               int next;
+               if (aggro) {
+                 next = target;
                } else {
-                 next += std::min<int>(estmax, delta);
+                 double room =
+                   std::min<double>(max_misplaced - misplaced_ratio,
+                                    misplaced_ratio / 2.0);
+                 unsigned estmax = std::max<unsigned>(
+                   (double)p.get_pg_num() * room, 1u);
+                 int delta = target - p.get_pgp_num();
+                 next = p.get_pgp_num();
+                 if (delta < 0) {
+                   next += std::max<int>(-estmax, delta);
+                 } else {
+                   next += std::min<int>(estmax, delta);
+                 }
+                 dout(20) << " room " << room << " estmax " << estmax
+                          << " delta " << delta << " next " << next << dendl;
                }
-               dout(20) << " room " << room << " estmax " << estmax
-                        << " delta " << delta << " next " << next << dendl;
                dout(10) << "pool " << i.first
                         << " pgp_num_target " << p.get_pgp_num_target()
                         << " pgp_num " << p.get_pgp_num()