]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
mgr/balancer: add per pool concurrent optimization limit
authorxie xingguo <xie.xingguo@zte.com.cn>
Thu, 26 Sep 2019 07:32:04 +0000 (15:32 +0800)
committerxie xingguo <xie.xingguo@zte.com.cn>
Sat, 28 Sep 2019 00:31:45 +0000 (08:31 +0800)
the main point is that in general we have physical isolated storage
pools in our products, so the cluster-wide target_max_misplaced_ratio
option would not make much sense in such case.
Introducing a fine-grained alternate throttling mechanism instead make
things much better now!

Signed-off-by: xie xingguo <xie.xingguo@zte.com.cn>
src/pybind/mgr/balancer/module.py

index 7ec02e3b8596a21c03ec3ff1ee65a19cb8f8d0f9..9619b27a47b8948dbf40b9150c496567c00e2b54 100644 (file)
@@ -901,7 +901,6 @@ class Module(MgrModule):
                 detail = 'Unrecognized mode %s' % plan.mode
                 self.log.info(detail)
                 return -errno.EINVAL, detail
-        ##
 
     def do_upmap(self, plan):
         self.log.info('do_upmap')
@@ -941,12 +940,29 @@ class Module(MgrModule):
         # shuffle so all pools get equal (in)attention
         random.shuffle(classified_pools)
         for it in classified_pools:
-            did = ms.osdmap.calc_pg_upmaps(inc, max_deviation, left, it)
+            pool_dump = osdmap_dump.get('pools', [])
+            num_pg = 0
+            for p in pool_dump:
+                if p['pool_name'] in it:
+                    num_pg += p['pg_num']
+
+            # note that here we deliberately exclude any scrubbing pgs too
+            # since scrubbing activities have significant impacts on performance
+            pool_ids = list(p['pool'] for p in pool_dump if p['pool_name'] in it)
+            num_pg_active_clean = 0
+            pg_dump = self.get('pg_dump')
+            for p in pg_dump['pg_stats']:
+                pg_pool = p['pgid'].split('.')[0]
+                if len(pool_ids) and int(pg_pool) not in pool_ids:
+                    continue
+                if p['state'] == 'active+clean':
+                    num_pg_active_clean += 1
+
+            available = max_optimizations - (num_pg - num_pg_active_clean)
+            did = ms.osdmap.calc_pg_upmaps(inc, max_deviation, available, it)
+            self.log.info('prepared %d changes for pool(s) %s' % (did, it))
             total_did += did
-            left -= did
-            if left <= 0:
-                break
-        self.log.info('prepared %d/%d changes' % (total_did, max_optimizations))
+        self.log.info('prepared %d changes in total' % total_did)
         if total_did == 0:
             return -errno.EALREADY, 'Unable to find further optimization, ' \
                                     'or pool(s)\' pg_num is decreasing, ' \