From: xie xingguo Date: Thu, 26 Sep 2019 07:32:04 +0000 (+0800) Subject: mgr/balancer: add per pool concurrent optimization limit X-Git-Tag: v15.1.0~1366^2 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=86444dbfe5d8478530182116507d034d9e180f5e;p=ceph-ci.git mgr/balancer: add per pool concurrent optimization limit the main point is that in general we have physical isolated storage pools in our products, so the cluster-wide target_max_misplaced_ratio option would not make much sense in such case. Introducing a fine-grained alternate throttling mechanism instead make things much better now! Signed-off-by: xie xingguo --- diff --git a/src/pybind/mgr/balancer/module.py b/src/pybind/mgr/balancer/module.py index 7ec02e3b859..9619b27a47b 100644 --- a/src/pybind/mgr/balancer/module.py +++ b/src/pybind/mgr/balancer/module.py @@ -901,7 +901,6 @@ class Module(MgrModule): detail = 'Unrecognized mode %s' % plan.mode self.log.info(detail) return -errno.EINVAL, detail - ## def do_upmap(self, plan): self.log.info('do_upmap') @@ -941,12 +940,29 @@ class Module(MgrModule): # shuffle so all pools get equal (in)attention random.shuffle(classified_pools) for it in classified_pools: - did = ms.osdmap.calc_pg_upmaps(inc, max_deviation, left, it) + pool_dump = osdmap_dump.get('pools', []) + num_pg = 0 + for p in pool_dump: + if p['pool_name'] in it: + num_pg += p['pg_num'] + + # note that here we deliberately exclude any scrubbing pgs too + # since scrubbing activities have significant impacts on performance + pool_ids = list(p['pool'] for p in pool_dump if p['pool_name'] in it) + num_pg_active_clean = 0 + pg_dump = self.get('pg_dump') + for p in pg_dump['pg_stats']: + pg_pool = p['pgid'].split('.')[0] + if len(pool_ids) and int(pg_pool) not in pool_ids: + continue + if p['state'] == 'active+clean': + num_pg_active_clean += 1 + + available = max_optimizations - (num_pg - num_pg_active_clean) + did = ms.osdmap.calc_pg_upmaps(inc, max_deviation, available, it) + self.log.info('prepared %d changes for pool(s) %s' % (did, it)) total_did += did - left -= did - if left <= 0: - break - self.log.info('prepared %d/%d changes' % (total_did, max_optimizations)) + self.log.info('prepared %d changes in total' % total_did) if total_did == 0: return -errno.EALREADY, 'Unable to find further optimization, ' \ 'or pool(s)\' pg_num is decreasing, ' \