From: xie xingguo Date: Mon, 16 Dec 2019 08:39:53 +0000 (+0800) Subject: mgr/balancer: eliminate usage of MS infrastructure for upmap mode X-Git-Tag: v15.1.0~395^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=778aae2d4e291f68f4946a052842b43df2a8a909;p=ceph.git mgr/balancer: eliminate usage of MS infrastructure for upmap mode This can avoid excessive RAM and CPU usage for large Ceph clusters when balancer and upmap is enabled. Signed-off-by: xie xingguo --- diff --git a/src/pybind/mgr/balancer/module.py b/src/pybind/mgr/balancer/module.py index 041938a7471b..9940eb2730df 100644 --- a/src/pybind/mgr/balancer/module.py +++ b/src/pybind/mgr/balancer/module.py @@ -48,16 +48,25 @@ class MappingState: return float(misplaced) / float(num) return 0.0 -class Plan: - def __init__(self, name, ms, pools): - self.mode = 'unknown' +class Plan(object): + def __init__(self, name, mode, osdmap, pools): self.name = name - self.initial = ms + self.mode = mode + self.osdmap = osdmap + self.osdmap_dump = osdmap.dump() self.pools = pools - self.osd_weights = {} self.compat_ws = {} - self.inc = ms.osdmap.new_incremental() + self.inc = osdmap.new_incremental() + self.pg_status = {} + +class MsPlan(Plan): + """ + Plan with a preloaded MappingState member. + """ + def __init__(self, name, mode, ms, pools): + super(MsPlan, self).__init__(name, mode, ms.osdmap, pools) + self.initial = ms def final_state(self): self.inc.set_osd_reweights(self.osd_weights) @@ -521,7 +530,18 @@ class Module(MgrModule): ms = MappingState(osdmap, self.get("pg_dump"), 'pool "%s"' % option) else: pools = plan.pools - ms = plan.final_state() + if plan.mode == 'upmap': + # Note that for upmap, to improve the efficiency, + # we use a basic version of Plan without keeping the obvious + # *redundant* MS member. + # Hence ms might not be accurate here since we are basically + # using an old snapshotted osdmap vs a fresh copy of pg_dump. + # It should not be a big deal though.. + ms = MappingState(plan.osdmap, + self.get("pg_dump"), + 'plan "%s"' % plan.name) + else: + ms = plan.final_state() else: ms = MappingState(self.get_osdmap(), self.get("pg_dump"), @@ -669,11 +689,20 @@ class Module(MgrModule): self.event.clear() def plan_create(self, name, osdmap, pools): - plan = Plan(name, - MappingState(osdmap, - self.get("pg_dump"), - 'plan %s initial' % name), - pools) + mode = self.get_module_option('mode') + if mode == 'upmap': + # drop unnecessary MS member for upmap mode. + # this way we could effectively eliminate the usage of a + # complete pg_dump, which can become horribly inefficient + # as pg_num grows.. + plan = Plan(name, mode, osdmap, pools) + else: + plan = MsPlan(name, + mode, + MappingState(osdmap, + self.get("pg_dump"), + 'plan %s initial' % name), + pools) return plan def plan_rm(self, name): @@ -893,7 +922,6 @@ class Module(MgrModule): def optimize(self, plan): self.log.info('Optimize plan %s' % plan.name) - plan.mode = self.get_module_option('mode') max_misplaced = self.get_ceph_option('target_max_misplaced_ratio') self.log.info('Mode %s, max misplaced %f' % (plan.mode, max_misplaced)) @@ -903,6 +931,7 @@ class Module(MgrModule): degraded = info.get('degraded_ratio', 0.0) inactive = info.get('inactive_pgs_ratio', 0.0) misplaced = info.get('misplaced_ratio', 0.0) + plan.pg_status = info self.log.debug('unknown %f degraded %f inactive %f misplaced %g', unknown, degraded, inactive, misplaced) if unknown > 0.0: @@ -940,12 +969,12 @@ class Module(MgrModule): self.log.info('do_upmap') max_optimizations = self.get_module_option('upmap_max_optimizations') max_deviation = self.get_module_option('upmap_max_deviation') + osdmap_dump = plan.osdmap_dump - ms = plan.initial if len(plan.pools): pools = plan.pools else: # all - pools = [str(i['pool_name']) for i in ms.osdmap_dump.get('pools',[])] + pools = [str(i['pool_name']) for i in osdmap_dump.get('pools',[])] if len(pools) == 0: detail = 'No pools available' self.log.info(detail) @@ -954,7 +983,6 @@ class Module(MgrModule): inc = plan.inc total_did = 0 left = max_optimizations - osdmap_dump = ms.osdmap_dump pools_with_pg_merge = [p['pool_name'] for p in osdmap_dump.get('pools', []) if p['pg_num'] > p['pg_num_target']] crush_rule_by_pool_name = dict((p['pool_name'], p['crush_rule']) for p in osdmap_dump.get('pools', [])) @@ -984,16 +1012,15 @@ class Module(MgrModule): # since scrubbing activities have significant impacts on performance pool_ids = list(p['pool'] for p in pool_dump if p['pool_name'] in it) num_pg_active_clean = 0 - pg_dump = ms.pg_dump - for p in pg_dump['pg_stats']: - pg_pool = p['pgid'].split('.')[0] - if len(pool_ids) and int(pg_pool) not in pool_ids: + for p in plan.pg_status.get('pgs_by_pool_state', []): + if len(pool_ids) and p['pool_id'] not in pool_ids: continue - if p['state'] == 'active+clean': - num_pg_active_clean += 1 - + for s in p['pg_state_counts']: + if s['state_name'] == 'active+clean': + num_pg_active_clean += s['count'] + break available = max_optimizations - (num_pg - num_pg_active_clean) - did = ms.osdmap.calc_pg_upmaps(inc, max_deviation, available, it) + did = plan.osdmap.calc_pg_upmaps(inc, max_deviation, available, it) self.log.info('prepared %d changes for pool(s) %s' % (did, it)) total_did += did self.log.info('prepared %d changes in total' % total_did)