From: xie xingguo <xie.xingguo@zte.com.cn>
Date: Mon, 16 Dec 2019 08:39:53 +0000 (+0800)
Subject: mgr/balancer: eliminate usage of MS infrastructure for upmap mode
X-Git-Tag: v15.1.0~395^2
X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=778aae2d4e291f68f4946a052842b43df2a8a909;p=ceph.git

mgr/balancer: eliminate usage of MS infrastructure for upmap mode

This can avoid excessive RAM and CPU usage for large Ceph clusters
when balancer and upmap is enabled.

Signed-off-by: xie xingguo <xie.xingguo@zte.com.cn>
---

diff --git a/src/pybind/mgr/balancer/module.py b/src/pybind/mgr/balancer/module.py
index 041938a7471b..9940eb2730df 100644
--- a/src/pybind/mgr/balancer/module.py
+++ b/src/pybind/mgr/balancer/module.py
@@ -48,16 +48,25 @@ class MappingState:
             return float(misplaced) / float(num)
         return 0.0
 
-class Plan:
-    def __init__(self, name, ms, pools):
-        self.mode = 'unknown'
+class Plan(object):
+    def __init__(self, name, mode, osdmap, pools):
         self.name = name
-        self.initial = ms
+        self.mode = mode
+        self.osdmap = osdmap
+        self.osdmap_dump = osdmap.dump()
         self.pools = pools
-
         self.osd_weights = {}
         self.compat_ws = {}
-        self.inc = ms.osdmap.new_incremental()
+        self.inc = osdmap.new_incremental()
+        self.pg_status = {}
+
+class MsPlan(Plan):
+    """
+    Plan with a preloaded MappingState member.
+    """
+    def __init__(self, name, mode, ms, pools):
+        super(MsPlan, self).__init__(name, mode, ms.osdmap, pools)
+        self.initial = ms
 
     def final_state(self):
         self.inc.set_osd_reweights(self.osd_weights)
@@ -521,7 +530,18 @@ class Module(MgrModule):
                     ms = MappingState(osdmap, self.get("pg_dump"), 'pool "%s"' % option)
                 else:
                     pools = plan.pools
-                    ms = plan.final_state()
+                    if plan.mode == 'upmap':
+                        # Note that for upmap, to improve the efficiency,
+                        # we use a basic version of Plan without keeping the obvious
+                        # *redundant* MS member.
+                        # Hence ms might not be accurate here since we are basically
+                        # using an old snapshotted osdmap vs a fresh copy of pg_dump.
+                        # It should not be a big deal though..
+                        ms = MappingState(plan.osdmap,
+                                          self.get("pg_dump"),
+                                          'plan "%s"' % plan.name)
+                    else:
+                        ms = plan.final_state()
             else:
                 ms = MappingState(self.get_osdmap(),
                                   self.get("pg_dump"),
@@ -669,11 +689,20 @@ class Module(MgrModule):
             self.event.clear()
 
     def plan_create(self, name, osdmap, pools):
-        plan = Plan(name,
-                    MappingState(osdmap,
-                                 self.get("pg_dump"),
-                                 'plan %s initial' % name),
-                    pools)
+        mode = self.get_module_option('mode')
+        if mode == 'upmap':
+            # drop unnecessary MS member for upmap mode.
+            # this way we could effectively eliminate the usage of a
+            # complete pg_dump, which can become horribly inefficient
+            # as pg_num grows..
+            plan = Plan(name, mode, osdmap, pools)
+        else:
+            plan = MsPlan(name,
+                          mode,
+                          MappingState(osdmap,
+                                       self.get("pg_dump"),
+                                       'plan %s initial' % name),
+                          pools)
         return plan
 
     def plan_rm(self, name):
@@ -893,7 +922,6 @@ class Module(MgrModule):
 
     def optimize(self, plan):
         self.log.info('Optimize plan %s' % plan.name)
-        plan.mode = self.get_module_option('mode')
         max_misplaced = self.get_ceph_option('target_max_misplaced_ratio')
         self.log.info('Mode %s, max misplaced %f' %
                       (plan.mode, max_misplaced))
@@ -903,6 +931,7 @@ class Module(MgrModule):
         degraded = info.get('degraded_ratio', 0.0)
         inactive = info.get('inactive_pgs_ratio', 0.0)
         misplaced = info.get('misplaced_ratio', 0.0)
+        plan.pg_status = info
         self.log.debug('unknown %f degraded %f inactive %f misplaced %g',
                        unknown, degraded, inactive, misplaced)
         if unknown > 0.0:
@@ -940,12 +969,12 @@ class Module(MgrModule):
         self.log.info('do_upmap')
         max_optimizations = self.get_module_option('upmap_max_optimizations')
         max_deviation = self.get_module_option('upmap_max_deviation')
+        osdmap_dump = plan.osdmap_dump
 
-        ms = plan.initial
         if len(plan.pools):
             pools = plan.pools
         else: # all
-            pools = [str(i['pool_name']) for i in ms.osdmap_dump.get('pools',[])]
+            pools = [str(i['pool_name']) for i in osdmap_dump.get('pools',[])]
         if len(pools) == 0:
             detail = 'No pools available'
             self.log.info(detail)
@@ -954,7 +983,6 @@ class Module(MgrModule):
         inc = plan.inc
         total_did = 0
         left = max_optimizations
-        osdmap_dump = ms.osdmap_dump
         pools_with_pg_merge = [p['pool_name'] for p in osdmap_dump.get('pools', [])
                                if p['pg_num'] > p['pg_num_target']]
         crush_rule_by_pool_name = dict((p['pool_name'], p['crush_rule']) for p in osdmap_dump.get('pools', []))
@@ -984,16 +1012,15 @@ class Module(MgrModule):
             # since scrubbing activities have significant impacts on performance
             pool_ids = list(p['pool'] for p in pool_dump if p['pool_name'] in it)
             num_pg_active_clean = 0
-            pg_dump = ms.pg_dump
-            for p in pg_dump['pg_stats']:
-                pg_pool = p['pgid'].split('.')[0]
-                if len(pool_ids) and int(pg_pool) not in pool_ids:
+            for p in plan.pg_status.get('pgs_by_pool_state', []):
+                if len(pool_ids) and p['pool_id'] not in pool_ids:
                     continue
-                if p['state'] == 'active+clean':
-                    num_pg_active_clean += 1
-
+                for s in p['pg_state_counts']:
+                    if s['state_name'] == 'active+clean':
+                        num_pg_active_clean += s['count']
+                        break
             available = max_optimizations - (num_pg - num_pg_active_clean)
-            did = ms.osdmap.calc_pg_upmaps(inc, max_deviation, available, it)
+            did = plan.osdmap.calc_pg_upmaps(inc, max_deviation, available, it)
             self.log.info('prepared %d changes for pool(s) %s' % (did, it))
             total_did += did
         self.log.info('prepared %d changes in total' % total_did)