pybind/mgr/balancer: be more specific on creating an optimization plan

author xie xingguo <xie.xingguo@zte.com.cn>

Thu, 1 Feb 2018 07:40:56 +0000 (15:40 +0800)

committer xie xingguo <xie.xingguo@zte.com.cn>

Wed, 7 Feb 2018 13:26:33 +0000 (21:26 +0800)
author xie xingguo <xie.xingguo@zte.com.cn>
Thu, 1 Feb 2018 07:40:56 +0000 (15:40 +0800)
committer xie xingguo <xie.xingguo@zte.com.cn>
Wed, 7 Feb 2018 13:26:33 +0000 (21:26 +0800)
diff --git a/src/pybind/mgr/balancer/module.py b/src/pybind/mgr/balancer/module.py

index 9511cfaef5c0fa93d25b60aa9fd1092782414a55..9f1f1d07f873424ab047e2d76a3294b01c8293e1 100644 (file)
--- a/src/pybind/mgr/balancer/module.py
+++ b/src/pybind/mgr/balancer/module.py
@@ -320,8 +320,12 @@ class Module(MgrModule):
              if len(invalid_pool_names):
                  return (-errno.EINVAL, '', 'pools %s not found' % invalid_pool_names)
              plan = self.plan_create(command['plan'], osdmap, pools)
-            self.optimize(plan)
-            return (0, '', '')
+            r, detail = self.optimize(plan)
+            # remove plan if we are currently unable to find an optimization
+            # or distribution is already perfect
+            if r:
+                self.plan_rm(command['plan'])
+            return (r, '', detail)
          elif command['prefix'] == 'balancer rm':
              self.plan_rm(command['plan'])
              return (0, '', '')
@@ -376,7 +380,8 @@ class Module(MgrModule):
                  self.log.debug('Running')
                  name = 'auto_%s' % time.strftime(TIME_FORMAT, time.gmtime())
                  plan = self.plan_create(name, self.get_osdmap(), [])
-                if self.optimize(plan):
+                r, detail = self.optimize(plan)
+                if r == 0:
                      self.execute(plan)
                  self.plan_rm(name)
              self.log.debug('Sleeping for %d', sleep_interval)
@@ -615,25 +620,35 @@ class Module(MgrModule):
          self.log.debug('unknown %f degraded %f inactive %f misplaced %g',
                         unknown, degraded, inactive, misplaced)
          if unknown > 0.0:
-            self.log.info('Some PGs (%f) are unknown; waiting', unknown)
+            detail = 'Some PGs (%f) are unknown; try again later' % unknown
+            self.log.info(detail)
+            return -errno.EAGAIN, detail
          elif degraded > 0.0:
-            self.log.info('Some objects (%f) are degraded; waiting', degraded)
+            detail = 'Some objects (%f) are degraded; try again later' % degraded
+            self.log.info(detail)
+            return -errno.EAGAIN, detail
          elif inactive > 0.0:
-            self.log.info('Some PGs (%f) are inactive; waiting', inactive)
+            detail = 'Some PGs (%f) are inactive; try again later' % inactive
+            self.log.info(detail)
+            return -errno.EAGAIN, detail
          elif misplaced >= max_misplaced:
-            self.log.info('Too many objects (%f > %f) are misplaced; waiting',
-                          misplaced, max_misplaced)
+            detail = 'Too many objects (%f > %f) are misplaced; ' \
+                     'try again later' % (misplaced, max_misplaced)
+            self.log.info(detail)
+            return -errno.EAGAIN, detail
          else:
              if plan.mode == 'upmap':
                  return self.do_upmap(plan)
              elif plan.mode == 'crush-compat':
                  return self.do_crush_compat(plan)
              elif plan.mode == 'none':
+                detail = 'Please do "ceph balancer mode" to choose a valid mode first'
                  self.log.info('Idle')
+                return -errno.ENOEXEC, detail
              else:
-                self.log.info('Unrecognized mode %s' % plan.mode)
-        return False
-
+                detail = 'Unrecognized mode %s' % plan.mode
+                self.log.info(detail)
+                return -errno.EINVAL, detail
          ##
  
      def do_upmap(self, plan):
@@ -647,8 +662,9 @@ class Module(MgrModule):
          else: # all
              pools = [str(i['pool_name']) for i in ms.osdmap_dump.get('pools',[])]
          if len(pools) == 0:
-            self.log.info('no pools, nothing to do')
-            return False
+            detail = 'No pools available'
+            self.log.info(detail)
+            return -errno.ENOENT, detail
          # shuffle pool list so they all get equal (in)attention
          random.shuffle(pools)
          self.log.info('pools %s' % pools)
@@ -663,16 +679,16 @@ class Module(MgrModule):
              if left <= 0:
                  break
          self.log.info('prepared %d/%d changes' % (total_did, max_iterations))
-        return True
+        return 0, ''
  
      def do_crush_compat(self, plan):
          self.log.info('do_crush_compat')
          max_iterations = int(self.get_config('crush_compat_max_iterations', 25))
          if max_iterations < 1:
-            return False
+            return -errno.EINVAL, '"crush_compat_max_iterations" must be >= 1'
          step = float(self.get_config('crush_compat_step', .5))
          if step <= 0 or step >= 1.0:
-            return False
+            return -errno.EINVAL, '"crush_compat_step" must be in (0, 1)'
          max_misplaced = float(self.get_config('max_misplaced',
                                                default_max_misplaced))
          min_pg_per_osd = 2
@@ -682,8 +698,9 @@ class Module(MgrModule):
          crush = osdmap.get_crush()
          pe = self.calc_eval(ms, plan.pools)
          if pe.score == 0:
-            self.log.info('Distribution is already perfect')
-            return False
+            detail = 'Distribution is already perfect'
+            self.log.info(detail)
+            return -errno.EALREADY, detail
  
          # get current osd reweights
          orig_osd_weight = { a['osd']: a['weight']
@@ -694,7 +711,7 @@ class Module(MgrModule):
          # get current compat weight-set weights
          orig_ws = self.get_compat_weight_set_weights(ms)
          if not orig_ws:
-            return False
+            return -errno.EAGAIN, 'compat weight-set not available'
          orig_ws = { a: b for a, b in orig_ws.iteritems() if a >= 0 }
  
          # Make sure roots don't overlap their devices.  If so, we
@@ -710,9 +727,10 @@ class Module(MgrModule):
                      overlap[osd] = 1
                  visited[osd] = 1
          if len(overlap) > 0:
-            self.log.error('error: some osds belong to multiple subtrees: %s' %
-                         overlap)
-            return False
+            detail = 'Some osds belong to multiple subtrees: %s' % \
+                     overlap.keys()
+            self.log.error(detail)
+            return -errno.EOPNOTSUPP, detail
  
          key = 'pgs'  # pgs objects or bytes
  
@@ -834,12 +852,13 @@ class Module(MgrModule):
                  if w != orig_osd_weight[osd]:
                      self.log.debug('osd.%d reweight %f', osd, w)
                      plan.osd_weights[osd] = w
-            return True
+            return 0, ''
          else:
              self.log.info('Failed to find further optimization, score %f',
                            pe.score)
              plan.compat_ws = {}
-            return False
+            return -errno.EDOM, 'Unable to find further optimization, ' \
+                                'change balancer mode and retry might help'
  
      def get_compat_weight_set_weights(self, ms):
          if not CRUSHMap.have_default_choose_args(ms.crush_dump):
author	xie xingguo <xie.xingguo@zte.com.cn>
	Thu, 1 Feb 2018 07:40:56 +0000 (15:40 +0800)
committer	xie xingguo <xie.xingguo@zte.com.cn>
	Wed, 7 Feb 2018 13:26:33 +0000 (21:26 +0800)