]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/progress: no progress event on unmoved pgs
authorJohn Spray <john.spray@redhat.com>
Tue, 24 Jul 2018 22:40:41 +0000 (18:40 -0400)
committerJohn Spray <john.spray@redhat.com>
Tue, 11 Sep 2018 10:21:35 +0000 (11:21 +0100)
PGs may not be moved on osd out, if there is no suitable
location for them to move to.  In this situation
it doesn't make sense to have a progress event, as the
health warnings adequately communicate the situation.

Signed-off-by: John Spray <john.spray@redhat.com>
src/pybind/mgr/progress/module.py

index b998b1bc1d7b681d3837ff3bc2d8e5bb12207c4b..72f73d1fe9f95ccdcb618c1ab47c1e2de8ce11d8 100644 (file)
@@ -143,7 +143,6 @@ class PgRecoveryEvent(Event):
             self._original_bytes_recovered = {}
             for pg in self._pgs:
                 pg_str = str(pg)
-                log.debug(json.dumps(pg_to_state[pg_str], indent=2))
                 self._original_bytes_recovered[pg] = \
                     pg_to_state[pg_str]['stat_sum']['num_bytes_recovered']
 
@@ -260,17 +259,49 @@ class Module(MgrModule):
 
     def _osd_out(self, old_map, old_dump, new_map, osd_id):
         affected_pgs = []
+        unmoved_pgs = []
         for pool in old_dump['pools']:
             pool_id = pool['pool']
             for ps in range(0, pool['pg_num']):
+                up_acting = old_map.pg_to_up_acting_osds(pool['pool'], ps)
+
+                # Was this OSD affected by the OSD going out?
+                old_osds = set(up_acting['up']) | set(up_acting['acting'])
+                was_on_out_osd = osd_id in old_osds
+                if not was_on_out_osd:
+                    continue
+
                 self.log.debug("pool_id, ps = {0}, {1}".format(
                     pool_id, ps
                 ))
-                up_acting = old_map.pg_to_up_acting_osds(pool['pool'], ps)
+
                 self.log.debug(
                     "up_acting: {0}".format(json.dumps(up_acting, indent=2)))
-                if osd_id in up_acting['up'] or osd_id in up_acting['acting']:
+
+                new_up_acting = new_map.pg_to_up_acting_osds(pool['pool'], ps)
+                new_osds = set(new_up_acting['up']) | set(new_up_acting['acting'])
+
+                # Has this OSD been assigned a new location?
+                # (it might not be if there is no suitable place to move
+                #  after an OSD failure)
+                is_relocated = len(new_osds - old_osds) > 0
+
+                self.log.debug(
+                    "new_up_acting: {0}".format(json.dumps(new_up_acting,
+                                                           indent=2)))
+
+                if was_on_out_osd and is_relocated:
+                    # This PG is now in motion, track its progress
                     affected_pgs.append(PgId(pool_id, ps))
+                elif not is_relocated:
+                    # This PG didn't get a new location, we'll log it
+                    unmoved_pgs.append(PgId(pool_id, ps))
+
+        # In the case that we ignored some PGs, log the reason why (we may
+        # not end up creating a progress event)
+        if len(unmoved_pgs):
+            self.log.warn("{0} PGs were on osd.{1}, but didn't get new locations".format(
+                len(unmoved_pgs), osd_id))
 
         self.log.warn("{0} PGs affected by osd.{1} going out".format(
             len(affected_pgs), osd_id))