From: John Spray Date: Tue, 24 Jul 2018 22:40:41 +0000 (-0400) Subject: mgr/progress: no progress event on unmoved pgs X-Git-Tag: v14.0.1~337^2~3 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=dde10a0b6e5eacd93535ef107fb86b92fdcf54cd;p=ceph.git mgr/progress: no progress event on unmoved pgs PGs may not be moved on osd out, if there is no suitable location for them to move to. In this situation it doesn't make sense to have a progress event, as the health warnings adequately communicate the situation. Signed-off-by: John Spray --- diff --git a/src/pybind/mgr/progress/module.py b/src/pybind/mgr/progress/module.py index b998b1bc1d7b6..72f73d1fe9f95 100644 --- a/src/pybind/mgr/progress/module.py +++ b/src/pybind/mgr/progress/module.py @@ -143,7 +143,6 @@ class PgRecoveryEvent(Event): self._original_bytes_recovered = {} for pg in self._pgs: pg_str = str(pg) - log.debug(json.dumps(pg_to_state[pg_str], indent=2)) self._original_bytes_recovered[pg] = \ pg_to_state[pg_str]['stat_sum']['num_bytes_recovered'] @@ -260,17 +259,49 @@ class Module(MgrModule): def _osd_out(self, old_map, old_dump, new_map, osd_id): affected_pgs = [] + unmoved_pgs = [] for pool in old_dump['pools']: pool_id = pool['pool'] for ps in range(0, pool['pg_num']): + up_acting = old_map.pg_to_up_acting_osds(pool['pool'], ps) + + # Was this OSD affected by the OSD going out? + old_osds = set(up_acting['up']) | set(up_acting['acting']) + was_on_out_osd = osd_id in old_osds + if not was_on_out_osd: + continue + self.log.debug("pool_id, ps = {0}, {1}".format( pool_id, ps )) - up_acting = old_map.pg_to_up_acting_osds(pool['pool'], ps) + self.log.debug( "up_acting: {0}".format(json.dumps(up_acting, indent=2))) - if osd_id in up_acting['up'] or osd_id in up_acting['acting']: + + new_up_acting = new_map.pg_to_up_acting_osds(pool['pool'], ps) + new_osds = set(new_up_acting['up']) | set(new_up_acting['acting']) + + # Has this OSD been assigned a new location? + # (it might not be if there is no suitable place to move + # after an OSD failure) + is_relocated = len(new_osds - old_osds) > 0 + + self.log.debug( + "new_up_acting: {0}".format(json.dumps(new_up_acting, + indent=2))) + + if was_on_out_osd and is_relocated: + # This PG is now in motion, track its progress affected_pgs.append(PgId(pool_id, ps)) + elif not is_relocated: + # This PG didn't get a new location, we'll log it + unmoved_pgs.append(PgId(pool_id, ps)) + + # In the case that we ignored some PGs, log the reason why (we may + # not end up creating a progress event) + if len(unmoved_pgs): + self.log.warn("{0} PGs were on osd.{1}, but didn't get new locations".format( + len(unmoved_pgs), osd_id)) self.log.warn("{0} PGs affected by osd.{1} going out".format( len(affected_pgs), osd_id))