]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/progress: Add event when OSD marked in
authorKamoltat (Junior) Sirivadhna <ksirivad@redhat.com>
Tue, 11 Jun 2019 19:14:15 +0000 (15:14 -0400)
committerkamoltat <ksirivad@redhat.com>
Sat, 21 Nov 2020 06:45:18 +0000 (06:45 +0000)
Basically keeps track of affected PGs
similar to how funciton _osd_out works
but instead of tracking up+acting set
we only care about acting. Also made
a minor change on the function pg_update:
we consider pg to be complete when it
is active and clean and nothing else.
Removed the variable "unmoved" since
If the pg wasn't moved, we shouldn't
have an event for it in the first place.

(cherry picked from commit dd2c3f66a1dbd9582b7cd695efff66317b730c8a)
Signed-off-by: Kamoltat <ksirivad@redhat.com>
src/pybind/mgr/progress/module.py

index 9236001ebb8389fa8719709ed16b4f1dc24f632b..dd228b8222130a2c2b56e63e098961349174a969 100644 (file)
@@ -197,7 +197,6 @@ class PgRecoveryEvent(Event):
         # FIXME: O(pg_num) in python
         # FIXME: far more fields getting pythonized than we really care about
         pg_to_state = dict([(p['pgid'], p) for p in raw_pg_stats['pg_stats']])
-
         if self._original_bytes_recovered is None:
             self._original_bytes_recovered = {}
             missing_pgs = []
@@ -239,10 +238,7 @@ class PgRecoveryEvent(Event):
 
             states = state.split("+")
 
-            unmoved = bool(set(self._evacuate_osds) & (
-                set(info['up']) | set(info['acting'])))
-
-            if "active" in states and "clean" in states and not unmoved:
+            if "active" in states and "clean" in states:
                 complete.add(pg)
             else:
                 if info['stat_sum']['num_bytes'] == 0:
@@ -256,7 +252,6 @@ class PgRecoveryEvent(Event):
                         ratio = float(recovered -
                                       self._original_bytes_recovered[pg]) / \
                             total_bytes
-
                         # Since the recovered bytes (over time) could perhaps
                         # exceed the contents of the PG (moment in time), we
                         # must clamp this
@@ -266,7 +261,6 @@ class PgRecoveryEvent(Event):
                         # Dataless PGs (e.g. containing only OMAPs) count
                         # as half done.
                         ratio = 0.5
-
                     complete_accumulate += ratio
 
         self._pgs = list(set(self._pgs) ^ complete)
@@ -429,7 +423,64 @@ class Module(MgrModule):
         ev.pg_update(self.get("pg_stats"), self.get("pg_ready"), self.log)
         self._events[ev.id] = ev
 
-    def _osd_in(self, osd_id):
+    def _osd_in(self, old_map, old_dump, new_map, osd_id):
+        affected_pgs = []
+        unmoved_pgs = []
+        for pool in old_dump['pools']:
+            pool_id = pool['pool']
+            for ps in range(0, pool['pg_num']):
+                up_acting = old_map.pg_to_up_acting_osds(pool['pool'], ps)
+
+                # Was this OSD affected by the OSD coming in?
+                old_osds = set(up_acting['acting'])
+                was_on_out_osd = osd_id in old_osds
+                if not was_on_out_osd:
+                    continue
+
+                self.log.debug("pool_id, ps = {0}, {1}".format(
+                    pool_id, ps
+                ))
+
+                self.log.debug(
+                    "up_acting: {0}".format(json.dumps(up_acting, indent=2)))
+
+                new_up_acting = new_map.pg_to_up_acting_osds(pool['pool'], ps)
+                new_osds = set(new_up_acting['acting'])
+
+                # Has this OSD been assigned a new location?
+                # (it might not be if there is no suitable place to move
+                #  after an OSD is marked in)
+                is_relocated = len(old_osds - new_osds) > 0
+                self.log.debug(
+                    "new_up_acting: {0}".format(json.dumps(new_up_acting,
+                                                           indent=2)))
+
+                if was_on_out_osd and is_relocated:
+                    # This PG is now in motion, track its progress
+                    affected_pgs.append(PgId(pool_id, ps))
+                elif not is_relocated:
+                    # This PG didn't get a new location, we'll log it
+                    unmoved_pgs.append(PgId(pool_id, ps))
+
+        # In the case that we ignored some PGs, log the reason why (we may
+        # not end up creating a progress event)
+        if len(unmoved_pgs):
+            self.log.warn("{0} PGs were on osd.{1}, but didn't get new locations".format(
+                len(unmoved_pgs), osd_id))
+
+        self.log.warn("{0} PGs affected by osd.{1} coming in".format(
+            len(affected_pgs), osd_id))
+
+        if len(affected_pgs) > 0:
+            ev = PgRecoveryEvent(
+            "Rebalancing after osd.{0} marked in".format(osd_id),
+            refs=[("osd", osd_id)],
+            which_pgs=affected_pgs,
+            evacuate_osds=[osd_id]
+            )
+            ev.pg_update(self.get("pg_dump"), self.log)
+            self._events[ev.id] = ev
+
         for ev_id, ev in self._events.items():
             if isinstance(ev, PgRecoveryEvent) and osd_id in ev.evacuating_osds:
                 self.log.info("osd.{0} came back in, cancelling event".format(
@@ -457,7 +508,7 @@ class Module(MgrModule):
                     # individual recovery events on every adjustment
                     # in a gradual weight-in
                     self.log.warn("osd.{0} marked in".format(osd_id))
-                    self._osd_in(osd_id)
+                    self._osd_in(old_osdmap, old_dump, new_osdmap, osd_id)
 
     def notify(self, notify_type, notify_data):
         self._ready.wait()