]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
pybind/mgr/pg_autoscaler: fix progress event of the autoscaler
authorKamoltat Sirivadhna <ksirivad@redhat.com>
Tue, 9 Sep 2025 11:28:17 +0000 (11:28 +0000)
committerKamoltat Sirivadhna <ksirivad@redhat.com>
Tue, 9 Sep 2025 18:38:31 +0000 (18:38 +0000)
Problem:
We are comparing the wrong value when determining whether the Autoscaler event should be completed or not,
we compared pool_data["pg_num"] == pool_data["pg_num_target"] which will always return true since pool_data["pg_num_target"]
is not actual the target pg we are trying to scale towards, but rather the previous calculated value
that we
are expected to already reached based on previous values.

Solution:
Use the correct value which is comparing pool_data[‘pg_num’] == ev.pg_num_target.

Fixes: https://tracker.ceph.com/issues/72857
Signed-off-by: Kamoltat Sirivadhna <ksirivad@redhat.com>
src/pybind/mgr/pg_autoscaler/module.py

index 03d601d0d4444f3989d3bebafe5cfd119f397712..30d9e0e39997d62aaf7212319e634b4e958aec03 100644 (file)
@@ -707,16 +707,34 @@ class PgAutoscaler(MgrModule):
         for pool_id in list(self._event):
             ev = self._event[pool_id]
             pool_data = self._get_pool_by_id(pools, pool_id)
-            if (
-                pool_data is None
-                or pool_data["pg_num"] == pool_data["pg_num_target"]
-                or ev.pg_num == ev.pg_num_target
-            ):
-                # pool is gone or we've reached our target
+            if (pool_data is None):
+                # pool is gone
+                self.log.warning("pool %s missing; marking complete", pool_id)
                 self.remote('progress', 'complete', ev.ev_id)
                 del self._event[pool_id]
                 continue
-            ev.update(self, (ev.pg_num - pool_data['pg_num']) / (ev.pg_num - ev.pg_num_target))
+            current = pool_data['pg_num']
+            start = ev.pg_num
+            target = ev.pg_num_target
+            self.log.debug("pool %s; start_pg_num: %d; current_pg_num: %d; -> target_pg_num: %d;",
+                           pool_id, start, current, target)
+            if (current == target):
+                # we've reached our target
+                self.log.debug("pool %s reached target; marking complete", pool_id)
+                self.remote('progress', 'complete', ev.ev_id)
+                del self._event[pool_id]
+                continue
+
+            denominator = (start - target)
+            if denominator == 0:
+               # start == target, we complete the event since nothing to track
+               self.remote('progress', 'complete', ev.ev_id)
+               self._event.pop(pool_id, None)
+               continue
+
+            progress = (start - current) / denominator
+
+            ev.update(self, progress)
 
     def _maybe_adjust(self,
                       osdmap: OSDMap,