]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/progress: optimize global recovery module 37544/head
authorKamoltat <ksirivad@redhat.com>
Mon, 5 Oct 2020 09:38:35 +0000 (09:38 +0000)
committerKamoltat <ksirivad@redhat.com>
Wed, 22 Sep 2021 19:18:31 +0000 (19:18 +0000)
Instead of fetching `pg_stats` from the python
part of manager module, we filter out the pgs
that are in active + clean state in ActivePyModules.cc
then parse these pgs along with `reported_epoch` and
the `total_num_pgs` of the clusters to global recovery
module.

Signed-off-by: Kamoltat <ksirivad@redhat.com>
src/mgr/ActivePyModules.cc
src/pybind/mgr/progress/module.py

index e1179d4debb4aae63c69b2e88597dd1b0243a1db..27b1baf8d1edbad1d4749cd955b5cd5817505a13 100644 (file)
@@ -21,7 +21,7 @@
 
 #include "osd/OSDMap.h"
 #include "mon/MonMap.h"
-
+#include "osd/osd_types.h"
 #include "mgr/MgrContext.h"
 
 // For ::mgr_store_prefix
@@ -446,6 +446,29 @@ PyObject *ActivePyModules::get_python(const std::string &what)
     with_gil_t with_gil{no_gil};
     f.dump_bool("have_local_config_map", have_local_config_map);
     return f.get();
+  } else if (what == "active_clean_pgs"){
+    cluster_state.with_pgmap(
+        [&](const PGMap &pg_map) {
+      with_gil_t with_gil{no_gil};
+      f.open_array_section("pg_stats");
+      for (auto &i : pg_map.pg_stat) {
+        const auto state = i.second.state;
+       const auto pgid_raw = i.first;
+       const auto pgid = stringify(pgid_raw.m_pool) + "." + stringify(pgid_raw.m_seed);
+       const auto reported_epoch = i.second.reported_epoch;
+       if (state & PG_STATE_ACTIVE && state & PG_STATE_CLEAN) {
+         f.open_object_section("pg_stat");
+         f.dump_string("pgid", pgid);
+         f.dump_string("state", pg_state_string(state));
+         f.dump_unsigned("reported_epoch", reported_epoch);
+         f.close_section();
+       }
+      }
+      f.close_section();
+      const auto num_pg = pg_map.num_pg;
+      f.dump_unsigned("total_num_pgs", num_pg);
+    });
+    return f.get();
   } else {
     derr << "Python module requested unknown data '" << what << "'" << dendl;
     with_gil_t with_gil{no_gil};
index 1f6162e2c8bdd5dbb90981b4b3756f24e7aa1fbb..2ec69971c896aa7cb82e7a4c0b9da2aebf402e84 100644 (file)
@@ -194,33 +194,27 @@ class GlobalRecoveryEvent(Event):
         self._active_clean_num = active_clean_num
         self._refresh()
 
-    def global_event_update_progress(self, pg_dump, log):
-        # type: (Dict, logging.Logger) -> None
+    def global_event_update_progress(self, log):
+        # type: (logging.Logger) -> None
         "Update progress of Global Recovery Event"
-        pgs = pg_dump['pg_stats']
-        new_active_clean_num = 0
+        global _module
+        assert _module
         skipped_pgs = 0
-
-        for pg in pgs:
+        active_clean_pgs = _module.get("active_clean_pgs")
+        total_pg_num = active_clean_pgs["total_num_pgs"]
+        new_active_clean_pgs = active_clean_pgs["pg_stats"]
+        new_active_clean_num = len(new_active_clean_pgs)
+        for pg in new_active_clean_pgs:
             # Disregard PGs that are not being reported
             # if the states are active+clean. Since it is
             # possible that some pgs might not have any movement
             # even before the start of the event.
-
-            state = pg['state']
-
-            states = state.split("+")
             if pg['reported_epoch'] < self._start_epoch:
-                if "active" in states and "clean" in states:
-                    log.debug("Skipping pg {0} since reported_epoch {1} < start_epoch {2}"
-                              .format(pg['pgid'], pg['reported_epoch'], self._start_epoch))
-                    skipped_pgs += 1
+                log.debug("Skipping pg {0} since reported_epoch {1} < start_epoch {2}"
+                             .format(pg['pgid'], pg['reported_epoch'], self._start_epoch))
+                skipped_pgs += 1
                 continue
 
-            if "active" in states and "clean" in states:
-                new_active_clean_num += 1
-
-        total_pg_num = len(pgs)
         if self._active_clean_num != new_active_clean_num:
             # Have this case to know when need to update
             # the progress
@@ -229,6 +223,9 @@ class GlobalRecoveryEvent(Event):
                 self._progress = float(new_active_clean_num) / (total_pg_num - skipped_pgs)
             except ZeroDivisionError:
                 self._progress = 0.0
+        else:
+            # No need to update since there is no change
+            return
 
         log.debug("Updated progress to %s", self.summary())
         self._refresh()
@@ -296,7 +293,7 @@ class PgRecoveryEvent(Event):
         self._original_pg_count = len(self._pgs)
         self._original_bytes_recovered = None  # type: Optional[Dict[PgId, float]]
         self._progress = 0.0
-        # self._start_epoch = _module.get_osdmap().get_epoch()
+
         self._start_epoch = start_epoch
         self._refresh()
 
@@ -584,23 +581,14 @@ class Module(MgrModule):
                     self.log.warning("osd.{0} marked in".format(osd_id))
                     self._osd_in_out(old_osdmap, old_dump, new_osdmap, osd_id, "in")
 
-    def _pg_state_changed(self, pg_dump):
+    def _pg_state_changed(self):
 
         # This function both constructs and updates
         # the global recovery event if one of the
         # PGs is not at active+clean state
-
-        pgs = pg_dump['pg_stats']
-        total_pg_num = len(pgs)
-        active_clean_num = 0
-        for pg in pgs:
-            state = pg['state']
-            # TODO right here we can keep track of epoch as well
-            # and parse it to global_event_update_progress()
-            states = state.split("+")
-
-            if "active" in states and "clean" in states:
-                active_clean_num += 1
+        active_clean_pgs = self.get("active_clean_pgs")
+        total_pg_num = active_clean_pgs["total_num_pgs"]
+        active_clean_num = len(active_clean_pgs["pg_stats"])
         try:
             # There might be a case where there is no pg_num
             progress = float(active_clean_num) / total_pg_num
@@ -615,7 +603,7 @@ class Module(MgrModule):
                     add_to_ceph_s=True,
                     start_epoch=self.get_osdmap().get_epoch(),
                     active_clean_num=active_clean_num)
-            ev.global_event_update_progress(self.get('pg_stats'), self.log)
+            ev.global_event_update_progress(self.log)
             self._events[ev.id] = ev
 
     def _process_osdmap(self):
@@ -646,13 +634,13 @@ class Module(MgrModule):
                 self.maybe_complete(ev)
             elif isinstance(ev, GlobalRecoveryEvent):
                 global_event = True
-                ev.global_event_update_progress(data, self.log)
+                ev.global_event_update_progress(self.log)
                 self.maybe_complete(ev)
 
         if not global_event:
             # If there is no global event
             # we create one
-            self._pg_state_changed(data)
+            self._pg_state_changed()
 
     def maybe_complete(self, event):
         # type: (Event) -> None