From: Sage Weil Date: Fri, 3 Dec 2021 18:48:32 +0000 (-0500) Subject: mgr/progress: avoid inefficient dump of all pg stats X-Git-Tag: v17.1.0~226^2 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=f5973ccef415571c72560a04968592e8a6daf93a;p=ceph-ci.git mgr/progress: avoid inefficient dump of all pg stats We only use a handful of fields, and the pg dump includes a gazillion fields that we waste CPU copying to python-land. This tends to lead to long ClusterState::lock hold times, leading to long ms_dispatch delays and generally gumming up the works. Instead, create a new "pg_progress" item that dumps only the fields that mgr/progress needs. Fixes: https://tracker.ceph.com/issues/53475 Signed-off-by: Sage Weil --- diff --git a/src/mgr/ActivePyModules.cc b/src/mgr/ActivePyModules.cc index 04fbdc81040..5f050fe1d33 100644 --- a/src/mgr/ActivePyModules.cc +++ b/src/mgr/ActivePyModules.cc @@ -395,6 +395,14 @@ PyObject *ActivePyModules::get_python(const std::string &what) } else if (what == "pg_ready") { server.dump_pg_ready(&f); return f.get(); + } else if (what == "pg_progress") { + without_gil_t no_gil; + return cluster_state.with_pgmap([&](const PGMap &pg_map) { + no_gil.acquire_gil(); + pg_map.dump_pg_progress(&f); + server.dump_pg_ready(&f); + return f.get(); + }); } else if (what == "osd_stats") { without_gil_t no_gil; return cluster_state.with_pgmap([&](const PGMap &pg_map) { diff --git a/src/mon/PGMap.cc b/src/mon/PGMap.cc index ea90270a9ef..1c0623456da 100644 --- a/src/mon/PGMap.cc +++ b/src/mon/PGMap.cc @@ -1564,6 +1564,21 @@ void PGMap::dump_pg_stats(ceph::Formatter *f, bool brief) const f->close_section(); } +void PGMap::dump_pg_progress(ceph::Formatter *f) const +{ + f->open_object_section("pgs"); + for (auto& i : pg_stat) { + std::string n = stringify(i.first); + f->open_object_section(n.c_str()); + f->dump_int("num_bytes_recovered", i.second.stats.sum.num_bytes_recovered); + f->dump_int("num_bytes", i.second.stats.sum.num_bytes); + f->dump_unsigned("reported_epoch", i.second.reported_epoch); + f->dump_string("state", pg_state_string(i.second.state)); + f->close_section(); + } + f->close_section(); +} + void PGMap::dump_pool_stats(ceph::Formatter *f) const { f->open_array_section("pool_stats"); diff --git a/src/mon/PGMap.h b/src/mon/PGMap.h index d55d2397c1b..a5e75ed5897 100644 --- a/src/mon/PGMap.h +++ b/src/mon/PGMap.h @@ -442,6 +442,7 @@ public: void dump(ceph::Formatter *f, bool with_net = true) const; void dump_basic(ceph::Formatter *f) const; void dump_pg_stats(ceph::Formatter *f, bool brief) const; + void dump_pg_progress(ceph::Formatter *f) const; void dump_pool_stats(ceph::Formatter *f) const; void dump_osd_stats(ceph::Formatter *f, bool with_net = true) const; void dump_osd_ping_times(ceph::Formatter *f) const; diff --git a/src/pybind/mgr/progress/module.py b/src/pybind/mgr/progress/module.py index 2ec69971c89..5f9aa86f647 100644 --- a/src/pybind/mgr/progress/module.py +++ b/src/pybind/mgr/progress/module.py @@ -301,13 +301,13 @@ class PgRecoveryEvent(Event): def which_osds(self): return self. _which_osds - def pg_update(self, raw_pg_stats, pg_ready, log): - # type: (Dict, bool, Any) -> None + def pg_update(self, pg_progress: Dict, log: Any) -> None: # FIXME: O(pg_num) in python - # FIXME: far more fields getting pythonized than we really care about # Sanity check to see if there are any missing PGs and to assign # empty array and dictionary if there hasn't been any recovery - pg_to_state = dict((p['pgid'], p) for p in raw_pg_stats['pg_stats']) # type: Dict[str, Any] + pg_to_state: Dict[str, Any] = pg_progress["pgs"] + pg_ready: bool = pg_progress["pg_ready"] + if self._original_bytes_recovered is None: self._original_bytes_recovered = {} missing_pgs = [] @@ -315,7 +315,7 @@ class PgRecoveryEvent(Event): pg_str = str(pg) if pg_str in pg_to_state: self._original_bytes_recovered[pg] = \ - pg_to_state[pg_str]['stat_sum']['num_bytes_recovered'] + pg_to_state[pg_str]['num_bytes_recovered'] else: missing_pgs.append(pg) if pg_ready: @@ -352,13 +352,13 @@ class PgRecoveryEvent(Event): if "active" in states and "clean" in states: complete.add(pg) else: - if info['stat_sum']['num_bytes'] == 0: + if info['num_bytes'] == 0: # Empty PGs are considered 0% done until they are # in the correct state. pass else: - recovered = info['stat_sum']['num_bytes_recovered'] - total_bytes = info['stat_sum']['num_bytes'] + recovered = info['num_bytes_recovered'] + total_bytes = info['num_bytes'] if total_bytes > 0: ratio = float(recovered - self._original_bytes_recovered[pg]) / \ @@ -555,7 +555,7 @@ class Module(MgrModule): start_epoch=self.get_osdmap().get_epoch(), add_to_ceph_s=False ) - r_ev.pg_update(self.get("pg_stats"), self.get("pg_ready"), self.log) + r_ev.pg_update(self.get("pg_progress"), self.log) self._events[r_ev.id] = r_ev def _osdmap_changed(self, old_osdmap, new_osdmap): @@ -623,14 +623,13 @@ class Module(MgrModule): return global_event = False - data = self.get("pg_stats") - ready = self.get("pg_ready") + data = self.get("pg_progress") for ev_id in list(self._events): ev = self._events[ev_id] # Check for types of events # we have to update if isinstance(ev, PgRecoveryEvent): - ev.pg_update(data, ready, self.log) + ev.pg_update(data, self.log) self.maybe_complete(ev) elif isinstance(ev, GlobalRecoveryEvent): global_event = True diff --git a/src/pybind/mgr/progress/test_progress.py b/src/pybind/mgr/progress/test_progress.py index 9d39cbdd684..ef4e7e16618 100644 --- a/src/pybind/mgr/progress/test_progress.py +++ b/src/pybind/mgr/progress/test_progress.py @@ -20,66 +20,34 @@ class TestPgRecoveryEvent(object): self.test_event = module.PgRecoveryEvent(None, None, [module.PgId(1,i) for i in range(3)], [0], 30, False) def test_pg_update(self): - # Test for a completed event when the pg states show active+clear - pg_stats = { - "pg_stats":[ - { - "state": "active+clean", - "stat_sum": { - "num_bytes": 10, - "num_bytes_recovered": 10 - }, - "up": [ - 3, - 1 - ], - "acting": [ - 3, - 1 - ], - "pgid": "1.0", - "reported_epoch": 30 - }, - { - "state": "active+clean", - "stat_sum": { - "num_bytes": 10, - "num_bytes_recovered": 10 - }, - "up": [ - 3, - 1 - ], - "acting": [ - 3, - 1 - ], - "pgid": "1.1", - "reported_epoch": 30 - }, - { - "state": "active+clean", - "stat_sum": { - "num_bytes": 10, - "num_bytes_recovered": 10 - }, - "up": [ - 3, - 1 - ], - "acting": [ - 3, - 1 - ], - "pgid": "1.2", - "reported_epoch": 30 - } - ] + # Test for a completed event when the pg states show active+clean + pg_progress = { + "pgs": { + "1.0": { + "state": "active+clean", + "num_bytes": 10, + "num_bytes_recovered": 10, + "reported_epoch": 30, + }, + "1.1": { + "state": "active+clean", + "num_bytes": 10, + "num_bytes_recovered": 10, + "reported_epoch": 30, + }, + "1.2": { + "state": "active+clean", + "num_bytes": 10, + "num_bytes_recovered": 10, + "reported_epoch": 30, + }, + }, + "pg_ready": True, } - - self.test_event.pg_update(pg_stats, True, mock.Mock()) + self.test_event.pg_update(pg_progress, mock.Mock()) assert self.test_event._progress == 1.0 - + + class OSDMap: # This is an artificial class to help @@ -118,6 +86,7 @@ class OSDMap: def pg_to_up_acting_osds(self, pool_id, ps): return self._pg_to_up_acting_osds(pool_id, ps) + class TestModule(object): # Testing Module Class