]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/Mgr.cc: clear daemon health metrics instead of removing down/out osd from daemon... 58512/head
authorCory Snyder <csnyder@1111systems.com>
Fri, 19 Apr 2024 15:42:00 +0000 (15:42 +0000)
committerKonstantin Shalygin <k0ste@k0ste.ru>
Wed, 24 Jul 2024 17:33:41 +0000 (00:33 +0700)
Reverts the change from https://github.com/ceph/ceph/pull/53993
and directly clears daemon health metrics for down and out OSDs.
The former approach of removing down/out OSDs from the daemon
state has undesirable consequences for stat output, including
the prometheus exporter.

Fixes: https://tracker.ceph.com/issues/66168
Signed-off-by: Cory Snyder <csnyder@1111systems.com>
(cherry picked from commit 282558cf40274366360bb3b1ec0fa102fbb592a6)

src/mgr/Mgr.cc

index b3d710f31abfae8680e8f76af6929f9f86cb48f3..91557d1a2206e55db2169abc7e3d2f9638388232 100644 (file)
@@ -525,7 +525,7 @@ void Mgr::handle_osd_map()
   cluster_state.with_osdmap_and_pgmap([this, &names_exist](const OSDMap &osd_map,
                                                           const PGMap &pg_map) {
     for (int osd_id = 0; osd_id < osd_map.get_max_osd(); ++osd_id) {
-      if (!osd_map.exists(osd_id) || (osd_map.is_out(osd_id) && osd_map.is_down(osd_id))) {
+      if (!osd_map.exists(osd_id)) {
         continue;
       }
 
@@ -537,9 +537,16 @@ void Mgr::handle_osd_map()
       if (daemon_state.is_updating(k)) {
         continue;
       }
+        
+      DaemonStatePtr daemon = daemon_state.get(k);
+        
+      if (daemon && osd_map.is_out(osd_id) && osd_map.is_down(osd_id)) {
+        std::lock_guard l(daemon->lock);
+        daemon->daemon_health_metrics.clear();
+      }
 
       bool update_meta = false;
-      if (daemon_state.exists(k)) {
+      if (daemon) {
         if (osd_map.get_up_from(osd_id) == osd_map.get_epoch()) {
           dout(4) << "Mgr::handle_osd_map: osd." << osd_id
                  << " joined cluster at " << "e" << osd_map.get_epoch()