]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/cephadm: don't include agents in CEPHADM_FAILED_DAEMON 44158/head
authorAdam King <adking@redhat.com>
Wed, 1 Dec 2021 09:01:28 +0000 (04:01 -0500)
committerAdam King <adking@redhat.com>
Thu, 16 Dec 2021 12:56:55 +0000 (07:56 -0500)
They already have their own, more strict health warning.
There's very few cases they would show up in failed daemon
health check but not agent down health check and even if
they did it would be temporary. Also, agents marked as down
will automatically (before this change) be marked as failed
even if they don't meet the typical criteria for failed
(systemd status is in error)

Fixes: https://tracker.ceph.com/issues/53448
Signed-off-by: Adam King <adking@redhat.com>
src/pybind/mgr/cephadm/module.py

index b8cd31931abcf70b23c4b4ab0eac7d3f817c6e16..348824f89c2a5ed017d13f15885ff33b43a9fcba 100644 (file)
@@ -754,9 +754,10 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule,
     def update_failed_daemon_health_check(self) -> None:
         failed_daemons = []
         for dd in self.cache.get_error_daemons():
-            failed_daemons.append('daemon %s on %s is in %s state' % (
-                dd.name(), dd.hostname, dd.status_desc
-            ))
+            if dd.daemon_type != 'agent':  # agents tracked by CEPHADM_AGENT_DOWN
+                failed_daemons.append('daemon %s on %s is in %s state' % (
+                    dd.name(), dd.hostname, dd.status_desc
+                ))
         self.remove_health_warning('CEPHADM_FAILED_DAEMON')
         if failed_daemons:
             self.set_health_warning('CEPHADM_FAILED_DAEMON', f'{len(failed_daemons)} failed cephadm daemon(s)', len(