From eaad13485b8a426d028d00c042e27a4db5f4e2a4 Mon Sep 17 00:00:00 2001
From: Adam King <adking@redhat.com>
Date: Wed, 2 Oct 2024 10:59:00 -0400
Subject: [PATCH] mgr/cephadm: set maintenance mode healthcheck when removing
 hosts

This is to handle the case where a host is removed from
cephadm management using the --force flag while the host
is in maintenance mode. Without this addition the health
warning about the host being in maintenance mode would
persist despite the host no longer being tracked by cephadm

Signed-off-by: Adam King <adking@redhat.com>
---
 src/pybind/mgr/cephadm/module.py             | 9 +++++----
 src/pybind/mgr/cephadm/tests/test_cephadm.py | 1 +
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py
index 3a112d6a9574..565067f32afc 100644
--- a/src/pybind/mgr/cephadm/module.py
+++ b/src/pybind/mgr/cephadm/module.py
@@ -1897,7 +1897,7 @@ Then run the following:
         self.inventory.add_host(spec)
         self.offline_hosts_remove(spec.hostname)
         if spec.status == 'maintenance':
-            self._set_maintenance_healthcheck()
+            self.set_maintenance_healthcheck()
         self.event.set()  # refresh stray health check
         self.log.info('Added host %s' % spec.hostname)
         return "Added host '{}' with addr '{}'".format(spec.hostname, spec.addr)
@@ -2068,6 +2068,7 @@ Then run the following:
         self.ssh.reset_con(host)
         # if host was in offline host list, we should remove it now.
         self.offline_hosts_remove(host)
+        self.set_maintenance_healthcheck()
         self.event.set()  # refresh stray health check
         self.log.info('Removed host %s' % host)
         return "Removed {} host '{}'".format('offline' if offline else '', host)
@@ -2182,7 +2183,7 @@ Then run the following:
         self.log.info(msg)
         return msg
 
-    def _set_maintenance_healthcheck(self) -> None:
+    def set_maintenance_healthcheck(self) -> None:
         """Raise/update or clear the maintenance health check as needed"""
 
         in_maintenance = self.inventory.get_host_with_state("maintenance")
@@ -2266,7 +2267,7 @@ Then run the following:
         self.inventory._inventory[hostname] = tgt_host
         self.inventory.save()
 
-        self._set_maintenance_healthcheck()
+        self.set_maintenance_healthcheck()
         return f'Daemons for Ceph cluster {self._cluster_fsid} stopped on host {hostname}. Host {hostname} moved to maintenance mode'
 
     @handle_orch_error
@@ -2317,7 +2318,7 @@ Then run the following:
         self.inventory._inventory[hostname] = tgt_host
         self.inventory.save()
 
-        self._set_maintenance_healthcheck()
+        self.set_maintenance_healthcheck()
 
         return f"Ceph cluster {self._cluster_fsid} on {hostname} has exited maintenance mode"
 
diff --git a/src/pybind/mgr/cephadm/tests/test_cephadm.py b/src/pybind/mgr/cephadm/tests/test_cephadm.py
index 975c125225dc..6a99123b7690 100644
--- a/src/pybind/mgr/cephadm/tests/test_cephadm.py
+++ b/src/pybind/mgr/cephadm/tests/test_cephadm.py
@@ -2720,6 +2720,7 @@ Traceback (most recent call last):
             cephadm_module.cache.facts = facts
             assert cephadm_module._validate_tunedprofile_settings(spec) == expected_value
 
+    @mock.patch("cephadm.CephadmOrchestrator.set_maintenance_healthcheck", lambda _: None)
     @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
     def test_tuned_profiles_validation(self, cephadm_module):
         with with_host(cephadm_module, 'test'):
-- 
2.47.3