From eaad13485b8a426d028d00c042e27a4db5f4e2a4 Mon Sep 17 00:00:00 2001 From: Adam King Date: Wed, 2 Oct 2024 10:59:00 -0400 Subject: [PATCH] mgr/cephadm: set maintenance mode healthcheck when removing hosts This is to handle the case where a host is removed from cephadm management using the --force flag while the host is in maintenance mode. Without this addition the health warning about the host being in maintenance mode would persist despite the host no longer being tracked by cephadm Signed-off-by: Adam King --- src/pybind/mgr/cephadm/module.py | 9 +++++---- src/pybind/mgr/cephadm/tests/test_cephadm.py | 1 + 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py index 3a112d6a9574..565067f32afc 100644 --- a/src/pybind/mgr/cephadm/module.py +++ b/src/pybind/mgr/cephadm/module.py @@ -1897,7 +1897,7 @@ Then run the following: self.inventory.add_host(spec) self.offline_hosts_remove(spec.hostname) if spec.status == 'maintenance': - self._set_maintenance_healthcheck() + self.set_maintenance_healthcheck() self.event.set() # refresh stray health check self.log.info('Added host %s' % spec.hostname) return "Added host '{}' with addr '{}'".format(spec.hostname, spec.addr) @@ -2068,6 +2068,7 @@ Then run the following: self.ssh.reset_con(host) # if host was in offline host list, we should remove it now. self.offline_hosts_remove(host) + self.set_maintenance_healthcheck() self.event.set() # refresh stray health check self.log.info('Removed host %s' % host) return "Removed {} host '{}'".format('offline' if offline else '', host) @@ -2182,7 +2183,7 @@ Then run the following: self.log.info(msg) return msg - def _set_maintenance_healthcheck(self) -> None: + def set_maintenance_healthcheck(self) -> None: """Raise/update or clear the maintenance health check as needed""" in_maintenance = self.inventory.get_host_with_state("maintenance") @@ -2266,7 +2267,7 @@ Then run the following: self.inventory._inventory[hostname] = tgt_host self.inventory.save() - self._set_maintenance_healthcheck() + self.set_maintenance_healthcheck() return f'Daemons for Ceph cluster {self._cluster_fsid} stopped on host {hostname}. Host {hostname} moved to maintenance mode' @handle_orch_error @@ -2317,7 +2318,7 @@ Then run the following: self.inventory._inventory[hostname] = tgt_host self.inventory.save() - self._set_maintenance_healthcheck() + self.set_maintenance_healthcheck() return f"Ceph cluster {self._cluster_fsid} on {hostname} has exited maintenance mode" diff --git a/src/pybind/mgr/cephadm/tests/test_cephadm.py b/src/pybind/mgr/cephadm/tests/test_cephadm.py index 975c125225dc..6a99123b7690 100644 --- a/src/pybind/mgr/cephadm/tests/test_cephadm.py +++ b/src/pybind/mgr/cephadm/tests/test_cephadm.py @@ -2720,6 +2720,7 @@ Traceback (most recent call last): cephadm_module.cache.facts = facts assert cephadm_module._validate_tunedprofile_settings(spec) == expected_value + @mock.patch("cephadm.CephadmOrchestrator.set_maintenance_healthcheck", lambda _: None) @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) def test_tuned_profiles_validation(self, cephadm_module): with with_host(cephadm_module, 'test'): -- 2.47.3