From: Adam King Date: Wed, 2 Oct 2024 14:59:00 +0000 (-0400) Subject: mgr/cephadm: set maintenance mode healthcheck when removing hosts X-Git-Tag: v20.0.0~726^2~2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=eaad13485b8a426d028d00c042e27a4db5f4e2a4;p=ceph.git mgr/cephadm: set maintenance mode healthcheck when removing hosts This is to handle the case where a host is removed from cephadm management using the --force flag while the host is in maintenance mode. Without this addition the health warning about the host being in maintenance mode would persist despite the host no longer being tracked by cephadm Signed-off-by: Adam King --- diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py index 3a112d6a957..565067f32af 100644 --- a/src/pybind/mgr/cephadm/module.py +++ b/src/pybind/mgr/cephadm/module.py @@ -1897,7 +1897,7 @@ Then run the following: self.inventory.add_host(spec) self.offline_hosts_remove(spec.hostname) if spec.status == 'maintenance': - self._set_maintenance_healthcheck() + self.set_maintenance_healthcheck() self.event.set() # refresh stray health check self.log.info('Added host %s' % spec.hostname) return "Added host '{}' with addr '{}'".format(spec.hostname, spec.addr) @@ -2068,6 +2068,7 @@ Then run the following: self.ssh.reset_con(host) # if host was in offline host list, we should remove it now. self.offline_hosts_remove(host) + self.set_maintenance_healthcheck() self.event.set() # refresh stray health check self.log.info('Removed host %s' % host) return "Removed {} host '{}'".format('offline' if offline else '', host) @@ -2182,7 +2183,7 @@ Then run the following: self.log.info(msg) return msg - def _set_maintenance_healthcheck(self) -> None: + def set_maintenance_healthcheck(self) -> None: """Raise/update or clear the maintenance health check as needed""" in_maintenance = self.inventory.get_host_with_state("maintenance") @@ -2266,7 +2267,7 @@ Then run the following: self.inventory._inventory[hostname] = tgt_host self.inventory.save() - self._set_maintenance_healthcheck() + self.set_maintenance_healthcheck() return f'Daemons for Ceph cluster {self._cluster_fsid} stopped on host {hostname}. Host {hostname} moved to maintenance mode' @handle_orch_error @@ -2317,7 +2318,7 @@ Then run the following: self.inventory._inventory[hostname] = tgt_host self.inventory.save() - self._set_maintenance_healthcheck() + self.set_maintenance_healthcheck() return f"Ceph cluster {self._cluster_fsid} on {hostname} has exited maintenance mode" diff --git a/src/pybind/mgr/cephadm/tests/test_cephadm.py b/src/pybind/mgr/cephadm/tests/test_cephadm.py index 975c125225d..6a99123b769 100644 --- a/src/pybind/mgr/cephadm/tests/test_cephadm.py +++ b/src/pybind/mgr/cephadm/tests/test_cephadm.py @@ -2720,6 +2720,7 @@ Traceback (most recent call last): cephadm_module.cache.facts = facts assert cephadm_module._validate_tunedprofile_settings(spec) == expected_value + @mock.patch("cephadm.CephadmOrchestrator.set_maintenance_healthcheck", lambda _: None) @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) def test_tuned_profiles_validation(self, cephadm_module): with with_host(cephadm_module, 'test'):