From e7156a73c327c608f0eb6dec1c9fedde226b4a79 Mon Sep 17 00:00:00 2001 From: Adam King Date: Mon, 5 Jun 2023 15:05:55 -0400 Subject: [PATCH] mgr/cephadm: add ability to zap OSDs' devices while draining host Currently, when cephadm drains a host, it will remove all OSDs on the host, but provides no option to zap the OSD's devices afterwards. Given users are draining the host likely to remove it from the cluster, it makes sense some users would want to clean up the devices on the host that were being used for OSDs. Cephadm already supports zapping devices outside of host draining, so it makes shouldn't take much to add that functionality to the host drain as well. Fixes: https://tracker.ceph.com/issues/61593 Signed-off-by: Adam King (cherry picked from commit 85043ff4cee108c152f5aa8af267c85e353c475a) --- doc/cephadm/host-management.rst | 8 ++++++++ src/pybind/mgr/cephadm/module.py | 4 ++-- src/pybind/mgr/cephadm/tests/test_cephadm.py | 11 +++++++++++ src/pybind/mgr/orchestrator/_interface.py | 2 +- src/pybind/mgr/orchestrator/module.py | 4 ++-- 5 files changed, 24 insertions(+), 5 deletions(-) diff --git a/doc/cephadm/host-management.rst b/doc/cephadm/host-management.rst index 781e6378e15bc..4b964c5f455a7 100644 --- a/doc/cephadm/host-management.rst +++ b/doc/cephadm/host-management.rst @@ -123,6 +123,14 @@ All OSDs on the host will be scheduled to be removed. You can check the progress See :ref:`cephadm-osd-removal` for more details about OSD removal. +The ``orch host drain`` command also supports a ``--zap-osd-devices`` +flag. Setting this flag while draining a host will cause cephadm to zap +the devices of the OSDs it is removing as part of the drain process + +.. prompt:: bash # + + ceph orch host drain ** --zap-osd-devices + Use the following command to determine whether any daemons are still on the host: diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py index aeca11f235f76..af5d466637b1d 100644 --- a/src/pybind/mgr/cephadm/module.py +++ b/src/pybind/mgr/cephadm/module.py @@ -3363,7 +3363,7 @@ Then run the following: return self.to_remove_osds.all_osds() @handle_orch_error - def drain_host(self, hostname: str, force: bool = False, keep_conf_keyring: bool = False) -> str: + def drain_host(self, hostname: str, force: bool = False, keep_conf_keyring: bool = False, zap_osd_devices: bool = False) -> str: """ Drain all daemons from a host. :param host: host name @@ -3389,7 +3389,7 @@ Then run the following: daemons: List[orchestrator.DaemonDescription] = self.cache.get_daemons_by_host(hostname) osds_to_remove = [d.daemon_id for d in daemons if d.daemon_type == 'osd'] - self.remove_osds(osds_to_remove) + self.remove_osds(osds_to_remove, zap=zap_osd_devices) daemons_table = "" daemons_table += "{:<20} {:<15}\n".format("type", "id") diff --git a/src/pybind/mgr/cephadm/tests/test_cephadm.py b/src/pybind/mgr/cephadm/tests/test_cephadm.py index 6cc06cedcf47a..e40de481ff870 100644 --- a/src/pybind/mgr/cephadm/tests/test_cephadm.py +++ b/src/pybind/mgr/cephadm/tests/test_cephadm.py @@ -2539,3 +2539,14 @@ Traceback (most recent call last): with pytest.raises(OrchestratorError, match=r'Command "very slow" timed out on host hostC \(non-default 999 second timeout\)'): with cephadm_module.async_timeout_handler('hostC', 'very slow', 999): cephadm_module.wait_async(_timeout()) + + @mock.patch("cephadm.CephadmOrchestrator.remove_osds") + @mock.patch("cephadm.CephadmOrchestrator.add_host_label", lambda *a, **kw: None) + @mock.patch("cephadm.inventory.HostCache.get_daemons_by_host", lambda *a, **kw: []) + def test_host_drain_zap(self, _rm_osds, cephadm_module): + # pass force=true in these tests to bypass _admin label check + cephadm_module.drain_host('host1', force=True, zap_osd_devices=False) + assert _rm_osds.called_with([], zap=False) + + cephadm_module.drain_host('host1', force=True, zap_osd_devices=True) + assert _rm_osds.called_with([], zap=True) diff --git a/src/pybind/mgr/orchestrator/_interface.py b/src/pybind/mgr/orchestrator/_interface.py index 581e925a0922a..e9a6c3f07cb2f 100644 --- a/src/pybind/mgr/orchestrator/_interface.py +++ b/src/pybind/mgr/orchestrator/_interface.py @@ -367,7 +367,7 @@ class Orchestrator(object): """ raise NotImplementedError() - def drain_host(self, hostname: str, force: bool = False, keep_conf_keyring: bool = False) -> OrchResult[str]: + def drain_host(self, hostname: str, force: bool = False, keep_conf_keyring: bool = False, zap_osd_devices: bool = False) -> OrchResult[str]: """ drain all daemons from a host diff --git a/src/pybind/mgr/orchestrator/module.py b/src/pybind/mgr/orchestrator/module.py index a4ca7704f041a..24e7d8d4eab18 100644 --- a/src/pybind/mgr/orchestrator/module.py +++ b/src/pybind/mgr/orchestrator/module.py @@ -478,9 +478,9 @@ class OrchestratorCli(OrchestratorClientMixin, MgrModule, return HandleCommandResult(stdout=completion.result_str()) @_cli_write_command('orch host drain') - def _drain_host(self, hostname: str, force: bool = False, keep_conf_keyring: bool = False) -> HandleCommandResult: + def _drain_host(self, hostname: str, force: bool = False, keep_conf_keyring: bool = False, zap_osd_devices: bool = False) -> HandleCommandResult: """drain all daemons from a host""" - completion = self.drain_host(hostname, force, keep_conf_keyring) + completion = self.drain_host(hostname, force, keep_conf_keyring, zap_osd_devices) raise_if_exception(completion) return HandleCommandResult(stdout=completion.result_str()) -- 2.39.5