From e7156a73c327c608f0eb6dec1c9fedde226b4a79 Mon Sep 17 00:00:00 2001
From: Adam King <adking@redhat.com>
Date: Mon, 5 Jun 2023 15:05:55 -0400
Subject: [PATCH] mgr/cephadm: add ability to zap OSDs' devices while draining
 host

Currently, when cephadm drains a host, it will remove all OSDs on
the host, but provides no option to zap the OSD's devices afterwards.
Given users are draining the host likely to remove it from the cluster,
it makes sense some users would want to clean up the devices on the
host that were being used for OSDs. Cephadm already supports zapping
devices outside of host draining, so it makes shouldn't take much to
add that functionality to the host drain as well.

Fixes: https://tracker.ceph.com/issues/61593

Signed-off-by: Adam King <adking@redhat.com>
(cherry picked from commit 85043ff4cee108c152f5aa8af267c85e353c475a)
---
 doc/cephadm/host-management.rst              |  8 ++++++++
 src/pybind/mgr/cephadm/module.py             |  4 ++--
 src/pybind/mgr/cephadm/tests/test_cephadm.py | 11 +++++++++++
 src/pybind/mgr/orchestrator/_interface.py    |  2 +-
 src/pybind/mgr/orchestrator/module.py        |  4 ++--
 5 files changed, 24 insertions(+), 5 deletions(-)
diff --git a/doc/cephadm/host-management.rst b/doc/cephadm/host-management.rst
index 781e6378e15bc..4b964c5f455a7 100644
--- a/doc/cephadm/host-management.rst
+++ b/doc/cephadm/host-management.rst
@@ -123,6 +123,14 @@ All OSDs on the host will be scheduled to be removed. You can check the progress
 
 See :ref:`cephadm-osd-removal` for more details about OSD removal.
 
+The ``orch host drain`` command also supports a ``--zap-osd-devices``
+flag. Setting this flag while draining a host will cause cephadm to zap
+the devices of the OSDs it is removing as part of the drain process
+
+.. prompt:: bash #
+
+   ceph orch host drain *<host>* --zap-osd-devices
+
 Use the following command to determine whether any daemons are still on the
 host:
 
diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py
index aeca11f235f76..af5d466637b1d 100644
--- a/src/pybind/mgr/cephadm/module.py
+++ b/src/pybind/mgr/cephadm/module.py
@@ -3363,7 +3363,7 @@ Then run the following:
         return self.to_remove_osds.all_osds()
 
     @handle_orch_error
-    def drain_host(self, hostname: str, force: bool = False, keep_conf_keyring: bool = False) -> str:
+    def drain_host(self, hostname: str, force: bool = False, keep_conf_keyring: bool = False, zap_osd_devices: bool = False) -> str:
         """
         Drain all daemons from a host.
         :param host: host name
@@ -3389,7 +3389,7 @@ Then run the following:
         daemons: List[orchestrator.DaemonDescription] = self.cache.get_daemons_by_host(hostname)
 
         osds_to_remove = [d.daemon_id for d in daemons if d.daemon_type == 'osd']
-        self.remove_osds(osds_to_remove)
+        self.remove_osds(osds_to_remove, zap=zap_osd_devices)
 
         daemons_table = ""
         daemons_table += "{:<20} {:<15}\n".format("type", "id")
diff --git a/src/pybind/mgr/cephadm/tests/test_cephadm.py b/src/pybind/mgr/cephadm/tests/test_cephadm.py
index 6cc06cedcf47a..e40de481ff870 100644
--- a/src/pybind/mgr/cephadm/tests/test_cephadm.py
+++ b/src/pybind/mgr/cephadm/tests/test_cephadm.py
@@ -2539,3 +2539,14 @@ Traceback (most recent call last):
         with pytest.raises(OrchestratorError, match=r'Command "very slow" timed out on host hostC \(non-default 999 second timeout\)'):
             with cephadm_module.async_timeout_handler('hostC', 'very slow', 999):
                 cephadm_module.wait_async(_timeout())
+
+    @mock.patch("cephadm.CephadmOrchestrator.remove_osds")
+    @mock.patch("cephadm.CephadmOrchestrator.add_host_label", lambda *a, **kw: None)
+    @mock.patch("cephadm.inventory.HostCache.get_daemons_by_host", lambda *a, **kw: [])
+    def test_host_drain_zap(self, _rm_osds, cephadm_module):
+        # pass force=true in these tests to bypass _admin label check
+        cephadm_module.drain_host('host1', force=True, zap_osd_devices=False)
+        assert _rm_osds.called_with([], zap=False)
+
+        cephadm_module.drain_host('host1', force=True, zap_osd_devices=True)
+        assert _rm_osds.called_with([], zap=True)
diff --git a/src/pybind/mgr/orchestrator/_interface.py b/src/pybind/mgr/orchestrator/_interface.py
index 581e925a0922a..e9a6c3f07cb2f 100644
--- a/src/pybind/mgr/orchestrator/_interface.py
+++ b/src/pybind/mgr/orchestrator/_interface.py
@@ -367,7 +367,7 @@ class Orchestrator(object):
         """
         raise NotImplementedError()
 
-    def drain_host(self, hostname: str, force: bool = False, keep_conf_keyring: bool = False) -> OrchResult[str]:
+    def drain_host(self, hostname: str, force: bool = False, keep_conf_keyring: bool = False, zap_osd_devices: bool = False) -> OrchResult[str]:
         """
         drain all daemons from a host
 
diff --git a/src/pybind/mgr/orchestrator/module.py b/src/pybind/mgr/orchestrator/module.py
index a4ca7704f041a..24e7d8d4eab18 100644
--- a/src/pybind/mgr/orchestrator/module.py
+++ b/src/pybind/mgr/orchestrator/module.py
@@ -478,9 +478,9 @@ class OrchestratorCli(OrchestratorClientMixin, MgrModule,
         return HandleCommandResult(stdout=completion.result_str())
 
     @_cli_write_command('orch host drain')
-    def _drain_host(self, hostname: str, force: bool = False, keep_conf_keyring: bool = False) -> HandleCommandResult:
+    def _drain_host(self, hostname: str, force: bool = False, keep_conf_keyring: bool = False, zap_osd_devices: bool = False) -> HandleCommandResult:
         """drain all daemons from a host"""
-        completion = self.drain_host(hostname, force, keep_conf_keyring)
+        completion = self.drain_host(hostname, force, keep_conf_keyring, zap_osd_devices)
         raise_if_exception(completion)
         return HandleCommandResult(stdout=completion.result_str())
 
-- 
2.39.5