]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/cephadm: add --rm-crush-entry flag to host removal
authorAdam King <adking@redhat.com>
Fri, 29 Sep 2023 18:39:10 +0000 (14:39 -0400)
committerAdam King <adking@redhat.com>
Fri, 3 Nov 2023 17:19:03 +0000 (13:19 -0400)
This will tell cephadm to try and remove the
crush bucket for the host at the end of the host
removal process. If this fails, we still consider the
host as having been successfully remove from
cephadm's POV, but the user will get back an error
message telling them we failed to remove the
host from the crush map

Fixes: https://tracker.ceph.com/issues/63031
Signed-off-by: Adam King <adking@redhat.com>
src/pybind/mgr/cephadm/module.py
src/pybind/mgr/orchestrator/_interface.py
src/pybind/mgr/orchestrator/module.py
src/pybind/mgr/test_orchestrator/module.py

index ac6e39c469ed14ba97f514eca55f9cc786175cf7..a83256d0bb7d23fc4788cf671bc3c0ad3f24ea12 100644 (file)
@@ -39,7 +39,13 @@ from cephadm.http_server import CephadmHttpServer
 from cephadm.agent import CephadmAgentHelpers
 
 
-from mgr_module import MgrModule, HandleCommandResult, Option, NotifyType
+from mgr_module import (
+    MgrModule,
+    HandleCommandResult,
+    Option,
+    NotifyType,
+    MonCommandFailed,
+)
 from mgr_util import build_url
 import orchestrator
 from orchestrator.module import to_format, Format
@@ -1630,7 +1636,7 @@ Then run the following:
         return self._add_host(spec)
 
     @handle_orch_error
-    def remove_host(self, host: str, force: bool = False, offline: bool = False) -> str:
+    def remove_host(self, host: str, force: bool = False, offline: bool = False, rm_crush_entry: bool = False) -> str:
         """
         Remove a host from orchestrator management.
 
@@ -1710,6 +1716,17 @@ Then run the following:
             }
             run_cmd(cmd_args)
 
+        if rm_crush_entry:
+            try:
+                self.check_mon_command({
+                    'prefix': 'osd crush remove',
+                    'name': host,
+                })
+            except MonCommandFailed as e:
+                self.log.error(f'Couldn\'t remove host {host} from CRUSH map: {str(e)}')
+                return (f'Cephadm failed removing host {host}\n'
+                        f'Failed to remove host {host} from the CRUSH map: {str(e)}')
+
         self.inventory.rm_host(host)
         self.cache.rm_host(host)
         self.ssh.reset_con(host)
index 2c7776280028c15506f0a891f1ebafbd75b6bf41..5bde317d19e67dcd7fcae2177bf62a609527b700 100644 (file)
@@ -359,7 +359,7 @@ class Orchestrator(object):
         """
         raise NotImplementedError()
 
-    def remove_host(self, host: str, force: bool, offline: bool) -> OrchResult[str]:
+    def remove_host(self, host: str, force: bool, offline: bool, rm_crush_entry: bool) -> OrchResult[str]:
         """
         Remove a host from the orchestrator inventory.
 
index de4777e0defa48ff63b34f044054a41989e736e1..d6f36e81b718b355b9cb70f5c490ad2f979e3265 100644 (file)
@@ -488,9 +488,9 @@ class OrchestratorCli(OrchestratorClientMixin, MgrModule,
         return self._apply_misc([s], False, Format.plain)
 
     @_cli_write_command('orch host rm')
-    def _remove_host(self, hostname: str, force: bool = False, offline: bool = False) -> HandleCommandResult:
+    def _remove_host(self, hostname: str, force: bool = False, offline: bool = False, rm_crush_entry: bool = False) -> HandleCommandResult:
         """Remove a host"""
-        completion = self.remove_host(hostname, force, offline)
+        completion = self.remove_host(hostname, force, offline, rm_crush_entry)
         raise_if_exception(completion)
         return HandleCommandResult(stdout=completion.result_str())
 
index d89c23bf1593c679890c77bc7fac13338be72a29..a0721250c7f90fb0089bca7ef68d317ebd031b9d 100644 (file)
@@ -284,7 +284,7 @@ class TestOrchestrator(MgrModule, orchestrator.Orchestrator):
         return ''
 
     @handle_orch_error
-    def remove_host(self, host, force: bool, offline: bool):
+    def remove_host(self, host, force: bool, offline: bool, rm_crush_entry: bool):
         assert isinstance(host, str)
         return 'done'