]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/cephadm: add --rm-crush-entry flag to host removal
authorAdam King <adking@redhat.com>
Fri, 29 Sep 2023 18:39:10 +0000 (14:39 -0400)
committerAdam King <adking@redhat.com>
Wed, 13 Mar 2024 02:42:22 +0000 (22:42 -0400)
This will tell cephadm to try and remove the
crush bucket for the host at the end of the host
removal process. If this fails, we still consider the
host as having been successfully remove from
cephadm's POV, but the user will get back an error
message telling them we failed to remove the
host from the crush map

Fixes: https://tracker.ceph.com/issues/63031
Signed-off-by: Adam King <adking@redhat.com>
(cherry picked from commit fa0f62aa57755c45c713367620dc834530276b25)

Conflicts:
src/pybind/mgr/cephadm/module.py
src/pybind/mgr/orchestrator/_interface.py

src/pybind/mgr/cephadm/module.py
src/pybind/mgr/orchestrator/_interface.py
src/pybind/mgr/orchestrator/module.py
src/pybind/mgr/test_orchestrator/module.py

index 978088360789f982a9d2b86509841e81816bb8ee..ae97f5b631fcfef42ee5a4166d413f8e9d7efdda 100644 (file)
@@ -41,7 +41,13 @@ from cephadm.http_server import CephadmHttpServer
 from cephadm.agent import CephadmAgentHelpers
 
 
-from mgr_module import MgrModule, HandleCommandResult, Option, NotifyType
+from mgr_module import (
+    MgrModule,
+    HandleCommandResult,
+    Option,
+    NotifyType,
+    MonCommandFailed,
+)
 from mgr_util import build_url
 import orchestrator
 from orchestrator.module import to_format, Format
@@ -1723,7 +1729,7 @@ Then run the following:
         return self.node_proxy_cache.common(category, hostname=hostname)
 
     @handle_orch_error
-    def remove_host(self, host: str, force: bool = False, offline: bool = False) -> str:
+    def remove_host(self, host: str, force: bool = False, offline: bool = False, rm_crush_entry: bool = False) -> str:
         """
         Remove a host from orchestrator management.
 
@@ -1803,6 +1809,17 @@ Then run the following:
             }
             run_cmd(cmd_args)
 
+        if rm_crush_entry:
+            try:
+                self.check_mon_command({
+                    'prefix': 'osd crush remove',
+                    'name': host,
+                })
+            except MonCommandFailed as e:
+                self.log.error(f'Couldn\'t remove host {host} from CRUSH map: {str(e)}')
+                return (f'Cephadm failed removing host {host}\n'
+                        f'Failed to remove host {host} from the CRUSH map: {str(e)}')
+
         self.inventory.rm_host(host)
         self.cache.rm_host(host)
         self.ssh.reset_con(host)
index e5ee5035133e3d43048163a6a3e209e4a2573869..042572ec19418065c3a84aa3b9bc0ea4a5f4ea63 100644 (file)
@@ -435,7 +435,7 @@ class Orchestrator(object):
         """
         raise NotImplementedError()
 
-    def remove_host(self, host: str, force: bool, offline: bool) -> OrchResult[str]:
+    def remove_host(self, host: str, force: bool, offline: bool, rm_crush_entry: bool) -> OrchResult[str]:
         """
         Remove a host from the orchestrator inventory.
 
index a965d1b71af8458888c25603fb10507818ea7882..e69df3e89e56af5dbfc2bd2de4804e2cf39d9cb5 100644 (file)
@@ -647,9 +647,9 @@ class OrchestratorCli(OrchestratorClientMixin, MgrModule,
         return HandleCommandResult(stdout=completion.result_str())
 
     @_cli_write_command('orch host rm')
-    def _remove_host(self, hostname: str, force: bool = False, offline: bool = False) -> HandleCommandResult:
+    def _remove_host(self, hostname: str, force: bool = False, offline: bool = False, rm_crush_entry: bool = False) -> HandleCommandResult:
         """Remove a host"""
-        completion = self.remove_host(hostname, force, offline)
+        completion = self.remove_host(hostname, force, offline, rm_crush_entry)
         raise_if_exception(completion)
         return HandleCommandResult(stdout=completion.result_str())
 
index d89c23bf1593c679890c77bc7fac13338be72a29..a0721250c7f90fb0089bca7ef68d317ebd031b9d 100644 (file)
@@ -284,7 +284,7 @@ class TestOrchestrator(MgrModule, orchestrator.Orchestrator):
         return ''
 
     @handle_orch_error
-    def remove_host(self, host, force: bool, offline: bool):
+    def remove_host(self, host, force: bool, offline: bool, rm_crush_entry: bool):
         assert isinstance(host, str)
         return 'done'