]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/cephadm: add --rm-crush-entry flag to host removal
authorAdam King <adking@redhat.com>
Fri, 29 Sep 2023 18:39:10 +0000 (14:39 -0400)
committerAdam King <adking@redhat.com>
Wed, 13 Mar 2024 14:36:29 +0000 (10:36 -0400)
This will tell cephadm to try and remove the
crush bucket for the host at the end of the host
removal process. If this fails, we still consider the
host as having been successfully remove from
cephadm's POV, but the user will get back an error
message telling them we failed to remove the
host from the crush map

Fixes: https://tracker.ceph.com/issues/63031
Signed-off-by: Adam King <adking@redhat.com>
(cherry picked from commit fa0f62aa57755c45c713367620dc834530276b25)

Conflicts:
src/pybind/mgr/cephadm/module.py

src/cephadm/tests/test_cephadm.py
src/pybind/mgr/cephadm/module.py
src/pybind/mgr/orchestrator/_interface.py
src/pybind/mgr/orchestrator/module.py
src/pybind/mgr/test_orchestrator/module.py

index 641483d8504f2a6485dedfbdded8a9025861209f..f7ad6e3159d39213c0fb09fd13e9a9d478432e50 100644 (file)
@@ -300,6 +300,7 @@ class TestCephAdm(object):
     @mock.patch('cephadm.migrate_sysctl_dir')
     @mock.patch('cephadm.check_unit', lambda *args, **kwargs: (None, 'running', None))
     @mock.patch('cephadm.get_unit_name', lambda *args, **kwargs: 'mon-unit-name')
+    @mock.patch('cephadm.extract_uid_gid', lambda *args, **kwargs: (167, 167))
     @mock.patch('cephadm.get_deployment_container')
     def test_mon_crush_location(self, _get_deployment_container, _migrate_sysctl, _make_var_run, _get_parm, _deploy_daemon, _file_lock, _logger):
         """
index 7c495357a1f4891f22d7aa69562d0d3b59c9b6fc..ce7b0789bc55f15253d2ea974ebb96a8291bbcea 100644 (file)
@@ -36,7 +36,13 @@ from cephadm.services.cephadmservice import CephadmDaemonDeploySpec
 from cephadm.agent import CherryPyThread, CephadmAgentHelpers
 
 
-from mgr_module import MgrModule, HandleCommandResult, Option, NotifyType
+from mgr_module import (
+    MgrModule,
+    HandleCommandResult,
+    Option,
+    NotifyType,
+    MonCommandFailed,
+)
 import orchestrator
 from orchestrator.module import to_format, Format
 
@@ -1544,7 +1550,7 @@ Then run the following:
         return self._add_host(spec)
 
     @handle_orch_error
-    def remove_host(self, host: str, force: bool = False, offline: bool = False) -> str:
+    def remove_host(self, host: str, force: bool = False, offline: bool = False, rm_crush_entry: bool = False) -> str:
         """
         Remove a host from orchestrator management.
 
@@ -1623,6 +1629,17 @@ Then run the following:
             }
             run_cmd(cmd_args)
 
+        if rm_crush_entry:
+            try:
+                self.check_mon_command({
+                    'prefix': 'osd crush remove',
+                    'name': host,
+                })
+            except MonCommandFailed as e:
+                self.log.error(f'Couldn\'t remove host {host} from CRUSH map: {str(e)}')
+                return (f'Cephadm failed removing host {host}\n'
+                        f'Failed to remove host {host} from the CRUSH map: {str(e)}')
+
         self.inventory.rm_host(host)
         self.cache.rm_host(host)
         self.ssh.reset_con(host)
index a58143807d1807b3d98c5b9ddbd47ef46dffb0bb..97d55730029044ba0d382625cbf85ca51a340405 100644 (file)
@@ -347,7 +347,7 @@ class Orchestrator(object):
         """
         raise NotImplementedError()
 
-    def remove_host(self, host: str, force: bool, offline: bool) -> OrchResult[str]:
+    def remove_host(self, host: str, force: bool, offline: bool, rm_crush_entry: bool) -> OrchResult[str]:
         """
         Remove a host from the orchestrator inventory.
 
index 208d27a82a1bfd9800686aa2083d283aca7d1b4c..3e459876d9e9e20fbb34e12f079e2408ac662e87 100644 (file)
@@ -444,9 +444,9 @@ class OrchestratorCli(OrchestratorClientMixin, MgrModule,
         return self._apply_misc([s], False, Format.plain)
 
     @_cli_write_command('orch host rm')
-    def _remove_host(self, hostname: str, force: bool = False, offline: bool = False) -> HandleCommandResult:
+    def _remove_host(self, hostname: str, force: bool = False, offline: bool = False, rm_crush_entry: bool = False) -> HandleCommandResult:
         """Remove a host"""
-        completion = self.remove_host(hostname, force, offline)
+        completion = self.remove_host(hostname, force, offline, rm_crush_entry)
         raise_if_exception(completion)
         return HandleCommandResult(stdout=completion.result_str())
 
index d89c23bf1593c679890c77bc7fac13338be72a29..a0721250c7f90fb0089bca7ef68d317ebd031b9d 100644 (file)
@@ -284,7 +284,7 @@ class TestOrchestrator(MgrModule, orchestrator.Orchestrator):
         return ''
 
     @handle_orch_error
-    def remove_host(self, host, force: bool, offline: bool):
+    def remove_host(self, host, force: bool, offline: bool, rm_crush_entry: bool):
         assert isinstance(host, str)
         return 'done'