From: Adam King Date: Fri, 29 Sep 2023 18:39:10 +0000 (-0400) Subject: mgr/cephadm: add --rm-crush-entry flag to host removal X-Git-Tag: v17.2.8~466^2~2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=389154a8f582ed02cd0bcb9b60b7f1bda3f2ae4f;p=ceph.git mgr/cephadm: add --rm-crush-entry flag to host removal This will tell cephadm to try and remove the crush bucket for the host at the end of the host removal process. If this fails, we still consider the host as having been successfully remove from cephadm's POV, but the user will get back an error message telling them we failed to remove the host from the crush map Fixes: https://tracker.ceph.com/issues/63031 Signed-off-by: Adam King (cherry picked from commit fa0f62aa57755c45c713367620dc834530276b25) Conflicts: src/pybind/mgr/cephadm/module.py --- diff --git a/src/cephadm/tests/test_cephadm.py b/src/cephadm/tests/test_cephadm.py index 641483d8504f..f7ad6e3159d3 100644 --- a/src/cephadm/tests/test_cephadm.py +++ b/src/cephadm/tests/test_cephadm.py @@ -300,6 +300,7 @@ class TestCephAdm(object): @mock.patch('cephadm.migrate_sysctl_dir') @mock.patch('cephadm.check_unit', lambda *args, **kwargs: (None, 'running', None)) @mock.patch('cephadm.get_unit_name', lambda *args, **kwargs: 'mon-unit-name') + @mock.patch('cephadm.extract_uid_gid', lambda *args, **kwargs: (167, 167)) @mock.patch('cephadm.get_deployment_container') def test_mon_crush_location(self, _get_deployment_container, _migrate_sysctl, _make_var_run, _get_parm, _deploy_daemon, _file_lock, _logger): """ diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py index 7c495357a1f4..ce7b0789bc55 100644 --- a/src/pybind/mgr/cephadm/module.py +++ b/src/pybind/mgr/cephadm/module.py @@ -36,7 +36,13 @@ from cephadm.services.cephadmservice import CephadmDaemonDeploySpec from cephadm.agent import CherryPyThread, CephadmAgentHelpers -from mgr_module import MgrModule, HandleCommandResult, Option, NotifyType +from mgr_module import ( + MgrModule, + HandleCommandResult, + Option, + NotifyType, + MonCommandFailed, +) import orchestrator from orchestrator.module import to_format, Format @@ -1544,7 +1550,7 @@ Then run the following: return self._add_host(spec) @handle_orch_error - def remove_host(self, host: str, force: bool = False, offline: bool = False) -> str: + def remove_host(self, host: str, force: bool = False, offline: bool = False, rm_crush_entry: bool = False) -> str: """ Remove a host from orchestrator management. @@ -1623,6 +1629,17 @@ Then run the following: } run_cmd(cmd_args) + if rm_crush_entry: + try: + self.check_mon_command({ + 'prefix': 'osd crush remove', + 'name': host, + }) + except MonCommandFailed as e: + self.log.error(f'Couldn\'t remove host {host} from CRUSH map: {str(e)}') + return (f'Cephadm failed removing host {host}\n' + f'Failed to remove host {host} from the CRUSH map: {str(e)}') + self.inventory.rm_host(host) self.cache.rm_host(host) self.ssh.reset_con(host) diff --git a/src/pybind/mgr/orchestrator/_interface.py b/src/pybind/mgr/orchestrator/_interface.py index a58143807d18..97d557300290 100644 --- a/src/pybind/mgr/orchestrator/_interface.py +++ b/src/pybind/mgr/orchestrator/_interface.py @@ -347,7 +347,7 @@ class Orchestrator(object): """ raise NotImplementedError() - def remove_host(self, host: str, force: bool, offline: bool) -> OrchResult[str]: + def remove_host(self, host: str, force: bool, offline: bool, rm_crush_entry: bool) -> OrchResult[str]: """ Remove a host from the orchestrator inventory. diff --git a/src/pybind/mgr/orchestrator/module.py b/src/pybind/mgr/orchestrator/module.py index 208d27a82a1b..3e459876d9e9 100644 --- a/src/pybind/mgr/orchestrator/module.py +++ b/src/pybind/mgr/orchestrator/module.py @@ -444,9 +444,9 @@ class OrchestratorCli(OrchestratorClientMixin, MgrModule, return self._apply_misc([s], False, Format.plain) @_cli_write_command('orch host rm') - def _remove_host(self, hostname: str, force: bool = False, offline: bool = False) -> HandleCommandResult: + def _remove_host(self, hostname: str, force: bool = False, offline: bool = False, rm_crush_entry: bool = False) -> HandleCommandResult: """Remove a host""" - completion = self.remove_host(hostname, force, offline) + completion = self.remove_host(hostname, force, offline, rm_crush_entry) raise_if_exception(completion) return HandleCommandResult(stdout=completion.result_str()) diff --git a/src/pybind/mgr/test_orchestrator/module.py b/src/pybind/mgr/test_orchestrator/module.py index d89c23bf1593..a0721250c7f9 100644 --- a/src/pybind/mgr/test_orchestrator/module.py +++ b/src/pybind/mgr/test_orchestrator/module.py @@ -284,7 +284,7 @@ class TestOrchestrator(MgrModule, orchestrator.Orchestrator): return '' @handle_orch_error - def remove_host(self, host, force: bool, offline: bool): + def remove_host(self, host, force: bool, offline: bool, rm_crush_entry: bool): assert isinstance(host, str) return 'done'