From d888fccad66c575ba0fdea004d2e102cabb5cefe Mon Sep 17 00:00:00 2001 From: Adam King Date: Fri, 29 Sep 2023 14:39:10 -0400 Subject: [PATCH] mgr/cephadm: add --rm-crush-entry flag to host removal This will tell cephadm to try and remove the crush bucket for the host at the end of the host removal process. If this fails, we still consider the host as having been successfully remove from cephadm's POV, but the user will get back an error message telling them we failed to remove the host from the crush map Fixes: https://tracker.ceph.com/issues/63031 Signed-off-by: Adam King (cherry picked from commit fa0f62aa57755c45c713367620dc834530276b25) Conflicts: src/pybind/mgr/cephadm/module.py src/pybind/mgr/orchestrator/_interface.py --- src/pybind/mgr/cephadm/module.py | 21 +++++++++++++++++++-- src/pybind/mgr/orchestrator/_interface.py | 2 +- src/pybind/mgr/orchestrator/module.py | 4 ++-- src/pybind/mgr/test_orchestrator/module.py | 2 +- 4 files changed, 23 insertions(+), 6 deletions(-) diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py index 978088360789f..ae97f5b631fcf 100644 --- a/src/pybind/mgr/cephadm/module.py +++ b/src/pybind/mgr/cephadm/module.py @@ -41,7 +41,13 @@ from cephadm.http_server import CephadmHttpServer from cephadm.agent import CephadmAgentHelpers -from mgr_module import MgrModule, HandleCommandResult, Option, NotifyType +from mgr_module import ( + MgrModule, + HandleCommandResult, + Option, + NotifyType, + MonCommandFailed, +) from mgr_util import build_url import orchestrator from orchestrator.module import to_format, Format @@ -1723,7 +1729,7 @@ Then run the following: return self.node_proxy_cache.common(category, hostname=hostname) @handle_orch_error - def remove_host(self, host: str, force: bool = False, offline: bool = False) -> str: + def remove_host(self, host: str, force: bool = False, offline: bool = False, rm_crush_entry: bool = False) -> str: """ Remove a host from orchestrator management. @@ -1803,6 +1809,17 @@ Then run the following: } run_cmd(cmd_args) + if rm_crush_entry: + try: + self.check_mon_command({ + 'prefix': 'osd crush remove', + 'name': host, + }) + except MonCommandFailed as e: + self.log.error(f'Couldn\'t remove host {host} from CRUSH map: {str(e)}') + return (f'Cephadm failed removing host {host}\n' + f'Failed to remove host {host} from the CRUSH map: {str(e)}') + self.inventory.rm_host(host) self.cache.rm_host(host) self.ssh.reset_con(host) diff --git a/src/pybind/mgr/orchestrator/_interface.py b/src/pybind/mgr/orchestrator/_interface.py index e5ee5035133e3..042572ec19418 100644 --- a/src/pybind/mgr/orchestrator/_interface.py +++ b/src/pybind/mgr/orchestrator/_interface.py @@ -435,7 +435,7 @@ class Orchestrator(object): """ raise NotImplementedError() - def remove_host(self, host: str, force: bool, offline: bool) -> OrchResult[str]: + def remove_host(self, host: str, force: bool, offline: bool, rm_crush_entry: bool) -> OrchResult[str]: """ Remove a host from the orchestrator inventory. diff --git a/src/pybind/mgr/orchestrator/module.py b/src/pybind/mgr/orchestrator/module.py index a965d1b71af84..e69df3e89e56a 100644 --- a/src/pybind/mgr/orchestrator/module.py +++ b/src/pybind/mgr/orchestrator/module.py @@ -647,9 +647,9 @@ class OrchestratorCli(OrchestratorClientMixin, MgrModule, return HandleCommandResult(stdout=completion.result_str()) @_cli_write_command('orch host rm') - def _remove_host(self, hostname: str, force: bool = False, offline: bool = False) -> HandleCommandResult: + def _remove_host(self, hostname: str, force: bool = False, offline: bool = False, rm_crush_entry: bool = False) -> HandleCommandResult: """Remove a host""" - completion = self.remove_host(hostname, force, offline) + completion = self.remove_host(hostname, force, offline, rm_crush_entry) raise_if_exception(completion) return HandleCommandResult(stdout=completion.result_str()) diff --git a/src/pybind/mgr/test_orchestrator/module.py b/src/pybind/mgr/test_orchestrator/module.py index d89c23bf1593c..a0721250c7f90 100644 --- a/src/pybind/mgr/test_orchestrator/module.py +++ b/src/pybind/mgr/test_orchestrator/module.py @@ -284,7 +284,7 @@ class TestOrchestrator(MgrModule, orchestrator.Orchestrator): return '' @handle_orch_error - def remove_host(self, host, force: bool, offline: bool): + def remove_host(self, host, force: bool, offline: bool, rm_crush_entry: bool): assert isinstance(host, str) return 'done' -- 2.39.5