This will tell cephadm to try and remove the
crush bucket for the host at the end of the host
removal process. If this fails, we still consider the
host as having been successfully remove from
cephadm's POV, but the user will get back an error
message telling them we failed to remove the
host from the crush map
Fixes: https://tracker.ceph.com/issues/63031
Signed-off-by: Adam King <adking@redhat.com>
(cherry picked from commit
fa0f62aa57755c45c713367620dc834530276b25)
Conflicts:
src/pybind/mgr/cephadm/module.py
src/pybind/mgr/orchestrator/_interface.py
from cephadm.agent import CephadmAgentHelpers
-from mgr_module import MgrModule, HandleCommandResult, Option, NotifyType
+from mgr_module import (
+ MgrModule,
+ HandleCommandResult,
+ Option,
+ NotifyType,
+ MonCommandFailed,
+)
from mgr_util import build_url
import orchestrator
from orchestrator.module import to_format, Format
return self.node_proxy_cache.common(category, hostname=hostname)
@handle_orch_error
- def remove_host(self, host: str, force: bool = False, offline: bool = False) -> str:
+ def remove_host(self, host: str, force: bool = False, offline: bool = False, rm_crush_entry: bool = False) -> str:
"""
Remove a host from orchestrator management.
}
run_cmd(cmd_args)
+ if rm_crush_entry:
+ try:
+ self.check_mon_command({
+ 'prefix': 'osd crush remove',
+ 'name': host,
+ })
+ except MonCommandFailed as e:
+ self.log.error(f'Couldn\'t remove host {host} from CRUSH map: {str(e)}')
+ return (f'Cephadm failed removing host {host}\n'
+ f'Failed to remove host {host} from the CRUSH map: {str(e)}')
+
self.inventory.rm_host(host)
self.cache.rm_host(host)
self.ssh.reset_con(host)
"""
raise NotImplementedError()
- def remove_host(self, host: str, force: bool, offline: bool) -> OrchResult[str]:
+ def remove_host(self, host: str, force: bool, offline: bool, rm_crush_entry: bool) -> OrchResult[str]:
"""
Remove a host from the orchestrator inventory.
return HandleCommandResult(stdout=completion.result_str())
@_cli_write_command('orch host rm')
- def _remove_host(self, hostname: str, force: bool = False, offline: bool = False) -> HandleCommandResult:
+ def _remove_host(self, hostname: str, force: bool = False, offline: bool = False, rm_crush_entry: bool = False) -> HandleCommandResult:
"""Remove a host"""
- completion = self.remove_host(hostname, force, offline)
+ completion = self.remove_host(hostname, force, offline, rm_crush_entry)
raise_if_exception(completion)
return HandleCommandResult(stdout=completion.result_str())
return ''
@handle_orch_error
- def remove_host(self, host, force: bool, offline: bool):
+ def remove_host(self, host, force: bool, offline: bool, rm_crush_entry: bool):
assert isinstance(host, str)
return 'done'