.. prompt:: bash #
- ceph orch host maintenance enter <hostname> [--force]
+ ceph orch host maintenance enter <hostname> [--force] [--yes-i-really-mean-it]
ceph orch host maintenance exit <hostname>
-Where the force flag when entering maintenance allows the user to bypass warnings (but not alerts)
+The ``--force`` flag allows the user to bypass warnings (but not alerts). The ``--yes-i-really-mean-it``
+flag bypasses all safety checks and will attempt to force the host into maintenance mode no
+matter what.
+
+.. warning:: Using the --yes-i-really-mean-it flag to force the host to enter maintenance
+ mode can potentially cause loss of data availability, the mon quorum to break down due
+ to too few running monitors, mgr module commands (such as ``ceph orch . . .`` commands)
+ to be become unresponsive, and a number of other possible issues. Please only use this
+ flag if you're absolutely certain you know what you're doing.
See also :ref:`cephadm-fqdn`
@handle_orch_error
@host_exists()
- def enter_host_maintenance(self, hostname: str, force: bool = False) -> str:
+ def enter_host_maintenance(self, hostname: str, force: bool = False, yes_i_really_mean_it: bool = False) -> str:
""" Attempt to place a cluster host in maintenance
Placing a host into maintenance disables the cluster's ceph target in systemd
:raises OrchestratorError: Hostname is invalid, host is already in maintenance
"""
- if len(self.cache.get_hosts()) == 1:
+ if yes_i_really_mean_it and not force:
+ raise OrchestratorError("--force must be passed with --yes-i-really-mean-it")
+
+ if len(self.cache.get_hosts()) == 1 and not yes_i_really_mean_it:
raise OrchestratorError("Maintenance feature is not supported on single node clusters")
# if upgrade is active, deny
- if self.upgrade.upgrade_state:
+ if self.upgrade.upgrade_state and not yes_i_really_mean_it:
raise OrchestratorError(
f"Unable to place {hostname} in maintenance with upgrade active/paused")
# daemons on this host, so check the daemons can be stopped
# and if so, place the host into maintenance by disabling the target
rc, msg = self._host_ok_to_stop(hostname, force)
- if rc:
+ if rc and not yes_i_really_mean_it:
raise OrchestratorError(
msg + '\nNote: Warnings can be bypassed with the --force flag', errno=rc)
["enter"],
error_ok=True))
returned_msg = _err[0].split('\n')[-1]
- if returned_msg.startswith('failed') or returned_msg.startswith('ERROR'):
+ if (returned_msg.startswith('failed') or returned_msg.startswith('ERROR')) and not yes_i_really_mean_it:
raise OrchestratorError(
f"Failed to place {hostname} into maintenance for cluster {self._cluster_fsid}")
'who': [crush_node],
'format': 'json'
})
- if rc:
+ if rc and not yes_i_really_mean_it:
self.log.warning(
f"maintenance mode request for {hostname} failed to SET the noout group (rc={rc})")
raise OrchestratorError(
f"Unable to set the osds on {hostname} to noout (rc={rc})")
- else:
+ elif not rc:
self.log.info(
f"maintenance mode request for {hostname} has SET the noout group")
assert not cephadm_module.inventory._inventory[hostname]['status']
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+ @mock.patch("cephadm.CephadmOrchestrator._host_ok_to_stop")
+ @mock.patch("cephadm.module.HostCache.get_daemon_types")
+ @mock.patch("cephadm.module.HostCache.get_hosts")
+ def test_maintenance_enter_i_really_mean_it(self, _hosts, _get_daemon_types, _host_ok, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ hostname = 'host1'
+ err_str = 'some kind of error'
+ _run_cephadm.side_effect = async_side_effect(
+ ([''], ['something\nfailed - disable the target'], 0))
+ _host_ok.return_value = 1, err_str
+ _get_daemon_types.return_value = ['mon']
+ _hosts.return_value = [hostname, 'other_host']
+ cephadm_module.inventory.add_host(HostSpec(hostname))
+
+ with pytest.raises(OrchestratorError, match=err_str):
+ cephadm_module.enter_host_maintenance(hostname)
+ assert not cephadm_module.inventory._inventory[hostname]['status']
+
+ with pytest.raises(OrchestratorError, match=err_str):
+ cephadm_module.enter_host_maintenance(hostname, force=True)
+ assert not cephadm_module.inventory._inventory[hostname]['status']
+
+ retval = cephadm_module.enter_host_maintenance(hostname, force=True, yes_i_really_mean_it=True)
+ assert retval.result_str().startswith('Daemons for Ceph cluster')
+ assert not retval.exception_str
+ assert cephadm_module.inventory._inventory[hostname]['status'] == 'maintenance'
+
@mock.patch("cephadm.serve.CephadmServe._run_cephadm")
@mock.patch("cephadm.module.HostCache.get_daemon_types")
@mock.patch("cephadm.module.HostCache.get_hosts")
"""
raise NotImplementedError()
- def enter_host_maintenance(self, hostname: str, force: bool = False) -> OrchResult:
+ def enter_host_maintenance(self, hostname: str, force: bool = False, yes_i_really_mean_it: bool = False) -> OrchResult:
"""
Place a host in maintenance, stopping daemons and disabling it's systemd target
"""
return HandleCommandResult(stdout=completion.result_str())
@_cli_write_command('orch host maintenance enter')
- def _host_maintenance_enter(self, hostname: str, force: bool = False) -> HandleCommandResult:
+ def _host_maintenance_enter(self, hostname: str, force: bool = False, yes_i_really_mean_it: bool = False) -> HandleCommandResult:
"""
Prepare a host for maintenance by shutting down and disabling all Ceph daemons (cephadm only)
"""
- completion = self.enter_host_maintenance(hostname, force=force)
+ completion = self.enter_host_maintenance(hostname, force=force, yes_i_really_mean_it=yes_i_really_mean_it)
raise_if_exception(completion)
return HandleCommandResult(stdout=completion.result_str())