notifications: List[str] = []
error_notifications: List[str] = []
- okay: bool = True
+ initial_rc = 0
for daemon_type, daemon_ids in daemon_map.items():
r = service_registry.get_service(daemon_type_to_service(
daemon_type)).ok_to_stop(daemon_ids, force=force)
if r.retval:
- okay = False
+ initial_rc = r.retval if initial_rc == 0 else initial_rc
+
# collect error notifications so user can see every daemon causing host
# to not be okay to stop
error_notifications.append(r.stderr)
# if extra notifications to print for user, add them to notifications list
notifications.append(r.stdout)
- if not okay:
+ if initial_rc:
# at least one daemon is not okay to stop
- return 1, '\n'.join(error_notifications)
+ return initial_rc, '\n'.join(error_notifications)
if notifications:
return 0, (f'It is presumed safe to stop host {hostname}. '
@handle_orch_error
def host_ok_to_stop(self, hostname: str) -> str:
if hostname not in self.cache.get_hosts():
- raise OrchestratorError(f'Cannot find host "{hostname}"')
+ raise OrchestratorError(f'Cannot find host "{hostname}"', errno=errno.EINVAL)
rc, msg = self._host_ok_to_stop(hostname)
if rc:
:raises OrchestratorError: Hostname is invalid, host is already in maintenance
"""
+
if yes_i_really_mean_it and not force:
- raise OrchestratorError("--force must be passed with --yes-i-really-mean-it")
+ raise OrchestratorError("force must be passed with --yes-i-really-mean-it", errno=errno.EINVAL)
if len(self.cache.get_hosts()) == 1 and not yes_i_really_mean_it:
- raise OrchestratorError("Maintenance feature is not supported on single node clusters")
+ raise OrchestratorError("Maintenance feature is not supported on single node clusters", errno=errno.ENOTSUP)
# if upgrade is active, deny
if self.upgrade.upgrade_state and not yes_i_really_mean_it:
- raise OrchestratorError(
- f"Unable to place {hostname} in maintenance with upgrade active/paused")
+ raise OrchestratorError(f"Unabe to place {hostname} in maintenance with upgrade active/paused", errno=errno.EAGAIN)
tgt_host = self.inventory._inventory[hostname]
if tgt_host.get("status", "").lower() == "maintenance":
- raise OrchestratorError(f"Host {hostname} is already in maintenance")
+ raise OrchestratorError(f"Host {hostname} is already in maintenance", errno=errno.EALREADY)
host_daemons = self.cache.get_daemon_types(hostname)
self.log.debug("daemons on host {}".format(','.join(host_daemons)))
error_ok=True))
returned_msg = _err[0].split('\n')[-1]
if (returned_msg.startswith('failed') or returned_msg.startswith('ERROR')) and not yes_i_really_mean_it:
- raise OrchestratorError(
- f"Failed to place {hostname} into maintenance for cluster {self._cluster_fsid}")
-
+ raise OrchestratorError(f"Failed to place {hostname} into maintenance for cluster {self._cluster_fsid}", errno=errno.EPERM)
if "osd" in host_daemons:
crush_node = hostname if '.' not in hostname else hostname.split('.')[0]
rc, out, err = self.mon_command({
self.log.warning(
f"maintenance mode request for {hostname} failed to SET the noout group (rc={rc})")
raise OrchestratorError(
- f"Unable to set the osds on {hostname} to noout (rc={rc})")
+ f"Unable to set the osds on {hostname} to noout (rc={rc})", errno=errno.EIO)
elif not rc:
self.log.info(
f"maintenance mode request for {hostname} has SET the noout group")
"""
tgt_host = self.inventory._inventory[hostname]
if tgt_host['status'] != "maintenance":
- raise OrchestratorError(f"Host {hostname} is not in maintenance mode")
+ raise OrchestratorError(f"Host {hostname} is not in maintenance mode", errno=errno.EINVAL)
# Given we do not regularly check maintenance mode hosts for being offline,
# we have no idea at this point whether the host is online or not.
if host_offline and not offline:
raise OrchestratorValidationError(
- f'{hostname} is offline, please use --offline and --force to take this host out of maintenance mode')
+ f'{hostname} is offline, please use --offline and --force to take this host out of maintenance mode', errno=errno.ENOTSUP)
if not host_offline and offline:
raise OrchestratorValidationError(
- f'{hostname} is online, please take host out of maintenance mode without --offline.')
+ f'{hostname} is online, please take host out of maintenance mode without --offline.', errno=errno.EAGAIN)
if offline and not force:
- raise OrchestratorValidationError("Taking an offline host out of maintenance mode requires --force")
+ raise OrchestratorValidationError("Taking an offline host out of maintenance mode requires --force", errno=errno.EAGAIN)
# no point trying these parts if we know the host is offline
if not host_offline:
f"Failed to exit maintenance state for host {hostname}, cluster {self._cluster_fsid}")
if not force:
raise OrchestratorError(
- f"Failed to exit maintenance state for host {hostname}, cluster {self._cluster_fsid}")
+ f"Failed to exit maintenance state for host {hostname}, cluster {self._cluster_fsid}", errno=errno.EPERM)
if "osd" in self.cache.get_daemon_types(hostname):
crush_node = hostname if '.' not in hostname else hostname.split('.')[0]
self.log.warning(
f"exit maintenance request failed to UNSET the noout group for {hostname}, (rc={rc})")
if not force:
- raise OrchestratorError(f"Unable to set the osds on {hostname} to noout (rc={rc})")
+ raise OrchestratorError(f"Unable to set the osds on {hostname} to noout (rc={rc})", errno=errno.EIO)
else:
self.log.info(
f"exit maintenance request has UNSET for the noout group on host {hostname}")
if name in dm:
args.append((name, host))
if not args:
- raise OrchestratorError('Unable to find daemon(s) %s' % (names))
+ raise OrchestratorError('Unable to find daemon(s) %s' % (names), errno=errno.EINVAL)
self.log.info('Remove daemons %s' % ' '.join([a[0] for a in args]))
return self._remove_daemons(args)
errno: int = -errno.EINVAL,
event_kind_subject: Optional[Tuple[str, str]] = None) -> None:
super(Exception, self).__init__(msg)
- self.errno = errno
+ self.errno = abs(errno)
# See OrchestratorEvent.subject
self.event_subject = event_kind_subject
return func(*args, **kwargs)
except (OrchestratorError, SpecValidationError) as e:
# Do not print Traceback for expected errors.
- return HandleCommandResult(e.errno, stderr=str(e))
+ return HandleCommandResult(retval=e.errno, stderr=str(e))
except ImportError as e:
- return HandleCommandResult(-errno.ENOENT, stderr=str(e))
+ return HandleCommandResult(retval=-errno.ENOENT, stderr=str(e))
except NotImplementedError:
msg = 'This Orchestrator does not support `{}`'.format(prefix)
- return HandleCommandResult(-errno.ENOENT, stderr=msg)
+ return HandleCommandResult(retval=-errno.ENOENT, stderr=msg)
# misuse lambda to copy `wrapper`
wrapper_copy = lambda *l_args, **l_kwargs: wrapper(*l_args, **l_kwargs) # noqa: E731
return c.result
+def completion_to_result(c: OrchResult[T]) -> HandleCommandResult:
+ """
+ Converts an OrchResult to a HandleCommandResult,
+ preserving output and error codes.
+ """
+ if c.serialized_exception is None:
+ assert c.result is not None, "OrchResult should either have result or an exception"
+ return HandleCommandResult(stdout=c.result_str())
+
+ try:
+ e = pickle.loads(c.serialized_exception)
+ except (KeyError, AttributeError):
+ return HandleCommandResult(stderr=c.exception_str, retval=errno.EIO)
+ if isinstance(e, OrchestratorError):
+ return HandleCommandResult(stderr=str(e), retval=-e.errno)
+
+ raise e
+
+
def _hide_in_features(f: FuncT) -> FuncT:
f._hide_in_features = True # type: ignore
return f
"""
raise NotImplementedError()
- def host_ok_to_stop(self, hostname: str) -> OrchResult:
+ def host_ok_to_stop(self, hostname: str) -> OrchResult[str]:
"""
Check if the specified host can be safely stopped without reducing availability
"""
raise NotImplementedError()
- def enter_host_maintenance(self, hostname: str, force: bool = False, yes_i_really_mean_it: bool = False) -> OrchResult:
+ def enter_host_maintenance(self, hostname: str, force: bool = False, yes_i_really_mean_it: bool = False) -> OrchResult[str]:
"""
Place a host in maintenance, stopping daemons and disabling it's systemd target
"""
raise NotImplementedError()
- def exit_host_maintenance(self, hostname: str, force: bool = False, offline: bool = False) -> OrchResult:
+ def exit_host_maintenance(self, hostname: str, force: bool = False, offline: bool = False) -> OrchResult[str]:
"""
Return a host from maintenance, restarting the clusters systemd target
"""
_cli_write_command,
json_to_generic_spec,
raise_if_exception,
+ completion_to_result,
)
def _host_ok_to_stop(self, hostname: str) -> HandleCommandResult:
"""Check if the specified host can be safely stopped without reducing availability"""""
completion = self.host_ok_to_stop(hostname)
- raise_if_exception(completion)
- return HandleCommandResult(stdout=completion.result_str())
+ return completion_to_result(completion)
@_cli_write_command('orch host maintenance enter')
def _host_maintenance_enter(self, hostname: str, force: bool = False, yes_i_really_mean_it: bool = False) -> HandleCommandResult:
Prepare a host for maintenance by shutting down and disabling all Ceph daemons (cephadm only)
"""
completion = self.enter_host_maintenance(hostname, force=force, yes_i_really_mean_it=yes_i_really_mean_it)
- raise_if_exception(completion)
-
- return HandleCommandResult(stdout=completion.result_str())
+ return completion_to_result(completion)
@_cli_write_command('orch host maintenance exit')
def _host_maintenance_exit(self, hostname: str, force: bool = False, offline: bool = False) -> HandleCommandResult:
Return a host from maintenance, restarting all Ceph daemons (cephadm only)
"""
completion = self.exit_host_maintenance(hostname, force, offline)
- raise_if_exception(completion)
-
- return HandleCommandResult(stdout=completion.result_str())
+ return completion_to_result(completion)
@_cli_write_command('orch host rescan')
def _host_rescan(self, hostname: str, with_summary: bool = False) -> HandleCommandResult:
"""Remove specific daemon(s)"""
for name in names:
if '.' not in name:
- raise OrchestratorError('%s is not a valid daemon name' % name)
+ return HandleCommandResult(stderr=f"{name} is not a valid daemon name", retval=-errno.EINVAL)
(daemon_type) = name.split('.')[0]
if not force and daemon_type in ['osd', 'mon', 'prometheus']:
- raise OrchestratorError(
- 'must pass --force to REMOVE daemon with potentially PRECIOUS DATA for %s' % name)
+ return HandleCommandResult(stderr=f"must pass --force to REMOVE daemon with potentially PRECIOUS DATA for {name}", retval=-errno.EPERM)
completion = self.remove_daemons(names)
- raise_if_exception(completion)
- return HandleCommandResult(stdout=completion.result_str())
+ return completion_to_result(completion)
@_cli_write_command('orch rm')
def _service_rm(self,
m = OrchestratorCli('orchestrator', 0, 0)
r = m._handle_command(None, cmd)
assert r == HandleCommandResult(
- retval=-2, stdout='', stderr='No orchestrator configured (try `ceph orch set backend`)')
+ retval=2, stdout='', stderr='No orchestrator configured (try `ceph orch set backend`)')
r = OrchResult([ServiceDescription(spec=ServiceSpec(service_type='osd'), running=123)])