From: Sebastian Wagner Date: Fri, 24 Jan 2020 12:08:02 +0000 (+0100) Subject: mgr/orchestrator_cli: rename to mgr/orchestrator X-Git-Tag: v15.1.1~379^2~11 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=f2c54722860bce81c293d3c10b32f774cc449a36;p=ceph.git mgr/orchestrator_cli: rename to mgr/orchestrator * Move `mgr/orchestrator.py` to `orchestrator/_interface.py` Signed-off-by: Sebastian Wagner --- diff --git a/qa/tasks/mgr/test_module_selftest.py b/qa/tasks/mgr/test_module_selftest.py index 966a92a69773..969d31a7dc8e 100644 --- a/qa/tasks/mgr/test_module_selftest.py +++ b/qa/tasks/mgr/test_module_selftest.py @@ -79,8 +79,8 @@ class TestModuleSelftest(MgrTestCase): def test_crash(self): self._selftest_plugin("crash") - def test_orchestrator_cli(self): - self._selftest_plugin("orchestrator_cli") + def test_orchestrator(self): + self._selftest_plugin("orchestrator") def test_selftest_config_update(self): diff --git a/qa/tasks/mgr/test_orchestrator_cli.py b/qa/tasks/mgr/test_orchestrator_cli.py index 8faa40eb6d99..4c465a783197 100644 --- a/qa/tasks/mgr/test_orchestrator_cli.py +++ b/qa/tasks/mgr/test_orchestrator_cli.py @@ -35,7 +35,7 @@ class TestOrchestratorCli(MgrTestCase): def setUp(self): super(TestOrchestratorCli, self).setUp() - self._load_module("orchestrator_cli") + self._load_module("orchestrator") self._load_module("test_orchestrator") self._orch_cmd("set", "backend", "test_orchestrator") diff --git a/src/pybind/mgr/orchestrator.py b/src/pybind/mgr/orchestrator.py deleted file mode 100644 index 18dc0a1da937..000000000000 --- a/src/pybind/mgr/orchestrator.py +++ /dev/null @@ -1,1736 +0,0 @@ - -""" -ceph-mgr orchestrator interface - -Please see the ceph-mgr module developer's guide for more information. -""" -import copy -import functools -import logging -import pickle -import sys -import time -from collections import namedtuple -from functools import wraps -import uuid -import string -import random -import datetime -import copy -import re -import six -import errno - -from ceph.deployment import inventory - -from mgr_module import MgrModule, PersistentStoreDict, CLICommand, HandleCommandResult -from mgr_util import format_bytes - -try: - from ceph.deployment.drive_group import DriveGroupSpec - from typing import TypeVar, Generic, List, Optional, Union, Tuple, Iterator, Callable, Any, \ - Type, Sequence -except ImportError: - pass - -logger = logging.getLogger(__name__) - - -class HostPlacementSpec(namedtuple('HostPlacementSpec', ['hostname', 'network', 'name'])): - def __str__(self): - res = '' - res += self.hostname - if self.network: - res += ':' + self.network - if self.name: - res += '=' + self.name - return res - - -def parse_host_placement_specs(host, require_network=True): - # type: (str, Optional[bool]) -> HostPlacementSpec - """ - Split host into host, network, and (optional) daemon name parts. The network - part can be an IP, CIDR, or ceph addrvec like '[v2:1.2.3.4:3300,v1:1.2.3.4:6789]'. - e.g., - "myhost" - "myhost=name" - "myhost:1.2.3.4" - "myhost:1.2.3.4=name" - "myhost:1.2.3.0/24" - "myhost:1.2.3.0/24=name" - "myhost:[v2:1.2.3.4:3000]=name" - "myhost:[v2:1.2.3.4:3000,v1:1.2.3.4:6789]=name" - """ - # Matches from start to : or = or until end of string - host_re = r'^(.*?)(:|=|$)' - # Matches from : to = or until end of string - ip_re = r':(.*?)(=|$)' - # Matches from = to end of string - name_re = r'=(.*?)$' - - # assign defaults - host_spec = HostPlacementSpec('', '', '') - - match_host = re.search(host_re, host) - if match_host: - host_spec = host_spec._replace(hostname=match_host.group(1)) - - name_match = re.search(name_re, host) - if name_match: - host_spec = host_spec._replace(name=name_match.group(1)) - - ip_match = re.search(ip_re, host) - if ip_match: - host_spec = host_spec._replace(network=ip_match.group(1)) - - if not require_network: - return host_spec - - from ipaddress import ip_network, ip_address - networks = list() # type: List[str] - network = host_spec.network - # in case we have [v2:1.2.3.4:3000,v1:1.2.3.4:6478] - if ',' in network: - networks = [x for x in network.split(',')] - else: - networks.append(network) - for network in networks: - # only if we have versioned network configs - if network.startswith('v') or network.startswith('[v'): - network = network.split(':')[1] - try: - # if subnets are defined, also verify the validity - if '/' in network: - ip_network(six.text_type(network)) - else: - ip_address(six.text_type(network)) - except ValueError as e: - # logging? - raise e - - return host_spec - - -class OrchestratorError(Exception): - """ - General orchestrator specific error. - - Used for deployment, configuration or user errors. - - It's not intended for programming errors or orchestrator internal errors. - """ - - -class NoOrchestrator(OrchestratorError): - """ - No orchestrator in configured. - """ - def __init__(self, msg="No orchestrator configured (try `ceph orch set backend`)"): - super(NoOrchestrator, self).__init__(msg) - - -class OrchestratorValidationError(OrchestratorError): - """ - Raised when an orchestrator doesn't support a specific feature. - """ - - -def handle_exception(prefix, cmd_args, desc, perm, func): - @wraps(func) - def wrapper(*args, **kwargs): - try: - return func(*args, **kwargs) - except (OrchestratorError, ImportError) as e: - # Do not print Traceback for expected errors. - return HandleCommandResult(-errno.ENOENT, stderr=str(e)) - except NotImplementedError: - msg = 'This Orchestrator does not support `{}`'.format(prefix) - return HandleCommandResult(-errno.ENOENT, stderr=msg) - - return CLICommand(prefix, cmd_args, desc, perm)(wrapper) - - -def _cli_command(perm): - def inner_cli_command(prefix, cmd_args="", desc=""): - return lambda func: handle_exception(prefix, cmd_args, desc, perm, func) - return inner_cli_command - - -_cli_read_command = _cli_command('r') -_cli_write_command = _cli_command('rw') - - -def _no_result(): - return object() - - -class _Promise(object): - """ - A completion may need multiple promises to be fulfilled. `_Promise` is one - step. - - Typically ``Orchestrator`` implementations inherit from this class to - build their own way of finishing a step to fulfil a future. - - They are not exposed in the orchestrator interface and can be seen as a - helper to build orchestrator modules. - """ - INITIALIZED = 1 # We have a parent completion and a next completion - RUNNING = 2 - FINISHED = 3 # we have a final result - - NO_RESULT = _no_result() # type: None - ASYNC_RESULT = object() - - def __init__(self, - _first_promise=None, # type: Optional["_Promise"] - value=NO_RESULT, # type: Optional[Any] - on_complete=None, # type: Optional[Callable] - name=None, # type: Optional[str] - ): - self._on_complete_ = on_complete - self._name = name - self._next_promise = None # type: Optional[_Promise] - - self._state = self.INITIALIZED - self._exception = None # type: Optional[Exception] - - # Value of this _Promise. may be an intermediate result. - self._value = value - - # _Promise is not a continuation monad, as `_result` is of type - # T instead of (T -> r) -> r. Therefore we need to store the first promise here. - self._first_promise = _first_promise or self # type: '_Promise' - - @property - def _exception(self): - # type: () -> Optional[Exception] - return getattr(self, '_exception_', None) - - @_exception.setter - def _exception(self, e): - self._exception_ = e - self._serialized_exception_ = pickle.dumps(e) if e is not None else None - - @property - def _serialized_exception(self): - # type: () -> Optional[bytes] - return getattr(self, '_serialized_exception_', None) - - - - @property - def _on_complete(self): - # type: () -> Optional[Callable] - # https://github.com/python/mypy/issues/4125 - return self._on_complete_ - - @_on_complete.setter - def _on_complete(self, val): - # type: (Optional[Callable]) -> None - self._on_complete_ = val - - - def __repr__(self): - name = self._name or getattr(self._on_complete, '__name__', '??') if self._on_complete else 'None' - val = repr(self._value) if self._value is not self.NO_RESULT else 'NA' - return '{}(_s={}, val={}, _on_c={}, id={}, name={}, pr={}, _next={})'.format( - self.__class__, self._state, val, self._on_complete, id(self), name, getattr(next, '_progress_reference', 'NA'), repr(self._next_promise) - ) - - def pretty_print_1(self): - if self._name: - name = self._name - elif self._on_complete is None: - name = 'lambda x: x' - elif hasattr(self._on_complete, '__name__'): - name = getattr(self._on_complete, '__name__') - else: - name = self._on_complete.__class__.__name__ - val = repr(self._value) if self._value not in (self.NO_RESULT, self.ASYNC_RESULT) else '...' - prefix = { - self.INITIALIZED: ' ', - self.RUNNING: ' >>>', - self.FINISHED: '(done)' - }[self._state] - return '{} {}({}),'.format(prefix, name, val) - - def then(self, on_complete): - # type: (Any, Callable) -> Any - """ - Call ``on_complete`` as soon as this promise is finalized. - """ - assert self._state in (self.INITIALIZED, self.RUNNING) - if self._on_complete is not None: - assert self._next_promise is None - self._set_next_promise(self.__class__( - _first_promise=self._first_promise, - on_complete=on_complete - )) - return self._next_promise - - else: - self._on_complete = on_complete - self._set_next_promise(self.__class__(_first_promise=self._first_promise)) - return self._next_promise - - def _set_next_promise(self, next): - # type: (_Promise) -> None - assert self is not next - assert self._state in (self.INITIALIZED, self.RUNNING) - - self._next_promise = next - assert self._next_promise is not None - for p in iter(self._next_promise): - p._first_promise = self._first_promise - - def _finalize(self, value=NO_RESULT): - """ - Sets this promise to complete. - - Orchestrators may choose to use this helper function. - - :param value: new value. - """ - if self._state not in (self.INITIALIZED, self.RUNNING): - raise ValueError('finalize: {} already finished. {}'.format(repr(self), value)) - - self._state = self.RUNNING - - if value is not self.NO_RESULT: - self._value = value - assert self._value is not self.NO_RESULT, repr(self) - - if self._on_complete: - try: - next_result = self._on_complete(self._value) - except Exception as e: - self.fail(e) - return - else: - next_result = self._value - - if isinstance(next_result, _Promise): - # hack: _Promise is not a continuation monad. - next_result = next_result._first_promise # type: ignore - assert next_result not in self, repr(self._first_promise) + repr(next_result) - assert self not in next_result - next_result._append_promise(self._next_promise) - self._set_next_promise(next_result) - assert self._next_promise - if self._next_promise._value is self.NO_RESULT: - self._next_promise._value = self._value - self.propagate_to_next() - elif next_result is not self.ASYNC_RESULT: - # simple map. simply forward - if self._next_promise: - self._next_promise._value = next_result - else: - # Hack: next_result is of type U, _value is of type T - self._value = next_result # type: ignore - self.propagate_to_next() - else: - # asynchronous promise - pass - - - def propagate_to_next(self): - self._state = self.FINISHED - logger.debug('finalized {}'.format(repr(self))) - if self._next_promise: - self._next_promise._finalize() - - def fail(self, e): - # type: (Exception) -> None - """ - Sets the whole completion to be faild with this exception and end the - evaluation. - """ - if self._state == self.FINISHED: - raise ValueError( - 'Invalid State: called fail, but Completion is already finished: {}'.format(str(e))) - assert self._state in (self.INITIALIZED, self.RUNNING) - logger.exception('_Promise failed') - self._exception = e - self._value = 'exception' - if self._next_promise: - self._next_promise.fail(e) - self._state = self.FINISHED - - def __contains__(self, item): - return any(item is p for p in iter(self._first_promise)) - - def __iter__(self): - yield self - elem = self._next_promise - while elem is not None: - yield elem - elem = elem._next_promise - - def _append_promise(self, other): - if other is not None: - assert self not in other - assert other not in self - self._last_promise()._set_next_promise(other) - - def _last_promise(self): - # type: () -> _Promise - return list(iter(self))[-1] - - -class ProgressReference(object): - def __init__(self, - message, # type: str - mgr, - completion=None # type: Optional[Callable[[], Completion]] - ): - """ - ProgressReference can be used within Completions:: - - +---------------+ +---------------------------------+ - | | then | | - | My Completion | +--> | on_complete=ProgressReference() | - | | | | - +---------------+ +---------------------------------+ - - See :func:`Completion.with_progress` for an easy way to create - a progress reference - - """ - super(ProgressReference, self).__init__() - self.progress_id = str(uuid.uuid4()) - self.message = message - self.mgr = mgr - - #: The completion can already have a result, before the write - #: operation is effective. progress == 1 means, the services are - #: created / removed. - self.completion = completion # type: Optional[Callable[[], Completion]] - - #: if a orchestrator module can provide a more detailed - #: progress information, it needs to also call ``progress.update()``. - self.progress = 0.0 - - self._completion_has_result = False - self.mgr.all_progress_references.append(self) - - def __str__(self): - """ - ``__str__()`` is used for determining the message for progress events. - """ - return self.message or super(ProgressReference, self).__str__() - - def __call__(self, arg): - self._completion_has_result = True - self.progress = 1.0 - return arg - - @property - def progress(self): - return self._progress - - @progress.setter - def progress(self, progress): - assert progress <= 1.0 - self._progress = progress - try: - if self.effective: - self.mgr.remote("progress", "complete", self.progress_id) - self.mgr.all_progress_references = [p for p in self.mgr.all_progress_references if p is not self] - else: - self.mgr.remote("progress", "update", self.progress_id, self.message, - progress, - [("origin", "orchestrator")]) - except ImportError: - # If the progress module is disabled that's fine, - # they just won't see the output. - pass - - @property - def effective(self): - return self.progress == 1 and self._completion_has_result - - def update(self): - def progress_run(progress): - self.progress = progress - if self.completion: - c = self.completion().then(progress_run) - self.mgr.process([c._first_promise]) - else: - self.progress = 1 - - def fail(self): - self._completion_has_result = True - self.progress = 1 - - -class Completion(_Promise): - """ - Combines multiple promises into one overall operation. - - Completions are composable by being able to - call one completion from another completion. I.e. making them re-usable - using Promises E.g.:: - - >>> return Orchestrator().get_hosts().then(self._create_osd) - - where ``get_hosts`` returns a Completion of list of hosts and - ``_create_osd`` takes a list of hosts. - - The concept behind this is to store the computation steps - explicit and then explicitly evaluate the chain: - - >>> p = Completion(on_complete=lambda x: x*2).then(on_complete=lambda x: str(x)) - ... p.finalize(2) - ... assert p.result = "4" - - or graphically:: - - +---------------+ +-----------------+ - | | then | | - | lambda x: x*x | +--> | lambda x: str(x)| - | | | | - +---------------+ +-----------------+ - - """ - def __init__(self, - _first_promise=None, # type: Optional["Completion"] - value=_Promise.NO_RESULT, # type: Any - on_complete=None, # type: Optional[Callable] - name=None, # type: Optional[str] - ): - super(Completion, self).__init__(_first_promise, value, on_complete, name) - - @property - def _progress_reference(self): - # type: () -> Optional[ProgressReference] - if hasattr(self._on_complete, 'progress_id'): - return self._on_complete # type: ignore - return None - - @property - def progress_reference(self): - # type: () -> Optional[ProgressReference] - """ - ProgressReference. Marks this completion - as a write completeion. - """ - - references = [c._progress_reference for c in iter(self) if c._progress_reference is not None] - if references: - assert len(references) == 1 - return references[0] - return None - - @classmethod - def with_progress(cls, # type: Any - message, # type: str - mgr, - _first_promise=None, # type: Optional["Completion"] - value=_Promise.NO_RESULT, # type: Any - on_complete=None, # type: Optional[Callable] - calc_percent=None # type: Optional[Callable[[], Any]] - ): - # type: (...) -> Any - - c = cls( - _first_promise=_first_promise, - value=value, - on_complete=on_complete - ).add_progress(message, mgr, calc_percent) - - return c._first_promise - - def add_progress(self, - message, # type: str - mgr, - calc_percent=None # type: Optional[Callable[[], Any]] - ): - return self.then( - on_complete=ProgressReference( - message=message, - mgr=mgr, - completion=calc_percent - ) - ) - - def fail(self, e): - super(Completion, self).fail(e) - if self._progress_reference: - self._progress_reference.fail() - - def finalize(self, result=_Promise.NO_RESULT): - if self._first_promise._state == self.INITIALIZED: - self._first_promise._finalize(result) - - @property - def result(self): - """ - The result of the operation that we were waited - for. Only valid after calling Orchestrator.process() on this - completion. - """ - last = self._last_promise() - assert last._state == _Promise.FINISHED - return last._value - - def result_str(self): - """Force a string.""" - if self.result is None: - return '' - if isinstance(self.result, list): - return '\n'.join(str(x) for x in self.result) - return str(self.result) - - @property - def exception(self): - # type: () -> Optional[Exception] - return self._last_promise()._exception - - @property - def serialized_exception(self): - # type: () -> Optional[bytes] - return self._last_promise()._serialized_exception - - @property - def has_result(self): - # type: () -> bool - """ - Has the operation already a result? - - For Write operations, it can already have a - result, if the orchestrator's configuration is - persistently written. Typically this would - indicate that an update had been written to - a manifest, but that the update had not - necessarily been pushed out to the cluster. - - :return: - """ - return self._last_promise()._state == _Promise.FINISHED - - @property - def is_errored(self): - # type: () -> bool - """ - Has the completion failed. Default implementation looks for - self.exception. Can be overwritten. - """ - return self.exception is not None - - @property - def needs_result(self): - # type: () -> bool - """ - Could the external operation be deemed as complete, - or should we wait? - We must wait for a read operation only if it is not complete. - """ - return not self.is_errored and not self.has_result - - @property - def is_finished(self): - # type: () -> bool - """ - Could the external operation be deemed as complete, - or should we wait? - We must wait for a read operation only if it is not complete. - """ - return self.is_errored or (self.has_result) - - def pretty_print(self): - - reprs = '\n'.join(p.pretty_print_1() for p in iter(self._first_promise)) - return """<{}>[\n{}\n]""".format(self.__class__.__name__, reprs) - - -def pretty_print(completions): - # type: (Sequence[Completion]) -> str - return ', '.join(c.pretty_print() for c in completions) - - -def raise_if_exception(c): - # type: (Completion) -> None - """ - :raises OrchestratorError: Some user error or a config error. - :raises Exception: Some internal error - """ - if c.serialized_exception is not None: - try: - e = pickle.loads(c.serialized_exception) - except (KeyError, AttributeError): - raise Exception('{}: {}'.format(type(c.exception), c.exception)) - raise e - - -class TrivialReadCompletion(Completion): - """ - This is the trivial completion simply wrapping a result. - """ - def __init__(self, result): - super(TrivialReadCompletion, self).__init__() - if result: - self.finalize(result) - - -def _hide_in_features(f): - f._hide_in_features = True - return f - - -class Orchestrator(object): - """ - Calls in this class may do long running remote operations, with time - periods ranging from network latencies to package install latencies and large - internet downloads. For that reason, all are asynchronous, and return - ``Completion`` objects. - - Methods should only return the completion and not directly execute - anything, like network calls. Otherwise the purpose of - those completions is defeated. - - Implementations are not required to start work on an operation until - the caller waits on the relevant Completion objects. Callers making - multiple updates should not wait on Completions until they're done - sending operations: this enables implementations to batch up a series - of updates when wait() is called on a set of Completion objects. - - Implementations are encouraged to keep reasonably fresh caches of - the status of the system: it is better to serve a stale-but-recent - result read of e.g. device inventory than it is to keep the caller waiting - while you scan hosts every time. - """ - - @_hide_in_features - def is_orchestrator_module(self): - """ - Enable other modules to interrogate this module to discover - whether it's usable as an orchestrator module. - - Subclasses do not need to override this. - """ - return True - - @_hide_in_features - def available(self): - # type: () -> Tuple[bool, str] - """ - Report whether we can talk to the orchestrator. This is the - place to give the user a meaningful message if the orchestrator - isn't running or can't be contacted. - - This method may be called frequently (e.g. every page load - to conditionally display a warning banner), so make sure it's - not too expensive. It's okay to give a slightly stale status - (e.g. based on a periodic background ping of the orchestrator) - if that's necessary to make this method fast. - - .. note:: - `True` doesn't mean that the desired functionality - is actually available in the orchestrator. I.e. this - won't work as expected:: - - >>> if OrchestratorClientMixin().available()[0]: # wrong. - ... OrchestratorClientMixin().get_hosts() - - :return: two-tuple of boolean, string - """ - raise NotImplementedError() - - @_hide_in_features - def process(self, completions): - # type: (List[Completion]) -> None - """ - Given a list of Completion instances, process any which are - incomplete. - - Callers should inspect the detail of each completion to identify - partial completion/progress information, and present that information - to the user. - - This method should not block, as this would make it slow to query - a status, while other long running operations are in progress. - """ - raise NotImplementedError() - - @_hide_in_features - def get_feature_set(self): - """Describes which methods this orchestrator implements - - .. note:: - `True` doesn't mean that the desired functionality - is actually possible in the orchestrator. I.e. this - won't work as expected:: - - >>> api = OrchestratorClientMixin() - ... if api.get_feature_set()['get_hosts']['available']: # wrong. - ... api.get_hosts() - - It's better to ask for forgiveness instead:: - - >>> try: - ... OrchestratorClientMixin().get_hosts() - ... except (OrchestratorError, NotImplementedError): - ... ... - - :returns: Dict of API method names to ``{'available': True or False}`` - """ - module = self.__class__ - features = {a: {'available': getattr(Orchestrator, a, None) != getattr(module, a)} - for a in Orchestrator.__dict__ - if not a.startswith('_') and not getattr(getattr(Orchestrator, a), '_hide_in_features', False) - } - return features - - @_hide_in_features - def cancel_completions(self): - # type: () -> None - """ - Cancels ongoing completions. Unstuck the mgr. - """ - raise NotImplementedError() - - def add_host(self, HostSpec): - # type: (HostSpec) -> Completion - """ - Add a host to the orchestrator inventory. - - :param host: hostname - """ - raise NotImplementedError() - - def remove_host(self, host): - # type: (str) -> Completion - """ - Remove a host from the orchestrator inventory. - - :param host: hostname - """ - raise NotImplementedError() - - def update_host_addr(self, host, addr): - # type: (str, str) -> Completion - """ - Update a host's address - - :param host: hostname - :param addr: address (dns name or IP) - """ - raise NotImplementedError() - - def get_hosts(self): - # type: () -> Completion - """ - Report the hosts in the cluster. - - The default implementation is extra slow. - - :return: list of InventoryNodes - """ - return self.get_inventory() - - def add_host_label(self, host, label): - # type: (str, str) -> Completion - """ - Add a host label - """ - raise NotImplementedError() - - def remove_host_label(self, host, label): - # type: (str, str) -> Completion - """ - Remove a host label - """ - raise NotImplementedError() - - def get_inventory(self, node_filter=None, refresh=False): - # type: (Optional[InventoryFilter], bool) -> Completion - """ - Returns something that was created by `ceph-volume inventory`. - - :return: list of InventoryNode - """ - raise NotImplementedError() - - def describe_service(self, service_type=None, service_id=None, node_name=None, refresh=False): - # type: (Optional[str], Optional[str], Optional[str], bool) -> Completion - """ - Describe a service (of any kind) that is already configured in - the orchestrator. For example, when viewing an OSD in the dashboard - we might like to also display information about the orchestrator's - view of the service (like the kubernetes pod ID). - - When viewing a CephFS filesystem in the dashboard, we would use this - to display the pods being currently run for MDS daemons. - - :return: list of ServiceDescription objects. - """ - raise NotImplementedError() - - def list_daemons(self, daemon_type=None, daemon_id=None, host=None, refresh=False): - # type: (Optional[str], Optional[str], Optional[str], bool) -> Completion - """ - Describe a daemon (of any kind) that is already configured in - the orchestrator. - - :return: list of DaemonDescription objects. - """ - raise NotImplementedError() - - def remove_daemons(self, names, force): - # type: (List[str], bool) -> Completion - """ - Remove specific daemon(s). - - :return: None - """ - raise NotImplementedError() - - def remove_service(self, service_type, service_name=None): - # type: (str, Optional[str]) -> Completion - """ - Remove a service (a collection of daemons). - - :return: None - """ - raise NotImplementedError() - - def service_action(self, action, service_type, service_name): - # type: (str, str, str) -> Completion - """ - Perform an action (start/stop/reload) on a service (i.e., all daemons - providing the logical service). - - :param action: one of "start", "stop", "restart", "redeploy", "reconfig" - :param service_type: e.g. "mds", "rgw", ... - :param service_name: name of logical service ("cephfs", "us-east", ...) - :rtype: Completion - """ - #assert action in ["start", "stop", "reload, "restart", "redeploy"] - raise NotImplementedError() - - def daemon_action(self, action, daemon_type, daemon_id): - # type: (str, str, str) -> Completion - """ - Perform an action (start/stop/reload) on a daemon. - - :param action: one of "start", "stop", "restart", "redeploy", "reconfig" - :param name: name of daemon - :rtype: Completion - """ - #assert action in ["start", "stop", "reload, "restart", "redeploy"] - raise NotImplementedError() - - def create_osds(self, drive_groups): - # type: (List[DriveGroupSpec]) -> Completion - """ - Create one or more OSDs within a single Drive Group. - - The principal argument here is the drive_group member - of OsdSpec: other fields are advisory/extensible for any - finer-grained OSD feature enablement (choice of backing store, - compression/encryption, etc). - - :param drive_groups: a list of DriveGroupSpec - :param all_hosts: TODO, this is required because the orchestrator methods are not composable - Probably this parameter can be easily removed because each orchestrator can use - the "get_inventory" method and the "drive_group.host_pattern" attribute - to obtain the list of hosts where to apply the operation - """ - raise NotImplementedError() - - def blink_device_light(self, ident_fault, on, locations): - # type: (str, bool, List[DeviceLightLoc]) -> Completion - """ - Instructs the orchestrator to enable or disable either the ident or the fault LED. - - :param ident_fault: either ``"ident"`` or ``"fault"`` - :param on: ``True`` = on. - :param locations: See :class:`orchestrator.DeviceLightLoc` - """ - raise NotImplementedError() - - def add_mon(self, spec): - # type: (ServiceSpec) -> Completion - """Create mon daemon(s)""" - raise NotImplementedError() - - def apply_mon(self, spec): - # type: (ServiceSpec) -> Completion - """Update mon cluster""" - raise NotImplementedError() - - def add_mgr(self, spec): - # type: (ServiceSpec) -> Completion - """Create mgr daemon(s)""" - raise NotImplementedError() - - def apply_mgr(self, spec): - # type: (ServiceSpec) -> Completion - """Update mgr cluster""" - raise NotImplementedError() - - def add_mds(self, spec): - # type: (ServiceSpec) -> Completion - """Create MDS daemon(s)""" - raise NotImplementedError() - - def apply_mds(self, spec): - # type: (ServiceSpec) -> Completion - """Update MDS cluster""" - raise NotImplementedError() - - def add_rbd_mirror(self, spec): - # type: (ServiceSpec) -> Completion - """Create rbd-mirror daemon(s)""" - raise NotImplementedError() - - def apply_rbd_mirror(self, spec): - # type: (ServiceSpec) -> Completion - """Update rbd-mirror cluster""" - raise NotImplementedError() - - def add_nfs(self, spec): - # type: (NFSServiceSpec) -> Completion - """Create NFS daemon(s)""" - raise NotImplementedError() - - def apply_nfs(self, spec): - # type: (NFSServiceSpec) -> Completion - """Update NFS cluster""" - raise NotImplementedError() - - def add_rgw(self, spec): - # type: (RGWSpec) -> Completion - """Create RGW daemon(s)""" - raise NotImplementedError() - - def apply_rgw(self, spec): - # type: (RGWSpec) -> Completion - """Update RGW cluster""" - raise NotImplementedError() - - def add_prometheus(self, spec): - # type: (ServiceSpec) -> Completion - """Create new prometheus daemon""" - raise NotImplementedError() - - def apply_prometheus(self, spec): - # type: (ServiceSpec) -> Completion - """Update prometheus cluster""" - raise NotImplementedError() - - def upgrade_check(self, image, version): - # type: (Optional[str], Optional[str]) -> Completion - raise NotImplementedError() - - def upgrade_start(self, image, version): - # type: (Optional[str], Optional[str]) -> Completion - raise NotImplementedError() - - def upgrade_pause(self): - # type: () -> Completion - raise NotImplementedError() - - def upgrade_resume(self): - # type: () -> Completion - raise NotImplementedError() - - def upgrade_stop(self): - # type: () -> Completion - raise NotImplementedError() - - def upgrade_status(self): - # type: () -> Completion - """ - If an upgrade is currently underway, report on where - we are in the process, or if some error has occurred. - - :return: UpgradeStatusSpec instance - """ - raise NotImplementedError() - - @_hide_in_features - def upgrade_available(self): - # type: () -> Completion - """ - Report on what versions are available to upgrade to - - :return: List of strings - """ - raise NotImplementedError() - -class HostSpec(object): - def __init__(self, hostname, addr=None, labels=None): - # type: (str, Optional[str], Optional[List[str]]) -> None - self.hostname = hostname # the hostname on the host - self.addr = addr or hostname # DNS name or IP address to reach it - self.labels = labels or [] # initial label(s), if any - -class UpgradeStatusSpec(object): - # Orchestrator's report on what's going on with any ongoing upgrade - def __init__(self): - self.in_progress = False # Is an upgrade underway? - self.target_image = None - self.services_complete = [] # Which daemon types are fully updated? - self.message = "" # Freeform description - - -class PlacementSpec(object): - """ - For APIs that need to specify a node subset - """ - def __init__(self, label=None, hosts=None, count=None): - # type: (Optional[str], Optional[List], Optional[int]) -> None - self.label = label - self.hosts = [] # type: List[HostPlacementSpec] - if hosts: - if all([isinstance(host, HostPlacementSpec) for host in hosts]): - self.hosts = hosts - else: - self.hosts = [parse_host_placement_specs(x, require_network=False) for x in hosts if x] - - - self.count = count # type: Optional[int] - - def set_hosts(self, hosts): - # To backpopulate the .hosts attribute when using labels or count - # in the orchestrator backend. - self.hosts = hosts - - @classmethod - def from_dict(cls, data): - _cls = cls(**data) - _cls.validate() - return _cls - - def validate(self): - if self.hosts and self.label: - # TODO: a less generic Exception - raise Exception('Node and label are mutually exclusive') - if self.count is not None and self.count <= 0: - raise Exception("num/count must be > 1") - - -def handle_type_error(method): - @wraps(method) - def inner(cls, *args, **kwargs): - try: - return method(cls, *args, **kwargs) - except TypeError as e: - error_msg = '{}: {}'.format(cls.__name__, e) - raise OrchestratorValidationError(error_msg) - return inner - - -class DaemonDescription(object): - """ - For responding to queries about the status of a particular daemon, - stateful or stateless. - - This is not about health or performance monitoring of daemons: it's - about letting the orchestrator tell Ceph whether and where a - daemon is scheduled in the cluster. When an orchestrator tells - Ceph "it's running on node123", that's not a promise that the process - is literally up this second, it's a description of where the orchestrator - has decided the daemon should run. - """ - - def __init__(self, - daemon_type=None, - daemon_id=None, - nodename=None, - container_id=None, - container_image_id=None, - container_image_name=None, - version=None, - status=None, - status_desc=None): - # Node is at the same granularity as InventoryNode - self.nodename = nodename - - # Not everyone runs in containers, but enough people do to - # justify having the container_id (runtime id) and container_image - # (image name) - self.container_id = container_id # runtime id - self.container_image_id = container_image_id # image hash - self.container_image_name = container_image_name # image friendly name - - # The type of service (osd, mon, mgr, etc.) - self.daemon_type = daemon_type - - # The orchestrator will have picked some names for daemons, - # typically either based on hostnames or on pod names. - # This is the in mds., the ID that will appear - # in the FSMap/ServiceMap. - self.daemon_id = daemon_id - - # Service version that was deployed - self.version = version - - # Service status: -1 error, 0 stopped, 1 running - self.status = status - - # Service status description when status == -1. - self.status_desc = status_desc - - # datetime when this info was last refreshed - self.last_refresh = None # type: Optional[datetime.datetime] - - def name(self): - return '%s.%s' % (self.daemon_type, self.daemon_id) - - def __repr__(self): - return "({type}.{id})".format(type=self.daemon_type, - id=self.daemon_id) - - def to_json(self): - out = { - 'nodename': self.nodename, - 'container_id': self.container_id, - 'container_image_id': self.container_image_id, - 'container_image_name': self.container_image_name, - 'daemon_id': self.daemon_id, - 'daemon_type': self.daemon_type, - 'version': self.version, - 'status': self.status, - 'status_desc': self.status_desc, - } - return {k: v for (k, v) in out.items() if v is not None} - - @classmethod - @handle_type_error - def from_json(cls, data): - return cls(**data) - -class ServiceDescription(object): - """ - For responding to queries about the status of a particular service, - stateful or stateless. - - This is not about health or performance monitoring of services: it's - about letting the orchestrator tell Ceph whether and where a - service is scheduled in the cluster. When an orchestrator tells - Ceph "it's running on node123", that's not a promise that the process - is literally up this second, it's a description of where the orchestrator - has decided the service should run. - """ - - def __init__(self, nodename=None, - container_id=None, container_image_id=None, - container_image_name=None, - service=None, service_instance=None, - service_type=None, version=None, rados_config_location=None, - service_url=None, status=None, status_desc=None): - # Node is at the same granularity as InventoryNode - self.nodename = nodename # type: Optional[str] - - # Not everyone runs in containers, but enough people do to - # justify having the container_id (runtime id) and container_image - # (image name) - self.container_id = container_id # runtime id - self.container_image_id = container_image_id # image hash - self.container_image_name = container_image_name # image friendly name - - # Some services can be deployed in groups. For example, mds's can - # have an active and standby daemons, and nfs-ganesha can run daemons - # in parallel. This tag refers to a group of daemons as a whole. - # - # For instance, a cluster of mds' all service the same fs, and they - # will all have the same service value (which may be the - # Filesystem name in the FSMap). - # - # Single-instance services should leave this set to None - self.service = service - - # The orchestrator will have picked some names for daemons, - # typically either based on hostnames or on pod names. - # This is the in mds., the ID that will appear - # in the FSMap/ServiceMap. - self.service_instance = service_instance - - # The type of service (osd, mon, mgr, etc.) - self.service_type = service_type - - # Service version that was deployed - self.version = version - - # Location of the service configuration when stored in rados - # object. Format: "rados:///[]" - self.rados_config_location = rados_config_location - - # If the service exposes REST-like API, this attribute should hold - # the URL. - self.service_url = service_url - - # Service status: -1 error, 0 stopped, 1 running - self.status = status - - # Service status description when status == -1. - self.status_desc = status_desc - - # datetime when this info was last refreshed - self.last_refresh = None # type: Optional[datetime.datetime] - - def name(self): - if self.service_instance: - return '%s.%s' % (self.service_type, self.service_instance) - return self.service_type - - def __repr__(self): - return "({n_name}:{s_type})".format(n_name=self.nodename, - s_type=self.name()) - - def to_json(self): - out = { - 'nodename': self.nodename, - 'container_id': self.container_id, - 'service': self.service, - 'service_instance': self.service_instance, - 'service_type': self.service_type, - 'version': self.version, - 'rados_config_location': self.rados_config_location, - 'service_url': self.service_url, - 'status': self.status, - 'status_desc': self.status_desc, - } - return {k: v for (k, v) in out.items() if v is not None} - - @classmethod - @handle_type_error - def from_json(cls, data): - return cls(**data) - - -class ServiceSpec(object): - """ - Details of service creation. - - Request to the orchestrator for a cluster of daemons - such as MDS, RGW, iscsi gateway, MONs, MGRs, Prometheus - - This structure is supposed to be enough information to - start the services. - - """ - - def __init__(self, name=None, placement=None): - # type: (Optional[str], Optional[PlacementSpec]) -> None - self.placement = PlacementSpec() if placement is None else placement # type: PlacementSpec - - #: Give this set of stateless services a name: typically it would - #: be the name of a CephFS filesystem, RGW zone, etc. Must be unique - #: within one ceph cluster. Note: Not all clusters have a name - self.name = name # type: Optional[str] - - if self.placement is not None and self.placement.count is not None: - #: Count of service instances. Deprecated. - self.count = self.placement.count # type: int - else: - self.count = 1 - - def validate_add(self): - if not self.name: - raise OrchestratorValidationError('Cannot add Service: Name required') - - -class NFSServiceSpec(ServiceSpec): - def __init__(self, name, pool=None, namespace=None, placement=None): - super(NFSServiceSpec, self).__init__(name, placement) - - #: RADOS pool where NFS client recovery data is stored. - self.pool = pool - - #: RADOS namespace where NFS client recovery data is stored in the pool. - self.namespace = namespace - - def validate_add(self): - super(NFSServiceSpec, self).validate_add() - - if not self.pool: - raise OrchestratorValidationError('Cannot add NFS: No Pool specified') - - -class RGWSpec(ServiceSpec): - """ - Settings to configure a (multisite) Ceph RGW - - """ - def __init__(self, - rgw_realm, # type: str - rgw_zone, # type: str - placement=None, - hosts=None, # type: Optional[List[str]] - rgw_multisite=None, # type: Optional[bool] - rgw_zonemaster=None, # type: Optional[bool] - rgw_zonesecondary=None, # type: Optional[bool] - rgw_multisite_proto=None, # type: Optional[str] - rgw_frontend_port=None, # type: Optional[int] - rgw_zonegroup=None, # type: Optional[str] - rgw_zone_user=None, # type: Optional[str] - system_access_key=None, # type: Optional[str] - system_secret_key=None, # type: Optional[str] - count=None # type: Optional[int] - ): - # Regarding default values. Ansible has a `set_rgwspec_defaults` that sets - # default values that makes sense for Ansible. Rook has default values implemented - # in Rook itself. Thus we don't set any defaults here in this class. - - super(RGWSpec, self).__init__(name=rgw_realm + '.' + rgw_zone, - placement=placement) - - #: List of hosts where RGWs should run. Not for Rook. - if hosts: - self.placement = PlacementSpec(hosts=hosts) - - #: is multisite - self.rgw_multisite = rgw_multisite - self.rgw_zonemaster = rgw_zonemaster - self.rgw_zonesecondary = rgw_zonesecondary - self.rgw_multisite_proto = rgw_multisite_proto - self.rgw_frontend_port = rgw_frontend_port - - self.rgw_realm = rgw_realm - self.rgw_zone = rgw_zone - self.rgw_zonegroup = rgw_zonegroup - self.rgw_zone_user = rgw_zone_user - - self.system_access_key = system_access_key - self.system_secret_key = system_secret_key - - @property - def rgw_multisite_endpoint_addr(self): - """Returns the first host. Not supported for Rook.""" - return self.placement.hosts[0] - - @property - def rgw_multisite_endpoints_list(self): - return ",".join(["{}://{}:{}".format(self.rgw_multisite_proto, - host, - self.rgw_frontend_port) for host in self.placement.hosts]) - - def genkey(self, nchars): - """ Returns a random string of nchars - - :nchars : Length of the returned string - """ - # TODO Python 3: use Secrets module instead. - - return ''.join(random.choice(string.ascii_uppercase + - string.ascii_lowercase + - string.digits) for _ in range(nchars)) - - @classmethod - def from_json(cls, json_rgw_spec): - # type: (dict) -> RGWSpec - """ - Initialize 'RGWSpec' object data from a json structure - :param json_rgw_spec: A valid dict with a the RGW settings - """ - # TODO: also add PlacementSpec(**json_rgw_spec['placement']) - args = {k:v for k, v in json_rgw_spec.items()} - return RGWSpec(**args) - - -class InventoryFilter(object): - """ - When fetching inventory, use this filter to avoid unnecessarily - scanning the whole estate. - - Typical use: filter by node when presenting UI workflow for configuring - a particular server. - filter by label when not all of estate is Ceph servers, - and we want to only learn about the Ceph servers. - filter by label when we are interested particularly - in e.g. OSD servers. - - """ - def __init__(self, labels=None, nodes=None): - # type: (Optional[List[str]], Optional[List[str]]) -> None - - #: Optional: get info about nodes matching labels - self.labels = labels - - #: Optional: get info about certain named nodes only - self.nodes = nodes - - -class InventoryNode(object): - """ - When fetching inventory, all Devices are groups inside of an - InventoryNode. - """ - def __init__(self, name, devices=None, labels=None, addr=None): - # type: (str, Optional[inventory.Devices], Optional[List[str]], Optional[str]) -> None - if devices is None: - devices = inventory.Devices([]) - if labels is None: - labels = [] - assert isinstance(devices, inventory.Devices) - - self.name = name # unique within cluster. For example a hostname. - self.addr = addr or name - self.devices = devices - self.labels = labels - - def to_json(self): - return { - 'name': self.name, - 'addr': self.addr, - 'devices': self.devices.to_json(), - 'labels': self.labels, - } - - @classmethod - def from_json(cls, data): - try: - _data = copy.deepcopy(data) - name = _data.pop('name') - addr = _data.pop('addr', None) or name - devices = inventory.Devices.from_json(_data.pop('devices')) - if _data: - error_msg = 'Unknown key(s) in Inventory: {}'.format(','.join(_data.keys())) - raise OrchestratorValidationError(error_msg) - labels = _data.get('labels', list()) - return cls(name, devices, labels, addr) - except KeyError as e: - error_msg = '{} is required for {}'.format(e, cls.__name__) - raise OrchestratorValidationError(error_msg) - except TypeError as e: - raise OrchestratorValidationError('Failed to read inventory: {}'.format(e)) - - - @classmethod - def from_nested_items(cls, hosts): - devs = inventory.Devices.from_json - return [cls(item[0], devs(item[1].data)) for item in hosts] - - def __repr__(self): - return "({name})".format(name=self.name) - - @staticmethod - def get_host_names(nodes): - # type: (List[InventoryNode]) -> List[str] - return [node.name for node in nodes] - - def __eq__(self, other): - return self.name == other.name and self.devices == other.devices - - -class DeviceLightLoc(namedtuple('DeviceLightLoc', ['host', 'dev', 'path'])): - """ - Describes a specific device on a specific host. Used for enabling or disabling LEDs - on devices. - - hostname as in :func:`orchestrator.Orchestrator.get_hosts` - - device_id: e.g. ``ABC1234DEF567-1R1234_ABC8DE0Q``. - See ``ceph osd metadata | jq '.[].device_ids'`` - """ - __slots__ = () - - -def _mk_orch_methods(cls): - # Needs to be defined outside of for. - # Otherwise meth is always bound to last key - def shim(method_name): - def inner(self, *args, **kwargs): - completion = self._oremote(method_name, args, kwargs) - return completion - return inner - - for meth in Orchestrator.__dict__: - if not meth.startswith('_') and meth not in ['is_orchestrator_module']: - setattr(cls, meth, shim(meth)) - return cls - - -@_mk_orch_methods -class OrchestratorClientMixin(Orchestrator): - """ - A module that inherents from `OrchestratorClientMixin` can directly call - all :class:`Orchestrator` methods without manually calling remote. - - Every interface method from ``Orchestrator`` is converted into a stub method that internally - calls :func:`OrchestratorClientMixin._oremote` - - >>> class MyModule(OrchestratorClientMixin): - ... def func(self): - ... completion = self.add_host('somehost') # calls `_oremote()` - ... self._orchestrator_wait([completion]) - ... self.log.debug(completion.result) - - .. note:: Orchestrator implementations should not inherit from `OrchestratorClientMixin`. - Reason is, that OrchestratorClientMixin magically redirects all methods to the - "real" implementation of the orchestrator. - - - >>> import mgr_module - >>> class MyImplentation(mgr_module.MgrModule, Orchestrator): - ... def __init__(self, ...): - ... self.orch_client = OrchestratorClientMixin() - ... self.orch_client.set_mgr(self.mgr)) - """ - - def set_mgr(self, mgr): - # type: (MgrModule) -> None - """ - Useable in the Dashbord that uses a global ``mgr`` - """ - - self.__mgr = mgr # Make sure we're not overwriting any other `mgr` properties - - def __get_mgr(self): - try: - return self.__mgr - except AttributeError: - return self - - def _oremote(self, meth, args, kwargs): - """ - Helper for invoking `remote` on whichever orchestrator is enabled - - :raises RuntimeError: If the remote method failed. - :raises OrchestratorError: orchestrator failed to perform - :raises ImportError: no `orchestrator_cli` module or backend not found. - """ - mgr = self.__get_mgr() - - try: - o = mgr._select_orchestrator() - except AttributeError: - o = mgr.remote('orchestrator_cli', '_select_orchestrator') - - if o is None: - raise NoOrchestrator() - - mgr.log.debug("_oremote {} -> {}.{}(*{}, **{})".format(mgr.module_name, o, meth, args, kwargs)) - return mgr.remote(o, meth, *args, **kwargs) - - def _orchestrator_wait(self, completions): - # type: (List[Completion]) -> None - """ - Wait for completions to complete (reads) or - become persistent (writes). - - Waits for writes to be *persistent* but not *effective*. - - :param completions: List of Completions - :raises NoOrchestrator: - :raises RuntimeError: something went wrong while calling the process method. - :raises ImportError: no `orchestrator_cli` module or backend not found. - """ - while any(not c.has_result for c in completions): - self.process(completions) - self.__get_mgr().log.info("Operations pending: %s", - sum(1 for c in completions if not c.has_result)) - if any(c.needs_result for c in completions): - time.sleep(1) - else: - break - - -class OutdatableData(object): - DATEFMT = '%Y-%m-%d %H:%M:%S.%f' - - def __init__(self, data=None, last_refresh=None): - # type: (Optional[dict], Optional[datetime.datetime]) -> None - self._data = data - if data is not None and last_refresh is None: - self.last_refresh = datetime.datetime.utcnow() # type: Optional[datetime.datetime] - else: - self.last_refresh = last_refresh - - def json(self): - if self.last_refresh is not None: - timestr = self.last_refresh.strftime(self.DATEFMT) # type: Optional[str] - else: - timestr = None - - return { - "data": self._data, - "last_refresh": timestr, - } - - @property - def data(self): - return self._data - - # @data.setter - # No setter, as it doesn't work as expected: It's not saved in store automatically - - @classmethod - def time_from_string(cls, timestr): - if timestr is None: - return None - # drop the 'Z' timezone indication, it's always UTC - timestr = timestr.rstrip('Z') - return datetime.datetime.strptime(timestr, cls.DATEFMT) - - @classmethod - def from_json(cls, data): - return cls(data['data'], cls.time_from_string(data['last_refresh'])) - - def outdated(self, timeout=None): - if timeout is None: - timeout = 600 - if self.last_refresh is None: - return True - cutoff = datetime.datetime.utcnow() - datetime.timedelta( - seconds=timeout) - return self.last_refresh < cutoff - - def __repr__(self): - return 'OutdatableData(data={}, last_refresh={})'.format(self._data, self.last_refresh) - - -class OutdatableDictMixin(object): - """ - Toolbox for implementing a cache. As every orchestrator has - different needs, we cannot implement any logic here. - """ - - def __getitem__(self, item): - # type: (str) -> OutdatableData - return OutdatableData.from_json(super(OutdatableDictMixin, self).__getitem__(item)) # type: ignore - - def __setitem__(self, key, value): - # type: (str, OutdatableData) -> None - val = None if value is None else value.json() - super(OutdatableDictMixin, self).__setitem__(key, val) # type: ignore - - def items(self): - ## type: () -> Iterator[Tuple[str, OutdatableData]] - for item in super(OutdatableDictMixin, self).items(): # type: ignore - k, v = item - yield k, OutdatableData.from_json(v) - - def items_filtered(self, keys=None): - if keys: - return [(host, self[host]) for host in keys] - else: - return list(self.items()) - - def any_outdated(self, timeout=None): - items = self.items() - if not list(items): - return True - return any([i[1].outdated(timeout) for i in items]) - - def remove_outdated(self): - outdated = [item[0] for item in self.items() if item[1].outdated()] - for o in outdated: - del self[o] # type: ignore - - def invalidate(self, key): - self[key] = OutdatableData(self[key].data, - datetime.datetime.fromtimestamp(0)) - - -class OutdatablePersistentDict(OutdatableDictMixin, PersistentStoreDict): - pass - - -class OutdatableDict(OutdatableDictMixin, dict): - pass diff --git a/src/pybind/mgr/orchestrator/README.md b/src/pybind/mgr/orchestrator/README.md new file mode 100644 index 000000000000..d70e88c11587 --- /dev/null +++ b/src/pybind/mgr/orchestrator/README.md @@ -0,0 +1,14 @@ +# Orchestrator CLI + +See also [orchestrator cli doc](https://docs.ceph.com/docs/master/mgr/orchestrator_cli/). + +## Running the Teuthology tests + +To run the API tests against a real Ceph cluster, we leverage the Teuthology +framework and the `test_orchestrator` backend. + +``source`` the script and run the tests manually:: + + $ pushd ../dashboard ; source ./run-backend-api-tests.sh ; popd + $ run_teuthology_tests tasks.mgr.test_orchestrator_cli + $ cleanup_teuthology diff --git a/src/pybind/mgr/orchestrator/__init__.py b/src/pybind/mgr/orchestrator/__init__.py new file mode 100644 index 000000000000..946ddb058676 --- /dev/null +++ b/src/pybind/mgr/orchestrator/__init__.py @@ -0,0 +1,16 @@ +from __future__ import absolute_import + +from .module import OrchestratorCli + +# usage: E.g. `from orchestrator import StatelessServiceSpec` +from ._interface import \ + Completion, TrivialReadCompletion, raise_if_exception, ProgressReference, pretty_print, _Promise, \ + CLICommand, _cli_write_command, _cli_read_command, \ + Orchestrator, OrchestratorClientMixin, \ + OrchestratorValidationError, OrchestratorError, NoOrchestrator, \ + ServiceSpec, NFSServiceSpec, RGWSpec, HostPlacementSpec, \ + ServiceDescription, InventoryFilter, PlacementSpec, HostSpec, \ + DaemonDescription, \ + InventoryNode, DeviceLightLoc, \ + OutdatableData, OutdatablePersistentDict, \ + UpgradeStatusSpec diff --git a/src/pybind/mgr/orchestrator/_interface.py b/src/pybind/mgr/orchestrator/_interface.py new file mode 100644 index 000000000000..576b21dec8e8 --- /dev/null +++ b/src/pybind/mgr/orchestrator/_interface.py @@ -0,0 +1,1736 @@ + +""" +ceph-mgr orchestrator interface + +Please see the ceph-mgr module developer's guide for more information. +""" +import copy +import functools +import logging +import pickle +import sys +import time +from collections import namedtuple +from functools import wraps +import uuid +import string +import random +import datetime +import copy +import re +import six +import errno + +from ceph.deployment import inventory + +from mgr_module import MgrModule, PersistentStoreDict, CLICommand, HandleCommandResult +from mgr_util import format_bytes + +try: + from ceph.deployment.drive_group import DriveGroupSpec + from typing import TypeVar, Generic, List, Optional, Union, Tuple, Iterator, Callable, Any, \ + Type, Sequence +except ImportError: + pass + +logger = logging.getLogger(__name__) + + +class HostPlacementSpec(namedtuple('HostPlacementSpec', ['hostname', 'network', 'name'])): + def __str__(self): + res = '' + res += self.hostname + if self.network: + res += ':' + self.network + if self.name: + res += '=' + self.name + return res + + @classmethod + def parse(cls, host, require_network=True): + # type: (str, bool) -> HostPlacementSpec + """ + Split host into host, network, and (optional) daemon name parts. The network + part can be an IP, CIDR, or ceph addrvec like '[v2:1.2.3.4:3300,v1:1.2.3.4:6789]'. + e.g., + "myhost" + "myhost=name" + "myhost:1.2.3.4" + "myhost:1.2.3.4=name" + "myhost:1.2.3.0/24" + "myhost:1.2.3.0/24=name" + "myhost:[v2:1.2.3.4:3000]=name" + "myhost:[v2:1.2.3.4:3000,v1:1.2.3.4:6789]=name" + """ + # Matches from start to : or = or until end of string + host_re = r'^(.*?)(:|=|$)' + # Matches from : to = or until end of string + ip_re = r':(.*?)(=|$)' + # Matches from = to end of string + name_re = r'=(.*?)$' + + # assign defaults + host_spec = cls('', '', '') + + match_host = re.search(host_re, host) + if match_host: + host_spec = host_spec._replace(hostname=match_host.group(1)) + + name_match = re.search(name_re, host) + if name_match: + host_spec = host_spec._replace(name=name_match.group(1)) + + ip_match = re.search(ip_re, host) + if ip_match: + host_spec = host_spec._replace(network=ip_match.group(1)) + + if not require_network: + return host_spec + + from ipaddress import ip_network, ip_address + networks = list() # type: List[str] + network = host_spec.network + # in case we have [v2:1.2.3.4:3000,v1:1.2.3.4:6478] + if ',' in network: + networks = [x for x in network.split(',')] + else: + networks.append(network) + for network in networks: + # only if we have versioned network configs + if network.startswith('v') or network.startswith('[v'): + network = network.split(':')[1] + try: + # if subnets are defined, also verify the validity + if '/' in network: + ip_network(six.text_type(network)) + else: + ip_address(six.text_type(network)) + except ValueError as e: + # logging? + raise e + + return host_spec + + +class OrchestratorError(Exception): + """ + General orchestrator specific error. + + Used for deployment, configuration or user errors. + + It's not intended for programming errors or orchestrator internal errors. + """ + + +class NoOrchestrator(OrchestratorError): + """ + No orchestrator in configured. + """ + def __init__(self, msg="No orchestrator configured (try `ceph orch set backend`)"): + super(NoOrchestrator, self).__init__(msg) + + +class OrchestratorValidationError(OrchestratorError): + """ + Raised when an orchestrator doesn't support a specific feature. + """ + + +def handle_exception(prefix, cmd_args, desc, perm, func): + @wraps(func) + def wrapper(*args, **kwargs): + try: + return func(*args, **kwargs) + except (OrchestratorError, ImportError) as e: + # Do not print Traceback for expected errors. + return HandleCommandResult(-errno.ENOENT, stderr=str(e)) + except NotImplementedError: + msg = 'This Orchestrator does not support `{}`'.format(prefix) + return HandleCommandResult(-errno.ENOENT, stderr=msg) + + return CLICommand(prefix, cmd_args, desc, perm)(wrapper) + + +def _cli_command(perm): + def inner_cli_command(prefix, cmd_args="", desc=""): + return lambda func: handle_exception(prefix, cmd_args, desc, perm, func) + return inner_cli_command + + +_cli_read_command = _cli_command('r') +_cli_write_command = _cli_command('rw') + + +def _no_result(): + return object() + + +class _Promise(object): + """ + A completion may need multiple promises to be fulfilled. `_Promise` is one + step. + + Typically ``Orchestrator`` implementations inherit from this class to + build their own way of finishing a step to fulfil a future. + + They are not exposed in the orchestrator interface and can be seen as a + helper to build orchestrator modules. + """ + INITIALIZED = 1 # We have a parent completion and a next completion + RUNNING = 2 + FINISHED = 3 # we have a final result + + NO_RESULT = _no_result() # type: None + ASYNC_RESULT = object() + + def __init__(self, + _first_promise=None, # type: Optional["_Promise"] + value=NO_RESULT, # type: Optional[Any] + on_complete=None, # type: Optional[Callable] + name=None, # type: Optional[str] + ): + self._on_complete_ = on_complete + self._name = name + self._next_promise = None # type: Optional[_Promise] + + self._state = self.INITIALIZED + self._exception = None # type: Optional[Exception] + + # Value of this _Promise. may be an intermediate result. + self._value = value + + # _Promise is not a continuation monad, as `_result` is of type + # T instead of (T -> r) -> r. Therefore we need to store the first promise here. + self._first_promise = _first_promise or self # type: '_Promise' + + @property + def _exception(self): + # type: () -> Optional[Exception] + return getattr(self, '_exception_', None) + + @_exception.setter + def _exception(self, e): + self._exception_ = e + self._serialized_exception_ = pickle.dumps(e) if e is not None else None + + @property + def _serialized_exception(self): + # type: () -> Optional[bytes] + return getattr(self, '_serialized_exception_', None) + + + + @property + def _on_complete(self): + # type: () -> Optional[Callable] + # https://github.com/python/mypy/issues/4125 + return self._on_complete_ + + @_on_complete.setter + def _on_complete(self, val): + # type: (Optional[Callable]) -> None + self._on_complete_ = val + + + def __repr__(self): + name = self._name or getattr(self._on_complete, '__name__', '??') if self._on_complete else 'None' + val = repr(self._value) if self._value is not self.NO_RESULT else 'NA' + return '{}(_s={}, val={}, _on_c={}, id={}, name={}, pr={}, _next={})'.format( + self.__class__, self._state, val, self._on_complete, id(self), name, getattr(next, '_progress_reference', 'NA'), repr(self._next_promise) + ) + + def pretty_print_1(self): + if self._name: + name = self._name + elif self._on_complete is None: + name = 'lambda x: x' + elif hasattr(self._on_complete, '__name__'): + name = getattr(self._on_complete, '__name__') + else: + name = self._on_complete.__class__.__name__ + val = repr(self._value) if self._value not in (self.NO_RESULT, self.ASYNC_RESULT) else '...' + prefix = { + self.INITIALIZED: ' ', + self.RUNNING: ' >>>', + self.FINISHED: '(done)' + }[self._state] + return '{} {}({}),'.format(prefix, name, val) + + def then(self, on_complete): + # type: (Any, Callable) -> Any + """ + Call ``on_complete`` as soon as this promise is finalized. + """ + assert self._state in (self.INITIALIZED, self.RUNNING) + if self._on_complete is not None: + assert self._next_promise is None + self._set_next_promise(self.__class__( + _first_promise=self._first_promise, + on_complete=on_complete + )) + return self._next_promise + + else: + self._on_complete = on_complete + self._set_next_promise(self.__class__(_first_promise=self._first_promise)) + return self._next_promise + + def _set_next_promise(self, next): + # type: (_Promise) -> None + assert self is not next + assert self._state in (self.INITIALIZED, self.RUNNING) + + self._next_promise = next + assert self._next_promise is not None + for p in iter(self._next_promise): + p._first_promise = self._first_promise + + def _finalize(self, value=NO_RESULT): + """ + Sets this promise to complete. + + Orchestrators may choose to use this helper function. + + :param value: new value. + """ + if self._state not in (self.INITIALIZED, self.RUNNING): + raise ValueError('finalize: {} already finished. {}'.format(repr(self), value)) + + self._state = self.RUNNING + + if value is not self.NO_RESULT: + self._value = value + assert self._value is not self.NO_RESULT, repr(self) + + if self._on_complete: + try: + next_result = self._on_complete(self._value) + except Exception as e: + self.fail(e) + return + else: + next_result = self._value + + if isinstance(next_result, _Promise): + # hack: _Promise is not a continuation monad. + next_result = next_result._first_promise # type: ignore + assert next_result not in self, repr(self._first_promise) + repr(next_result) + assert self not in next_result + next_result._append_promise(self._next_promise) + self._set_next_promise(next_result) + assert self._next_promise + if self._next_promise._value is self.NO_RESULT: + self._next_promise._value = self._value + self.propagate_to_next() + elif next_result is not self.ASYNC_RESULT: + # simple map. simply forward + if self._next_promise: + self._next_promise._value = next_result + else: + # Hack: next_result is of type U, _value is of type T + self._value = next_result # type: ignore + self.propagate_to_next() + else: + # asynchronous promise + pass + + + def propagate_to_next(self): + self._state = self.FINISHED + logger.debug('finalized {}'.format(repr(self))) + if self._next_promise: + self._next_promise._finalize() + + def fail(self, e): + # type: (Exception) -> None + """ + Sets the whole completion to be faild with this exception and end the + evaluation. + """ + if self._state == self.FINISHED: + raise ValueError( + 'Invalid State: called fail, but Completion is already finished: {}'.format(str(e))) + assert self._state in (self.INITIALIZED, self.RUNNING) + logger.exception('_Promise failed') + self._exception = e + self._value = 'exception' + if self._next_promise: + self._next_promise.fail(e) + self._state = self.FINISHED + + def __contains__(self, item): + return any(item is p for p in iter(self._first_promise)) + + def __iter__(self): + yield self + elem = self._next_promise + while elem is not None: + yield elem + elem = elem._next_promise + + def _append_promise(self, other): + if other is not None: + assert self not in other + assert other not in self + self._last_promise()._set_next_promise(other) + + def _last_promise(self): + # type: () -> _Promise + return list(iter(self))[-1] + + +class ProgressReference(object): + def __init__(self, + message, # type: str + mgr, + completion=None # type: Optional[Callable[[], Completion]] + ): + """ + ProgressReference can be used within Completions:: + + +---------------+ +---------------------------------+ + | | then | | + | My Completion | +--> | on_complete=ProgressReference() | + | | | | + +---------------+ +---------------------------------+ + + See :func:`Completion.with_progress` for an easy way to create + a progress reference + + """ + super(ProgressReference, self).__init__() + self.progress_id = str(uuid.uuid4()) + self.message = message + self.mgr = mgr + + #: The completion can already have a result, before the write + #: operation is effective. progress == 1 means, the services are + #: created / removed. + self.completion = completion # type: Optional[Callable[[], Completion]] + + #: if a orchestrator module can provide a more detailed + #: progress information, it needs to also call ``progress.update()``. + self.progress = 0.0 + + self._completion_has_result = False + self.mgr.all_progress_references.append(self) + + def __str__(self): + """ + ``__str__()`` is used for determining the message for progress events. + """ + return self.message or super(ProgressReference, self).__str__() + + def __call__(self, arg): + self._completion_has_result = True + self.progress = 1.0 + return arg + + @property + def progress(self): + return self._progress + + @progress.setter + def progress(self, progress): + assert progress <= 1.0 + self._progress = progress + try: + if self.effective: + self.mgr.remote("progress", "complete", self.progress_id) + self.mgr.all_progress_references = [p for p in self.mgr.all_progress_references if p is not self] + else: + self.mgr.remote("progress", "update", self.progress_id, self.message, + progress, + [("origin", "orchestrator")]) + except ImportError: + # If the progress module is disabled that's fine, + # they just won't see the output. + pass + + @property + def effective(self): + return self.progress == 1 and self._completion_has_result + + def update(self): + def progress_run(progress): + self.progress = progress + if self.completion: + c = self.completion().then(progress_run) + self.mgr.process([c._first_promise]) + else: + self.progress = 1 + + def fail(self): + self._completion_has_result = True + self.progress = 1 + + +class Completion(_Promise): + """ + Combines multiple promises into one overall operation. + + Completions are composable by being able to + call one completion from another completion. I.e. making them re-usable + using Promises E.g.:: + + >>> return Orchestrator().get_hosts().then(self._create_osd) + + where ``get_hosts`` returns a Completion of list of hosts and + ``_create_osd`` takes a list of hosts. + + The concept behind this is to store the computation steps + explicit and then explicitly evaluate the chain: + + >>> p = Completion(on_complete=lambda x: x*2).then(on_complete=lambda x: str(x)) + ... p.finalize(2) + ... assert p.result = "4" + + or graphically:: + + +---------------+ +-----------------+ + | | then | | + | lambda x: x*x | +--> | lambda x: str(x)| + | | | | + +---------------+ +-----------------+ + + """ + def __init__(self, + _first_promise=None, # type: Optional["Completion"] + value=_Promise.NO_RESULT, # type: Any + on_complete=None, # type: Optional[Callable] + name=None, # type: Optional[str] + ): + super(Completion, self).__init__(_first_promise, value, on_complete, name) + + @property + def _progress_reference(self): + # type: () -> Optional[ProgressReference] + if hasattr(self._on_complete, 'progress_id'): + return self._on_complete # type: ignore + return None + + @property + def progress_reference(self): + # type: () -> Optional[ProgressReference] + """ + ProgressReference. Marks this completion + as a write completeion. + """ + + references = [c._progress_reference for c in iter(self) if c._progress_reference is not None] + if references: + assert len(references) == 1 + return references[0] + return None + + @classmethod + def with_progress(cls, # type: Any + message, # type: str + mgr, + _first_promise=None, # type: Optional["Completion"] + value=_Promise.NO_RESULT, # type: Any + on_complete=None, # type: Optional[Callable] + calc_percent=None # type: Optional[Callable[[], Any]] + ): + # type: (...) -> Any + + c = cls( + _first_promise=_first_promise, + value=value, + on_complete=on_complete + ).add_progress(message, mgr, calc_percent) + + return c._first_promise + + def add_progress(self, + message, # type: str + mgr, + calc_percent=None # type: Optional[Callable[[], Any]] + ): + return self.then( + on_complete=ProgressReference( + message=message, + mgr=mgr, + completion=calc_percent + ) + ) + + def fail(self, e): + super(Completion, self).fail(e) + if self._progress_reference: + self._progress_reference.fail() + + def finalize(self, result=_Promise.NO_RESULT): + if self._first_promise._state == self.INITIALIZED: + self._first_promise._finalize(result) + + @property + def result(self): + """ + The result of the operation that we were waited + for. Only valid after calling Orchestrator.process() on this + completion. + """ + last = self._last_promise() + assert last._state == _Promise.FINISHED + return last._value + + def result_str(self): + """Force a string.""" + if self.result is None: + return '' + if isinstance(self.result, list): + return '\n'.join(str(x) for x in self.result) + return str(self.result) + + @property + def exception(self): + # type: () -> Optional[Exception] + return self._last_promise()._exception + + @property + def serialized_exception(self): + # type: () -> Optional[bytes] + return self._last_promise()._serialized_exception + + @property + def has_result(self): + # type: () -> bool + """ + Has the operation already a result? + + For Write operations, it can already have a + result, if the orchestrator's configuration is + persistently written. Typically this would + indicate that an update had been written to + a manifest, but that the update had not + necessarily been pushed out to the cluster. + + :return: + """ + return self._last_promise()._state == _Promise.FINISHED + + @property + def is_errored(self): + # type: () -> bool + """ + Has the completion failed. Default implementation looks for + self.exception. Can be overwritten. + """ + return self.exception is not None + + @property + def needs_result(self): + # type: () -> bool + """ + Could the external operation be deemed as complete, + or should we wait? + We must wait for a read operation only if it is not complete. + """ + return not self.is_errored and not self.has_result + + @property + def is_finished(self): + # type: () -> bool + """ + Could the external operation be deemed as complete, + or should we wait? + We must wait for a read operation only if it is not complete. + """ + return self.is_errored or (self.has_result) + + def pretty_print(self): + + reprs = '\n'.join(p.pretty_print_1() for p in iter(self._first_promise)) + return """<{}>[\n{}\n]""".format(self.__class__.__name__, reprs) + + +def pretty_print(completions): + # type: (Sequence[Completion]) -> str + return ', '.join(c.pretty_print() for c in completions) + + +def raise_if_exception(c): + # type: (Completion) -> None + """ + :raises OrchestratorError: Some user error or a config error. + :raises Exception: Some internal error + """ + if c.serialized_exception is not None: + try: + e = pickle.loads(c.serialized_exception) + except (KeyError, AttributeError): + raise Exception('{}: {}'.format(type(c.exception), c.exception)) + raise e + + +class TrivialReadCompletion(Completion): + """ + This is the trivial completion simply wrapping a result. + """ + def __init__(self, result): + super(TrivialReadCompletion, self).__init__() + if result: + self.finalize(result) + + +def _hide_in_features(f): + f._hide_in_features = True + return f + + +class Orchestrator(object): + """ + Calls in this class may do long running remote operations, with time + periods ranging from network latencies to package install latencies and large + internet downloads. For that reason, all are asynchronous, and return + ``Completion`` objects. + + Methods should only return the completion and not directly execute + anything, like network calls. Otherwise the purpose of + those completions is defeated. + + Implementations are not required to start work on an operation until + the caller waits on the relevant Completion objects. Callers making + multiple updates should not wait on Completions until they're done + sending operations: this enables implementations to batch up a series + of updates when wait() is called on a set of Completion objects. + + Implementations are encouraged to keep reasonably fresh caches of + the status of the system: it is better to serve a stale-but-recent + result read of e.g. device inventory than it is to keep the caller waiting + while you scan hosts every time. + """ + + @_hide_in_features + def is_orchestrator_module(self): + """ + Enable other modules to interrogate this module to discover + whether it's usable as an orchestrator module. + + Subclasses do not need to override this. + """ + return True + + @_hide_in_features + def available(self): + # type: () -> Tuple[bool, str] + """ + Report whether we can talk to the orchestrator. This is the + place to give the user a meaningful message if the orchestrator + isn't running or can't be contacted. + + This method may be called frequently (e.g. every page load + to conditionally display a warning banner), so make sure it's + not too expensive. It's okay to give a slightly stale status + (e.g. based on a periodic background ping of the orchestrator) + if that's necessary to make this method fast. + + .. note:: + `True` doesn't mean that the desired functionality + is actually available in the orchestrator. I.e. this + won't work as expected:: + + >>> if OrchestratorClientMixin().available()[0]: # wrong. + ... OrchestratorClientMixin().get_hosts() + + :return: two-tuple of boolean, string + """ + raise NotImplementedError() + + @_hide_in_features + def process(self, completions): + # type: (List[Completion]) -> None + """ + Given a list of Completion instances, process any which are + incomplete. + + Callers should inspect the detail of each completion to identify + partial completion/progress information, and present that information + to the user. + + This method should not block, as this would make it slow to query + a status, while other long running operations are in progress. + """ + raise NotImplementedError() + + @_hide_in_features + def get_feature_set(self): + """Describes which methods this orchestrator implements + + .. note:: + `True` doesn't mean that the desired functionality + is actually possible in the orchestrator. I.e. this + won't work as expected:: + + >>> api = OrchestratorClientMixin() + ... if api.get_feature_set()['get_hosts']['available']: # wrong. + ... api.get_hosts() + + It's better to ask for forgiveness instead:: + + >>> try: + ... OrchestratorClientMixin().get_hosts() + ... except (OrchestratorError, NotImplementedError): + ... ... + + :returns: Dict of API method names to ``{'available': True or False}`` + """ + module = self.__class__ + features = {a: {'available': getattr(Orchestrator, a, None) != getattr(module, a)} + for a in Orchestrator.__dict__ + if not a.startswith('_') and not getattr(getattr(Orchestrator, a), '_hide_in_features', False) + } + return features + + @_hide_in_features + def cancel_completions(self): + # type: () -> None + """ + Cancels ongoing completions. Unstuck the mgr. + """ + raise NotImplementedError() + + def add_host(self, HostSpec): + # type: (HostSpec) -> Completion + """ + Add a host to the orchestrator inventory. + + :param host: hostname + """ + raise NotImplementedError() + + def remove_host(self, host): + # type: (str) -> Completion + """ + Remove a host from the orchestrator inventory. + + :param host: hostname + """ + raise NotImplementedError() + + def update_host_addr(self, host, addr): + # type: (str, str) -> Completion + """ + Update a host's address + + :param host: hostname + :param addr: address (dns name or IP) + """ + raise NotImplementedError() + + def get_hosts(self): + # type: () -> Completion + """ + Report the hosts in the cluster. + + The default implementation is extra slow. + + :return: list of InventoryNodes + """ + return self.get_inventory() + + def add_host_label(self, host, label): + # type: (str, str) -> Completion + """ + Add a host label + """ + raise NotImplementedError() + + def remove_host_label(self, host, label): + # type: (str, str) -> Completion + """ + Remove a host label + """ + raise NotImplementedError() + + def get_inventory(self, node_filter=None, refresh=False): + # type: (Optional[InventoryFilter], bool) -> Completion + """ + Returns something that was created by `ceph-volume inventory`. + + :return: list of InventoryNode + """ + raise NotImplementedError() + + def describe_service(self, service_type=None, service_id=None, node_name=None, refresh=False): + # type: (Optional[str], Optional[str], Optional[str], bool) -> Completion + """ + Describe a service (of any kind) that is already configured in + the orchestrator. For example, when viewing an OSD in the dashboard + we might like to also display information about the orchestrator's + view of the service (like the kubernetes pod ID). + + When viewing a CephFS filesystem in the dashboard, we would use this + to display the pods being currently run for MDS daemons. + + :return: list of ServiceDescription objects. + """ + raise NotImplementedError() + + def list_daemons(self, daemon_type=None, daemon_id=None, host=None, refresh=False): + # type: (Optional[str], Optional[str], Optional[str], bool) -> Completion + """ + Describe a daemon (of any kind) that is already configured in + the orchestrator. + + :return: list of DaemonDescription objects. + """ + raise NotImplementedError() + + def remove_daemons(self, names, force): + # type: (List[str], bool) -> Completion + """ + Remove specific daemon(s). + + :return: None + """ + raise NotImplementedError() + + def remove_service(self, service_type, service_name=None): + # type: (str, Optional[str]) -> Completion + """ + Remove a service (a collection of daemons). + + :return: None + """ + raise NotImplementedError() + + def service_action(self, action, service_type, service_name): + # type: (str, str, str) -> Completion + """ + Perform an action (start/stop/reload) on a service (i.e., all daemons + providing the logical service). + + :param action: one of "start", "stop", "restart", "redeploy", "reconfig" + :param service_type: e.g. "mds", "rgw", ... + :param service_name: name of logical service ("cephfs", "us-east", ...) + :rtype: Completion + """ + #assert action in ["start", "stop", "reload, "restart", "redeploy"] + raise NotImplementedError() + + def daemon_action(self, action, daemon_type, daemon_id): + # type: (str, str, str) -> Completion + """ + Perform an action (start/stop/reload) on a daemon. + + :param action: one of "start", "stop", "restart", "redeploy", "reconfig" + :param name: name of daemon + :rtype: Completion + """ + #assert action in ["start", "stop", "reload, "restart", "redeploy"] + raise NotImplementedError() + + def create_osds(self, drive_groups): + # type: (List[DriveGroupSpec]) -> Completion + """ + Create one or more OSDs within a single Drive Group. + + The principal argument here is the drive_group member + of OsdSpec: other fields are advisory/extensible for any + finer-grained OSD feature enablement (choice of backing store, + compression/encryption, etc). + + :param drive_groups: a list of DriveGroupSpec + :param all_hosts: TODO, this is required because the orchestrator methods are not composable + Probably this parameter can be easily removed because each orchestrator can use + the "get_inventory" method and the "drive_group.host_pattern" attribute + to obtain the list of hosts where to apply the operation + """ + raise NotImplementedError() + + def blink_device_light(self, ident_fault, on, locations): + # type: (str, bool, List[DeviceLightLoc]) -> Completion + """ + Instructs the orchestrator to enable or disable either the ident or the fault LED. + + :param ident_fault: either ``"ident"`` or ``"fault"`` + :param on: ``True`` = on. + :param locations: See :class:`orchestrator.DeviceLightLoc` + """ + raise NotImplementedError() + + def add_mon(self, spec): + # type: (ServiceSpec) -> Completion + """Create mon daemon(s)""" + raise NotImplementedError() + + def apply_mon(self, spec): + # type: (ServiceSpec) -> Completion + """Update mon cluster""" + raise NotImplementedError() + + def add_mgr(self, spec): + # type: (ServiceSpec) -> Completion + """Create mgr daemon(s)""" + raise NotImplementedError() + + def apply_mgr(self, spec): + # type: (ServiceSpec) -> Completion + """Update mgr cluster""" + raise NotImplementedError() + + def add_mds(self, spec): + # type: (ServiceSpec) -> Completion + """Create MDS daemon(s)""" + raise NotImplementedError() + + def apply_mds(self, spec): + # type: (ServiceSpec) -> Completion + """Update MDS cluster""" + raise NotImplementedError() + + def add_rbd_mirror(self, spec): + # type: (ServiceSpec) -> Completion + """Create rbd-mirror daemon(s)""" + raise NotImplementedError() + + def apply_rbd_mirror(self, spec): + # type: (ServiceSpec) -> Completion + """Update rbd-mirror cluster""" + raise NotImplementedError() + + def add_nfs(self, spec): + # type: (NFSServiceSpec) -> Completion + """Create NFS daemon(s)""" + raise NotImplementedError() + + def apply_nfs(self, spec): + # type: (NFSServiceSpec) -> Completion + """Update NFS cluster""" + raise NotImplementedError() + + def add_rgw(self, spec): + # type: (RGWSpec) -> Completion + """Create RGW daemon(s)""" + raise NotImplementedError() + + def apply_rgw(self, spec): + # type: (RGWSpec) -> Completion + """Update RGW cluster""" + raise NotImplementedError() + + def add_prometheus(self, spec): + # type: (ServiceSpec) -> Completion + """Create new prometheus daemon""" + raise NotImplementedError() + + def apply_prometheus(self, spec): + # type: (ServiceSpec) -> Completion + """Update prometheus cluster""" + raise NotImplementedError() + + def upgrade_check(self, image, version): + # type: (Optional[str], Optional[str]) -> Completion + raise NotImplementedError() + + def upgrade_start(self, image, version): + # type: (Optional[str], Optional[str]) -> Completion + raise NotImplementedError() + + def upgrade_pause(self): + # type: () -> Completion + raise NotImplementedError() + + def upgrade_resume(self): + # type: () -> Completion + raise NotImplementedError() + + def upgrade_stop(self): + # type: () -> Completion + raise NotImplementedError() + + def upgrade_status(self): + # type: () -> Completion + """ + If an upgrade is currently underway, report on where + we are in the process, or if some error has occurred. + + :return: UpgradeStatusSpec instance + """ + raise NotImplementedError() + + @_hide_in_features + def upgrade_available(self): + # type: () -> Completion + """ + Report on what versions are available to upgrade to + + :return: List of strings + """ + raise NotImplementedError() + +class HostSpec(object): + def __init__(self, hostname, addr=None, labels=None): + # type: (str, Optional[str], Optional[List[str]]) -> None + self.hostname = hostname # the hostname on the host + self.addr = addr or hostname # DNS name or IP address to reach it + self.labels = labels or [] # initial label(s), if any + +class UpgradeStatusSpec(object): + # Orchestrator's report on what's going on with any ongoing upgrade + def __init__(self): + self.in_progress = False # Is an upgrade underway? + self.target_image = None + self.services_complete = [] # Which daemon types are fully updated? + self.message = "" # Freeform description + + +class PlacementSpec(object): + """ + For APIs that need to specify a node subset + """ + def __init__(self, label=None, hosts=None, count=None): + # type: (Optional[str], Optional[List], Optional[int]) -> None + self.label = label + self.hosts = [] # type: List[HostPlacementSpec] + if hosts: + if all([isinstance(host, HostPlacementSpec) for host in hosts]): + self.hosts = hosts + else: + self.hosts = [parse_host_placement_specs(x, require_network=False) for x in hosts if x] + + + self.count = count # type: Optional[int] + + def set_hosts(self, hosts): + # To backpopulate the .hosts attribute when using labels or count + # in the orchestrator backend. + self.hosts = hosts + + @classmethod + def from_dict(cls, data): + _cls = cls(**data) + _cls.validate() + return _cls + + def validate(self): + if self.hosts and self.label: + # TODO: a less generic Exception + raise Exception('Node and label are mutually exclusive') + if self.count is not None and self.count <= 0: + raise Exception("num/count must be > 1") + + +def handle_type_error(method): + @wraps(method) + def inner(cls, *args, **kwargs): + try: + return method(cls, *args, **kwargs) + except TypeError as e: + error_msg = '{}: {}'.format(cls.__name__, e) + raise OrchestratorValidationError(error_msg) + return inner + + +class DaemonDescription(object): + """ + For responding to queries about the status of a particular daemon, + stateful or stateless. + + This is not about health or performance monitoring of daemons: it's + about letting the orchestrator tell Ceph whether and where a + daemon is scheduled in the cluster. When an orchestrator tells + Ceph "it's running on node123", that's not a promise that the process + is literally up this second, it's a description of where the orchestrator + has decided the daemon should run. + """ + + def __init__(self, + daemon_type=None, + daemon_id=None, + nodename=None, + container_id=None, + container_image_id=None, + container_image_name=None, + version=None, + status=None, + status_desc=None): + # Node is at the same granularity as InventoryNode + self.nodename = nodename + + # Not everyone runs in containers, but enough people do to + # justify having the container_id (runtime id) and container_image + # (image name) + self.container_id = container_id # runtime id + self.container_image_id = container_image_id # image hash + self.container_image_name = container_image_name # image friendly name + + # The type of service (osd, mon, mgr, etc.) + self.daemon_type = daemon_type + + # The orchestrator will have picked some names for daemons, + # typically either based on hostnames or on pod names. + # This is the in mds., the ID that will appear + # in the FSMap/ServiceMap. + self.daemon_id = daemon_id + + # Service version that was deployed + self.version = version + + # Service status: -1 error, 0 stopped, 1 running + self.status = status + + # Service status description when status == -1. + self.status_desc = status_desc + + # datetime when this info was last refreshed + self.last_refresh = None # type: Optional[datetime.datetime] + + def name(self): + return '%s.%s' % (self.daemon_type, self.daemon_id) + + def __repr__(self): + return "({type}.{id})".format(type=self.daemon_type, + id=self.daemon_id) + + def to_json(self): + out = { + 'nodename': self.nodename, + 'container_id': self.container_id, + 'container_image_id': self.container_image_id, + 'container_image_name': self.container_image_name, + 'daemon_id': self.daemon_id, + 'daemon_type': self.daemon_type, + 'version': self.version, + 'status': self.status, + 'status_desc': self.status_desc, + } + return {k: v for (k, v) in out.items() if v is not None} + + @classmethod + @handle_type_error + def from_json(cls, data): + return cls(**data) + +class ServiceDescription(object): + """ + For responding to queries about the status of a particular service, + stateful or stateless. + + This is not about health or performance monitoring of services: it's + about letting the orchestrator tell Ceph whether and where a + service is scheduled in the cluster. When an orchestrator tells + Ceph "it's running on node123", that's not a promise that the process + is literally up this second, it's a description of where the orchestrator + has decided the service should run. + """ + + def __init__(self, nodename=None, + container_id=None, container_image_id=None, + container_image_name=None, + service=None, service_instance=None, + service_type=None, version=None, rados_config_location=None, + service_url=None, status=None, status_desc=None): + # Node is at the same granularity as InventoryNode + self.nodename = nodename # type: Optional[str] + + # Not everyone runs in containers, but enough people do to + # justify having the container_id (runtime id) and container_image + # (image name) + self.container_id = container_id # runtime id + self.container_image_id = container_image_id # image hash + self.container_image_name = container_image_name # image friendly name + + # Some services can be deployed in groups. For example, mds's can + # have an active and standby daemons, and nfs-ganesha can run daemons + # in parallel. This tag refers to a group of daemons as a whole. + # + # For instance, a cluster of mds' all service the same fs, and they + # will all have the same service value (which may be the + # Filesystem name in the FSMap). + # + # Single-instance services should leave this set to None + self.service = service + + # The orchestrator will have picked some names for daemons, + # typically either based on hostnames or on pod names. + # This is the in mds., the ID that will appear + # in the FSMap/ServiceMap. + self.service_instance = service_instance + + # The type of service (osd, mon, mgr, etc.) + self.service_type = service_type + + # Service version that was deployed + self.version = version + + # Location of the service configuration when stored in rados + # object. Format: "rados:///[]" + self.rados_config_location = rados_config_location + + # If the service exposes REST-like API, this attribute should hold + # the URL. + self.service_url = service_url + + # Service status: -1 error, 0 stopped, 1 running + self.status = status + + # Service status description when status == -1. + self.status_desc = status_desc + + # datetime when this info was last refreshed + self.last_refresh = None # type: Optional[datetime.datetime] + + def name(self): + if self.service_instance: + return '%s.%s' % (self.service_type, self.service_instance) + return self.service_type + + def __repr__(self): + return "({n_name}:{s_type})".format(n_name=self.nodename, + s_type=self.name()) + + def to_json(self): + out = { + 'nodename': self.nodename, + 'container_id': self.container_id, + 'service': self.service, + 'service_instance': self.service_instance, + 'service_type': self.service_type, + 'version': self.version, + 'rados_config_location': self.rados_config_location, + 'service_url': self.service_url, + 'status': self.status, + 'status_desc': self.status_desc, + } + return {k: v for (k, v) in out.items() if v is not None} + + @classmethod + @handle_type_error + def from_json(cls, data): + return cls(**data) + + +class ServiceSpec(object): + """ + Details of service creation. + + Request to the orchestrator for a cluster of daemons + such as MDS, RGW, iscsi gateway, MONs, MGRs, Prometheus + + This structure is supposed to be enough information to + start the services. + + """ + + def __init__(self, name=None, placement=None): + # type: (Optional[str], Optional[PlacementSpec]) -> None + self.placement = PlacementSpec() if placement is None else placement # type: PlacementSpec + + #: Give this set of stateless services a name: typically it would + #: be the name of a CephFS filesystem, RGW zone, etc. Must be unique + #: within one ceph cluster. Note: Not all clusters have a name + self.name = name # type: Optional[str] + + if self.placement is not None and self.placement.count is not None: + #: Count of service instances. Deprecated. + self.count = self.placement.count # type: int + else: + self.count = 1 + + def validate_add(self): + if not self.name: + raise OrchestratorValidationError('Cannot add Service: Name required') + + +class NFSServiceSpec(ServiceSpec): + def __init__(self, name, pool=None, namespace=None, placement=None): + super(NFSServiceSpec, self).__init__(name, placement) + + #: RADOS pool where NFS client recovery data is stored. + self.pool = pool + + #: RADOS namespace where NFS client recovery data is stored in the pool. + self.namespace = namespace + + def validate_add(self): + super(NFSServiceSpec, self).validate_add() + + if not self.pool: + raise OrchestratorValidationError('Cannot add NFS: No Pool specified') + + +class RGWSpec(ServiceSpec): + """ + Settings to configure a (multisite) Ceph RGW + + """ + def __init__(self, + rgw_realm, # type: str + rgw_zone, # type: str + placement=None, + hosts=None, # type: Optional[List[str]] + rgw_multisite=None, # type: Optional[bool] + rgw_zonemaster=None, # type: Optional[bool] + rgw_zonesecondary=None, # type: Optional[bool] + rgw_multisite_proto=None, # type: Optional[str] + rgw_frontend_port=None, # type: Optional[int] + rgw_zonegroup=None, # type: Optional[str] + rgw_zone_user=None, # type: Optional[str] + system_access_key=None, # type: Optional[str] + system_secret_key=None, # type: Optional[str] + count=None # type: Optional[int] + ): + # Regarding default values. Ansible has a `set_rgwspec_defaults` that sets + # default values that makes sense for Ansible. Rook has default values implemented + # in Rook itself. Thus we don't set any defaults here in this class. + + super(RGWSpec, self).__init__(name=rgw_realm + '.' + rgw_zone, + placement=placement) + + #: List of hosts where RGWs should run. Not for Rook. + if hosts: + self.placement = PlacementSpec(hosts=hosts) + + #: is multisite + self.rgw_multisite = rgw_multisite + self.rgw_zonemaster = rgw_zonemaster + self.rgw_zonesecondary = rgw_zonesecondary + self.rgw_multisite_proto = rgw_multisite_proto + self.rgw_frontend_port = rgw_frontend_port + + self.rgw_realm = rgw_realm + self.rgw_zone = rgw_zone + self.rgw_zonegroup = rgw_zonegroup + self.rgw_zone_user = rgw_zone_user + + self.system_access_key = system_access_key + self.system_secret_key = system_secret_key + + @property + def rgw_multisite_endpoint_addr(self): + """Returns the first host. Not supported for Rook.""" + return self.placement.hosts[0] + + @property + def rgw_multisite_endpoints_list(self): + return ",".join(["{}://{}:{}".format(self.rgw_multisite_proto, + host, + self.rgw_frontend_port) for host in self.placement.hosts]) + + def genkey(self, nchars): + """ Returns a random string of nchars + + :nchars : Length of the returned string + """ + # TODO Python 3: use Secrets module instead. + + return ''.join(random.choice(string.ascii_uppercase + + string.ascii_lowercase + + string.digits) for _ in range(nchars)) + + @classmethod + def from_json(cls, json_rgw_spec): + # type: (dict) -> RGWSpec + """ + Initialize 'RGWSpec' object data from a json structure + :param json_rgw_spec: A valid dict with a the RGW settings + """ + # TODO: also add PlacementSpec(**json_rgw_spec['placement']) + args = {k:v for k, v in json_rgw_spec.items()} + return RGWSpec(**args) + + +class InventoryFilter(object): + """ + When fetching inventory, use this filter to avoid unnecessarily + scanning the whole estate. + + Typical use: filter by node when presenting UI workflow for configuring + a particular server. + filter by label when not all of estate is Ceph servers, + and we want to only learn about the Ceph servers. + filter by label when we are interested particularly + in e.g. OSD servers. + + """ + def __init__(self, labels=None, nodes=None): + # type: (Optional[List[str]], Optional[List[str]]) -> None + + #: Optional: get info about nodes matching labels + self.labels = labels + + #: Optional: get info about certain named nodes only + self.nodes = nodes + + +class InventoryNode(object): + """ + When fetching inventory, all Devices are groups inside of an + InventoryNode. + """ + def __init__(self, name, devices=None, labels=None, addr=None): + # type: (str, Optional[inventory.Devices], Optional[List[str]], Optional[str]) -> None + if devices is None: + devices = inventory.Devices([]) + if labels is None: + labels = [] + assert isinstance(devices, inventory.Devices) + + self.name = name # unique within cluster. For example a hostname. + self.addr = addr or name + self.devices = devices + self.labels = labels + + def to_json(self): + return { + 'name': self.name, + 'addr': self.addr, + 'devices': self.devices.to_json(), + 'labels': self.labels, + } + + @classmethod + def from_json(cls, data): + try: + _data = copy.deepcopy(data) + name = _data.pop('name') + addr = _data.pop('addr', None) or name + devices = inventory.Devices.from_json(_data.pop('devices')) + if _data: + error_msg = 'Unknown key(s) in Inventory: {}'.format(','.join(_data.keys())) + raise OrchestratorValidationError(error_msg) + labels = _data.get('labels', list()) + return cls(name, devices, labels, addr) + except KeyError as e: + error_msg = '{} is required for {}'.format(e, cls.__name__) + raise OrchestratorValidationError(error_msg) + except TypeError as e: + raise OrchestratorValidationError('Failed to read inventory: {}'.format(e)) + + + @classmethod + def from_nested_items(cls, hosts): + devs = inventory.Devices.from_json + return [cls(item[0], devs(item[1].data)) for item in hosts] + + def __repr__(self): + return "({name})".format(name=self.name) + + @staticmethod + def get_host_names(nodes): + # type: (List[InventoryNode]) -> List[str] + return [node.name for node in nodes] + + def __eq__(self, other): + return self.name == other.name and self.devices == other.devices + + +class DeviceLightLoc(namedtuple('DeviceLightLoc', ['host', 'dev', 'path'])): + """ + Describes a specific device on a specific host. Used for enabling or disabling LEDs + on devices. + + hostname as in :func:`orchestrator.Orchestrator.get_hosts` + + device_id: e.g. ``ABC1234DEF567-1R1234_ABC8DE0Q``. + See ``ceph osd metadata | jq '.[].device_ids'`` + """ + __slots__ = () + + +def _mk_orch_methods(cls): + # Needs to be defined outside of for. + # Otherwise meth is always bound to last key + def shim(method_name): + def inner(self, *args, **kwargs): + completion = self._oremote(method_name, args, kwargs) + return completion + return inner + + for meth in Orchestrator.__dict__: + if not meth.startswith('_') and meth not in ['is_orchestrator_module']: + setattr(cls, meth, shim(meth)) + return cls + + +@_mk_orch_methods +class OrchestratorClientMixin(Orchestrator): + """ + A module that inherents from `OrchestratorClientMixin` can directly call + all :class:`Orchestrator` methods without manually calling remote. + + Every interface method from ``Orchestrator`` is converted into a stub method that internally + calls :func:`OrchestratorClientMixin._oremote` + + >>> class MyModule(OrchestratorClientMixin): + ... def func(self): + ... completion = self.add_host('somehost') # calls `_oremote()` + ... self._orchestrator_wait([completion]) + ... self.log.debug(completion.result) + + .. note:: Orchestrator implementations should not inherit from `OrchestratorClientMixin`. + Reason is, that OrchestratorClientMixin magically redirects all methods to the + "real" implementation of the orchestrator. + + + >>> import mgr_module + >>> class MyImplentation(mgr_module.MgrModule, Orchestrator): + ... def __init__(self, ...): + ... self.orch_client = OrchestratorClientMixin() + ... self.orch_client.set_mgr(self.mgr)) + """ + + def set_mgr(self, mgr): + # type: (MgrModule) -> None + """ + Useable in the Dashbord that uses a global ``mgr`` + """ + + self.__mgr = mgr # Make sure we're not overwriting any other `mgr` properties + + def __get_mgr(self): + try: + return self.__mgr + except AttributeError: + return self + + def _oremote(self, meth, args, kwargs): + """ + Helper for invoking `remote` on whichever orchestrator is enabled + + :raises RuntimeError: If the remote method failed. + :raises OrchestratorError: orchestrator failed to perform + :raises ImportError: no `orchestrator` module or backend not found. + """ + mgr = self.__get_mgr() + + try: + o = mgr._select_orchestrator() + except AttributeError: + o = mgr.remote('orchestrator', '_select_orchestrator') + + if o is None: + raise NoOrchestrator() + + mgr.log.debug("_oremote {} -> {}.{}(*{}, **{})".format(mgr.module_name, o, meth, args, kwargs)) + return mgr.remote(o, meth, *args, **kwargs) + + def _orchestrator_wait(self, completions): + # type: (List[Completion]) -> None + """ + Wait for completions to complete (reads) or + become persistent (writes). + + Waits for writes to be *persistent* but not *effective*. + + :param completions: List of Completions + :raises NoOrchestrator: + :raises RuntimeError: something went wrong while calling the process method. + :raises ImportError: no `orchestrator` module or backend not found. + """ + while any(not c.has_result for c in completions): + self.process(completions) + self.__get_mgr().log.info("Operations pending: %s", + sum(1 for c in completions if not c.has_result)) + if any(c.needs_result for c in completions): + time.sleep(1) + else: + break + + +class OutdatableData(object): + DATEFMT = '%Y-%m-%d %H:%M:%S.%f' + + def __init__(self, data=None, last_refresh=None): + # type: (Optional[dict], Optional[datetime.datetime]) -> None + self._data = data + if data is not None and last_refresh is None: + self.last_refresh = datetime.datetime.utcnow() # type: Optional[datetime.datetime] + else: + self.last_refresh = last_refresh + + def json(self): + if self.last_refresh is not None: + timestr = self.last_refresh.strftime(self.DATEFMT) # type: Optional[str] + else: + timestr = None + + return { + "data": self._data, + "last_refresh": timestr, + } + + @property + def data(self): + return self._data + + # @data.setter + # No setter, as it doesn't work as expected: It's not saved in store automatically + + @classmethod + def time_from_string(cls, timestr): + if timestr is None: + return None + # drop the 'Z' timezone indication, it's always UTC + timestr = timestr.rstrip('Z') + return datetime.datetime.strptime(timestr, cls.DATEFMT) + + @classmethod + def from_json(cls, data): + return cls(data['data'], cls.time_from_string(data['last_refresh'])) + + def outdated(self, timeout=None): + if timeout is None: + timeout = 600 + if self.last_refresh is None: + return True + cutoff = datetime.datetime.utcnow() - datetime.timedelta( + seconds=timeout) + return self.last_refresh < cutoff + + def __repr__(self): + return 'OutdatableData(data={}, last_refresh={})'.format(self._data, self.last_refresh) + + +class OutdatableDictMixin(object): + """ + Toolbox for implementing a cache. As every orchestrator has + different needs, we cannot implement any logic here. + """ + + def __getitem__(self, item): + # type: (str) -> OutdatableData + return OutdatableData.from_json(super(OutdatableDictMixin, self).__getitem__(item)) # type: ignore + + def __setitem__(self, key, value): + # type: (str, OutdatableData) -> None + val = None if value is None else value.json() + super(OutdatableDictMixin, self).__setitem__(key, val) # type: ignore + + def items(self): + ## type: () -> Iterator[Tuple[str, OutdatableData]] + for item in super(OutdatableDictMixin, self).items(): # type: ignore + k, v = item + yield k, OutdatableData.from_json(v) + + def items_filtered(self, keys=None): + if keys: + return [(host, self[host]) for host in keys] + else: + return list(self.items()) + + def any_outdated(self, timeout=None): + items = self.items() + if not list(items): + return True + return any([i[1].outdated(timeout) for i in items]) + + def remove_outdated(self): + outdated = [item[0] for item in self.items() if item[1].outdated()] + for o in outdated: + del self[o] # type: ignore + + def invalidate(self, key): + self[key] = OutdatableData(self[key].data, + datetime.datetime.fromtimestamp(0)) + + +class OutdatablePersistentDict(OutdatableDictMixin, PersistentStoreDict): + pass + + +class OutdatableDict(OutdatableDictMixin, dict): + pass diff --git a/src/pybind/mgr/orchestrator/module.py b/src/pybind/mgr/orchestrator/module.py new file mode 100644 index 000000000000..277743fb7a08 --- /dev/null +++ b/src/pybind/mgr/orchestrator/module.py @@ -0,0 +1,866 @@ +import datetime +import errno +import json +import yaml + +from ceph.deployment.inventory import Device +from prettytable import PrettyTable + +from mgr_util import format_bytes, to_pretty_timedelta + +try: + from typing import List, Set, Optional +except ImportError: + pass # just for type checking. + + +from ceph.deployment.drive_group import DriveGroupSpec, DeviceSelection, \ + DriveGroupSpecs +from mgr_module import MgrModule, HandleCommandResult + +from ._interface import OrchestratorClientMixin, DeviceLightLoc, _cli_read_command, \ + raise_if_exception, _cli_write_command, TrivialReadCompletion, OrchestratorError, \ + NoOrchestrator, ServiceSpec, PlacementSpec, OrchestratorValidationError, NFSServiceSpec, \ + RGWSpec, InventoryFilter, InventoryNode, HostPlacementSpec, HostSpec + + +class OrchestratorCli(OrchestratorClientMixin, MgrModule): + MODULE_OPTIONS = [ + { + 'name': 'orchestrator', + 'type': 'str', + 'default': None, + 'desc': 'Orchestrator backend', + 'enum_allowed': ['cephadm', 'rook', + 'test_orchestrator'], + 'runtime': True, + }, + ] + NATIVE_OPTIONS = [] # type: List[dict] + + def __init__(self, *args, **kwargs): + super(OrchestratorCli, self).__init__(*args, **kwargs) + self.ident = set() # type: Set[str] + self.fault = set() # type: Set[str] + self._load() + self._refresh_health() + + def _load(self): + active = self.get_store('active_devices') + if active: + decoded = json.loads(active) + self.ident = set(decoded.get('ident', [])) + self.fault = set(decoded.get('fault', [])) + self.log.debug('ident {}, fault {}'.format(self.ident, self.fault)) + + def _save(self): + encoded = json.dumps({ + 'ident': list(self.ident), + 'fault': list(self.fault), + }) + self.set_store('active_devices', encoded) + + def _refresh_health(self): + h = {} + if self.ident: + h['DEVICE_IDENT_ON'] = { + 'severity': 'warning', + 'summary': '%d devices have ident light turned on' % len( + self.ident), + 'detail': ['{} ident light enabled'.format(d) for d in self.ident] + } + if self.fault: + h['DEVICE_FAULT_ON'] = { + 'severity': 'warning', + 'summary': '%d devices have fault light turned on' % len( + self.fault), + 'detail': ['{} fault light enabled'.format(d) for d in self.ident] + } + self.set_health_checks(h) + + def _get_device_locations(self, dev_id): + # type: (str) -> List[DeviceLightLoc] + locs = [d['location'] for d in self.get('devices')['devices'] if d['devid'] == dev_id] + return [DeviceLightLoc(**l) for l in sum(locs, [])] + + @_cli_read_command( + prefix='device ls-lights', + desc='List currently active device indicator lights') + def _device_ls(self): + return HandleCommandResult( + stdout=json.dumps({ + 'ident': list(self.ident), + 'fault': list(self.fault) + }, indent=4, sort_keys=True)) + + def light_on(self, fault_ident, devid): + # type: (str, str) -> HandleCommandResult + assert fault_ident in ("fault", "ident") + locs = self._get_device_locations(devid) + if locs is None: + return HandleCommandResult(stderr='device {} not found'.format(devid), + retval=-errno.ENOENT) + + getattr(self, fault_ident).add(devid) + self._save() + self._refresh_health() + completion = self.blink_device_light(fault_ident, True, locs) + self._orchestrator_wait([completion]) + return HandleCommandResult(stdout=str(completion.result)) + + def light_off(self, fault_ident, devid, force): + # type: (str, str, bool) -> HandleCommandResult + assert fault_ident in ("fault", "ident") + locs = self._get_device_locations(devid) + if locs is None: + return HandleCommandResult(stderr='device {} not found'.format(devid), + retval=-errno.ENOENT) + + try: + completion = self.blink_device_light(fault_ident, False, locs) + self._orchestrator_wait([completion]) + + if devid in getattr(self, fault_ident): + getattr(self, fault_ident).remove(devid) + self._save() + self._refresh_health() + return HandleCommandResult(stdout=str(completion.result)) + + except: + # There are several reasons the try: block might fail: + # 1. the device no longer exist + # 2. the device is no longer known to Ceph + # 3. the host is not reachable + if force and devid in getattr(self, fault_ident): + getattr(self, fault_ident).remove(devid) + self._save() + self._refresh_health() + raise + + @_cli_write_command( + prefix='device light', + cmd_args='name=enable,type=CephChoices,strings=on|off ' + 'name=devid,type=CephString ' + 'name=light_type,type=CephChoices,strings=ident|fault,req=false ' + 'name=force,type=CephBool,req=false', + desc='Enable or disable the device light. Default type is `ident`\n' + 'Usage: device light (on|off) [ident|fault] [--force]') + def _device_light(self, enable, devid, light_type=None, force=False): + # type: (str, str, Optional[str], bool) -> HandleCommandResult + light_type = light_type or 'ident' + on = enable == 'on' + if on: + return self.light_on(light_type, devid) + else: + return self.light_off(light_type, devid, force) + + def _select_orchestrator(self): + return self.get_module_option("orchestrator") + + @_cli_write_command( + 'orch host add', + 'name=host,type=CephString,req=true ' + 'name=addr,type=CephString,req=false ' + 'name=labels,type=CephString,n=N,req=false', + 'Add a host') + def _add_host(self, host, addr=None, labels=None): + s = HostSpec(hostname=host, addr=addr, labels=labels) + completion = self.add_host(s) + self._orchestrator_wait([completion]) + raise_if_exception(completion) + return HandleCommandResult(stdout=completion.result_str()) + + @_cli_write_command( + 'orch host rm', + "name=host,type=CephString,req=true", + 'Remove a host') + def _remove_host(self, host): + completion = self.remove_host(host) + self._orchestrator_wait([completion]) + raise_if_exception(completion) + return HandleCommandResult(stdout=completion.result_str()) + + @_cli_write_command( + 'orch host set-addr', + 'name=host,type=CephString ' + 'name=addr,type=CephString', + 'Update a host address') + def _update_set_addr(self, host, addr): + completion = self.update_host_addr(host, addr) + self._orchestrator_wait([completion]) + raise_if_exception(completion) + return HandleCommandResult(stdout=completion.result_str()) + + @_cli_read_command( + 'orch host ls', + 'name=format,type=CephChoices,strings=json|plain,req=false', + 'List hosts') + def _get_hosts(self, format='plain'): + completion = self.get_hosts() + self._orchestrator_wait([completion]) + raise_if_exception(completion) + if format == 'json': + hosts = [dict(host=node.name, labels=node.labels) + for node in completion.result] + output = json.dumps(hosts, sort_keys=True) + else: + table = PrettyTable( + ['HOST', 'ADDR', 'LABELS'], + border=False) + table.align = 'l' + table.left_padding_width = 0 + table.right_padding_width = 1 + for node in completion.result: + table.add_row((node.name, node.addr, ' '.join(node.labels))) + output = table.get_string() + return HandleCommandResult(stdout=output) + + @_cli_write_command( + 'orch host label add', + 'name=host,type=CephString ' + 'name=label,type=CephString', + 'Add a host label') + def _host_label_add(self, host, label): + completion = self.add_host_label(host, label) + self._orchestrator_wait([completion]) + raise_if_exception(completion) + return HandleCommandResult(stdout=completion.result_str()) + + @_cli_write_command( + 'orch host label rm', + 'name=host,type=CephString ' + 'name=label,type=CephString', + 'Add a host label') + def _host_label_rm(self, host, label): + completion = self.remove_host_label(host, label) + self._orchestrator_wait([completion]) + raise_if_exception(completion) + return HandleCommandResult(stdout=completion.result_str()) + + @_cli_read_command( + 'orch device ls', + "name=host,type=CephString,n=N,req=false " + "name=format,type=CephChoices,strings=json|plain,req=false " + "name=refresh,type=CephBool,req=false", + 'List devices on a node') + def _list_devices(self, host=None, format='plain', refresh=False): + # type: (Optional[List[str]], str, bool) -> HandleCommandResult + """ + Provide information about storage devices present in cluster hosts + + Note: this does not have to be completely synchronous. Slightly out of + date hardware inventory is fine as long as hardware ultimately appears + in the output of this command. + """ + nf = InventoryFilter(nodes=host) if host else None + + completion = self.get_inventory(node_filter=nf, refresh=refresh) + + self._orchestrator_wait([completion]) + raise_if_exception(completion) + + if format == 'json': + data = [n.to_json() for n in completion.result] + return HandleCommandResult(stdout=json.dumps(data)) + else: + out = [] + + table = PrettyTable( + ['HOST', 'PATH', 'TYPE', 'SIZE', 'DEVICE', 'AVAIL', + 'REJECT REASONS'], + border=False) + table.align = 'l' + table._align['SIZE'] = 'r' + table.left_padding_width = 0 + table.right_padding_width = 1 + for host_ in completion.result: # type: InventoryNode + for d in host_.devices.devices: # type: Device + table.add_row( + ( + host_.name, + d.path, + d.human_readable_type, + format_bytes(d.sys_api.get('size', 0), 5), + d.device_id, + d.available, + ', '.join(d.rejected_reasons) + ) + ) + out.append(table.get_string()) + return HandleCommandResult(stdout='\n'.join(out)) + + @_cli_read_command( + 'orch ps', + "name=host,type=CephString,req=false " + "name=daemon_type,type=CephChoices,strings=mon|mgr|osd|mds|iscsi|nfs|rgw|rbd-mirror,req=false " + "name=daemon_id,type=CephString,req=false " + "name=format,type=CephChoices,strings=json|plain,req=false " + "name=refresh,type=CephBool,req=false", + 'List daemons known to orchestrator') + def _list_daemons(self, host=None, daemon_type=None, daemon_id=None, format='plain', refresh=False): + completion = self.list_daemons(daemon_type, + daemon_id=daemon_id, + host=host, + refresh=refresh) + self._orchestrator_wait([completion]) + raise_if_exception(completion) + daemons = completion.result + + def ukn(s): + return '' if s is None else s + # Sort the list for display + daemons.sort(key=lambda s: (ukn(s.daemon_type), ukn(s.nodename), ukn(s.daemon_id))) + + if len(daemons) == 0: + return HandleCommandResult(stdout="No daemons reported") + elif format == 'json': + data = [s.to_json() for s in daemons] + return HandleCommandResult(stdout=json.dumps(data)) + else: + now = datetime.datetime.utcnow() + table = PrettyTable( + ['NAME', 'HOST', 'STATUS', 'REFRESHED', + 'VERSION', 'IMAGE NAME', 'IMAGE ID', 'CONTAINER ID'], + border=False) + table.align = 'l' + table.left_padding_width = 0 + table.right_padding_width = 1 + for s in sorted(daemons, key=lambda s: s.name()): + status = { + -1: 'error', + 0: 'stopped', + 1: 'running', + None: '' + }[s.status] + + if s.last_refresh: + age = to_pretty_timedelta(now - s.last_refresh) + ' ago' + else: + age = '-' + table.add_row(( + s.name(), + ukn(s.nodename), + status, + age, + ukn(s.version), + ukn(s.container_image_name), + ukn(s.container_image_id)[0:12], + ukn(s.container_id)[0:12])) + + return HandleCommandResult(stdout=table.get_string()) + + @_cli_write_command( + 'orch osd create', + "name=svc_arg,type=CephString,req=false", + 'Create an OSD service. Either --svc_arg=host:drives or -i ') + def _create_osd(self, svc_arg=None, inbuf=None): + # type: (Optional[str], Optional[str]) -> HandleCommandResult + """Create one or more OSDs""" + + usage = """ +Usage: + ceph orch osd create -i + ceph orch osd create host:device1,device2,... +""" + + if inbuf: + try: + dgs = DriveGroupSpecs(yaml.load(inbuf)) + drive_groups = dgs.drive_groups + except ValueError as e: + msg = 'Failed to read JSON input: {}'.format(str(e)) + usage + return HandleCommandResult(-errno.EINVAL, stderr=msg) + + elif svc_arg: + try: + node_name, block_device = svc_arg.split(":") + block_devices = block_device.split(',') + except (TypeError, KeyError, ValueError): + msg = "Invalid host:device spec: '{}'".format(svc_arg) + usage + return HandleCommandResult(-errno.EINVAL, stderr=msg) + + devs = DeviceSelection(paths=block_devices) + drive_groups = [DriveGroupSpec(node_name, data_devices=devs)] + else: + return HandleCommandResult(-errno.EINVAL, stderr=usage) + + completion = self.create_osds(drive_groups) + self._orchestrator_wait([completion]) + raise_if_exception(completion) + return HandleCommandResult(stdout=completion.result_str()) + + @_cli_write_command( + 'orch daemon add mon', + "name=num,type=CephInt,req=false " + "name=hosts,type=CephString,n=N,req=false " + "name=label,type=CephString,req=false", + 'Start monitor daemon(s)') + def _daemon_add_mon(self, num=None, hosts=[], label=None): + if not num and not hosts and not label: + # Improve Error message. Point to parse_host_spec examples + raise OrchestratorValidationError("Mons need a placement spec. (num, host, network, name(opt))") + placement = PlacementSpec(label=label, count=num, hosts=hosts) + placement.validate() + + spec = ServiceSpec(placement=placement) + + completion = self.add_mon(spec) + self._orchestrator_wait([completion]) + raise_if_exception(completion) + return HandleCommandResult(stdout=completion.result_str()) + + @_cli_write_command( + 'orch daemon add mgr', + "name=num,type=CephInt,req=false " + "name=hosts,type=CephString,n=N,req=false", + 'Start rbd-mirror daemon(s)') + def _daemon_add_mgr(self, num=None, hosts=None): + spec = ServiceSpec( + placement=PlacementSpec(hosts=hosts, count=num)) + completion = self.add_mgr(spec) + self._orchestrator_wait([completion]) + raise_if_exception(completion) + return HandleCommandResult(stdout=completion.result_str()) + + @_cli_write_command( + 'orch daemon add rbd-mirror', + "name=num,type=CephInt,req=false " + "name=hosts,type=CephString,n=N,req=false", + 'Start rbd-mirror daemon(s)') + def _rbd_mirror_add(self, num=None, hosts=None): + spec = ServiceSpec( + None, + placement=PlacementSpec(hosts=hosts, count=num)) + completion = self.add_rbd_mirror(spec) + self._orchestrator_wait([completion]) + raise_if_exception(completion) + return HandleCommandResult(stdout=completion.result_str()) + + @_cli_write_command( + 'orch daemon add mds', + "name=fs_name,type=CephString " + "name=num,type=CephInt,req=false " + "name=hosts,type=CephString,n=N,req=false", + 'Start MDS daemon(s)') + def _mds_add(self, fs_name, num=None, hosts=None): + spec = ServiceSpec( + fs_name, + placement=PlacementSpec(hosts=hosts, count=num)) + completion = self.add_mds(spec) + self._orchestrator_wait([completion]) + raise_if_exception(completion) + return HandleCommandResult(stdout=completion.result_str()) + + @_cli_write_command( + 'orch daemon add rgw', + 'name=realm_name,type=CephString ' + 'name=zone_name,type=CephString ' + 'name=num,type=CephInt,req=false ' + "name=hosts,type=CephString,n=N,req=false", + 'Start RGW daemon(s)') + def _rgw_add(self, realm_name, zone_name, num=1, hosts=None, inbuf=None): + usage = """ +Usage: + ceph orch rgw add -i + ceph orch rgw add + """ + if inbuf: + try: + rgw_spec = RGWSpec.from_json(json.loads(inbuf)) + except ValueError as e: + msg = 'Failed to read JSON input: {}'.format(str(e)) + usage + return HandleCommandResult(-errno.EINVAL, stderr=msg) + rgw_spec = RGWSpec( + rgw_realm=realm_name, + rgw_zone=zone_name, + placement=PlacementSpec(hosts=hosts, count=num)) + + completion = self.add_rgw(rgw_spec) + self._orchestrator_wait([completion]) + raise_if_exception(completion) + return HandleCommandResult(stdout=completion.result_str()) + + @_cli_write_command( + 'orch daemon add nfs', + "name=svc_arg,type=CephString " + "name=pool,type=CephString " + "name=namespace,type=CephString,req=false " + 'name=num,type=CephInt,req=false ' + 'name=hosts,type=CephString,n=N,req=false ' + 'name=label,type=CephString,req=false', + 'Start NFS daemon(s)') + def _nfs_add(self, svc_arg, pool, namespace=None, num=None, label=None, hosts=[]): + spec = NFSServiceSpec( + svc_arg, + pool=pool, + namespace=namespace, + placement=PlacementSpec(label=label, hosts=hosts, count=num), + ) + spec.validate_add() + completion = self.add_nfs(spec) + self._orchestrator_wait([completion]) + raise_if_exception(completion) + return HandleCommandResult(stdout=completion.result_str()) + + @_cli_write_command( + 'orch daemon add prometheus', + 'name=num,type=CephInt,req=false ' + 'name=hosts,type=CephString,n=N,req=false ' + 'name=label,type=CephString,req=false', + 'Add prometheus daemon(s)') + def _daemon_add_prometheus(self, num=None, label=None, hosts=[]): + # type: (Optional[int], Optional[str], List[str]) -> HandleCommandResult + spec = ServiceSpec( + placement=PlacementSpec(label=label, hosts=hosts, count=num), + ) + completion = self.add_prometheus(spec) + self._orchestrator_wait([completion]) + return HandleCommandResult(stdout=completion.result_str()) + + @_cli_write_command( + 'orch', + "name=action,type=CephChoices,strings=start|stop|restart|redeploy|reconfig " + "name=svc_name,type=CephString", + 'Start, stop, restart, redeploy, or reconfig an entire service (i.e. all daemons)') + def _service_action(self, action, svc_name): + if '.' in svc_name: + (service_type, service_id) = svc_name.split('.', 1) + else: + service_type = svc_name; + service_id = None + completion = self.service_action(action, service_type, service_id) + self._orchestrator_wait([completion]) + raise_if_exception(completion) + return HandleCommandResult(stdout=completion.result_str()) + + @_cli_write_command( + 'orch daemon', + "name=action,type=CephChoices,strings=start|stop|restart|redeploy|reconfig " + "name=name,type=CephString", + 'Start, stop, restart, redeploy, or reconfig a specific daemon') + def _daemon_action(self, action, name): + if '.' not in name: + raise OrchestratorError('%s is not a valid daemon name' % name) + (daemon_type, daemon_id) = name.split('.', 1) + completion = self.daemon_action(action, daemon_type, daemon_id) + self._orchestrator_wait([completion]) + raise_if_exception(completion) + return HandleCommandResult(stdout=completion.result_str()) + + @_cli_write_command( + 'orch daemon rm', + "name=names,type=CephString,n=N " + 'name=force,type=CephBool,req=false', + 'Remove specific daemon(s)') + def _daemon_rm(self, names, force=False): + for name in names: + if '.' not in name: + raise OrchestratorError('%s is not a valid daemon name' % name) + completion = self.remove_daemons(names, force) + self._orchestrator_wait([completion]) + raise_if_exception(completion) + return HandleCommandResult(stdout=completion.result_str()) + + @_cli_write_command( + 'orch rm', + "name=name,type=CephString", + 'Remove a service') + def _service_rm(self, name): + if '.' in name: + (service_type, service_name) = name.split('.') + else: + service_type = name; + service_name = None + if name in ['mon', 'mgr']: + raise OrchestratorError('The mon and mgr services cannot be removed') + completion = self.remove_service(service_type, service_name) + self._orchestrator_wait([completion]) + raise_if_exception(completion) + return HandleCommandResult(stdout=completion.result_str()) + + @_cli_write_command( + 'orch apply mgr', + "name=num,type=CephInt,req=false " + "name=hosts,type=CephString,n=N,req=false " + "name=label,type=CephString,req=false", + 'Update the size or placement of managers') + def _apply_mgr(self, num=None, hosts=[], label=None): + placement = PlacementSpec( + label=label, count=num, hosts=hosts) + placement.validate() + + spec = ServiceSpec(placement=placement) + + completion = self.apply_mgr(spec) + self._orchestrator_wait([completion]) + raise_if_exception(completion) + return HandleCommandResult(stdout=completion.result_str()) + + @_cli_write_command( + 'orch apply mon', + "name=num,type=CephInt,req=false " + "name=hosts,type=CephString,n=N,req=false " + "name=label,type=CephString,req=false", + 'Update the number of monitor instances') + def _apply_mon(self, num=None, hosts=[], label=None): + if not num and not hosts and not label: + # Improve Error message. Point to parse_host_spec examples + raise OrchestratorValidationError("Mons need a placement spec. (num, host, network, name(opt))") + placement = PlacementSpec(label=label, count=num, hosts=hosts) + placement.validate() + + spec = ServiceSpec(placement=placement) + + completion = self.apply_mon(spec) + self._orchestrator_wait([completion]) + raise_if_exception(completion) + return HandleCommandResult(stdout=completion.result_str()) + + @_cli_write_command( + 'orch apply mds', + "name=fs_name,type=CephString " + "name=num,type=CephInt,req=false " + "name=hosts,type=CephString,n=N,req=false " + "name=label,type=CephString,req=false", + 'Update the number of MDS instances for the given fs_name') + def _apply_mds(self, fs_name, num=None, label=None, hosts=[]): + placement = PlacementSpec(label=label, count=num, hosts=hosts) + placement.validate() + + spec = ServiceSpec( + fs_name, + placement=placement) + + completion = self.apply_mds(spec) + self._orchestrator_wait([completion]) + raise_if_exception(completion) + return HandleCommandResult(stdout=completion.result_str()) + + @_cli_write_command( + 'orch apply rbd-mirror', + "name=num,type=CephInt,req=false " + "name=hosts,type=CephString,n=N,req=false " + "name=label,type=CephString,req=false", + 'Update the number of rbd-mirror instances') + def _apply_rbd_mirror(self, num, label=None, hosts=[]): + spec = ServiceSpec( + placement=PlacementSpec(hosts=hosts, count=num, label=label)) + completion = self.apply_rbd_mirror(spec) + self._orchestrator_wait([completion]) + raise_if_exception(completion) + return HandleCommandResult(stdout=completion.result_str()) + + @_cli_write_command( + 'orch apply rgw', + 'name=realm_name,type=CephString ' + 'name=zone_name,type=CephString ' + 'name=num,type=CephInt,req=false ' + 'name=hosts,type=CephString,n=N,req=false ' + 'name=label,type=CephString,req=false', + 'Update the number of RGW instances for the given zone') + def _apply_rgw(self, zone_name, realm_name, num=None, label=None, hosts=[]): + spec = RGWSpec( + rgw_realm=realm_name, + rgw_zone=zone_name, + placement=PlacementSpec(hosts=hosts, label=label, count=num)) + completion = self.apply_rgw(spec) + self._orchestrator_wait([completion]) + raise_if_exception(completion) + return HandleCommandResult(stdout=completion.result_str()) + + @_cli_write_command( + 'orch apply nfs', + "name=svc_id,type=CephString " + 'name=num,type=CephInt,req=false ' + 'name=hosts,type=CephString,n=N,req=false ' + 'name=label,type=CephString,req=false', + 'Scale an NFS service') + def _apply_nfs(self, svc_id, num=None, label=None, hosts=[]): + # type: (str, Optional[int], Optional[str], List[str]) -> HandleCommandResult + spec = NFSServiceSpec( + svc_id, + placement=PlacementSpec(label=label, hosts=hosts, count=num), + ) + completion = self.apply_nfs(spec) + self._orchestrator_wait([completion]) + return HandleCommandResult(stdout=completion.result_str()) + + @_cli_write_command( + 'orch apply prometheus', + 'name=num,type=CephInt,req=false ' + 'name=hosts,type=CephString,n=N,req=false ' + 'name=label,type=CephString,req=false', + 'Scale prometheus service') + def _apply_prometheus(self, num=None, label=None, hosts=[]): + # type: (Optional[int], Optional[str], List[str]) -> HandleCommandResult + spec = ServiceSpec( + placement=PlacementSpec(label=label, hosts=hosts, count=num), + ) + completion = self.apply_prometheus(spec) + self._orchestrator_wait([completion]) + return HandleCommandResult(stdout=completion.result_str()) + + @_cli_write_command( + 'orch set backend', + "name=module_name,type=CephString,req=true", + 'Select orchestrator module backend') + def _set_backend(self, module_name): + """ + We implement a setter command instead of just having the user + modify the setting directly, so that we can validate they're setting + it to a module that really exists and is enabled. + + There isn't a mechanism for ensuring they don't *disable* the module + later, but this is better than nothing. + """ + mgr_map = self.get("mgr_map") + + if module_name is None or module_name == "": + self.set_module_option("orchestrator", None) + return HandleCommandResult() + + for module in mgr_map['available_modules']: + if module['name'] != module_name: + continue + + if not module['can_run']: + continue + + enabled = module['name'] in mgr_map['modules'] + if not enabled: + return HandleCommandResult(-errno.EINVAL, + stderr="Module '{module_name}' is not enabled. \n Run " + "`ceph mgr module enable {module_name}` " + "to enable.".format(module_name=module_name)) + + try: + is_orchestrator = self.remote(module_name, + "is_orchestrator_module") + except NameError: + is_orchestrator = False + + if not is_orchestrator: + return HandleCommandResult(-errno.EINVAL, + stderr="'{0}' is not an orchestrator module".format(module_name)) + + self.set_module_option("orchestrator", module_name) + + return HandleCommandResult() + + return HandleCommandResult(-errno.EINVAL, stderr="Module '{0}' not found".format(module_name)) + + @_cli_write_command( + 'orch cancel', + desc='cancels ongoing operations') + def _cancel(self): + """ + ProgressReferences might get stuck. Let's unstuck them. + """ + self.cancel_completions() + return HandleCommandResult() + + @_cli_read_command( + 'orch status', + desc='Report configured backend and its status') + def _status(self): + o = self._select_orchestrator() + if o is None: + raise NoOrchestrator() + + avail, why = self.available() + if avail is None: + # The module does not report its availability + return HandleCommandResult(stdout="Backend: {0}".format(o)) + else: + return HandleCommandResult(stdout="Backend: {0}\nAvailable: {1}{2}".format( + o, avail, + " ({0})".format(why) if not avail else "" + )) + + def self_test(self): + old_orch = self._select_orchestrator() + self._set_backend('') + assert self._select_orchestrator() is None + self._set_backend(old_orch) + + e1 = self.remote('selftest', 'remote_from_orchestrator_cli_self_test', "ZeroDivisionError") + try: + raise_if_exception(e1) + assert False + except ZeroDivisionError as e: + assert e.args == ('hello', 'world') + + e2 = self.remote('selftest', 'remote_from_orchestrator_cli_self_test', "OrchestratorError") + try: + raise_if_exception(e2) + assert False + except OrchestratorError as e: + assert e.args == ('hello', 'world') + + c = TrivialReadCompletion(result=True) + assert c.has_result + + @_cli_write_command( + 'upgrade check', + 'name=image,type=CephString,req=false ' + 'name=ceph_version,type=CephString,req=false', + desc='Check service versions vs available and target containers') + def _upgrade_check(self, image=None, ceph_version=None): + completion = self.upgrade_check(image=image, version=ceph_version) + self._orchestrator_wait([completion]) + raise_if_exception(completion) + return HandleCommandResult(stdout=completion.result_str()) + + @_cli_write_command( + 'upgrade status', + desc='Check service versions vs available and target containers') + def _upgrade_status(self): + completion = self.upgrade_status() + self._orchestrator_wait([completion]) + raise_if_exception(completion) + r = { + 'target_image': completion.result.target_image, + 'in_progress': completion.result.in_progress, + 'services_complete': completion.result.services_complete, + 'message': completion.result.message, + } + out = json.dumps(r, indent=4) + return HandleCommandResult(stdout=out) + + @_cli_write_command( + 'upgrade start', + 'name=image,type=CephString,req=false ' + 'name=ceph_version,type=CephString,req=false', + desc='Initiate upgrade') + def _upgrade_start(self, image=None, ceph_version=None): + completion = self.upgrade_start(image, ceph_version) + self._orchestrator_wait([completion]) + raise_if_exception(completion) + return HandleCommandResult(stdout=completion.result_str()) + + @_cli_write_command( + 'upgrade pause', + desc='Pause an in-progress upgrade') + def _upgrade_pause(self): + completion = self.upgrade_pause() + self._orchestrator_wait([completion]) + raise_if_exception(completion) + return HandleCommandResult(stdout=completion.result_str()) + + @_cli_write_command( + 'upgrade resume', + desc='Resume paused upgrade') + def _upgrade_resume(self): + completion = self.upgrade_resume() + self._orchestrator_wait([completion]) + raise_if_exception(completion) + return HandleCommandResult(stdout=completion.result_str()) + + @_cli_write_command( + 'upgrade stop', + desc='Stop an in-progress upgrade') + def _upgrade_stop(self): + completion = self.upgrade_stop() + self._orchestrator_wait([completion]) + raise_if_exception(completion) + return HandleCommandResult(stdout=completion.result_str()) diff --git a/src/pybind/mgr/orchestrator_cli/.gitignore b/src/pybind/mgr/orchestrator_cli/.gitignore deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/src/pybind/mgr/orchestrator_cli/README.md b/src/pybind/mgr/orchestrator_cli/README.md deleted file mode 100644 index d70e88c11587..000000000000 --- a/src/pybind/mgr/orchestrator_cli/README.md +++ /dev/null @@ -1,14 +0,0 @@ -# Orchestrator CLI - -See also [orchestrator cli doc](https://docs.ceph.com/docs/master/mgr/orchestrator_cli/). - -## Running the Teuthology tests - -To run the API tests against a real Ceph cluster, we leverage the Teuthology -framework and the `test_orchestrator` backend. - -``source`` the script and run the tests manually:: - - $ pushd ../dashboard ; source ./run-backend-api-tests.sh ; popd - $ run_teuthology_tests tasks.mgr.test_orchestrator_cli - $ cleanup_teuthology diff --git a/src/pybind/mgr/orchestrator_cli/__init__.py b/src/pybind/mgr/orchestrator_cli/__init__.py deleted file mode 100644 index ef27d74a3795..000000000000 --- a/src/pybind/mgr/orchestrator_cli/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from __future__ import absolute_import - -from .module import OrchestratorCli diff --git a/src/pybind/mgr/orchestrator_cli/module.py b/src/pybind/mgr/orchestrator_cli/module.py deleted file mode 100644 index 204a6638b989..000000000000 --- a/src/pybind/mgr/orchestrator_cli/module.py +++ /dev/null @@ -1,864 +0,0 @@ -import datetime -import errno -import json -import yaml -from functools import wraps - -from ceph.deployment.inventory import Device -from prettytable import PrettyTable - -from mgr_util import format_bytes, to_pretty_timedelta - -try: - from typing import List, Set, Optional -except ImportError: - pass # just for type checking. - - -from ceph.deployment.drive_group import DriveGroupSpec, DriveGroupValidationError, \ - DeviceSelection, DriveGroupSpecs -from mgr_module import MgrModule, CLICommand, HandleCommandResult - -import orchestrator - - -class OrchestratorCli(orchestrator.OrchestratorClientMixin, MgrModule): - MODULE_OPTIONS = [ - { - 'name': 'orchestrator', - 'type': 'str', - 'default': None, - 'desc': 'Orchestrator backend', - 'enum_allowed': ['cephadm', 'rook', - 'test_orchestrator'], - 'runtime': True, - }, - ] - NATIVE_OPTIONS = [] # type: List[dict] - - def __init__(self, *args, **kwargs): - super(OrchestratorCli, self).__init__(*args, **kwargs) - self.ident = set() # type: Set[str] - self.fault = set() # type: Set[str] - self._load() - self._refresh_health() - - def _load(self): - active = self.get_store('active_devices') - if active: - decoded = json.loads(active) - self.ident = set(decoded.get('ident', [])) - self.fault = set(decoded.get('fault', [])) - self.log.debug('ident {}, fault {}'.format(self.ident, self.fault)) - - def _save(self): - encoded = json.dumps({ - 'ident': list(self.ident), - 'fault': list(self.fault), - }) - self.set_store('active_devices', encoded) - - def _refresh_health(self): - h = {} - if self.ident: - h['DEVICE_IDENT_ON'] = { - 'severity': 'warning', - 'summary': '%d devices have ident light turned on' % len( - self.ident), - 'detail': ['{} ident light enabled'.format(d) for d in self.ident] - } - if self.fault: - h['DEVICE_FAULT_ON'] = { - 'severity': 'warning', - 'summary': '%d devices have fault light turned on' % len( - self.fault), - 'detail': ['{} fault light enabled'.format(d) for d in self.ident] - } - self.set_health_checks(h) - - def _get_device_locations(self, dev_id): - # type: (str) -> List[orchestrator.DeviceLightLoc] - locs = [d['location'] for d in self.get('devices')['devices'] if d['devid'] == dev_id] - return [orchestrator.DeviceLightLoc(**l) for l in sum(locs, [])] - - @orchestrator._cli_read_command( - prefix='device ls-lights', - desc='List currently active device indicator lights') - def _device_ls(self): - return HandleCommandResult( - stdout=json.dumps({ - 'ident': list(self.ident), - 'fault': list(self.fault) - }, indent=4, sort_keys=True)) - - def light_on(self, fault_ident, devid): - # type: (str, str) -> HandleCommandResult - assert fault_ident in ("fault", "ident") - locs = self._get_device_locations(devid) - if locs is None: - return HandleCommandResult(stderr='device {} not found'.format(devid), - retval=-errno.ENOENT) - - getattr(self, fault_ident).add(devid) - self._save() - self._refresh_health() - completion = self.blink_device_light(fault_ident, True, locs) - self._orchestrator_wait([completion]) - return HandleCommandResult(stdout=str(completion.result)) - - def light_off(self, fault_ident, devid, force): - # type: (str, str, bool) -> HandleCommandResult - assert fault_ident in ("fault", "ident") - locs = self._get_device_locations(devid) - if locs is None: - return HandleCommandResult(stderr='device {} not found'.format(devid), - retval=-errno.ENOENT) - - try: - completion = self.blink_device_light(fault_ident, False, locs) - self._orchestrator_wait([completion]) - - if devid in getattr(self, fault_ident): - getattr(self, fault_ident).remove(devid) - self._save() - self._refresh_health() - return HandleCommandResult(stdout=str(completion.result)) - - except: - # There are several reasons the try: block might fail: - # 1. the device no longer exist - # 2. the device is no longer known to Ceph - # 3. the host is not reachable - if force and devid in getattr(self, fault_ident): - getattr(self, fault_ident).remove(devid) - self._save() - self._refresh_health() - raise - - @orchestrator._cli_write_command( - prefix='device light', - cmd_args='name=enable,type=CephChoices,strings=on|off ' - 'name=devid,type=CephString ' - 'name=light_type,type=CephChoices,strings=ident|fault,req=false ' - 'name=force,type=CephBool,req=false', - desc='Enable or disable the device light. Default type is `ident`\n' - 'Usage: device light (on|off) [ident|fault] [--force]') - def _device_light(self, enable, devid, light_type=None, force=False): - # type: (str, str, Optional[str], bool) -> HandleCommandResult - light_type = light_type or 'ident' - on = enable == 'on' - if on: - return self.light_on(light_type, devid) - else: - return self.light_off(light_type, devid, force) - - def _select_orchestrator(self): - return self.get_module_option("orchestrator") - - @orchestrator._cli_write_command( - 'orch host add', - 'name=host,type=CephString,req=true ' - 'name=addr,type=CephString,req=false ' - 'name=labels,type=CephString,n=N,req=false', - 'Add a host') - def _add_host(self, host, addr=None, labels=None): - s = orchestrator.HostSpec(hostname=host, addr=addr, labels=labels) - completion = self.add_host(s) - self._orchestrator_wait([completion]) - orchestrator.raise_if_exception(completion) - return HandleCommandResult(stdout=completion.result_str()) - - @orchestrator._cli_write_command( - 'orch host rm', - "name=host,type=CephString,req=true", - 'Remove a host') - def _remove_host(self, host): - completion = self.remove_host(host) - self._orchestrator_wait([completion]) - orchestrator.raise_if_exception(completion) - return HandleCommandResult(stdout=completion.result_str()) - - @orchestrator._cli_write_command( - 'orch host set-addr', - 'name=host,type=CephString ' - 'name=addr,type=CephString', - 'Update a host address') - def _update_set_addr(self, host, addr): - completion = self.update_host_addr(host, addr) - self._orchestrator_wait([completion]) - orchestrator.raise_if_exception(completion) - return HandleCommandResult(stdout=completion.result_str()) - - @orchestrator._cli_read_command( - 'orch host ls', - 'name=format,type=CephChoices,strings=json|plain,req=false', - 'List hosts') - def _get_hosts(self, format='plain'): - completion = self.get_hosts() - self._orchestrator_wait([completion]) - orchestrator.raise_if_exception(completion) - if format == 'json': - hosts = [dict(host=node.name, labels=node.labels) - for node in completion.result] - output = json.dumps(hosts, sort_keys=True) - else: - table = PrettyTable( - ['HOST', 'ADDR', 'LABELS'], - border=False) - table.align = 'l' - table.left_padding_width = 0 - table.right_padding_width = 1 - for node in completion.result: - table.add_row((node.name, node.addr, ' '.join(node.labels))) - output = table.get_string() - return HandleCommandResult(stdout=output) - - @orchestrator._cli_write_command( - 'orch host label add', - 'name=host,type=CephString ' - 'name=label,type=CephString', - 'Add a host label') - def _host_label_add(self, host, label): - completion = self.add_host_label(host, label) - self._orchestrator_wait([completion]) - orchestrator.raise_if_exception(completion) - return HandleCommandResult(stdout=completion.result_str()) - - @orchestrator._cli_write_command( - 'orch host label rm', - 'name=host,type=CephString ' - 'name=label,type=CephString', - 'Add a host label') - def _host_label_rm(self, host, label): - completion = self.remove_host_label(host, label) - self._orchestrator_wait([completion]) - orchestrator.raise_if_exception(completion) - return HandleCommandResult(stdout=completion.result_str()) - - @orchestrator._cli_read_command( - 'orch device ls', - "name=host,type=CephString,n=N,req=false " - "name=format,type=CephChoices,strings=json|plain,req=false " - "name=refresh,type=CephBool,req=false", - 'List devices on a node') - def _list_devices(self, host=None, format='plain', refresh=False): - # type: (Optional[List[str]], str, bool) -> HandleCommandResult - """ - Provide information about storage devices present in cluster hosts - - Note: this does not have to be completely synchronous. Slightly out of - date hardware inventory is fine as long as hardware ultimately appears - in the output of this command. - """ - nf = orchestrator.InventoryFilter(nodes=host) if host else None - - completion = self.get_inventory(node_filter=nf, refresh=refresh) - - self._orchestrator_wait([completion]) - orchestrator.raise_if_exception(completion) - - if format == 'json': - data = [n.to_json() for n in completion.result] - return HandleCommandResult(stdout=json.dumps(data)) - else: - out = [] - - table = PrettyTable( - ['HOST', 'PATH', 'TYPE', 'SIZE', 'DEVICE', 'AVAIL', - 'REJECT REASONS'], - border=False) - table.align = 'l' - table._align['SIZE'] = 'r' - table.left_padding_width = 0 - table.right_padding_width = 1 - for host_ in completion.result: # type: orchestrator.InventoryNode - for d in host_.devices.devices: # type: Device - table.add_row( - ( - host_.name, - d.path, - d.human_readable_type, - format_bytes(d.sys_api.get('size', 0), 5), - d.device_id, - d.available, - ', '.join(d.rejected_reasons) - ) - ) - out.append(table.get_string()) - return HandleCommandResult(stdout='\n'.join(out)) - - @orchestrator._cli_read_command( - 'orch ps', - "name=host,type=CephString,req=false " - "name=daemon_type,type=CephChoices,strings=mon|mgr|osd|mds|iscsi|nfs|rgw|rbd-mirror,req=false " - "name=daemon_id,type=CephString,req=false " - "name=format,type=CephChoices,strings=json|plain,req=false " - "name=refresh,type=CephBool,req=false", - 'List daemons known to orchestrator') - def _list_daemons(self, host=None, daemon_type=None, daemon_id=None, format='plain', refresh=False): - completion = self.list_daemons(daemon_type, - daemon_id=daemon_id, - host=host, - refresh=refresh) - self._orchestrator_wait([completion]) - orchestrator.raise_if_exception(completion) - daemons = completion.result - - def ukn(s): - return '' if s is None else s - # Sort the list for display - daemons.sort(key=lambda s: (ukn(s.daemon_type), ukn(s.nodename), ukn(s.daemon_id))) - - if len(daemons) == 0: - return HandleCommandResult(stdout="No daemons reported") - elif format == 'json': - data = [s.to_json() for s in daemons] - return HandleCommandResult(stdout=json.dumps(data)) - else: - now = datetime.datetime.utcnow() - table = PrettyTable( - ['NAME', 'HOST', 'STATUS', 'REFRESHED', - 'VERSION', 'IMAGE NAME', 'IMAGE ID', 'CONTAINER ID'], - border=False) - table.align = 'l' - table.left_padding_width = 0 - table.right_padding_width = 1 - for s in sorted(daemons, key=lambda s: s.name()): - status = { - -1: 'error', - 0: 'stopped', - 1: 'running', - None: '' - }[s.status] - - if s.last_refresh: - age = to_pretty_timedelta(now - s.last_refresh) + ' ago' - else: - age = '-' - table.add_row(( - s.name(), - ukn(s.nodename), - status, - age, - ukn(s.version), - ukn(s.container_image_name), - ukn(s.container_image_id)[0:12], - ukn(s.container_id)[0:12])) - - return HandleCommandResult(stdout=table.get_string()) - - @orchestrator._cli_write_command( - 'orch osd create', - "name=svc_arg,type=CephString,req=false", - 'Create an OSD service. Either --svc_arg=host:drives or -i ') - def _create_osd(self, svc_arg=None, inbuf=None): - # type: (Optional[str], Optional[str]) -> HandleCommandResult - """Create one or more OSDs""" - - usage = """ -Usage: - ceph orch osd create -i - ceph orch osd create host:device1,device2,... -""" - - if inbuf: - try: - dgs = DriveGroupSpecs(yaml.load(inbuf)) - drive_groups = dgs.drive_groups - except ValueError as e: - msg = 'Failed to read JSON input: {}'.format(str(e)) + usage - return HandleCommandResult(-errno.EINVAL, stderr=msg) - - elif svc_arg: - try: - node_name, block_device = svc_arg.split(":") - block_devices = block_device.split(',') - except (TypeError, KeyError, ValueError): - msg = "Invalid host:device spec: '{}'".format(svc_arg) + usage - return HandleCommandResult(-errno.EINVAL, stderr=msg) - - devs = DeviceSelection(paths=block_devices) - drive_groups = [DriveGroupSpec(node_name, data_devices=devs)] - else: - return HandleCommandResult(-errno.EINVAL, stderr=usage) - - completion = self.create_osds(drive_groups) - self._orchestrator_wait([completion]) - orchestrator.raise_if_exception(completion) - return HandleCommandResult(stdout=completion.result_str()) - - @orchestrator._cli_write_command( - 'orch daemon add mon', - "name=num,type=CephInt,req=false " - "name=hosts,type=CephString,n=N,req=false " - "name=label,type=CephString,req=false", - 'Start monitor daemon(s)') - def _daemon_add_mon(self, num=None, hosts=[], label=None): - if not num and not hosts and not label: - # Improve Error message. Point to parse_host_spec examples - raise orchestrator.OrchestratorValidationError("Mons need a placement spec. (num, host, network, name(opt))") - placement = orchestrator.PlacementSpec(label=label, count=num, hosts=hosts) - placement.validate() - - spec = orchestrator.ServiceSpec(placement=placement) - - completion = self.add_mon(spec) - self._orchestrator_wait([completion]) - orchestrator.raise_if_exception(completion) - return HandleCommandResult(stdout=completion.result_str()) - - @orchestrator._cli_write_command( - 'orch daemon add mgr', - "name=num,type=CephInt,req=false " - "name=hosts,type=CephString,n=N,req=false", - 'Start rbd-mirror daemon(s)') - def _daemon_add_mgr(self, num=None, hosts=None): - spec = orchestrator.ServiceSpec( - placement=orchestrator.PlacementSpec(hosts=hosts, count=num)) - completion = self.add_mgr(spec) - self._orchestrator_wait([completion]) - orchestrator.raise_if_exception(completion) - return HandleCommandResult(stdout=completion.result_str()) - - @orchestrator._cli_write_command( - 'orch daemon add rbd-mirror', - "name=num,type=CephInt,req=false " - "name=hosts,type=CephString,n=N,req=false", - 'Start rbd-mirror daemon(s)') - def _rbd_mirror_add(self, num=None, hosts=None): - spec = orchestrator.ServiceSpec( - None, - placement=orchestrator.PlacementSpec(hosts=hosts, count=num)) - completion = self.add_rbd_mirror(spec) - self._orchestrator_wait([completion]) - orchestrator.raise_if_exception(completion) - return HandleCommandResult(stdout=completion.result_str()) - - @orchestrator._cli_write_command( - 'orch daemon add mds', - "name=fs_name,type=CephString " - "name=num,type=CephInt,req=false " - "name=hosts,type=CephString,n=N,req=false", - 'Start MDS daemon(s)') - def _mds_add(self, fs_name, num=None, hosts=None): - spec = orchestrator.ServiceSpec( - fs_name, - placement=orchestrator.PlacementSpec(hosts=hosts, count=num)) - completion = self.add_mds(spec) - self._orchestrator_wait([completion]) - orchestrator.raise_if_exception(completion) - return HandleCommandResult(stdout=completion.result_str()) - - @orchestrator._cli_write_command( - 'orch daemon add rgw', - 'name=realm_name,type=CephString ' - 'name=zone_name,type=CephString ' - 'name=num,type=CephInt,req=false ' - "name=hosts,type=CephString,n=N,req=false", - 'Start RGW daemon(s)') - def _rgw_add(self, realm_name, zone_name, num=1, hosts=None, inbuf=None): - usage = """ -Usage: - ceph orch rgw add -i - ceph orch rgw add - """ - if inbuf: - try: - rgw_spec = orchestrator.RGWSpec.from_json(json.loads(inbuf)) - except ValueError as e: - msg = 'Failed to read JSON input: {}'.format(str(e)) + usage - return HandleCommandResult(-errno.EINVAL, stderr=msg) - rgw_spec = orchestrator.RGWSpec( - rgw_realm=realm_name, - rgw_zone=zone_name, - placement=orchestrator.PlacementSpec(hosts=hosts, count=num)) - - completion = self.add_rgw(rgw_spec) - self._orchestrator_wait([completion]) - orchestrator.raise_if_exception(completion) - return HandleCommandResult(stdout=completion.result_str()) - - @orchestrator._cli_write_command( - 'orch daemon add nfs', - "name=svc_arg,type=CephString " - "name=pool,type=CephString " - "name=namespace,type=CephString,req=false " - 'name=num,type=CephInt,req=false ' - 'name=hosts,type=CephString,n=N,req=false ' - 'name=label,type=CephString,req=false', - 'Start NFS daemon(s)') - def _nfs_add(self, svc_arg, pool, namespace=None, num=None, label=None, hosts=[]): - spec = orchestrator.NFSServiceSpec( - svc_arg, - pool=pool, - namespace=namespace, - placement=orchestrator.PlacementSpec(label=label, hosts=hosts, count=num), - ) - spec.validate_add() - completion = self.add_nfs(spec) - self._orchestrator_wait([completion]) - orchestrator.raise_if_exception(completion) - return HandleCommandResult(stdout=completion.result_str()) - - @orchestrator._cli_write_command( - 'orch daemon add prometheus', - 'name=num,type=CephInt,req=false ' - 'name=hosts,type=CephString,n=N,req=false ' - 'name=label,type=CephString,req=false', - 'Add prometheus daemon(s)') - def _daemon_add_prometheus(self, num=None, label=None, hosts=[]): - # type: (Optional[int], Optional[str], List[str]) -> HandleCommandResult - spec = orchestrator.ServiceSpec( - placement=orchestrator.PlacementSpec(label=label, hosts=hosts, count=num), - ) - completion = self.add_prometheus(spec) - self._orchestrator_wait([completion]) - return HandleCommandResult(stdout=completion.result_str()) - - @orchestrator._cli_write_command( - 'orch', - "name=action,type=CephChoices,strings=start|stop|restart|redeploy|reconfig " - "name=svc_name,type=CephString", - 'Start, stop, restart, redeploy, or reconfig an entire service (i.e. all daemons)') - def _service_action(self, action, svc_name): - if '.' in svc_name: - (service_type, service_id) = svc_name.split('.', 1) - else: - service_type = svc_name; - service_id = None - completion = self.service_action(action, service_type, service_id) - self._orchestrator_wait([completion]) - orchestrator.raise_if_exception(completion) - return HandleCommandResult(stdout=completion.result_str()) - - @orchestrator._cli_write_command( - 'orch daemon', - "name=action,type=CephChoices,strings=start|stop|restart|redeploy|reconfig " - "name=name,type=CephString", - 'Start, stop, restart, redeploy, or reconfig a specific daemon') - def _daemon_action(self, action, name): - if '.' not in name: - raise orchestrator.OrchestratorError('%s is not a valid daemon name' % name) - (daemon_type, daemon_id) = name.split('.', 1) - completion = self.daemon_action(action, daemon_type, daemon_id) - self._orchestrator_wait([completion]) - orchestrator.raise_if_exception(completion) - return HandleCommandResult(stdout=completion.result_str()) - - @orchestrator._cli_write_command( - 'orch daemon rm', - "name=names,type=CephString,n=N " - 'name=force,type=CephBool,req=false', - 'Remove specific daemon(s)') - def _daemon_rm(self, names, force=False): - for name in names: - if '.' not in name: - raise orchestrator.OrchestratorError('%s is not a valid daemon name' % name) - completion = self.remove_daemons(names, force) - self._orchestrator_wait([completion]) - orchestrator.raise_if_exception(completion) - return HandleCommandResult(stdout=completion.result_str()) - - @orchestrator._cli_write_command( - 'orch rm', - "name=name,type=CephString", - 'Remove a service') - def _service_rm(self, name): - if '.' in name: - (service_type, service_name) = name.split('.') - else: - service_type = name; - service_name = None - if name in ['mon', 'mgr']: - raise orchestrator.OrchestratorError('The mon and mgr services cannot be removed') - completion = self.remove_service(service_type, service_name) - self._orchestrator_wait([completion]) - orchestrator.raise_if_exception(completion) - return HandleCommandResult(stdout=completion.result_str()) - - @orchestrator._cli_write_command( - 'orch apply mgr', - "name=num,type=CephInt,req=false " - "name=hosts,type=CephString,n=N,req=false " - "name=label,type=CephString,req=false", - 'Update the size or placement of managers') - def _apply_mgr(self, num=None, hosts=[], label=None): - placement = orchestrator.PlacementSpec( - label=label, count=num, hosts=hosts) - placement.validate() - - spec = orchestrator.ServiceSpec(placement=placement) - - completion = self.apply_mgr(spec) - self._orchestrator_wait([completion]) - orchestrator.raise_if_exception(completion) - return HandleCommandResult(stdout=completion.result_str()) - - @orchestrator._cli_write_command( - 'orch apply mon', - "name=num,type=CephInt,req=false " - "name=hosts,type=CephString,n=N,req=false " - "name=label,type=CephString,req=false", - 'Update the number of monitor instances') - def _apply_mon(self, num=None, hosts=[], label=None): - if not num and not hosts and not label: - # Improve Error message. Point to parse_host_spec examples - raise orchestrator.OrchestratorValidationError("Mons need a placement spec. (num, host, network, name(opt))") - placement = orchestrator.PlacementSpec(label=label, count=num, hosts=hosts) - placement.validate() - - spec = orchestrator.ServiceSpec(placement=placement) - - completion = self.apply_mon(spec) - self._orchestrator_wait([completion]) - orchestrator.raise_if_exception(completion) - return HandleCommandResult(stdout=completion.result_str()) - - @orchestrator._cli_write_command( - 'orch apply mds', - "name=fs_name,type=CephString " - "name=num,type=CephInt,req=false " - "name=hosts,type=CephString,n=N,req=false " - "name=label,type=CephString,req=false", - 'Update the number of MDS instances for the given fs_name') - def _apply_mds(self, fs_name, num=None, label=None, hosts=[]): - placement = orchestrator.PlacementSpec(label=label, count=num, hosts=hosts) - placement.validate() - - spec = orchestrator.ServiceSpec( - fs_name, - placement=placement) - - completion = self.apply_mds(spec) - self._orchestrator_wait([completion]) - orchestrator.raise_if_exception(completion) - return HandleCommandResult(stdout=completion.result_str()) - - @orchestrator._cli_write_command( - 'orch apply rbd-mirror', - "name=num,type=CephInt,req=false " - "name=hosts,type=CephString,n=N,req=false " - "name=label,type=CephString,req=false", - 'Update the number of rbd-mirror instances') - def _apply_rbd_mirror(self, num, label=None, hosts=[]): - spec = orchestrator.ServiceSpec( - placement=orchestrator.PlacementSpec(hosts=hosts, count=num, label=label)) - completion = self.apply_rbd_mirror(spec) - self._orchestrator_wait([completion]) - orchestrator.raise_if_exception(completion) - return HandleCommandResult(stdout=completion.result_str()) - - @orchestrator._cli_write_command( - 'orch apply rgw', - 'name=realm_name,type=CephString ' - 'name=zone_name,type=CephString ' - 'name=num,type=CephInt,req=false ' - 'name=hosts,type=CephString,n=N,req=false ' - 'name=label,type=CephString,req=false', - 'Update the number of RGW instances for the given zone') - def _apply_rgw(self, zone_name, realm_name, num=None, label=None, hosts=[]): - spec = orchestrator.RGWSpec( - rgw_realm=realm_name, - rgw_zone=zone_name, - placement=orchestrator.PlacementSpec(hosts=hosts, label=label, count=num)) - completion = self.apply_rgw(spec) - self._orchestrator_wait([completion]) - orchestrator.raise_if_exception(completion) - return HandleCommandResult(stdout=completion.result_str()) - - @orchestrator._cli_write_command( - 'orch apply nfs', - "name=svc_id,type=CephString " - 'name=num,type=CephInt,req=false ' - 'name=hosts,type=CephString,n=N,req=false ' - 'name=label,type=CephString,req=false', - 'Scale an NFS service') - def _apply_nfs(self, svc_id, num=None, label=None, hosts=[]): - # type: (str, Optional[int], Optional[str], List[str]) -> HandleCommandResult - spec = orchestrator.NFSServiceSpec( - svc_id, - placement=orchestrator.PlacementSpec(label=label, hosts=hosts, count=num), - ) - completion = self.apply_nfs(spec) - self._orchestrator_wait([completion]) - return HandleCommandResult(stdout=completion.result_str()) - - @orchestrator._cli_write_command( - 'orch apply prometheus', - 'name=num,type=CephInt,req=false ' - 'name=hosts,type=CephString,n=N,req=false ' - 'name=label,type=CephString,req=false', - 'Scale prometheus service') - def _apply_prometheus(self, num=None, label=None, hosts=[]): - # type: (Optional[int], Optional[str], List[str]) -> HandleCommandResult - spec = orchestrator.ServiceSpec( - placement=orchestrator.PlacementSpec(label=label, hosts=hosts, count=num), - ) - completion = self.apply_prometheus(spec) - self._orchestrator_wait([completion]) - return HandleCommandResult(stdout=completion.result_str()) - - @orchestrator._cli_write_command( - 'orch set backend', - "name=module_name,type=CephString,req=true", - 'Select orchestrator module backend') - def _set_backend(self, module_name): - """ - We implement a setter command instead of just having the user - modify the setting directly, so that we can validate they're setting - it to a module that really exists and is enabled. - - There isn't a mechanism for ensuring they don't *disable* the module - later, but this is better than nothing. - """ - mgr_map = self.get("mgr_map") - - if module_name is None or module_name == "": - self.set_module_option("orchestrator", None) - return HandleCommandResult() - - for module in mgr_map['available_modules']: - if module['name'] != module_name: - continue - - if not module['can_run']: - continue - - enabled = module['name'] in mgr_map['modules'] - if not enabled: - return HandleCommandResult(-errno.EINVAL, - stderr="Module '{module_name}' is not enabled. \n Run " - "`ceph mgr module enable {module_name}` " - "to enable.".format(module_name=module_name)) - - try: - is_orchestrator = self.remote(module_name, - "is_orchestrator_module") - except NameError: - is_orchestrator = False - - if not is_orchestrator: - return HandleCommandResult(-errno.EINVAL, - stderr="'{0}' is not an orchestrator module".format(module_name)) - - self.set_module_option("orchestrator", module_name) - - return HandleCommandResult() - - return HandleCommandResult(-errno.EINVAL, stderr="Module '{0}' not found".format(module_name)) - - @orchestrator._cli_write_command( - 'orch cancel', - desc='cancels ongoing operations') - def _cancel(self): - """ - ProgressReferences might get stuck. Let's unstuck them. - """ - self.cancel_completions() - return HandleCommandResult() - - @orchestrator._cli_read_command( - 'orch status', - desc='Report configured backend and its status') - def _status(self): - o = self._select_orchestrator() - if o is None: - raise orchestrator.NoOrchestrator() - - avail, why = self.available() - if avail is None: - # The module does not report its availability - return HandleCommandResult(stdout="Backend: {0}".format(o)) - else: - return HandleCommandResult(stdout="Backend: {0}\nAvailable: {1}{2}".format( - o, avail, - " ({0})".format(why) if not avail else "" - )) - - def self_test(self): - old_orch = self._select_orchestrator() - self._set_backend('') - assert self._select_orchestrator() is None - self._set_backend(old_orch) - - e1 = self.remote('selftest', 'remote_from_orchestrator_cli_self_test', "ZeroDivisionError") - try: - orchestrator.raise_if_exception(e1) - assert False - except ZeroDivisionError as e: - assert e.args == ('hello', 'world') - - e2 = self.remote('selftest', 'remote_from_orchestrator_cli_self_test', "OrchestratorError") - try: - orchestrator.raise_if_exception(e2) - assert False - except orchestrator.OrchestratorError as e: - assert e.args == ('hello', 'world') - - c = orchestrator.TrivialReadCompletion(result=True) - assert c.has_result - - @orchestrator._cli_write_command( - 'upgrade check', - 'name=image,type=CephString,req=false ' - 'name=ceph_version,type=CephString,req=false', - desc='Check service versions vs available and target containers') - def _upgrade_check(self, image=None, ceph_version=None): - completion = self.upgrade_check(image=image, version=ceph_version) - self._orchestrator_wait([completion]) - orchestrator.raise_if_exception(completion) - return HandleCommandResult(stdout=completion.result_str()) - - @orchestrator._cli_write_command( - 'upgrade status', - desc='Check service versions vs available and target containers') - def _upgrade_status(self): - completion = self.upgrade_status() - self._orchestrator_wait([completion]) - orchestrator.raise_if_exception(completion) - r = { - 'target_image': completion.result.target_image, - 'in_progress': completion.result.in_progress, - 'services_complete': completion.result.services_complete, - 'message': completion.result.message, - } - out = json.dumps(r, indent=4) - return HandleCommandResult(stdout=out) - - @orchestrator._cli_write_command( - 'upgrade start', - 'name=image,type=CephString,req=false ' - 'name=ceph_version,type=CephString,req=false', - desc='Initiate upgrade') - def _upgrade_start(self, image=None, ceph_version=None): - completion = self.upgrade_start(image, ceph_version) - self._orchestrator_wait([completion]) - orchestrator.raise_if_exception(completion) - return HandleCommandResult(stdout=completion.result_str()) - - @orchestrator._cli_write_command( - 'upgrade pause', - desc='Pause an in-progress upgrade') - def _upgrade_pause(self): - completion = self.upgrade_pause() - self._orchestrator_wait([completion]) - orchestrator.raise_if_exception(completion) - return HandleCommandResult(stdout=completion.result_str()) - - @orchestrator._cli_write_command( - 'upgrade resume', - desc='Resume paused upgrade') - def _upgrade_resume(self): - completion = self.upgrade_resume() - self._orchestrator_wait([completion]) - orchestrator.raise_if_exception(completion) - return HandleCommandResult(stdout=completion.result_str()) - - @orchestrator._cli_write_command( - 'upgrade stop', - desc='Stop an in-progress upgrade') - def _upgrade_stop(self): - completion = self.upgrade_stop() - self._orchestrator_wait([completion]) - orchestrator.raise_if_exception(completion) - return HandleCommandResult(stdout=completion.result_str()) diff --git a/src/pybind/mgr/orchestrator_cli/tox.ini b/src/pybind/mgr/orchestrator_cli/tox.ini deleted file mode 100644 index 60a6902c5f25..000000000000 --- a/src/pybind/mgr/orchestrator_cli/tox.ini +++ /dev/null @@ -1,14 +0,0 @@ -[tox] -envlist = py3 -skipsdist = true -toxworkdir = {env:CEPH_BUILD_DIR}/orchestrator_cli -minversion = 2.5 - -[testenv] -deps = -rrequirements.txt -setenv= - UNITTEST = true - py3: PYTHONPATH = {toxinidir}/../../../../build/lib/cython_modules/lib.3 - -commands= - {envbindir}/py.test . diff --git a/src/pybind/mgr/tox.ini b/src/pybind/mgr/tox.ini index d9ea1ef5fd05..d0c2fdf597a7 100644 --- a/src/pybind/mgr/tox.ini +++ b/src/pybind/mgr/tox.ini @@ -16,8 +16,7 @@ commands = mypy --config-file=../../mypy.ini \ cephadm/module.py \ mgr_module.py \ mgr_util.py \ - orchestrator.py \ - orchestrator_cli/module.py \ + orchestrator/__init__.py \ progress/module.py \ rook/module.py \ test_orchestrator/module.py