def test_crash(self):
self._selftest_plugin("crash")
- def test_orchestrator_cli(self):
- self._selftest_plugin("orchestrator_cli")
+ def test_orchestrator(self):
+ self._selftest_plugin("orchestrator")
def test_selftest_config_update(self):
def setUp(self):
super(TestOrchestratorCli, self).setUp()
- self._load_module("orchestrator_cli")
+ self._load_module("orchestrator")
self._load_module("test_orchestrator")
self._orch_cmd("set", "backend", "test_orchestrator")
+++ /dev/null
-
-"""
-ceph-mgr orchestrator interface
-
-Please see the ceph-mgr module developer's guide for more information.
-"""
-import copy
-import functools
-import logging
-import pickle
-import sys
-import time
-from collections import namedtuple
-from functools import wraps
-import uuid
-import string
-import random
-import datetime
-import copy
-import re
-import six
-import errno
-
-from ceph.deployment import inventory
-
-from mgr_module import MgrModule, PersistentStoreDict, CLICommand, HandleCommandResult
-from mgr_util import format_bytes
-
-try:
- from ceph.deployment.drive_group import DriveGroupSpec
- from typing import TypeVar, Generic, List, Optional, Union, Tuple, Iterator, Callable, Any, \
- Type, Sequence
-except ImportError:
- pass
-
-logger = logging.getLogger(__name__)
-
-
-class HostPlacementSpec(namedtuple('HostPlacementSpec', ['hostname', 'network', 'name'])):
- def __str__(self):
- res = ''
- res += self.hostname
- if self.network:
- res += ':' + self.network
- if self.name:
- res += '=' + self.name
- return res
-
-
-def parse_host_placement_specs(host, require_network=True):
- # type: (str, Optional[bool]) -> HostPlacementSpec
- """
- Split host into host, network, and (optional) daemon name parts. The network
- part can be an IP, CIDR, or ceph addrvec like '[v2:1.2.3.4:3300,v1:1.2.3.4:6789]'.
- e.g.,
- "myhost"
- "myhost=name"
- "myhost:1.2.3.4"
- "myhost:1.2.3.4=name"
- "myhost:1.2.3.0/24"
- "myhost:1.2.3.0/24=name"
- "myhost:[v2:1.2.3.4:3000]=name"
- "myhost:[v2:1.2.3.4:3000,v1:1.2.3.4:6789]=name"
- """
- # Matches from start to : or = or until end of string
- host_re = r'^(.*?)(:|=|$)'
- # Matches from : to = or until end of string
- ip_re = r':(.*?)(=|$)'
- # Matches from = to end of string
- name_re = r'=(.*?)$'
-
- # assign defaults
- host_spec = HostPlacementSpec('', '', '')
-
- match_host = re.search(host_re, host)
- if match_host:
- host_spec = host_spec._replace(hostname=match_host.group(1))
-
- name_match = re.search(name_re, host)
- if name_match:
- host_spec = host_spec._replace(name=name_match.group(1))
-
- ip_match = re.search(ip_re, host)
- if ip_match:
- host_spec = host_spec._replace(network=ip_match.group(1))
-
- if not require_network:
- return host_spec
-
- from ipaddress import ip_network, ip_address
- networks = list() # type: List[str]
- network = host_spec.network
- # in case we have [v2:1.2.3.4:3000,v1:1.2.3.4:6478]
- if ',' in network:
- networks = [x for x in network.split(',')]
- else:
- networks.append(network)
- for network in networks:
- # only if we have versioned network configs
- if network.startswith('v') or network.startswith('[v'):
- network = network.split(':')[1]
- try:
- # if subnets are defined, also verify the validity
- if '/' in network:
- ip_network(six.text_type(network))
- else:
- ip_address(six.text_type(network))
- except ValueError as e:
- # logging?
- raise e
-
- return host_spec
-
-
-class OrchestratorError(Exception):
- """
- General orchestrator specific error.
-
- Used for deployment, configuration or user errors.
-
- It's not intended for programming errors or orchestrator internal errors.
- """
-
-
-class NoOrchestrator(OrchestratorError):
- """
- No orchestrator in configured.
- """
- def __init__(self, msg="No orchestrator configured (try `ceph orch set backend`)"):
- super(NoOrchestrator, self).__init__(msg)
-
-
-class OrchestratorValidationError(OrchestratorError):
- """
- Raised when an orchestrator doesn't support a specific feature.
- """
-
-
-def handle_exception(prefix, cmd_args, desc, perm, func):
- @wraps(func)
- def wrapper(*args, **kwargs):
- try:
- return func(*args, **kwargs)
- except (OrchestratorError, ImportError) as e:
- # Do not print Traceback for expected errors.
- return HandleCommandResult(-errno.ENOENT, stderr=str(e))
- except NotImplementedError:
- msg = 'This Orchestrator does not support `{}`'.format(prefix)
- return HandleCommandResult(-errno.ENOENT, stderr=msg)
-
- return CLICommand(prefix, cmd_args, desc, perm)(wrapper)
-
-
-def _cli_command(perm):
- def inner_cli_command(prefix, cmd_args="", desc=""):
- return lambda func: handle_exception(prefix, cmd_args, desc, perm, func)
- return inner_cli_command
-
-
-_cli_read_command = _cli_command('r')
-_cli_write_command = _cli_command('rw')
-
-
-def _no_result():
- return object()
-
-
-class _Promise(object):
- """
- A completion may need multiple promises to be fulfilled. `_Promise` is one
- step.
-
- Typically ``Orchestrator`` implementations inherit from this class to
- build their own way of finishing a step to fulfil a future.
-
- They are not exposed in the orchestrator interface and can be seen as a
- helper to build orchestrator modules.
- """
- INITIALIZED = 1 # We have a parent completion and a next completion
- RUNNING = 2
- FINISHED = 3 # we have a final result
-
- NO_RESULT = _no_result() # type: None
- ASYNC_RESULT = object()
-
- def __init__(self,
- _first_promise=None, # type: Optional["_Promise"]
- value=NO_RESULT, # type: Optional[Any]
- on_complete=None, # type: Optional[Callable]
- name=None, # type: Optional[str]
- ):
- self._on_complete_ = on_complete
- self._name = name
- self._next_promise = None # type: Optional[_Promise]
-
- self._state = self.INITIALIZED
- self._exception = None # type: Optional[Exception]
-
- # Value of this _Promise. may be an intermediate result.
- self._value = value
-
- # _Promise is not a continuation monad, as `_result` is of type
- # T instead of (T -> r) -> r. Therefore we need to store the first promise here.
- self._first_promise = _first_promise or self # type: '_Promise'
-
- @property
- def _exception(self):
- # type: () -> Optional[Exception]
- return getattr(self, '_exception_', None)
-
- @_exception.setter
- def _exception(self, e):
- self._exception_ = e
- self._serialized_exception_ = pickle.dumps(e) if e is not None else None
-
- @property
- def _serialized_exception(self):
- # type: () -> Optional[bytes]
- return getattr(self, '_serialized_exception_', None)
-
-
-
- @property
- def _on_complete(self):
- # type: () -> Optional[Callable]
- # https://github.com/python/mypy/issues/4125
- return self._on_complete_
-
- @_on_complete.setter
- def _on_complete(self, val):
- # type: (Optional[Callable]) -> None
- self._on_complete_ = val
-
-
- def __repr__(self):
- name = self._name or getattr(self._on_complete, '__name__', '??') if self._on_complete else 'None'
- val = repr(self._value) if self._value is not self.NO_RESULT else 'NA'
- return '{}(_s={}, val={}, _on_c={}, id={}, name={}, pr={}, _next={})'.format(
- self.__class__, self._state, val, self._on_complete, id(self), name, getattr(next, '_progress_reference', 'NA'), repr(self._next_promise)
- )
-
- def pretty_print_1(self):
- if self._name:
- name = self._name
- elif self._on_complete is None:
- name = 'lambda x: x'
- elif hasattr(self._on_complete, '__name__'):
- name = getattr(self._on_complete, '__name__')
- else:
- name = self._on_complete.__class__.__name__
- val = repr(self._value) if self._value not in (self.NO_RESULT, self.ASYNC_RESULT) else '...'
- prefix = {
- self.INITIALIZED: ' ',
- self.RUNNING: ' >>>',
- self.FINISHED: '(done)'
- }[self._state]
- return '{} {}({}),'.format(prefix, name, val)
-
- def then(self, on_complete):
- # type: (Any, Callable) -> Any
- """
- Call ``on_complete`` as soon as this promise is finalized.
- """
- assert self._state in (self.INITIALIZED, self.RUNNING)
- if self._on_complete is not None:
- assert self._next_promise is None
- self._set_next_promise(self.__class__(
- _first_promise=self._first_promise,
- on_complete=on_complete
- ))
- return self._next_promise
-
- else:
- self._on_complete = on_complete
- self._set_next_promise(self.__class__(_first_promise=self._first_promise))
- return self._next_promise
-
- def _set_next_promise(self, next):
- # type: (_Promise) -> None
- assert self is not next
- assert self._state in (self.INITIALIZED, self.RUNNING)
-
- self._next_promise = next
- assert self._next_promise is not None
- for p in iter(self._next_promise):
- p._first_promise = self._first_promise
-
- def _finalize(self, value=NO_RESULT):
- """
- Sets this promise to complete.
-
- Orchestrators may choose to use this helper function.
-
- :param value: new value.
- """
- if self._state not in (self.INITIALIZED, self.RUNNING):
- raise ValueError('finalize: {} already finished. {}'.format(repr(self), value))
-
- self._state = self.RUNNING
-
- if value is not self.NO_RESULT:
- self._value = value
- assert self._value is not self.NO_RESULT, repr(self)
-
- if self._on_complete:
- try:
- next_result = self._on_complete(self._value)
- except Exception as e:
- self.fail(e)
- return
- else:
- next_result = self._value
-
- if isinstance(next_result, _Promise):
- # hack: _Promise is not a continuation monad.
- next_result = next_result._first_promise # type: ignore
- assert next_result not in self, repr(self._first_promise) + repr(next_result)
- assert self not in next_result
- next_result._append_promise(self._next_promise)
- self._set_next_promise(next_result)
- assert self._next_promise
- if self._next_promise._value is self.NO_RESULT:
- self._next_promise._value = self._value
- self.propagate_to_next()
- elif next_result is not self.ASYNC_RESULT:
- # simple map. simply forward
- if self._next_promise:
- self._next_promise._value = next_result
- else:
- # Hack: next_result is of type U, _value is of type T
- self._value = next_result # type: ignore
- self.propagate_to_next()
- else:
- # asynchronous promise
- pass
-
-
- def propagate_to_next(self):
- self._state = self.FINISHED
- logger.debug('finalized {}'.format(repr(self)))
- if self._next_promise:
- self._next_promise._finalize()
-
- def fail(self, e):
- # type: (Exception) -> None
- """
- Sets the whole completion to be faild with this exception and end the
- evaluation.
- """
- if self._state == self.FINISHED:
- raise ValueError(
- 'Invalid State: called fail, but Completion is already finished: {}'.format(str(e)))
- assert self._state in (self.INITIALIZED, self.RUNNING)
- logger.exception('_Promise failed')
- self._exception = e
- self._value = 'exception'
- if self._next_promise:
- self._next_promise.fail(e)
- self._state = self.FINISHED
-
- def __contains__(self, item):
- return any(item is p for p in iter(self._first_promise))
-
- def __iter__(self):
- yield self
- elem = self._next_promise
- while elem is not None:
- yield elem
- elem = elem._next_promise
-
- def _append_promise(self, other):
- if other is not None:
- assert self not in other
- assert other not in self
- self._last_promise()._set_next_promise(other)
-
- def _last_promise(self):
- # type: () -> _Promise
- return list(iter(self))[-1]
-
-
-class ProgressReference(object):
- def __init__(self,
- message, # type: str
- mgr,
- completion=None # type: Optional[Callable[[], Completion]]
- ):
- """
- ProgressReference can be used within Completions::
-
- +---------------+ +---------------------------------+
- | | then | |
- | My Completion | +--> | on_complete=ProgressReference() |
- | | | |
- +---------------+ +---------------------------------+
-
- See :func:`Completion.with_progress` for an easy way to create
- a progress reference
-
- """
- super(ProgressReference, self).__init__()
- self.progress_id = str(uuid.uuid4())
- self.message = message
- self.mgr = mgr
-
- #: The completion can already have a result, before the write
- #: operation is effective. progress == 1 means, the services are
- #: created / removed.
- self.completion = completion # type: Optional[Callable[[], Completion]]
-
- #: if a orchestrator module can provide a more detailed
- #: progress information, it needs to also call ``progress.update()``.
- self.progress = 0.0
-
- self._completion_has_result = False
- self.mgr.all_progress_references.append(self)
-
- def __str__(self):
- """
- ``__str__()`` is used for determining the message for progress events.
- """
- return self.message or super(ProgressReference, self).__str__()
-
- def __call__(self, arg):
- self._completion_has_result = True
- self.progress = 1.0
- return arg
-
- @property
- def progress(self):
- return self._progress
-
- @progress.setter
- def progress(self, progress):
- assert progress <= 1.0
- self._progress = progress
- try:
- if self.effective:
- self.mgr.remote("progress", "complete", self.progress_id)
- self.mgr.all_progress_references = [p for p in self.mgr.all_progress_references if p is not self]
- else:
- self.mgr.remote("progress", "update", self.progress_id, self.message,
- progress,
- [("origin", "orchestrator")])
- except ImportError:
- # If the progress module is disabled that's fine,
- # they just won't see the output.
- pass
-
- @property
- def effective(self):
- return self.progress == 1 and self._completion_has_result
-
- def update(self):
- def progress_run(progress):
- self.progress = progress
- if self.completion:
- c = self.completion().then(progress_run)
- self.mgr.process([c._first_promise])
- else:
- self.progress = 1
-
- def fail(self):
- self._completion_has_result = True
- self.progress = 1
-
-
-class Completion(_Promise):
- """
- Combines multiple promises into one overall operation.
-
- Completions are composable by being able to
- call one completion from another completion. I.e. making them re-usable
- using Promises E.g.::
-
- >>> return Orchestrator().get_hosts().then(self._create_osd)
-
- where ``get_hosts`` returns a Completion of list of hosts and
- ``_create_osd`` takes a list of hosts.
-
- The concept behind this is to store the computation steps
- explicit and then explicitly evaluate the chain:
-
- >>> p = Completion(on_complete=lambda x: x*2).then(on_complete=lambda x: str(x))
- ... p.finalize(2)
- ... assert p.result = "4"
-
- or graphically::
-
- +---------------+ +-----------------+
- | | then | |
- | lambda x: x*x | +--> | lambda x: str(x)|
- | | | |
- +---------------+ +-----------------+
-
- """
- def __init__(self,
- _first_promise=None, # type: Optional["Completion"]
- value=_Promise.NO_RESULT, # type: Any
- on_complete=None, # type: Optional[Callable]
- name=None, # type: Optional[str]
- ):
- super(Completion, self).__init__(_first_promise, value, on_complete, name)
-
- @property
- def _progress_reference(self):
- # type: () -> Optional[ProgressReference]
- if hasattr(self._on_complete, 'progress_id'):
- return self._on_complete # type: ignore
- return None
-
- @property
- def progress_reference(self):
- # type: () -> Optional[ProgressReference]
- """
- ProgressReference. Marks this completion
- as a write completeion.
- """
-
- references = [c._progress_reference for c in iter(self) if c._progress_reference is not None]
- if references:
- assert len(references) == 1
- return references[0]
- return None
-
- @classmethod
- def with_progress(cls, # type: Any
- message, # type: str
- mgr,
- _first_promise=None, # type: Optional["Completion"]
- value=_Promise.NO_RESULT, # type: Any
- on_complete=None, # type: Optional[Callable]
- calc_percent=None # type: Optional[Callable[[], Any]]
- ):
- # type: (...) -> Any
-
- c = cls(
- _first_promise=_first_promise,
- value=value,
- on_complete=on_complete
- ).add_progress(message, mgr, calc_percent)
-
- return c._first_promise
-
- def add_progress(self,
- message, # type: str
- mgr,
- calc_percent=None # type: Optional[Callable[[], Any]]
- ):
- return self.then(
- on_complete=ProgressReference(
- message=message,
- mgr=mgr,
- completion=calc_percent
- )
- )
-
- def fail(self, e):
- super(Completion, self).fail(e)
- if self._progress_reference:
- self._progress_reference.fail()
-
- def finalize(self, result=_Promise.NO_RESULT):
- if self._first_promise._state == self.INITIALIZED:
- self._first_promise._finalize(result)
-
- @property
- def result(self):
- """
- The result of the operation that we were waited
- for. Only valid after calling Orchestrator.process() on this
- completion.
- """
- last = self._last_promise()
- assert last._state == _Promise.FINISHED
- return last._value
-
- def result_str(self):
- """Force a string."""
- if self.result is None:
- return ''
- if isinstance(self.result, list):
- return '\n'.join(str(x) for x in self.result)
- return str(self.result)
-
- @property
- def exception(self):
- # type: () -> Optional[Exception]
- return self._last_promise()._exception
-
- @property
- def serialized_exception(self):
- # type: () -> Optional[bytes]
- return self._last_promise()._serialized_exception
-
- @property
- def has_result(self):
- # type: () -> bool
- """
- Has the operation already a result?
-
- For Write operations, it can already have a
- result, if the orchestrator's configuration is
- persistently written. Typically this would
- indicate that an update had been written to
- a manifest, but that the update had not
- necessarily been pushed out to the cluster.
-
- :return:
- """
- return self._last_promise()._state == _Promise.FINISHED
-
- @property
- def is_errored(self):
- # type: () -> bool
- """
- Has the completion failed. Default implementation looks for
- self.exception. Can be overwritten.
- """
- return self.exception is not None
-
- @property
- def needs_result(self):
- # type: () -> bool
- """
- Could the external operation be deemed as complete,
- or should we wait?
- We must wait for a read operation only if it is not complete.
- """
- return not self.is_errored and not self.has_result
-
- @property
- def is_finished(self):
- # type: () -> bool
- """
- Could the external operation be deemed as complete,
- or should we wait?
- We must wait for a read operation only if it is not complete.
- """
- return self.is_errored or (self.has_result)
-
- def pretty_print(self):
-
- reprs = '\n'.join(p.pretty_print_1() for p in iter(self._first_promise))
- return """<{}>[\n{}\n]""".format(self.__class__.__name__, reprs)
-
-
-def pretty_print(completions):
- # type: (Sequence[Completion]) -> str
- return ', '.join(c.pretty_print() for c in completions)
-
-
-def raise_if_exception(c):
- # type: (Completion) -> None
- """
- :raises OrchestratorError: Some user error or a config error.
- :raises Exception: Some internal error
- """
- if c.serialized_exception is not None:
- try:
- e = pickle.loads(c.serialized_exception)
- except (KeyError, AttributeError):
- raise Exception('{}: {}'.format(type(c.exception), c.exception))
- raise e
-
-
-class TrivialReadCompletion(Completion):
- """
- This is the trivial completion simply wrapping a result.
- """
- def __init__(self, result):
- super(TrivialReadCompletion, self).__init__()
- if result:
- self.finalize(result)
-
-
-def _hide_in_features(f):
- f._hide_in_features = True
- return f
-
-
-class Orchestrator(object):
- """
- Calls in this class may do long running remote operations, with time
- periods ranging from network latencies to package install latencies and large
- internet downloads. For that reason, all are asynchronous, and return
- ``Completion`` objects.
-
- Methods should only return the completion and not directly execute
- anything, like network calls. Otherwise the purpose of
- those completions is defeated.
-
- Implementations are not required to start work on an operation until
- the caller waits on the relevant Completion objects. Callers making
- multiple updates should not wait on Completions until they're done
- sending operations: this enables implementations to batch up a series
- of updates when wait() is called on a set of Completion objects.
-
- Implementations are encouraged to keep reasonably fresh caches of
- the status of the system: it is better to serve a stale-but-recent
- result read of e.g. device inventory than it is to keep the caller waiting
- while you scan hosts every time.
- """
-
- @_hide_in_features
- def is_orchestrator_module(self):
- """
- Enable other modules to interrogate this module to discover
- whether it's usable as an orchestrator module.
-
- Subclasses do not need to override this.
- """
- return True
-
- @_hide_in_features
- def available(self):
- # type: () -> Tuple[bool, str]
- """
- Report whether we can talk to the orchestrator. This is the
- place to give the user a meaningful message if the orchestrator
- isn't running or can't be contacted.
-
- This method may be called frequently (e.g. every page load
- to conditionally display a warning banner), so make sure it's
- not too expensive. It's okay to give a slightly stale status
- (e.g. based on a periodic background ping of the orchestrator)
- if that's necessary to make this method fast.
-
- .. note::
- `True` doesn't mean that the desired functionality
- is actually available in the orchestrator. I.e. this
- won't work as expected::
-
- >>> if OrchestratorClientMixin().available()[0]: # wrong.
- ... OrchestratorClientMixin().get_hosts()
-
- :return: two-tuple of boolean, string
- """
- raise NotImplementedError()
-
- @_hide_in_features
- def process(self, completions):
- # type: (List[Completion]) -> None
- """
- Given a list of Completion instances, process any which are
- incomplete.
-
- Callers should inspect the detail of each completion to identify
- partial completion/progress information, and present that information
- to the user.
-
- This method should not block, as this would make it slow to query
- a status, while other long running operations are in progress.
- """
- raise NotImplementedError()
-
- @_hide_in_features
- def get_feature_set(self):
- """Describes which methods this orchestrator implements
-
- .. note::
- `True` doesn't mean that the desired functionality
- is actually possible in the orchestrator. I.e. this
- won't work as expected::
-
- >>> api = OrchestratorClientMixin()
- ... if api.get_feature_set()['get_hosts']['available']: # wrong.
- ... api.get_hosts()
-
- It's better to ask for forgiveness instead::
-
- >>> try:
- ... OrchestratorClientMixin().get_hosts()
- ... except (OrchestratorError, NotImplementedError):
- ... ...
-
- :returns: Dict of API method names to ``{'available': True or False}``
- """
- module = self.__class__
- features = {a: {'available': getattr(Orchestrator, a, None) != getattr(module, a)}
- for a in Orchestrator.__dict__
- if not a.startswith('_') and not getattr(getattr(Orchestrator, a), '_hide_in_features', False)
- }
- return features
-
- @_hide_in_features
- def cancel_completions(self):
- # type: () -> None
- """
- Cancels ongoing completions. Unstuck the mgr.
- """
- raise NotImplementedError()
-
- def add_host(self, HostSpec):
- # type: (HostSpec) -> Completion
- """
- Add a host to the orchestrator inventory.
-
- :param host: hostname
- """
- raise NotImplementedError()
-
- def remove_host(self, host):
- # type: (str) -> Completion
- """
- Remove a host from the orchestrator inventory.
-
- :param host: hostname
- """
- raise NotImplementedError()
-
- def update_host_addr(self, host, addr):
- # type: (str, str) -> Completion
- """
- Update a host's address
-
- :param host: hostname
- :param addr: address (dns name or IP)
- """
- raise NotImplementedError()
-
- def get_hosts(self):
- # type: () -> Completion
- """
- Report the hosts in the cluster.
-
- The default implementation is extra slow.
-
- :return: list of InventoryNodes
- """
- return self.get_inventory()
-
- def add_host_label(self, host, label):
- # type: (str, str) -> Completion
- """
- Add a host label
- """
- raise NotImplementedError()
-
- def remove_host_label(self, host, label):
- # type: (str, str) -> Completion
- """
- Remove a host label
- """
- raise NotImplementedError()
-
- def get_inventory(self, node_filter=None, refresh=False):
- # type: (Optional[InventoryFilter], bool) -> Completion
- """
- Returns something that was created by `ceph-volume inventory`.
-
- :return: list of InventoryNode
- """
- raise NotImplementedError()
-
- def describe_service(self, service_type=None, service_id=None, node_name=None, refresh=False):
- # type: (Optional[str], Optional[str], Optional[str], bool) -> Completion
- """
- Describe a service (of any kind) that is already configured in
- the orchestrator. For example, when viewing an OSD in the dashboard
- we might like to also display information about the orchestrator's
- view of the service (like the kubernetes pod ID).
-
- When viewing a CephFS filesystem in the dashboard, we would use this
- to display the pods being currently run for MDS daemons.
-
- :return: list of ServiceDescription objects.
- """
- raise NotImplementedError()
-
- def list_daemons(self, daemon_type=None, daemon_id=None, host=None, refresh=False):
- # type: (Optional[str], Optional[str], Optional[str], bool) -> Completion
- """
- Describe a daemon (of any kind) that is already configured in
- the orchestrator.
-
- :return: list of DaemonDescription objects.
- """
- raise NotImplementedError()
-
- def remove_daemons(self, names, force):
- # type: (List[str], bool) -> Completion
- """
- Remove specific daemon(s).
-
- :return: None
- """
- raise NotImplementedError()
-
- def remove_service(self, service_type, service_name=None):
- # type: (str, Optional[str]) -> Completion
- """
- Remove a service (a collection of daemons).
-
- :return: None
- """
- raise NotImplementedError()
-
- def service_action(self, action, service_type, service_name):
- # type: (str, str, str) -> Completion
- """
- Perform an action (start/stop/reload) on a service (i.e., all daemons
- providing the logical service).
-
- :param action: one of "start", "stop", "restart", "redeploy", "reconfig"
- :param service_type: e.g. "mds", "rgw", ...
- :param service_name: name of logical service ("cephfs", "us-east", ...)
- :rtype: Completion
- """
- #assert action in ["start", "stop", "reload, "restart", "redeploy"]
- raise NotImplementedError()
-
- def daemon_action(self, action, daemon_type, daemon_id):
- # type: (str, str, str) -> Completion
- """
- Perform an action (start/stop/reload) on a daemon.
-
- :param action: one of "start", "stop", "restart", "redeploy", "reconfig"
- :param name: name of daemon
- :rtype: Completion
- """
- #assert action in ["start", "stop", "reload, "restart", "redeploy"]
- raise NotImplementedError()
-
- def create_osds(self, drive_groups):
- # type: (List[DriveGroupSpec]) -> Completion
- """
- Create one or more OSDs within a single Drive Group.
-
- The principal argument here is the drive_group member
- of OsdSpec: other fields are advisory/extensible for any
- finer-grained OSD feature enablement (choice of backing store,
- compression/encryption, etc).
-
- :param drive_groups: a list of DriveGroupSpec
- :param all_hosts: TODO, this is required because the orchestrator methods are not composable
- Probably this parameter can be easily removed because each orchestrator can use
- the "get_inventory" method and the "drive_group.host_pattern" attribute
- to obtain the list of hosts where to apply the operation
- """
- raise NotImplementedError()
-
- def blink_device_light(self, ident_fault, on, locations):
- # type: (str, bool, List[DeviceLightLoc]) -> Completion
- """
- Instructs the orchestrator to enable or disable either the ident or the fault LED.
-
- :param ident_fault: either ``"ident"`` or ``"fault"``
- :param on: ``True`` = on.
- :param locations: See :class:`orchestrator.DeviceLightLoc`
- """
- raise NotImplementedError()
-
- def add_mon(self, spec):
- # type: (ServiceSpec) -> Completion
- """Create mon daemon(s)"""
- raise NotImplementedError()
-
- def apply_mon(self, spec):
- # type: (ServiceSpec) -> Completion
- """Update mon cluster"""
- raise NotImplementedError()
-
- def add_mgr(self, spec):
- # type: (ServiceSpec) -> Completion
- """Create mgr daemon(s)"""
- raise NotImplementedError()
-
- def apply_mgr(self, spec):
- # type: (ServiceSpec) -> Completion
- """Update mgr cluster"""
- raise NotImplementedError()
-
- def add_mds(self, spec):
- # type: (ServiceSpec) -> Completion
- """Create MDS daemon(s)"""
- raise NotImplementedError()
-
- def apply_mds(self, spec):
- # type: (ServiceSpec) -> Completion
- """Update MDS cluster"""
- raise NotImplementedError()
-
- def add_rbd_mirror(self, spec):
- # type: (ServiceSpec) -> Completion
- """Create rbd-mirror daemon(s)"""
- raise NotImplementedError()
-
- def apply_rbd_mirror(self, spec):
- # type: (ServiceSpec) -> Completion
- """Update rbd-mirror cluster"""
- raise NotImplementedError()
-
- def add_nfs(self, spec):
- # type: (NFSServiceSpec) -> Completion
- """Create NFS daemon(s)"""
- raise NotImplementedError()
-
- def apply_nfs(self, spec):
- # type: (NFSServiceSpec) -> Completion
- """Update NFS cluster"""
- raise NotImplementedError()
-
- def add_rgw(self, spec):
- # type: (RGWSpec) -> Completion
- """Create RGW daemon(s)"""
- raise NotImplementedError()
-
- def apply_rgw(self, spec):
- # type: (RGWSpec) -> Completion
- """Update RGW cluster"""
- raise NotImplementedError()
-
- def add_prometheus(self, spec):
- # type: (ServiceSpec) -> Completion
- """Create new prometheus daemon"""
- raise NotImplementedError()
-
- def apply_prometheus(self, spec):
- # type: (ServiceSpec) -> Completion
- """Update prometheus cluster"""
- raise NotImplementedError()
-
- def upgrade_check(self, image, version):
- # type: (Optional[str], Optional[str]) -> Completion
- raise NotImplementedError()
-
- def upgrade_start(self, image, version):
- # type: (Optional[str], Optional[str]) -> Completion
- raise NotImplementedError()
-
- def upgrade_pause(self):
- # type: () -> Completion
- raise NotImplementedError()
-
- def upgrade_resume(self):
- # type: () -> Completion
- raise NotImplementedError()
-
- def upgrade_stop(self):
- # type: () -> Completion
- raise NotImplementedError()
-
- def upgrade_status(self):
- # type: () -> Completion
- """
- If an upgrade is currently underway, report on where
- we are in the process, or if some error has occurred.
-
- :return: UpgradeStatusSpec instance
- """
- raise NotImplementedError()
-
- @_hide_in_features
- def upgrade_available(self):
- # type: () -> Completion
- """
- Report on what versions are available to upgrade to
-
- :return: List of strings
- """
- raise NotImplementedError()
-
-class HostSpec(object):
- def __init__(self, hostname, addr=None, labels=None):
- # type: (str, Optional[str], Optional[List[str]]) -> None
- self.hostname = hostname # the hostname on the host
- self.addr = addr or hostname # DNS name or IP address to reach it
- self.labels = labels or [] # initial label(s), if any
-
-class UpgradeStatusSpec(object):
- # Orchestrator's report on what's going on with any ongoing upgrade
- def __init__(self):
- self.in_progress = False # Is an upgrade underway?
- self.target_image = None
- self.services_complete = [] # Which daemon types are fully updated?
- self.message = "" # Freeform description
-
-
-class PlacementSpec(object):
- """
- For APIs that need to specify a node subset
- """
- def __init__(self, label=None, hosts=None, count=None):
- # type: (Optional[str], Optional[List], Optional[int]) -> None
- self.label = label
- self.hosts = [] # type: List[HostPlacementSpec]
- if hosts:
- if all([isinstance(host, HostPlacementSpec) for host in hosts]):
- self.hosts = hosts
- else:
- self.hosts = [parse_host_placement_specs(x, require_network=False) for x in hosts if x]
-
-
- self.count = count # type: Optional[int]
-
- def set_hosts(self, hosts):
- # To backpopulate the .hosts attribute when using labels or count
- # in the orchestrator backend.
- self.hosts = hosts
-
- @classmethod
- def from_dict(cls, data):
- _cls = cls(**data)
- _cls.validate()
- return _cls
-
- def validate(self):
- if self.hosts and self.label:
- # TODO: a less generic Exception
- raise Exception('Node and label are mutually exclusive')
- if self.count is not None and self.count <= 0:
- raise Exception("num/count must be > 1")
-
-
-def handle_type_error(method):
- @wraps(method)
- def inner(cls, *args, **kwargs):
- try:
- return method(cls, *args, **kwargs)
- except TypeError as e:
- error_msg = '{}: {}'.format(cls.__name__, e)
- raise OrchestratorValidationError(error_msg)
- return inner
-
-
-class DaemonDescription(object):
- """
- For responding to queries about the status of a particular daemon,
- stateful or stateless.
-
- This is not about health or performance monitoring of daemons: it's
- about letting the orchestrator tell Ceph whether and where a
- daemon is scheduled in the cluster. When an orchestrator tells
- Ceph "it's running on node123", that's not a promise that the process
- is literally up this second, it's a description of where the orchestrator
- has decided the daemon should run.
- """
-
- def __init__(self,
- daemon_type=None,
- daemon_id=None,
- nodename=None,
- container_id=None,
- container_image_id=None,
- container_image_name=None,
- version=None,
- status=None,
- status_desc=None):
- # Node is at the same granularity as InventoryNode
- self.nodename = nodename
-
- # Not everyone runs in containers, but enough people do to
- # justify having the container_id (runtime id) and container_image
- # (image name)
- self.container_id = container_id # runtime id
- self.container_image_id = container_image_id # image hash
- self.container_image_name = container_image_name # image friendly name
-
- # The type of service (osd, mon, mgr, etc.)
- self.daemon_type = daemon_type
-
- # The orchestrator will have picked some names for daemons,
- # typically either based on hostnames or on pod names.
- # This is the <foo> in mds.<foo>, the ID that will appear
- # in the FSMap/ServiceMap.
- self.daemon_id = daemon_id
-
- # Service version that was deployed
- self.version = version
-
- # Service status: -1 error, 0 stopped, 1 running
- self.status = status
-
- # Service status description when status == -1.
- self.status_desc = status_desc
-
- # datetime when this info was last refreshed
- self.last_refresh = None # type: Optional[datetime.datetime]
-
- def name(self):
- return '%s.%s' % (self.daemon_type, self.daemon_id)
-
- def __repr__(self):
- return "<DaemonDescription>({type}.{id})".format(type=self.daemon_type,
- id=self.daemon_id)
-
- def to_json(self):
- out = {
- 'nodename': self.nodename,
- 'container_id': self.container_id,
- 'container_image_id': self.container_image_id,
- 'container_image_name': self.container_image_name,
- 'daemon_id': self.daemon_id,
- 'daemon_type': self.daemon_type,
- 'version': self.version,
- 'status': self.status,
- 'status_desc': self.status_desc,
- }
- return {k: v for (k, v) in out.items() if v is not None}
-
- @classmethod
- @handle_type_error
- def from_json(cls, data):
- return cls(**data)
-
-class ServiceDescription(object):
- """
- For responding to queries about the status of a particular service,
- stateful or stateless.
-
- This is not about health or performance monitoring of services: it's
- about letting the orchestrator tell Ceph whether and where a
- service is scheduled in the cluster. When an orchestrator tells
- Ceph "it's running on node123", that's not a promise that the process
- is literally up this second, it's a description of where the orchestrator
- has decided the service should run.
- """
-
- def __init__(self, nodename=None,
- container_id=None, container_image_id=None,
- container_image_name=None,
- service=None, service_instance=None,
- service_type=None, version=None, rados_config_location=None,
- service_url=None, status=None, status_desc=None):
- # Node is at the same granularity as InventoryNode
- self.nodename = nodename # type: Optional[str]
-
- # Not everyone runs in containers, but enough people do to
- # justify having the container_id (runtime id) and container_image
- # (image name)
- self.container_id = container_id # runtime id
- self.container_image_id = container_image_id # image hash
- self.container_image_name = container_image_name # image friendly name
-
- # Some services can be deployed in groups. For example, mds's can
- # have an active and standby daemons, and nfs-ganesha can run daemons
- # in parallel. This tag refers to a group of daemons as a whole.
- #
- # For instance, a cluster of mds' all service the same fs, and they
- # will all have the same service value (which may be the
- # Filesystem name in the FSMap).
- #
- # Single-instance services should leave this set to None
- self.service = service
-
- # The orchestrator will have picked some names for daemons,
- # typically either based on hostnames or on pod names.
- # This is the <foo> in mds.<foo>, the ID that will appear
- # in the FSMap/ServiceMap.
- self.service_instance = service_instance
-
- # The type of service (osd, mon, mgr, etc.)
- self.service_type = service_type
-
- # Service version that was deployed
- self.version = version
-
- # Location of the service configuration when stored in rados
- # object. Format: "rados://<pool>/[<namespace/>]<object>"
- self.rados_config_location = rados_config_location
-
- # If the service exposes REST-like API, this attribute should hold
- # the URL.
- self.service_url = service_url
-
- # Service status: -1 error, 0 stopped, 1 running
- self.status = status
-
- # Service status description when status == -1.
- self.status_desc = status_desc
-
- # datetime when this info was last refreshed
- self.last_refresh = None # type: Optional[datetime.datetime]
-
- def name(self):
- if self.service_instance:
- return '%s.%s' % (self.service_type, self.service_instance)
- return self.service_type
-
- def __repr__(self):
- return "<ServiceDescription>({n_name}:{s_type})".format(n_name=self.nodename,
- s_type=self.name())
-
- def to_json(self):
- out = {
- 'nodename': self.nodename,
- 'container_id': self.container_id,
- 'service': self.service,
- 'service_instance': self.service_instance,
- 'service_type': self.service_type,
- 'version': self.version,
- 'rados_config_location': self.rados_config_location,
- 'service_url': self.service_url,
- 'status': self.status,
- 'status_desc': self.status_desc,
- }
- return {k: v for (k, v) in out.items() if v is not None}
-
- @classmethod
- @handle_type_error
- def from_json(cls, data):
- return cls(**data)
-
-
-class ServiceSpec(object):
- """
- Details of service creation.
-
- Request to the orchestrator for a cluster of daemons
- such as MDS, RGW, iscsi gateway, MONs, MGRs, Prometheus
-
- This structure is supposed to be enough information to
- start the services.
-
- """
-
- def __init__(self, name=None, placement=None):
- # type: (Optional[str], Optional[PlacementSpec]) -> None
- self.placement = PlacementSpec() if placement is None else placement # type: PlacementSpec
-
- #: Give this set of stateless services a name: typically it would
- #: be the name of a CephFS filesystem, RGW zone, etc. Must be unique
- #: within one ceph cluster. Note: Not all clusters have a name
- self.name = name # type: Optional[str]
-
- if self.placement is not None and self.placement.count is not None:
- #: Count of service instances. Deprecated.
- self.count = self.placement.count # type: int
- else:
- self.count = 1
-
- def validate_add(self):
- if not self.name:
- raise OrchestratorValidationError('Cannot add Service: Name required')
-
-
-class NFSServiceSpec(ServiceSpec):
- def __init__(self, name, pool=None, namespace=None, placement=None):
- super(NFSServiceSpec, self).__init__(name, placement)
-
- #: RADOS pool where NFS client recovery data is stored.
- self.pool = pool
-
- #: RADOS namespace where NFS client recovery data is stored in the pool.
- self.namespace = namespace
-
- def validate_add(self):
- super(NFSServiceSpec, self).validate_add()
-
- if not self.pool:
- raise OrchestratorValidationError('Cannot add NFS: No Pool specified')
-
-
-class RGWSpec(ServiceSpec):
- """
- Settings to configure a (multisite) Ceph RGW
-
- """
- def __init__(self,
- rgw_realm, # type: str
- rgw_zone, # type: str
- placement=None,
- hosts=None, # type: Optional[List[str]]
- rgw_multisite=None, # type: Optional[bool]
- rgw_zonemaster=None, # type: Optional[bool]
- rgw_zonesecondary=None, # type: Optional[bool]
- rgw_multisite_proto=None, # type: Optional[str]
- rgw_frontend_port=None, # type: Optional[int]
- rgw_zonegroup=None, # type: Optional[str]
- rgw_zone_user=None, # type: Optional[str]
- system_access_key=None, # type: Optional[str]
- system_secret_key=None, # type: Optional[str]
- count=None # type: Optional[int]
- ):
- # Regarding default values. Ansible has a `set_rgwspec_defaults` that sets
- # default values that makes sense for Ansible. Rook has default values implemented
- # in Rook itself. Thus we don't set any defaults here in this class.
-
- super(RGWSpec, self).__init__(name=rgw_realm + '.' + rgw_zone,
- placement=placement)
-
- #: List of hosts where RGWs should run. Not for Rook.
- if hosts:
- self.placement = PlacementSpec(hosts=hosts)
-
- #: is multisite
- self.rgw_multisite = rgw_multisite
- self.rgw_zonemaster = rgw_zonemaster
- self.rgw_zonesecondary = rgw_zonesecondary
- self.rgw_multisite_proto = rgw_multisite_proto
- self.rgw_frontend_port = rgw_frontend_port
-
- self.rgw_realm = rgw_realm
- self.rgw_zone = rgw_zone
- self.rgw_zonegroup = rgw_zonegroup
- self.rgw_zone_user = rgw_zone_user
-
- self.system_access_key = system_access_key
- self.system_secret_key = system_secret_key
-
- @property
- def rgw_multisite_endpoint_addr(self):
- """Returns the first host. Not supported for Rook."""
- return self.placement.hosts[0]
-
- @property
- def rgw_multisite_endpoints_list(self):
- return ",".join(["{}://{}:{}".format(self.rgw_multisite_proto,
- host,
- self.rgw_frontend_port) for host in self.placement.hosts])
-
- def genkey(self, nchars):
- """ Returns a random string of nchars
-
- :nchars : Length of the returned string
- """
- # TODO Python 3: use Secrets module instead.
-
- return ''.join(random.choice(string.ascii_uppercase +
- string.ascii_lowercase +
- string.digits) for _ in range(nchars))
-
- @classmethod
- def from_json(cls, json_rgw_spec):
- # type: (dict) -> RGWSpec
- """
- Initialize 'RGWSpec' object data from a json structure
- :param json_rgw_spec: A valid dict with a the RGW settings
- """
- # TODO: also add PlacementSpec(**json_rgw_spec['placement'])
- args = {k:v for k, v in json_rgw_spec.items()}
- return RGWSpec(**args)
-
-
-class InventoryFilter(object):
- """
- When fetching inventory, use this filter to avoid unnecessarily
- scanning the whole estate.
-
- Typical use: filter by node when presenting UI workflow for configuring
- a particular server.
- filter by label when not all of estate is Ceph servers,
- and we want to only learn about the Ceph servers.
- filter by label when we are interested particularly
- in e.g. OSD servers.
-
- """
- def __init__(self, labels=None, nodes=None):
- # type: (Optional[List[str]], Optional[List[str]]) -> None
-
- #: Optional: get info about nodes matching labels
- self.labels = labels
-
- #: Optional: get info about certain named nodes only
- self.nodes = nodes
-
-
-class InventoryNode(object):
- """
- When fetching inventory, all Devices are groups inside of an
- InventoryNode.
- """
- def __init__(self, name, devices=None, labels=None, addr=None):
- # type: (str, Optional[inventory.Devices], Optional[List[str]], Optional[str]) -> None
- if devices is None:
- devices = inventory.Devices([])
- if labels is None:
- labels = []
- assert isinstance(devices, inventory.Devices)
-
- self.name = name # unique within cluster. For example a hostname.
- self.addr = addr or name
- self.devices = devices
- self.labels = labels
-
- def to_json(self):
- return {
- 'name': self.name,
- 'addr': self.addr,
- 'devices': self.devices.to_json(),
- 'labels': self.labels,
- }
-
- @classmethod
- def from_json(cls, data):
- try:
- _data = copy.deepcopy(data)
- name = _data.pop('name')
- addr = _data.pop('addr', None) or name
- devices = inventory.Devices.from_json(_data.pop('devices'))
- if _data:
- error_msg = 'Unknown key(s) in Inventory: {}'.format(','.join(_data.keys()))
- raise OrchestratorValidationError(error_msg)
- labels = _data.get('labels', list())
- return cls(name, devices, labels, addr)
- except KeyError as e:
- error_msg = '{} is required for {}'.format(e, cls.__name__)
- raise OrchestratorValidationError(error_msg)
- except TypeError as e:
- raise OrchestratorValidationError('Failed to read inventory: {}'.format(e))
-
-
- @classmethod
- def from_nested_items(cls, hosts):
- devs = inventory.Devices.from_json
- return [cls(item[0], devs(item[1].data)) for item in hosts]
-
- def __repr__(self):
- return "<InventoryNode>({name})".format(name=self.name)
-
- @staticmethod
- def get_host_names(nodes):
- # type: (List[InventoryNode]) -> List[str]
- return [node.name for node in nodes]
-
- def __eq__(self, other):
- return self.name == other.name and self.devices == other.devices
-
-
-class DeviceLightLoc(namedtuple('DeviceLightLoc', ['host', 'dev', 'path'])):
- """
- Describes a specific device on a specific host. Used for enabling or disabling LEDs
- on devices.
-
- hostname as in :func:`orchestrator.Orchestrator.get_hosts`
-
- device_id: e.g. ``ABC1234DEF567-1R1234_ABC8DE0Q``.
- See ``ceph osd metadata | jq '.[].device_ids'``
- """
- __slots__ = ()
-
-
-def _mk_orch_methods(cls):
- # Needs to be defined outside of for.
- # Otherwise meth is always bound to last key
- def shim(method_name):
- def inner(self, *args, **kwargs):
- completion = self._oremote(method_name, args, kwargs)
- return completion
- return inner
-
- for meth in Orchestrator.__dict__:
- if not meth.startswith('_') and meth not in ['is_orchestrator_module']:
- setattr(cls, meth, shim(meth))
- return cls
-
-
-@_mk_orch_methods
-class OrchestratorClientMixin(Orchestrator):
- """
- A module that inherents from `OrchestratorClientMixin` can directly call
- all :class:`Orchestrator` methods without manually calling remote.
-
- Every interface method from ``Orchestrator`` is converted into a stub method that internally
- calls :func:`OrchestratorClientMixin._oremote`
-
- >>> class MyModule(OrchestratorClientMixin):
- ... def func(self):
- ... completion = self.add_host('somehost') # calls `_oremote()`
- ... self._orchestrator_wait([completion])
- ... self.log.debug(completion.result)
-
- .. note:: Orchestrator implementations should not inherit from `OrchestratorClientMixin`.
- Reason is, that OrchestratorClientMixin magically redirects all methods to the
- "real" implementation of the orchestrator.
-
-
- >>> import mgr_module
- >>> class MyImplentation(mgr_module.MgrModule, Orchestrator):
- ... def __init__(self, ...):
- ... self.orch_client = OrchestratorClientMixin()
- ... self.orch_client.set_mgr(self.mgr))
- """
-
- def set_mgr(self, mgr):
- # type: (MgrModule) -> None
- """
- Useable in the Dashbord that uses a global ``mgr``
- """
-
- self.__mgr = mgr # Make sure we're not overwriting any other `mgr` properties
-
- def __get_mgr(self):
- try:
- return self.__mgr
- except AttributeError:
- return self
-
- def _oremote(self, meth, args, kwargs):
- """
- Helper for invoking `remote` on whichever orchestrator is enabled
-
- :raises RuntimeError: If the remote method failed.
- :raises OrchestratorError: orchestrator failed to perform
- :raises ImportError: no `orchestrator_cli` module or backend not found.
- """
- mgr = self.__get_mgr()
-
- try:
- o = mgr._select_orchestrator()
- except AttributeError:
- o = mgr.remote('orchestrator_cli', '_select_orchestrator')
-
- if o is None:
- raise NoOrchestrator()
-
- mgr.log.debug("_oremote {} -> {}.{}(*{}, **{})".format(mgr.module_name, o, meth, args, kwargs))
- return mgr.remote(o, meth, *args, **kwargs)
-
- def _orchestrator_wait(self, completions):
- # type: (List[Completion]) -> None
- """
- Wait for completions to complete (reads) or
- become persistent (writes).
-
- Waits for writes to be *persistent* but not *effective*.
-
- :param completions: List of Completions
- :raises NoOrchestrator:
- :raises RuntimeError: something went wrong while calling the process method.
- :raises ImportError: no `orchestrator_cli` module or backend not found.
- """
- while any(not c.has_result for c in completions):
- self.process(completions)
- self.__get_mgr().log.info("Operations pending: %s",
- sum(1 for c in completions if not c.has_result))
- if any(c.needs_result for c in completions):
- time.sleep(1)
- else:
- break
-
-
-class OutdatableData(object):
- DATEFMT = '%Y-%m-%d %H:%M:%S.%f'
-
- def __init__(self, data=None, last_refresh=None):
- # type: (Optional[dict], Optional[datetime.datetime]) -> None
- self._data = data
- if data is not None and last_refresh is None:
- self.last_refresh = datetime.datetime.utcnow() # type: Optional[datetime.datetime]
- else:
- self.last_refresh = last_refresh
-
- def json(self):
- if self.last_refresh is not None:
- timestr = self.last_refresh.strftime(self.DATEFMT) # type: Optional[str]
- else:
- timestr = None
-
- return {
- "data": self._data,
- "last_refresh": timestr,
- }
-
- @property
- def data(self):
- return self._data
-
- # @data.setter
- # No setter, as it doesn't work as expected: It's not saved in store automatically
-
- @classmethod
- def time_from_string(cls, timestr):
- if timestr is None:
- return None
- # drop the 'Z' timezone indication, it's always UTC
- timestr = timestr.rstrip('Z')
- return datetime.datetime.strptime(timestr, cls.DATEFMT)
-
- @classmethod
- def from_json(cls, data):
- return cls(data['data'], cls.time_from_string(data['last_refresh']))
-
- def outdated(self, timeout=None):
- if timeout is None:
- timeout = 600
- if self.last_refresh is None:
- return True
- cutoff = datetime.datetime.utcnow() - datetime.timedelta(
- seconds=timeout)
- return self.last_refresh < cutoff
-
- def __repr__(self):
- return 'OutdatableData(data={}, last_refresh={})'.format(self._data, self.last_refresh)
-
-
-class OutdatableDictMixin(object):
- """
- Toolbox for implementing a cache. As every orchestrator has
- different needs, we cannot implement any logic here.
- """
-
- def __getitem__(self, item):
- # type: (str) -> OutdatableData
- return OutdatableData.from_json(super(OutdatableDictMixin, self).__getitem__(item)) # type: ignore
-
- def __setitem__(self, key, value):
- # type: (str, OutdatableData) -> None
- val = None if value is None else value.json()
- super(OutdatableDictMixin, self).__setitem__(key, val) # type: ignore
-
- def items(self):
- ## type: () -> Iterator[Tuple[str, OutdatableData]]
- for item in super(OutdatableDictMixin, self).items(): # type: ignore
- k, v = item
- yield k, OutdatableData.from_json(v)
-
- def items_filtered(self, keys=None):
- if keys:
- return [(host, self[host]) for host in keys]
- else:
- return list(self.items())
-
- def any_outdated(self, timeout=None):
- items = self.items()
- if not list(items):
- return True
- return any([i[1].outdated(timeout) for i in items])
-
- def remove_outdated(self):
- outdated = [item[0] for item in self.items() if item[1].outdated()]
- for o in outdated:
- del self[o] # type: ignore
-
- def invalidate(self, key):
- self[key] = OutdatableData(self[key].data,
- datetime.datetime.fromtimestamp(0))
-
-
-class OutdatablePersistentDict(OutdatableDictMixin, PersistentStoreDict):
- pass
-
-
-class OutdatableDict(OutdatableDictMixin, dict):
- pass
--- /dev/null
+# Orchestrator CLI
+
+See also [orchestrator cli doc](https://docs.ceph.com/docs/master/mgr/orchestrator_cli/).
+
+## Running the Teuthology tests
+
+To run the API tests against a real Ceph cluster, we leverage the Teuthology
+framework and the `test_orchestrator` backend.
+
+``source`` the script and run the tests manually::
+
+ $ pushd ../dashboard ; source ./run-backend-api-tests.sh ; popd
+ $ run_teuthology_tests tasks.mgr.test_orchestrator_cli
+ $ cleanup_teuthology
--- /dev/null
+from __future__ import absolute_import
+
+from .module import OrchestratorCli
+
+# usage: E.g. `from orchestrator import StatelessServiceSpec`
+from ._interface import \
+ Completion, TrivialReadCompletion, raise_if_exception, ProgressReference, pretty_print, _Promise, \
+ CLICommand, _cli_write_command, _cli_read_command, \
+ Orchestrator, OrchestratorClientMixin, \
+ OrchestratorValidationError, OrchestratorError, NoOrchestrator, \
+ ServiceSpec, NFSServiceSpec, RGWSpec, HostPlacementSpec, \
+ ServiceDescription, InventoryFilter, PlacementSpec, HostSpec, \
+ DaemonDescription, \
+ InventoryNode, DeviceLightLoc, \
+ OutdatableData, OutdatablePersistentDict, \
+ UpgradeStatusSpec
--- /dev/null
+
+"""
+ceph-mgr orchestrator interface
+
+Please see the ceph-mgr module developer's guide for more information.
+"""
+import copy
+import functools
+import logging
+import pickle
+import sys
+import time
+from collections import namedtuple
+from functools import wraps
+import uuid
+import string
+import random
+import datetime
+import copy
+import re
+import six
+import errno
+
+from ceph.deployment import inventory
+
+from mgr_module import MgrModule, PersistentStoreDict, CLICommand, HandleCommandResult
+from mgr_util import format_bytes
+
+try:
+ from ceph.deployment.drive_group import DriveGroupSpec
+ from typing import TypeVar, Generic, List, Optional, Union, Tuple, Iterator, Callable, Any, \
+ Type, Sequence
+except ImportError:
+ pass
+
+logger = logging.getLogger(__name__)
+
+
+class HostPlacementSpec(namedtuple('HostPlacementSpec', ['hostname', 'network', 'name'])):
+ def __str__(self):
+ res = ''
+ res += self.hostname
+ if self.network:
+ res += ':' + self.network
+ if self.name:
+ res += '=' + self.name
+ return res
+
+ @classmethod
+ def parse(cls, host, require_network=True):
+ # type: (str, bool) -> HostPlacementSpec
+ """
+ Split host into host, network, and (optional) daemon name parts. The network
+ part can be an IP, CIDR, or ceph addrvec like '[v2:1.2.3.4:3300,v1:1.2.3.4:6789]'.
+ e.g.,
+ "myhost"
+ "myhost=name"
+ "myhost:1.2.3.4"
+ "myhost:1.2.3.4=name"
+ "myhost:1.2.3.0/24"
+ "myhost:1.2.3.0/24=name"
+ "myhost:[v2:1.2.3.4:3000]=name"
+ "myhost:[v2:1.2.3.4:3000,v1:1.2.3.4:6789]=name"
+ """
+ # Matches from start to : or = or until end of string
+ host_re = r'^(.*?)(:|=|$)'
+ # Matches from : to = or until end of string
+ ip_re = r':(.*?)(=|$)'
+ # Matches from = to end of string
+ name_re = r'=(.*?)$'
+
+ # assign defaults
+ host_spec = cls('', '', '')
+
+ match_host = re.search(host_re, host)
+ if match_host:
+ host_spec = host_spec._replace(hostname=match_host.group(1))
+
+ name_match = re.search(name_re, host)
+ if name_match:
+ host_spec = host_spec._replace(name=name_match.group(1))
+
+ ip_match = re.search(ip_re, host)
+ if ip_match:
+ host_spec = host_spec._replace(network=ip_match.group(1))
+
+ if not require_network:
+ return host_spec
+
+ from ipaddress import ip_network, ip_address
+ networks = list() # type: List[str]
+ network = host_spec.network
+ # in case we have [v2:1.2.3.4:3000,v1:1.2.3.4:6478]
+ if ',' in network:
+ networks = [x for x in network.split(',')]
+ else:
+ networks.append(network)
+ for network in networks:
+ # only if we have versioned network configs
+ if network.startswith('v') or network.startswith('[v'):
+ network = network.split(':')[1]
+ try:
+ # if subnets are defined, also verify the validity
+ if '/' in network:
+ ip_network(six.text_type(network))
+ else:
+ ip_address(six.text_type(network))
+ except ValueError as e:
+ # logging?
+ raise e
+
+ return host_spec
+
+
+class OrchestratorError(Exception):
+ """
+ General orchestrator specific error.
+
+ Used for deployment, configuration or user errors.
+
+ It's not intended for programming errors or orchestrator internal errors.
+ """
+
+
+class NoOrchestrator(OrchestratorError):
+ """
+ No orchestrator in configured.
+ """
+ def __init__(self, msg="No orchestrator configured (try `ceph orch set backend`)"):
+ super(NoOrchestrator, self).__init__(msg)
+
+
+class OrchestratorValidationError(OrchestratorError):
+ """
+ Raised when an orchestrator doesn't support a specific feature.
+ """
+
+
+def handle_exception(prefix, cmd_args, desc, perm, func):
+ @wraps(func)
+ def wrapper(*args, **kwargs):
+ try:
+ return func(*args, **kwargs)
+ except (OrchestratorError, ImportError) as e:
+ # Do not print Traceback for expected errors.
+ return HandleCommandResult(-errno.ENOENT, stderr=str(e))
+ except NotImplementedError:
+ msg = 'This Orchestrator does not support `{}`'.format(prefix)
+ return HandleCommandResult(-errno.ENOENT, stderr=msg)
+
+ return CLICommand(prefix, cmd_args, desc, perm)(wrapper)
+
+
+def _cli_command(perm):
+ def inner_cli_command(prefix, cmd_args="", desc=""):
+ return lambda func: handle_exception(prefix, cmd_args, desc, perm, func)
+ return inner_cli_command
+
+
+_cli_read_command = _cli_command('r')
+_cli_write_command = _cli_command('rw')
+
+
+def _no_result():
+ return object()
+
+
+class _Promise(object):
+ """
+ A completion may need multiple promises to be fulfilled. `_Promise` is one
+ step.
+
+ Typically ``Orchestrator`` implementations inherit from this class to
+ build their own way of finishing a step to fulfil a future.
+
+ They are not exposed in the orchestrator interface and can be seen as a
+ helper to build orchestrator modules.
+ """
+ INITIALIZED = 1 # We have a parent completion and a next completion
+ RUNNING = 2
+ FINISHED = 3 # we have a final result
+
+ NO_RESULT = _no_result() # type: None
+ ASYNC_RESULT = object()
+
+ def __init__(self,
+ _first_promise=None, # type: Optional["_Promise"]
+ value=NO_RESULT, # type: Optional[Any]
+ on_complete=None, # type: Optional[Callable]
+ name=None, # type: Optional[str]
+ ):
+ self._on_complete_ = on_complete
+ self._name = name
+ self._next_promise = None # type: Optional[_Promise]
+
+ self._state = self.INITIALIZED
+ self._exception = None # type: Optional[Exception]
+
+ # Value of this _Promise. may be an intermediate result.
+ self._value = value
+
+ # _Promise is not a continuation monad, as `_result` is of type
+ # T instead of (T -> r) -> r. Therefore we need to store the first promise here.
+ self._first_promise = _first_promise or self # type: '_Promise'
+
+ @property
+ def _exception(self):
+ # type: () -> Optional[Exception]
+ return getattr(self, '_exception_', None)
+
+ @_exception.setter
+ def _exception(self, e):
+ self._exception_ = e
+ self._serialized_exception_ = pickle.dumps(e) if e is not None else None
+
+ @property
+ def _serialized_exception(self):
+ # type: () -> Optional[bytes]
+ return getattr(self, '_serialized_exception_', None)
+
+
+
+ @property
+ def _on_complete(self):
+ # type: () -> Optional[Callable]
+ # https://github.com/python/mypy/issues/4125
+ return self._on_complete_
+
+ @_on_complete.setter
+ def _on_complete(self, val):
+ # type: (Optional[Callable]) -> None
+ self._on_complete_ = val
+
+
+ def __repr__(self):
+ name = self._name or getattr(self._on_complete, '__name__', '??') if self._on_complete else 'None'
+ val = repr(self._value) if self._value is not self.NO_RESULT else 'NA'
+ return '{}(_s={}, val={}, _on_c={}, id={}, name={}, pr={}, _next={})'.format(
+ self.__class__, self._state, val, self._on_complete, id(self), name, getattr(next, '_progress_reference', 'NA'), repr(self._next_promise)
+ )
+
+ def pretty_print_1(self):
+ if self._name:
+ name = self._name
+ elif self._on_complete is None:
+ name = 'lambda x: x'
+ elif hasattr(self._on_complete, '__name__'):
+ name = getattr(self._on_complete, '__name__')
+ else:
+ name = self._on_complete.__class__.__name__
+ val = repr(self._value) if self._value not in (self.NO_RESULT, self.ASYNC_RESULT) else '...'
+ prefix = {
+ self.INITIALIZED: ' ',
+ self.RUNNING: ' >>>',
+ self.FINISHED: '(done)'
+ }[self._state]
+ return '{} {}({}),'.format(prefix, name, val)
+
+ def then(self, on_complete):
+ # type: (Any, Callable) -> Any
+ """
+ Call ``on_complete`` as soon as this promise is finalized.
+ """
+ assert self._state in (self.INITIALIZED, self.RUNNING)
+ if self._on_complete is not None:
+ assert self._next_promise is None
+ self._set_next_promise(self.__class__(
+ _first_promise=self._first_promise,
+ on_complete=on_complete
+ ))
+ return self._next_promise
+
+ else:
+ self._on_complete = on_complete
+ self._set_next_promise(self.__class__(_first_promise=self._first_promise))
+ return self._next_promise
+
+ def _set_next_promise(self, next):
+ # type: (_Promise) -> None
+ assert self is not next
+ assert self._state in (self.INITIALIZED, self.RUNNING)
+
+ self._next_promise = next
+ assert self._next_promise is not None
+ for p in iter(self._next_promise):
+ p._first_promise = self._first_promise
+
+ def _finalize(self, value=NO_RESULT):
+ """
+ Sets this promise to complete.
+
+ Orchestrators may choose to use this helper function.
+
+ :param value: new value.
+ """
+ if self._state not in (self.INITIALIZED, self.RUNNING):
+ raise ValueError('finalize: {} already finished. {}'.format(repr(self), value))
+
+ self._state = self.RUNNING
+
+ if value is not self.NO_RESULT:
+ self._value = value
+ assert self._value is not self.NO_RESULT, repr(self)
+
+ if self._on_complete:
+ try:
+ next_result = self._on_complete(self._value)
+ except Exception as e:
+ self.fail(e)
+ return
+ else:
+ next_result = self._value
+
+ if isinstance(next_result, _Promise):
+ # hack: _Promise is not a continuation monad.
+ next_result = next_result._first_promise # type: ignore
+ assert next_result not in self, repr(self._first_promise) + repr(next_result)
+ assert self not in next_result
+ next_result._append_promise(self._next_promise)
+ self._set_next_promise(next_result)
+ assert self._next_promise
+ if self._next_promise._value is self.NO_RESULT:
+ self._next_promise._value = self._value
+ self.propagate_to_next()
+ elif next_result is not self.ASYNC_RESULT:
+ # simple map. simply forward
+ if self._next_promise:
+ self._next_promise._value = next_result
+ else:
+ # Hack: next_result is of type U, _value is of type T
+ self._value = next_result # type: ignore
+ self.propagate_to_next()
+ else:
+ # asynchronous promise
+ pass
+
+
+ def propagate_to_next(self):
+ self._state = self.FINISHED
+ logger.debug('finalized {}'.format(repr(self)))
+ if self._next_promise:
+ self._next_promise._finalize()
+
+ def fail(self, e):
+ # type: (Exception) -> None
+ """
+ Sets the whole completion to be faild with this exception and end the
+ evaluation.
+ """
+ if self._state == self.FINISHED:
+ raise ValueError(
+ 'Invalid State: called fail, but Completion is already finished: {}'.format(str(e)))
+ assert self._state in (self.INITIALIZED, self.RUNNING)
+ logger.exception('_Promise failed')
+ self._exception = e
+ self._value = 'exception'
+ if self._next_promise:
+ self._next_promise.fail(e)
+ self._state = self.FINISHED
+
+ def __contains__(self, item):
+ return any(item is p for p in iter(self._first_promise))
+
+ def __iter__(self):
+ yield self
+ elem = self._next_promise
+ while elem is not None:
+ yield elem
+ elem = elem._next_promise
+
+ def _append_promise(self, other):
+ if other is not None:
+ assert self not in other
+ assert other not in self
+ self._last_promise()._set_next_promise(other)
+
+ def _last_promise(self):
+ # type: () -> _Promise
+ return list(iter(self))[-1]
+
+
+class ProgressReference(object):
+ def __init__(self,
+ message, # type: str
+ mgr,
+ completion=None # type: Optional[Callable[[], Completion]]
+ ):
+ """
+ ProgressReference can be used within Completions::
+
+ +---------------+ +---------------------------------+
+ | | then | |
+ | My Completion | +--> | on_complete=ProgressReference() |
+ | | | |
+ +---------------+ +---------------------------------+
+
+ See :func:`Completion.with_progress` for an easy way to create
+ a progress reference
+
+ """
+ super(ProgressReference, self).__init__()
+ self.progress_id = str(uuid.uuid4())
+ self.message = message
+ self.mgr = mgr
+
+ #: The completion can already have a result, before the write
+ #: operation is effective. progress == 1 means, the services are
+ #: created / removed.
+ self.completion = completion # type: Optional[Callable[[], Completion]]
+
+ #: if a orchestrator module can provide a more detailed
+ #: progress information, it needs to also call ``progress.update()``.
+ self.progress = 0.0
+
+ self._completion_has_result = False
+ self.mgr.all_progress_references.append(self)
+
+ def __str__(self):
+ """
+ ``__str__()`` is used for determining the message for progress events.
+ """
+ return self.message or super(ProgressReference, self).__str__()
+
+ def __call__(self, arg):
+ self._completion_has_result = True
+ self.progress = 1.0
+ return arg
+
+ @property
+ def progress(self):
+ return self._progress
+
+ @progress.setter
+ def progress(self, progress):
+ assert progress <= 1.0
+ self._progress = progress
+ try:
+ if self.effective:
+ self.mgr.remote("progress", "complete", self.progress_id)
+ self.mgr.all_progress_references = [p for p in self.mgr.all_progress_references if p is not self]
+ else:
+ self.mgr.remote("progress", "update", self.progress_id, self.message,
+ progress,
+ [("origin", "orchestrator")])
+ except ImportError:
+ # If the progress module is disabled that's fine,
+ # they just won't see the output.
+ pass
+
+ @property
+ def effective(self):
+ return self.progress == 1 and self._completion_has_result
+
+ def update(self):
+ def progress_run(progress):
+ self.progress = progress
+ if self.completion:
+ c = self.completion().then(progress_run)
+ self.mgr.process([c._first_promise])
+ else:
+ self.progress = 1
+
+ def fail(self):
+ self._completion_has_result = True
+ self.progress = 1
+
+
+class Completion(_Promise):
+ """
+ Combines multiple promises into one overall operation.
+
+ Completions are composable by being able to
+ call one completion from another completion. I.e. making them re-usable
+ using Promises E.g.::
+
+ >>> return Orchestrator().get_hosts().then(self._create_osd)
+
+ where ``get_hosts`` returns a Completion of list of hosts and
+ ``_create_osd`` takes a list of hosts.
+
+ The concept behind this is to store the computation steps
+ explicit and then explicitly evaluate the chain:
+
+ >>> p = Completion(on_complete=lambda x: x*2).then(on_complete=lambda x: str(x))
+ ... p.finalize(2)
+ ... assert p.result = "4"
+
+ or graphically::
+
+ +---------------+ +-----------------+
+ | | then | |
+ | lambda x: x*x | +--> | lambda x: str(x)|
+ | | | |
+ +---------------+ +-----------------+
+
+ """
+ def __init__(self,
+ _first_promise=None, # type: Optional["Completion"]
+ value=_Promise.NO_RESULT, # type: Any
+ on_complete=None, # type: Optional[Callable]
+ name=None, # type: Optional[str]
+ ):
+ super(Completion, self).__init__(_first_promise, value, on_complete, name)
+
+ @property
+ def _progress_reference(self):
+ # type: () -> Optional[ProgressReference]
+ if hasattr(self._on_complete, 'progress_id'):
+ return self._on_complete # type: ignore
+ return None
+
+ @property
+ def progress_reference(self):
+ # type: () -> Optional[ProgressReference]
+ """
+ ProgressReference. Marks this completion
+ as a write completeion.
+ """
+
+ references = [c._progress_reference for c in iter(self) if c._progress_reference is not None]
+ if references:
+ assert len(references) == 1
+ return references[0]
+ return None
+
+ @classmethod
+ def with_progress(cls, # type: Any
+ message, # type: str
+ mgr,
+ _first_promise=None, # type: Optional["Completion"]
+ value=_Promise.NO_RESULT, # type: Any
+ on_complete=None, # type: Optional[Callable]
+ calc_percent=None # type: Optional[Callable[[], Any]]
+ ):
+ # type: (...) -> Any
+
+ c = cls(
+ _first_promise=_first_promise,
+ value=value,
+ on_complete=on_complete
+ ).add_progress(message, mgr, calc_percent)
+
+ return c._first_promise
+
+ def add_progress(self,
+ message, # type: str
+ mgr,
+ calc_percent=None # type: Optional[Callable[[], Any]]
+ ):
+ return self.then(
+ on_complete=ProgressReference(
+ message=message,
+ mgr=mgr,
+ completion=calc_percent
+ )
+ )
+
+ def fail(self, e):
+ super(Completion, self).fail(e)
+ if self._progress_reference:
+ self._progress_reference.fail()
+
+ def finalize(self, result=_Promise.NO_RESULT):
+ if self._first_promise._state == self.INITIALIZED:
+ self._first_promise._finalize(result)
+
+ @property
+ def result(self):
+ """
+ The result of the operation that we were waited
+ for. Only valid after calling Orchestrator.process() on this
+ completion.
+ """
+ last = self._last_promise()
+ assert last._state == _Promise.FINISHED
+ return last._value
+
+ def result_str(self):
+ """Force a string."""
+ if self.result is None:
+ return ''
+ if isinstance(self.result, list):
+ return '\n'.join(str(x) for x in self.result)
+ return str(self.result)
+
+ @property
+ def exception(self):
+ # type: () -> Optional[Exception]
+ return self._last_promise()._exception
+
+ @property
+ def serialized_exception(self):
+ # type: () -> Optional[bytes]
+ return self._last_promise()._serialized_exception
+
+ @property
+ def has_result(self):
+ # type: () -> bool
+ """
+ Has the operation already a result?
+
+ For Write operations, it can already have a
+ result, if the orchestrator's configuration is
+ persistently written. Typically this would
+ indicate that an update had been written to
+ a manifest, but that the update had not
+ necessarily been pushed out to the cluster.
+
+ :return:
+ """
+ return self._last_promise()._state == _Promise.FINISHED
+
+ @property
+ def is_errored(self):
+ # type: () -> bool
+ """
+ Has the completion failed. Default implementation looks for
+ self.exception. Can be overwritten.
+ """
+ return self.exception is not None
+
+ @property
+ def needs_result(self):
+ # type: () -> bool
+ """
+ Could the external operation be deemed as complete,
+ or should we wait?
+ We must wait for a read operation only if it is not complete.
+ """
+ return not self.is_errored and not self.has_result
+
+ @property
+ def is_finished(self):
+ # type: () -> bool
+ """
+ Could the external operation be deemed as complete,
+ or should we wait?
+ We must wait for a read operation only if it is not complete.
+ """
+ return self.is_errored or (self.has_result)
+
+ def pretty_print(self):
+
+ reprs = '\n'.join(p.pretty_print_1() for p in iter(self._first_promise))
+ return """<{}>[\n{}\n]""".format(self.__class__.__name__, reprs)
+
+
+def pretty_print(completions):
+ # type: (Sequence[Completion]) -> str
+ return ', '.join(c.pretty_print() for c in completions)
+
+
+def raise_if_exception(c):
+ # type: (Completion) -> None
+ """
+ :raises OrchestratorError: Some user error or a config error.
+ :raises Exception: Some internal error
+ """
+ if c.serialized_exception is not None:
+ try:
+ e = pickle.loads(c.serialized_exception)
+ except (KeyError, AttributeError):
+ raise Exception('{}: {}'.format(type(c.exception), c.exception))
+ raise e
+
+
+class TrivialReadCompletion(Completion):
+ """
+ This is the trivial completion simply wrapping a result.
+ """
+ def __init__(self, result):
+ super(TrivialReadCompletion, self).__init__()
+ if result:
+ self.finalize(result)
+
+
+def _hide_in_features(f):
+ f._hide_in_features = True
+ return f
+
+
+class Orchestrator(object):
+ """
+ Calls in this class may do long running remote operations, with time
+ periods ranging from network latencies to package install latencies and large
+ internet downloads. For that reason, all are asynchronous, and return
+ ``Completion`` objects.
+
+ Methods should only return the completion and not directly execute
+ anything, like network calls. Otherwise the purpose of
+ those completions is defeated.
+
+ Implementations are not required to start work on an operation until
+ the caller waits on the relevant Completion objects. Callers making
+ multiple updates should not wait on Completions until they're done
+ sending operations: this enables implementations to batch up a series
+ of updates when wait() is called on a set of Completion objects.
+
+ Implementations are encouraged to keep reasonably fresh caches of
+ the status of the system: it is better to serve a stale-but-recent
+ result read of e.g. device inventory than it is to keep the caller waiting
+ while you scan hosts every time.
+ """
+
+ @_hide_in_features
+ def is_orchestrator_module(self):
+ """
+ Enable other modules to interrogate this module to discover
+ whether it's usable as an orchestrator module.
+
+ Subclasses do not need to override this.
+ """
+ return True
+
+ @_hide_in_features
+ def available(self):
+ # type: () -> Tuple[bool, str]
+ """
+ Report whether we can talk to the orchestrator. This is the
+ place to give the user a meaningful message if the orchestrator
+ isn't running or can't be contacted.
+
+ This method may be called frequently (e.g. every page load
+ to conditionally display a warning banner), so make sure it's
+ not too expensive. It's okay to give a slightly stale status
+ (e.g. based on a periodic background ping of the orchestrator)
+ if that's necessary to make this method fast.
+
+ .. note::
+ `True` doesn't mean that the desired functionality
+ is actually available in the orchestrator. I.e. this
+ won't work as expected::
+
+ >>> if OrchestratorClientMixin().available()[0]: # wrong.
+ ... OrchestratorClientMixin().get_hosts()
+
+ :return: two-tuple of boolean, string
+ """
+ raise NotImplementedError()
+
+ @_hide_in_features
+ def process(self, completions):
+ # type: (List[Completion]) -> None
+ """
+ Given a list of Completion instances, process any which are
+ incomplete.
+
+ Callers should inspect the detail of each completion to identify
+ partial completion/progress information, and present that information
+ to the user.
+
+ This method should not block, as this would make it slow to query
+ a status, while other long running operations are in progress.
+ """
+ raise NotImplementedError()
+
+ @_hide_in_features
+ def get_feature_set(self):
+ """Describes which methods this orchestrator implements
+
+ .. note::
+ `True` doesn't mean that the desired functionality
+ is actually possible in the orchestrator. I.e. this
+ won't work as expected::
+
+ >>> api = OrchestratorClientMixin()
+ ... if api.get_feature_set()['get_hosts']['available']: # wrong.
+ ... api.get_hosts()
+
+ It's better to ask for forgiveness instead::
+
+ >>> try:
+ ... OrchestratorClientMixin().get_hosts()
+ ... except (OrchestratorError, NotImplementedError):
+ ... ...
+
+ :returns: Dict of API method names to ``{'available': True or False}``
+ """
+ module = self.__class__
+ features = {a: {'available': getattr(Orchestrator, a, None) != getattr(module, a)}
+ for a in Orchestrator.__dict__
+ if not a.startswith('_') and not getattr(getattr(Orchestrator, a), '_hide_in_features', False)
+ }
+ return features
+
+ @_hide_in_features
+ def cancel_completions(self):
+ # type: () -> None
+ """
+ Cancels ongoing completions. Unstuck the mgr.
+ """
+ raise NotImplementedError()
+
+ def add_host(self, HostSpec):
+ # type: (HostSpec) -> Completion
+ """
+ Add a host to the orchestrator inventory.
+
+ :param host: hostname
+ """
+ raise NotImplementedError()
+
+ def remove_host(self, host):
+ # type: (str) -> Completion
+ """
+ Remove a host from the orchestrator inventory.
+
+ :param host: hostname
+ """
+ raise NotImplementedError()
+
+ def update_host_addr(self, host, addr):
+ # type: (str, str) -> Completion
+ """
+ Update a host's address
+
+ :param host: hostname
+ :param addr: address (dns name or IP)
+ """
+ raise NotImplementedError()
+
+ def get_hosts(self):
+ # type: () -> Completion
+ """
+ Report the hosts in the cluster.
+
+ The default implementation is extra slow.
+
+ :return: list of InventoryNodes
+ """
+ return self.get_inventory()
+
+ def add_host_label(self, host, label):
+ # type: (str, str) -> Completion
+ """
+ Add a host label
+ """
+ raise NotImplementedError()
+
+ def remove_host_label(self, host, label):
+ # type: (str, str) -> Completion
+ """
+ Remove a host label
+ """
+ raise NotImplementedError()
+
+ def get_inventory(self, node_filter=None, refresh=False):
+ # type: (Optional[InventoryFilter], bool) -> Completion
+ """
+ Returns something that was created by `ceph-volume inventory`.
+
+ :return: list of InventoryNode
+ """
+ raise NotImplementedError()
+
+ def describe_service(self, service_type=None, service_id=None, node_name=None, refresh=False):
+ # type: (Optional[str], Optional[str], Optional[str], bool) -> Completion
+ """
+ Describe a service (of any kind) that is already configured in
+ the orchestrator. For example, when viewing an OSD in the dashboard
+ we might like to also display information about the orchestrator's
+ view of the service (like the kubernetes pod ID).
+
+ When viewing a CephFS filesystem in the dashboard, we would use this
+ to display the pods being currently run for MDS daemons.
+
+ :return: list of ServiceDescription objects.
+ """
+ raise NotImplementedError()
+
+ def list_daemons(self, daemon_type=None, daemon_id=None, host=None, refresh=False):
+ # type: (Optional[str], Optional[str], Optional[str], bool) -> Completion
+ """
+ Describe a daemon (of any kind) that is already configured in
+ the orchestrator.
+
+ :return: list of DaemonDescription objects.
+ """
+ raise NotImplementedError()
+
+ def remove_daemons(self, names, force):
+ # type: (List[str], bool) -> Completion
+ """
+ Remove specific daemon(s).
+
+ :return: None
+ """
+ raise NotImplementedError()
+
+ def remove_service(self, service_type, service_name=None):
+ # type: (str, Optional[str]) -> Completion
+ """
+ Remove a service (a collection of daemons).
+
+ :return: None
+ """
+ raise NotImplementedError()
+
+ def service_action(self, action, service_type, service_name):
+ # type: (str, str, str) -> Completion
+ """
+ Perform an action (start/stop/reload) on a service (i.e., all daemons
+ providing the logical service).
+
+ :param action: one of "start", "stop", "restart", "redeploy", "reconfig"
+ :param service_type: e.g. "mds", "rgw", ...
+ :param service_name: name of logical service ("cephfs", "us-east", ...)
+ :rtype: Completion
+ """
+ #assert action in ["start", "stop", "reload, "restart", "redeploy"]
+ raise NotImplementedError()
+
+ def daemon_action(self, action, daemon_type, daemon_id):
+ # type: (str, str, str) -> Completion
+ """
+ Perform an action (start/stop/reload) on a daemon.
+
+ :param action: one of "start", "stop", "restart", "redeploy", "reconfig"
+ :param name: name of daemon
+ :rtype: Completion
+ """
+ #assert action in ["start", "stop", "reload, "restart", "redeploy"]
+ raise NotImplementedError()
+
+ def create_osds(self, drive_groups):
+ # type: (List[DriveGroupSpec]) -> Completion
+ """
+ Create one or more OSDs within a single Drive Group.
+
+ The principal argument here is the drive_group member
+ of OsdSpec: other fields are advisory/extensible for any
+ finer-grained OSD feature enablement (choice of backing store,
+ compression/encryption, etc).
+
+ :param drive_groups: a list of DriveGroupSpec
+ :param all_hosts: TODO, this is required because the orchestrator methods are not composable
+ Probably this parameter can be easily removed because each orchestrator can use
+ the "get_inventory" method and the "drive_group.host_pattern" attribute
+ to obtain the list of hosts where to apply the operation
+ """
+ raise NotImplementedError()
+
+ def blink_device_light(self, ident_fault, on, locations):
+ # type: (str, bool, List[DeviceLightLoc]) -> Completion
+ """
+ Instructs the orchestrator to enable or disable either the ident or the fault LED.
+
+ :param ident_fault: either ``"ident"`` or ``"fault"``
+ :param on: ``True`` = on.
+ :param locations: See :class:`orchestrator.DeviceLightLoc`
+ """
+ raise NotImplementedError()
+
+ def add_mon(self, spec):
+ # type: (ServiceSpec) -> Completion
+ """Create mon daemon(s)"""
+ raise NotImplementedError()
+
+ def apply_mon(self, spec):
+ # type: (ServiceSpec) -> Completion
+ """Update mon cluster"""
+ raise NotImplementedError()
+
+ def add_mgr(self, spec):
+ # type: (ServiceSpec) -> Completion
+ """Create mgr daemon(s)"""
+ raise NotImplementedError()
+
+ def apply_mgr(self, spec):
+ # type: (ServiceSpec) -> Completion
+ """Update mgr cluster"""
+ raise NotImplementedError()
+
+ def add_mds(self, spec):
+ # type: (ServiceSpec) -> Completion
+ """Create MDS daemon(s)"""
+ raise NotImplementedError()
+
+ def apply_mds(self, spec):
+ # type: (ServiceSpec) -> Completion
+ """Update MDS cluster"""
+ raise NotImplementedError()
+
+ def add_rbd_mirror(self, spec):
+ # type: (ServiceSpec) -> Completion
+ """Create rbd-mirror daemon(s)"""
+ raise NotImplementedError()
+
+ def apply_rbd_mirror(self, spec):
+ # type: (ServiceSpec) -> Completion
+ """Update rbd-mirror cluster"""
+ raise NotImplementedError()
+
+ def add_nfs(self, spec):
+ # type: (NFSServiceSpec) -> Completion
+ """Create NFS daemon(s)"""
+ raise NotImplementedError()
+
+ def apply_nfs(self, spec):
+ # type: (NFSServiceSpec) -> Completion
+ """Update NFS cluster"""
+ raise NotImplementedError()
+
+ def add_rgw(self, spec):
+ # type: (RGWSpec) -> Completion
+ """Create RGW daemon(s)"""
+ raise NotImplementedError()
+
+ def apply_rgw(self, spec):
+ # type: (RGWSpec) -> Completion
+ """Update RGW cluster"""
+ raise NotImplementedError()
+
+ def add_prometheus(self, spec):
+ # type: (ServiceSpec) -> Completion
+ """Create new prometheus daemon"""
+ raise NotImplementedError()
+
+ def apply_prometheus(self, spec):
+ # type: (ServiceSpec) -> Completion
+ """Update prometheus cluster"""
+ raise NotImplementedError()
+
+ def upgrade_check(self, image, version):
+ # type: (Optional[str], Optional[str]) -> Completion
+ raise NotImplementedError()
+
+ def upgrade_start(self, image, version):
+ # type: (Optional[str], Optional[str]) -> Completion
+ raise NotImplementedError()
+
+ def upgrade_pause(self):
+ # type: () -> Completion
+ raise NotImplementedError()
+
+ def upgrade_resume(self):
+ # type: () -> Completion
+ raise NotImplementedError()
+
+ def upgrade_stop(self):
+ # type: () -> Completion
+ raise NotImplementedError()
+
+ def upgrade_status(self):
+ # type: () -> Completion
+ """
+ If an upgrade is currently underway, report on where
+ we are in the process, or if some error has occurred.
+
+ :return: UpgradeStatusSpec instance
+ """
+ raise NotImplementedError()
+
+ @_hide_in_features
+ def upgrade_available(self):
+ # type: () -> Completion
+ """
+ Report on what versions are available to upgrade to
+
+ :return: List of strings
+ """
+ raise NotImplementedError()
+
+class HostSpec(object):
+ def __init__(self, hostname, addr=None, labels=None):
+ # type: (str, Optional[str], Optional[List[str]]) -> None
+ self.hostname = hostname # the hostname on the host
+ self.addr = addr or hostname # DNS name or IP address to reach it
+ self.labels = labels or [] # initial label(s), if any
+
+class UpgradeStatusSpec(object):
+ # Orchestrator's report on what's going on with any ongoing upgrade
+ def __init__(self):
+ self.in_progress = False # Is an upgrade underway?
+ self.target_image = None
+ self.services_complete = [] # Which daemon types are fully updated?
+ self.message = "" # Freeform description
+
+
+class PlacementSpec(object):
+ """
+ For APIs that need to specify a node subset
+ """
+ def __init__(self, label=None, hosts=None, count=None):
+ # type: (Optional[str], Optional[List], Optional[int]) -> None
+ self.label = label
+ self.hosts = [] # type: List[HostPlacementSpec]
+ if hosts:
+ if all([isinstance(host, HostPlacementSpec) for host in hosts]):
+ self.hosts = hosts
+ else:
+ self.hosts = [parse_host_placement_specs(x, require_network=False) for x in hosts if x]
+
+
+ self.count = count # type: Optional[int]
+
+ def set_hosts(self, hosts):
+ # To backpopulate the .hosts attribute when using labels or count
+ # in the orchestrator backend.
+ self.hosts = hosts
+
+ @classmethod
+ def from_dict(cls, data):
+ _cls = cls(**data)
+ _cls.validate()
+ return _cls
+
+ def validate(self):
+ if self.hosts and self.label:
+ # TODO: a less generic Exception
+ raise Exception('Node and label are mutually exclusive')
+ if self.count is not None and self.count <= 0:
+ raise Exception("num/count must be > 1")
+
+
+def handle_type_error(method):
+ @wraps(method)
+ def inner(cls, *args, **kwargs):
+ try:
+ return method(cls, *args, **kwargs)
+ except TypeError as e:
+ error_msg = '{}: {}'.format(cls.__name__, e)
+ raise OrchestratorValidationError(error_msg)
+ return inner
+
+
+class DaemonDescription(object):
+ """
+ For responding to queries about the status of a particular daemon,
+ stateful or stateless.
+
+ This is not about health or performance monitoring of daemons: it's
+ about letting the orchestrator tell Ceph whether and where a
+ daemon is scheduled in the cluster. When an orchestrator tells
+ Ceph "it's running on node123", that's not a promise that the process
+ is literally up this second, it's a description of where the orchestrator
+ has decided the daemon should run.
+ """
+
+ def __init__(self,
+ daemon_type=None,
+ daemon_id=None,
+ nodename=None,
+ container_id=None,
+ container_image_id=None,
+ container_image_name=None,
+ version=None,
+ status=None,
+ status_desc=None):
+ # Node is at the same granularity as InventoryNode
+ self.nodename = nodename
+
+ # Not everyone runs in containers, but enough people do to
+ # justify having the container_id (runtime id) and container_image
+ # (image name)
+ self.container_id = container_id # runtime id
+ self.container_image_id = container_image_id # image hash
+ self.container_image_name = container_image_name # image friendly name
+
+ # The type of service (osd, mon, mgr, etc.)
+ self.daemon_type = daemon_type
+
+ # The orchestrator will have picked some names for daemons,
+ # typically either based on hostnames or on pod names.
+ # This is the <foo> in mds.<foo>, the ID that will appear
+ # in the FSMap/ServiceMap.
+ self.daemon_id = daemon_id
+
+ # Service version that was deployed
+ self.version = version
+
+ # Service status: -1 error, 0 stopped, 1 running
+ self.status = status
+
+ # Service status description when status == -1.
+ self.status_desc = status_desc
+
+ # datetime when this info was last refreshed
+ self.last_refresh = None # type: Optional[datetime.datetime]
+
+ def name(self):
+ return '%s.%s' % (self.daemon_type, self.daemon_id)
+
+ def __repr__(self):
+ return "<DaemonDescription>({type}.{id})".format(type=self.daemon_type,
+ id=self.daemon_id)
+
+ def to_json(self):
+ out = {
+ 'nodename': self.nodename,
+ 'container_id': self.container_id,
+ 'container_image_id': self.container_image_id,
+ 'container_image_name': self.container_image_name,
+ 'daemon_id': self.daemon_id,
+ 'daemon_type': self.daemon_type,
+ 'version': self.version,
+ 'status': self.status,
+ 'status_desc': self.status_desc,
+ }
+ return {k: v for (k, v) in out.items() if v is not None}
+
+ @classmethod
+ @handle_type_error
+ def from_json(cls, data):
+ return cls(**data)
+
+class ServiceDescription(object):
+ """
+ For responding to queries about the status of a particular service,
+ stateful or stateless.
+
+ This is not about health or performance monitoring of services: it's
+ about letting the orchestrator tell Ceph whether and where a
+ service is scheduled in the cluster. When an orchestrator tells
+ Ceph "it's running on node123", that's not a promise that the process
+ is literally up this second, it's a description of where the orchestrator
+ has decided the service should run.
+ """
+
+ def __init__(self, nodename=None,
+ container_id=None, container_image_id=None,
+ container_image_name=None,
+ service=None, service_instance=None,
+ service_type=None, version=None, rados_config_location=None,
+ service_url=None, status=None, status_desc=None):
+ # Node is at the same granularity as InventoryNode
+ self.nodename = nodename # type: Optional[str]
+
+ # Not everyone runs in containers, but enough people do to
+ # justify having the container_id (runtime id) and container_image
+ # (image name)
+ self.container_id = container_id # runtime id
+ self.container_image_id = container_image_id # image hash
+ self.container_image_name = container_image_name # image friendly name
+
+ # Some services can be deployed in groups. For example, mds's can
+ # have an active and standby daemons, and nfs-ganesha can run daemons
+ # in parallel. This tag refers to a group of daemons as a whole.
+ #
+ # For instance, a cluster of mds' all service the same fs, and they
+ # will all have the same service value (which may be the
+ # Filesystem name in the FSMap).
+ #
+ # Single-instance services should leave this set to None
+ self.service = service
+
+ # The orchestrator will have picked some names for daemons,
+ # typically either based on hostnames or on pod names.
+ # This is the <foo> in mds.<foo>, the ID that will appear
+ # in the FSMap/ServiceMap.
+ self.service_instance = service_instance
+
+ # The type of service (osd, mon, mgr, etc.)
+ self.service_type = service_type
+
+ # Service version that was deployed
+ self.version = version
+
+ # Location of the service configuration when stored in rados
+ # object. Format: "rados://<pool>/[<namespace/>]<object>"
+ self.rados_config_location = rados_config_location
+
+ # If the service exposes REST-like API, this attribute should hold
+ # the URL.
+ self.service_url = service_url
+
+ # Service status: -1 error, 0 stopped, 1 running
+ self.status = status
+
+ # Service status description when status == -1.
+ self.status_desc = status_desc
+
+ # datetime when this info was last refreshed
+ self.last_refresh = None # type: Optional[datetime.datetime]
+
+ def name(self):
+ if self.service_instance:
+ return '%s.%s' % (self.service_type, self.service_instance)
+ return self.service_type
+
+ def __repr__(self):
+ return "<ServiceDescription>({n_name}:{s_type})".format(n_name=self.nodename,
+ s_type=self.name())
+
+ def to_json(self):
+ out = {
+ 'nodename': self.nodename,
+ 'container_id': self.container_id,
+ 'service': self.service,
+ 'service_instance': self.service_instance,
+ 'service_type': self.service_type,
+ 'version': self.version,
+ 'rados_config_location': self.rados_config_location,
+ 'service_url': self.service_url,
+ 'status': self.status,
+ 'status_desc': self.status_desc,
+ }
+ return {k: v for (k, v) in out.items() if v is not None}
+
+ @classmethod
+ @handle_type_error
+ def from_json(cls, data):
+ return cls(**data)
+
+
+class ServiceSpec(object):
+ """
+ Details of service creation.
+
+ Request to the orchestrator for a cluster of daemons
+ such as MDS, RGW, iscsi gateway, MONs, MGRs, Prometheus
+
+ This structure is supposed to be enough information to
+ start the services.
+
+ """
+
+ def __init__(self, name=None, placement=None):
+ # type: (Optional[str], Optional[PlacementSpec]) -> None
+ self.placement = PlacementSpec() if placement is None else placement # type: PlacementSpec
+
+ #: Give this set of stateless services a name: typically it would
+ #: be the name of a CephFS filesystem, RGW zone, etc. Must be unique
+ #: within one ceph cluster. Note: Not all clusters have a name
+ self.name = name # type: Optional[str]
+
+ if self.placement is not None and self.placement.count is not None:
+ #: Count of service instances. Deprecated.
+ self.count = self.placement.count # type: int
+ else:
+ self.count = 1
+
+ def validate_add(self):
+ if not self.name:
+ raise OrchestratorValidationError('Cannot add Service: Name required')
+
+
+class NFSServiceSpec(ServiceSpec):
+ def __init__(self, name, pool=None, namespace=None, placement=None):
+ super(NFSServiceSpec, self).__init__(name, placement)
+
+ #: RADOS pool where NFS client recovery data is stored.
+ self.pool = pool
+
+ #: RADOS namespace where NFS client recovery data is stored in the pool.
+ self.namespace = namespace
+
+ def validate_add(self):
+ super(NFSServiceSpec, self).validate_add()
+
+ if not self.pool:
+ raise OrchestratorValidationError('Cannot add NFS: No Pool specified')
+
+
+class RGWSpec(ServiceSpec):
+ """
+ Settings to configure a (multisite) Ceph RGW
+
+ """
+ def __init__(self,
+ rgw_realm, # type: str
+ rgw_zone, # type: str
+ placement=None,
+ hosts=None, # type: Optional[List[str]]
+ rgw_multisite=None, # type: Optional[bool]
+ rgw_zonemaster=None, # type: Optional[bool]
+ rgw_zonesecondary=None, # type: Optional[bool]
+ rgw_multisite_proto=None, # type: Optional[str]
+ rgw_frontend_port=None, # type: Optional[int]
+ rgw_zonegroup=None, # type: Optional[str]
+ rgw_zone_user=None, # type: Optional[str]
+ system_access_key=None, # type: Optional[str]
+ system_secret_key=None, # type: Optional[str]
+ count=None # type: Optional[int]
+ ):
+ # Regarding default values. Ansible has a `set_rgwspec_defaults` that sets
+ # default values that makes sense for Ansible. Rook has default values implemented
+ # in Rook itself. Thus we don't set any defaults here in this class.
+
+ super(RGWSpec, self).__init__(name=rgw_realm + '.' + rgw_zone,
+ placement=placement)
+
+ #: List of hosts where RGWs should run. Not for Rook.
+ if hosts:
+ self.placement = PlacementSpec(hosts=hosts)
+
+ #: is multisite
+ self.rgw_multisite = rgw_multisite
+ self.rgw_zonemaster = rgw_zonemaster
+ self.rgw_zonesecondary = rgw_zonesecondary
+ self.rgw_multisite_proto = rgw_multisite_proto
+ self.rgw_frontend_port = rgw_frontend_port
+
+ self.rgw_realm = rgw_realm
+ self.rgw_zone = rgw_zone
+ self.rgw_zonegroup = rgw_zonegroup
+ self.rgw_zone_user = rgw_zone_user
+
+ self.system_access_key = system_access_key
+ self.system_secret_key = system_secret_key
+
+ @property
+ def rgw_multisite_endpoint_addr(self):
+ """Returns the first host. Not supported for Rook."""
+ return self.placement.hosts[0]
+
+ @property
+ def rgw_multisite_endpoints_list(self):
+ return ",".join(["{}://{}:{}".format(self.rgw_multisite_proto,
+ host,
+ self.rgw_frontend_port) for host in self.placement.hosts])
+
+ def genkey(self, nchars):
+ """ Returns a random string of nchars
+
+ :nchars : Length of the returned string
+ """
+ # TODO Python 3: use Secrets module instead.
+
+ return ''.join(random.choice(string.ascii_uppercase +
+ string.ascii_lowercase +
+ string.digits) for _ in range(nchars))
+
+ @classmethod
+ def from_json(cls, json_rgw_spec):
+ # type: (dict) -> RGWSpec
+ """
+ Initialize 'RGWSpec' object data from a json structure
+ :param json_rgw_spec: A valid dict with a the RGW settings
+ """
+ # TODO: also add PlacementSpec(**json_rgw_spec['placement'])
+ args = {k:v for k, v in json_rgw_spec.items()}
+ return RGWSpec(**args)
+
+
+class InventoryFilter(object):
+ """
+ When fetching inventory, use this filter to avoid unnecessarily
+ scanning the whole estate.
+
+ Typical use: filter by node when presenting UI workflow for configuring
+ a particular server.
+ filter by label when not all of estate is Ceph servers,
+ and we want to only learn about the Ceph servers.
+ filter by label when we are interested particularly
+ in e.g. OSD servers.
+
+ """
+ def __init__(self, labels=None, nodes=None):
+ # type: (Optional[List[str]], Optional[List[str]]) -> None
+
+ #: Optional: get info about nodes matching labels
+ self.labels = labels
+
+ #: Optional: get info about certain named nodes only
+ self.nodes = nodes
+
+
+class InventoryNode(object):
+ """
+ When fetching inventory, all Devices are groups inside of an
+ InventoryNode.
+ """
+ def __init__(self, name, devices=None, labels=None, addr=None):
+ # type: (str, Optional[inventory.Devices], Optional[List[str]], Optional[str]) -> None
+ if devices is None:
+ devices = inventory.Devices([])
+ if labels is None:
+ labels = []
+ assert isinstance(devices, inventory.Devices)
+
+ self.name = name # unique within cluster. For example a hostname.
+ self.addr = addr or name
+ self.devices = devices
+ self.labels = labels
+
+ def to_json(self):
+ return {
+ 'name': self.name,
+ 'addr': self.addr,
+ 'devices': self.devices.to_json(),
+ 'labels': self.labels,
+ }
+
+ @classmethod
+ def from_json(cls, data):
+ try:
+ _data = copy.deepcopy(data)
+ name = _data.pop('name')
+ addr = _data.pop('addr', None) or name
+ devices = inventory.Devices.from_json(_data.pop('devices'))
+ if _data:
+ error_msg = 'Unknown key(s) in Inventory: {}'.format(','.join(_data.keys()))
+ raise OrchestratorValidationError(error_msg)
+ labels = _data.get('labels', list())
+ return cls(name, devices, labels, addr)
+ except KeyError as e:
+ error_msg = '{} is required for {}'.format(e, cls.__name__)
+ raise OrchestratorValidationError(error_msg)
+ except TypeError as e:
+ raise OrchestratorValidationError('Failed to read inventory: {}'.format(e))
+
+
+ @classmethod
+ def from_nested_items(cls, hosts):
+ devs = inventory.Devices.from_json
+ return [cls(item[0], devs(item[1].data)) for item in hosts]
+
+ def __repr__(self):
+ return "<InventoryNode>({name})".format(name=self.name)
+
+ @staticmethod
+ def get_host_names(nodes):
+ # type: (List[InventoryNode]) -> List[str]
+ return [node.name for node in nodes]
+
+ def __eq__(self, other):
+ return self.name == other.name and self.devices == other.devices
+
+
+class DeviceLightLoc(namedtuple('DeviceLightLoc', ['host', 'dev', 'path'])):
+ """
+ Describes a specific device on a specific host. Used for enabling or disabling LEDs
+ on devices.
+
+ hostname as in :func:`orchestrator.Orchestrator.get_hosts`
+
+ device_id: e.g. ``ABC1234DEF567-1R1234_ABC8DE0Q``.
+ See ``ceph osd metadata | jq '.[].device_ids'``
+ """
+ __slots__ = ()
+
+
+def _mk_orch_methods(cls):
+ # Needs to be defined outside of for.
+ # Otherwise meth is always bound to last key
+ def shim(method_name):
+ def inner(self, *args, **kwargs):
+ completion = self._oremote(method_name, args, kwargs)
+ return completion
+ return inner
+
+ for meth in Orchestrator.__dict__:
+ if not meth.startswith('_') and meth not in ['is_orchestrator_module']:
+ setattr(cls, meth, shim(meth))
+ return cls
+
+
+@_mk_orch_methods
+class OrchestratorClientMixin(Orchestrator):
+ """
+ A module that inherents from `OrchestratorClientMixin` can directly call
+ all :class:`Orchestrator` methods without manually calling remote.
+
+ Every interface method from ``Orchestrator`` is converted into a stub method that internally
+ calls :func:`OrchestratorClientMixin._oremote`
+
+ >>> class MyModule(OrchestratorClientMixin):
+ ... def func(self):
+ ... completion = self.add_host('somehost') # calls `_oremote()`
+ ... self._orchestrator_wait([completion])
+ ... self.log.debug(completion.result)
+
+ .. note:: Orchestrator implementations should not inherit from `OrchestratorClientMixin`.
+ Reason is, that OrchestratorClientMixin magically redirects all methods to the
+ "real" implementation of the orchestrator.
+
+
+ >>> import mgr_module
+ >>> class MyImplentation(mgr_module.MgrModule, Orchestrator):
+ ... def __init__(self, ...):
+ ... self.orch_client = OrchestratorClientMixin()
+ ... self.orch_client.set_mgr(self.mgr))
+ """
+
+ def set_mgr(self, mgr):
+ # type: (MgrModule) -> None
+ """
+ Useable in the Dashbord that uses a global ``mgr``
+ """
+
+ self.__mgr = mgr # Make sure we're not overwriting any other `mgr` properties
+
+ def __get_mgr(self):
+ try:
+ return self.__mgr
+ except AttributeError:
+ return self
+
+ def _oremote(self, meth, args, kwargs):
+ """
+ Helper for invoking `remote` on whichever orchestrator is enabled
+
+ :raises RuntimeError: If the remote method failed.
+ :raises OrchestratorError: orchestrator failed to perform
+ :raises ImportError: no `orchestrator` module or backend not found.
+ """
+ mgr = self.__get_mgr()
+
+ try:
+ o = mgr._select_orchestrator()
+ except AttributeError:
+ o = mgr.remote('orchestrator', '_select_orchestrator')
+
+ if o is None:
+ raise NoOrchestrator()
+
+ mgr.log.debug("_oremote {} -> {}.{}(*{}, **{})".format(mgr.module_name, o, meth, args, kwargs))
+ return mgr.remote(o, meth, *args, **kwargs)
+
+ def _orchestrator_wait(self, completions):
+ # type: (List[Completion]) -> None
+ """
+ Wait for completions to complete (reads) or
+ become persistent (writes).
+
+ Waits for writes to be *persistent* but not *effective*.
+
+ :param completions: List of Completions
+ :raises NoOrchestrator:
+ :raises RuntimeError: something went wrong while calling the process method.
+ :raises ImportError: no `orchestrator` module or backend not found.
+ """
+ while any(not c.has_result for c in completions):
+ self.process(completions)
+ self.__get_mgr().log.info("Operations pending: %s",
+ sum(1 for c in completions if not c.has_result))
+ if any(c.needs_result for c in completions):
+ time.sleep(1)
+ else:
+ break
+
+
+class OutdatableData(object):
+ DATEFMT = '%Y-%m-%d %H:%M:%S.%f'
+
+ def __init__(self, data=None, last_refresh=None):
+ # type: (Optional[dict], Optional[datetime.datetime]) -> None
+ self._data = data
+ if data is not None and last_refresh is None:
+ self.last_refresh = datetime.datetime.utcnow() # type: Optional[datetime.datetime]
+ else:
+ self.last_refresh = last_refresh
+
+ def json(self):
+ if self.last_refresh is not None:
+ timestr = self.last_refresh.strftime(self.DATEFMT) # type: Optional[str]
+ else:
+ timestr = None
+
+ return {
+ "data": self._data,
+ "last_refresh": timestr,
+ }
+
+ @property
+ def data(self):
+ return self._data
+
+ # @data.setter
+ # No setter, as it doesn't work as expected: It's not saved in store automatically
+
+ @classmethod
+ def time_from_string(cls, timestr):
+ if timestr is None:
+ return None
+ # drop the 'Z' timezone indication, it's always UTC
+ timestr = timestr.rstrip('Z')
+ return datetime.datetime.strptime(timestr, cls.DATEFMT)
+
+ @classmethod
+ def from_json(cls, data):
+ return cls(data['data'], cls.time_from_string(data['last_refresh']))
+
+ def outdated(self, timeout=None):
+ if timeout is None:
+ timeout = 600
+ if self.last_refresh is None:
+ return True
+ cutoff = datetime.datetime.utcnow() - datetime.timedelta(
+ seconds=timeout)
+ return self.last_refresh < cutoff
+
+ def __repr__(self):
+ return 'OutdatableData(data={}, last_refresh={})'.format(self._data, self.last_refresh)
+
+
+class OutdatableDictMixin(object):
+ """
+ Toolbox for implementing a cache. As every orchestrator has
+ different needs, we cannot implement any logic here.
+ """
+
+ def __getitem__(self, item):
+ # type: (str) -> OutdatableData
+ return OutdatableData.from_json(super(OutdatableDictMixin, self).__getitem__(item)) # type: ignore
+
+ def __setitem__(self, key, value):
+ # type: (str, OutdatableData) -> None
+ val = None if value is None else value.json()
+ super(OutdatableDictMixin, self).__setitem__(key, val) # type: ignore
+
+ def items(self):
+ ## type: () -> Iterator[Tuple[str, OutdatableData]]
+ for item in super(OutdatableDictMixin, self).items(): # type: ignore
+ k, v = item
+ yield k, OutdatableData.from_json(v)
+
+ def items_filtered(self, keys=None):
+ if keys:
+ return [(host, self[host]) for host in keys]
+ else:
+ return list(self.items())
+
+ def any_outdated(self, timeout=None):
+ items = self.items()
+ if not list(items):
+ return True
+ return any([i[1].outdated(timeout) for i in items])
+
+ def remove_outdated(self):
+ outdated = [item[0] for item in self.items() if item[1].outdated()]
+ for o in outdated:
+ del self[o] # type: ignore
+
+ def invalidate(self, key):
+ self[key] = OutdatableData(self[key].data,
+ datetime.datetime.fromtimestamp(0))
+
+
+class OutdatablePersistentDict(OutdatableDictMixin, PersistentStoreDict):
+ pass
+
+
+class OutdatableDict(OutdatableDictMixin, dict):
+ pass
--- /dev/null
+import datetime
+import errno
+import json
+import yaml
+
+from ceph.deployment.inventory import Device
+from prettytable import PrettyTable
+
+from mgr_util import format_bytes, to_pretty_timedelta
+
+try:
+ from typing import List, Set, Optional
+except ImportError:
+ pass # just for type checking.
+
+
+from ceph.deployment.drive_group import DriveGroupSpec, DeviceSelection, \
+ DriveGroupSpecs
+from mgr_module import MgrModule, HandleCommandResult
+
+from ._interface import OrchestratorClientMixin, DeviceLightLoc, _cli_read_command, \
+ raise_if_exception, _cli_write_command, TrivialReadCompletion, OrchestratorError, \
+ NoOrchestrator, ServiceSpec, PlacementSpec, OrchestratorValidationError, NFSServiceSpec, \
+ RGWSpec, InventoryFilter, InventoryNode, HostPlacementSpec, HostSpec
+
+
+class OrchestratorCli(OrchestratorClientMixin, MgrModule):
+ MODULE_OPTIONS = [
+ {
+ 'name': 'orchestrator',
+ 'type': 'str',
+ 'default': None,
+ 'desc': 'Orchestrator backend',
+ 'enum_allowed': ['cephadm', 'rook',
+ 'test_orchestrator'],
+ 'runtime': True,
+ },
+ ]
+ NATIVE_OPTIONS = [] # type: List[dict]
+
+ def __init__(self, *args, **kwargs):
+ super(OrchestratorCli, self).__init__(*args, **kwargs)
+ self.ident = set() # type: Set[str]
+ self.fault = set() # type: Set[str]
+ self._load()
+ self._refresh_health()
+
+ def _load(self):
+ active = self.get_store('active_devices')
+ if active:
+ decoded = json.loads(active)
+ self.ident = set(decoded.get('ident', []))
+ self.fault = set(decoded.get('fault', []))
+ self.log.debug('ident {}, fault {}'.format(self.ident, self.fault))
+
+ def _save(self):
+ encoded = json.dumps({
+ 'ident': list(self.ident),
+ 'fault': list(self.fault),
+ })
+ self.set_store('active_devices', encoded)
+
+ def _refresh_health(self):
+ h = {}
+ if self.ident:
+ h['DEVICE_IDENT_ON'] = {
+ 'severity': 'warning',
+ 'summary': '%d devices have ident light turned on' % len(
+ self.ident),
+ 'detail': ['{} ident light enabled'.format(d) for d in self.ident]
+ }
+ if self.fault:
+ h['DEVICE_FAULT_ON'] = {
+ 'severity': 'warning',
+ 'summary': '%d devices have fault light turned on' % len(
+ self.fault),
+ 'detail': ['{} fault light enabled'.format(d) for d in self.ident]
+ }
+ self.set_health_checks(h)
+
+ def _get_device_locations(self, dev_id):
+ # type: (str) -> List[DeviceLightLoc]
+ locs = [d['location'] for d in self.get('devices')['devices'] if d['devid'] == dev_id]
+ return [DeviceLightLoc(**l) for l in sum(locs, [])]
+
+ @_cli_read_command(
+ prefix='device ls-lights',
+ desc='List currently active device indicator lights')
+ def _device_ls(self):
+ return HandleCommandResult(
+ stdout=json.dumps({
+ 'ident': list(self.ident),
+ 'fault': list(self.fault)
+ }, indent=4, sort_keys=True))
+
+ def light_on(self, fault_ident, devid):
+ # type: (str, str) -> HandleCommandResult
+ assert fault_ident in ("fault", "ident")
+ locs = self._get_device_locations(devid)
+ if locs is None:
+ return HandleCommandResult(stderr='device {} not found'.format(devid),
+ retval=-errno.ENOENT)
+
+ getattr(self, fault_ident).add(devid)
+ self._save()
+ self._refresh_health()
+ completion = self.blink_device_light(fault_ident, True, locs)
+ self._orchestrator_wait([completion])
+ return HandleCommandResult(stdout=str(completion.result))
+
+ def light_off(self, fault_ident, devid, force):
+ # type: (str, str, bool) -> HandleCommandResult
+ assert fault_ident in ("fault", "ident")
+ locs = self._get_device_locations(devid)
+ if locs is None:
+ return HandleCommandResult(stderr='device {} not found'.format(devid),
+ retval=-errno.ENOENT)
+
+ try:
+ completion = self.blink_device_light(fault_ident, False, locs)
+ self._orchestrator_wait([completion])
+
+ if devid in getattr(self, fault_ident):
+ getattr(self, fault_ident).remove(devid)
+ self._save()
+ self._refresh_health()
+ return HandleCommandResult(stdout=str(completion.result))
+
+ except:
+ # There are several reasons the try: block might fail:
+ # 1. the device no longer exist
+ # 2. the device is no longer known to Ceph
+ # 3. the host is not reachable
+ if force and devid in getattr(self, fault_ident):
+ getattr(self, fault_ident).remove(devid)
+ self._save()
+ self._refresh_health()
+ raise
+
+ @_cli_write_command(
+ prefix='device light',
+ cmd_args='name=enable,type=CephChoices,strings=on|off '
+ 'name=devid,type=CephString '
+ 'name=light_type,type=CephChoices,strings=ident|fault,req=false '
+ 'name=force,type=CephBool,req=false',
+ desc='Enable or disable the device light. Default type is `ident`\n'
+ 'Usage: device light (on|off) <devid> [ident|fault] [--force]')
+ def _device_light(self, enable, devid, light_type=None, force=False):
+ # type: (str, str, Optional[str], bool) -> HandleCommandResult
+ light_type = light_type or 'ident'
+ on = enable == 'on'
+ if on:
+ return self.light_on(light_type, devid)
+ else:
+ return self.light_off(light_type, devid, force)
+
+ def _select_orchestrator(self):
+ return self.get_module_option("orchestrator")
+
+ @_cli_write_command(
+ 'orch host add',
+ 'name=host,type=CephString,req=true '
+ 'name=addr,type=CephString,req=false '
+ 'name=labels,type=CephString,n=N,req=false',
+ 'Add a host')
+ def _add_host(self, host, addr=None, labels=None):
+ s = HostSpec(hostname=host, addr=addr, labels=labels)
+ completion = self.add_host(s)
+ self._orchestrator_wait([completion])
+ raise_if_exception(completion)
+ return HandleCommandResult(stdout=completion.result_str())
+
+ @_cli_write_command(
+ 'orch host rm',
+ "name=host,type=CephString,req=true",
+ 'Remove a host')
+ def _remove_host(self, host):
+ completion = self.remove_host(host)
+ self._orchestrator_wait([completion])
+ raise_if_exception(completion)
+ return HandleCommandResult(stdout=completion.result_str())
+
+ @_cli_write_command(
+ 'orch host set-addr',
+ 'name=host,type=CephString '
+ 'name=addr,type=CephString',
+ 'Update a host address')
+ def _update_set_addr(self, host, addr):
+ completion = self.update_host_addr(host, addr)
+ self._orchestrator_wait([completion])
+ raise_if_exception(completion)
+ return HandleCommandResult(stdout=completion.result_str())
+
+ @_cli_read_command(
+ 'orch host ls',
+ 'name=format,type=CephChoices,strings=json|plain,req=false',
+ 'List hosts')
+ def _get_hosts(self, format='plain'):
+ completion = self.get_hosts()
+ self._orchestrator_wait([completion])
+ raise_if_exception(completion)
+ if format == 'json':
+ hosts = [dict(host=node.name, labels=node.labels)
+ for node in completion.result]
+ output = json.dumps(hosts, sort_keys=True)
+ else:
+ table = PrettyTable(
+ ['HOST', 'ADDR', 'LABELS'],
+ border=False)
+ table.align = 'l'
+ table.left_padding_width = 0
+ table.right_padding_width = 1
+ for node in completion.result:
+ table.add_row((node.name, node.addr, ' '.join(node.labels)))
+ output = table.get_string()
+ return HandleCommandResult(stdout=output)
+
+ @_cli_write_command(
+ 'orch host label add',
+ 'name=host,type=CephString '
+ 'name=label,type=CephString',
+ 'Add a host label')
+ def _host_label_add(self, host, label):
+ completion = self.add_host_label(host, label)
+ self._orchestrator_wait([completion])
+ raise_if_exception(completion)
+ return HandleCommandResult(stdout=completion.result_str())
+
+ @_cli_write_command(
+ 'orch host label rm',
+ 'name=host,type=CephString '
+ 'name=label,type=CephString',
+ 'Add a host label')
+ def _host_label_rm(self, host, label):
+ completion = self.remove_host_label(host, label)
+ self._orchestrator_wait([completion])
+ raise_if_exception(completion)
+ return HandleCommandResult(stdout=completion.result_str())
+
+ @_cli_read_command(
+ 'orch device ls',
+ "name=host,type=CephString,n=N,req=false "
+ "name=format,type=CephChoices,strings=json|plain,req=false "
+ "name=refresh,type=CephBool,req=false",
+ 'List devices on a node')
+ def _list_devices(self, host=None, format='plain', refresh=False):
+ # type: (Optional[List[str]], str, bool) -> HandleCommandResult
+ """
+ Provide information about storage devices present in cluster hosts
+
+ Note: this does not have to be completely synchronous. Slightly out of
+ date hardware inventory is fine as long as hardware ultimately appears
+ in the output of this command.
+ """
+ nf = InventoryFilter(nodes=host) if host else None
+
+ completion = self.get_inventory(node_filter=nf, refresh=refresh)
+
+ self._orchestrator_wait([completion])
+ raise_if_exception(completion)
+
+ if format == 'json':
+ data = [n.to_json() for n in completion.result]
+ return HandleCommandResult(stdout=json.dumps(data))
+ else:
+ out = []
+
+ table = PrettyTable(
+ ['HOST', 'PATH', 'TYPE', 'SIZE', 'DEVICE', 'AVAIL',
+ 'REJECT REASONS'],
+ border=False)
+ table.align = 'l'
+ table._align['SIZE'] = 'r'
+ table.left_padding_width = 0
+ table.right_padding_width = 1
+ for host_ in completion.result: # type: InventoryNode
+ for d in host_.devices.devices: # type: Device
+ table.add_row(
+ (
+ host_.name,
+ d.path,
+ d.human_readable_type,
+ format_bytes(d.sys_api.get('size', 0), 5),
+ d.device_id,
+ d.available,
+ ', '.join(d.rejected_reasons)
+ )
+ )
+ out.append(table.get_string())
+ return HandleCommandResult(stdout='\n'.join(out))
+
+ @_cli_read_command(
+ 'orch ps',
+ "name=host,type=CephString,req=false "
+ "name=daemon_type,type=CephChoices,strings=mon|mgr|osd|mds|iscsi|nfs|rgw|rbd-mirror,req=false "
+ "name=daemon_id,type=CephString,req=false "
+ "name=format,type=CephChoices,strings=json|plain,req=false "
+ "name=refresh,type=CephBool,req=false",
+ 'List daemons known to orchestrator')
+ def _list_daemons(self, host=None, daemon_type=None, daemon_id=None, format='plain', refresh=False):
+ completion = self.list_daemons(daemon_type,
+ daemon_id=daemon_id,
+ host=host,
+ refresh=refresh)
+ self._orchestrator_wait([completion])
+ raise_if_exception(completion)
+ daemons = completion.result
+
+ def ukn(s):
+ return '<unknown>' if s is None else s
+ # Sort the list for display
+ daemons.sort(key=lambda s: (ukn(s.daemon_type), ukn(s.nodename), ukn(s.daemon_id)))
+
+ if len(daemons) == 0:
+ return HandleCommandResult(stdout="No daemons reported")
+ elif format == 'json':
+ data = [s.to_json() for s in daemons]
+ return HandleCommandResult(stdout=json.dumps(data))
+ else:
+ now = datetime.datetime.utcnow()
+ table = PrettyTable(
+ ['NAME', 'HOST', 'STATUS', 'REFRESHED',
+ 'VERSION', 'IMAGE NAME', 'IMAGE ID', 'CONTAINER ID'],
+ border=False)
+ table.align = 'l'
+ table.left_padding_width = 0
+ table.right_padding_width = 1
+ for s in sorted(daemons, key=lambda s: s.name()):
+ status = {
+ -1: 'error',
+ 0: 'stopped',
+ 1: 'running',
+ None: '<unknown>'
+ }[s.status]
+
+ if s.last_refresh:
+ age = to_pretty_timedelta(now - s.last_refresh) + ' ago'
+ else:
+ age = '-'
+ table.add_row((
+ s.name(),
+ ukn(s.nodename),
+ status,
+ age,
+ ukn(s.version),
+ ukn(s.container_image_name),
+ ukn(s.container_image_id)[0:12],
+ ukn(s.container_id)[0:12]))
+
+ return HandleCommandResult(stdout=table.get_string())
+
+ @_cli_write_command(
+ 'orch osd create',
+ "name=svc_arg,type=CephString,req=false",
+ 'Create an OSD service. Either --svc_arg=host:drives or -i <drive_group>')
+ def _create_osd(self, svc_arg=None, inbuf=None):
+ # type: (Optional[str], Optional[str]) -> HandleCommandResult
+ """Create one or more OSDs"""
+
+ usage = """
+Usage:
+ ceph orch osd create -i <json_file/yaml_file>
+ ceph orch osd create host:device1,device2,...
+"""
+
+ if inbuf:
+ try:
+ dgs = DriveGroupSpecs(yaml.load(inbuf))
+ drive_groups = dgs.drive_groups
+ except ValueError as e:
+ msg = 'Failed to read JSON input: {}'.format(str(e)) + usage
+ return HandleCommandResult(-errno.EINVAL, stderr=msg)
+
+ elif svc_arg:
+ try:
+ node_name, block_device = svc_arg.split(":")
+ block_devices = block_device.split(',')
+ except (TypeError, KeyError, ValueError):
+ msg = "Invalid host:device spec: '{}'".format(svc_arg) + usage
+ return HandleCommandResult(-errno.EINVAL, stderr=msg)
+
+ devs = DeviceSelection(paths=block_devices)
+ drive_groups = [DriveGroupSpec(node_name, data_devices=devs)]
+ else:
+ return HandleCommandResult(-errno.EINVAL, stderr=usage)
+
+ completion = self.create_osds(drive_groups)
+ self._orchestrator_wait([completion])
+ raise_if_exception(completion)
+ return HandleCommandResult(stdout=completion.result_str())
+
+ @_cli_write_command(
+ 'orch daemon add mon',
+ "name=num,type=CephInt,req=false "
+ "name=hosts,type=CephString,n=N,req=false "
+ "name=label,type=CephString,req=false",
+ 'Start monitor daemon(s)')
+ def _daemon_add_mon(self, num=None, hosts=[], label=None):
+ if not num and not hosts and not label:
+ # Improve Error message. Point to parse_host_spec examples
+ raise OrchestratorValidationError("Mons need a placement spec. (num, host, network, name(opt))")
+ placement = PlacementSpec(label=label, count=num, hosts=hosts)
+ placement.validate()
+
+ spec = ServiceSpec(placement=placement)
+
+ completion = self.add_mon(spec)
+ self._orchestrator_wait([completion])
+ raise_if_exception(completion)
+ return HandleCommandResult(stdout=completion.result_str())
+
+ @_cli_write_command(
+ 'orch daemon add mgr',
+ "name=num,type=CephInt,req=false "
+ "name=hosts,type=CephString,n=N,req=false",
+ 'Start rbd-mirror daemon(s)')
+ def _daemon_add_mgr(self, num=None, hosts=None):
+ spec = ServiceSpec(
+ placement=PlacementSpec(hosts=hosts, count=num))
+ completion = self.add_mgr(spec)
+ self._orchestrator_wait([completion])
+ raise_if_exception(completion)
+ return HandleCommandResult(stdout=completion.result_str())
+
+ @_cli_write_command(
+ 'orch daemon add rbd-mirror',
+ "name=num,type=CephInt,req=false "
+ "name=hosts,type=CephString,n=N,req=false",
+ 'Start rbd-mirror daemon(s)')
+ def _rbd_mirror_add(self, num=None, hosts=None):
+ spec = ServiceSpec(
+ None,
+ placement=PlacementSpec(hosts=hosts, count=num))
+ completion = self.add_rbd_mirror(spec)
+ self._orchestrator_wait([completion])
+ raise_if_exception(completion)
+ return HandleCommandResult(stdout=completion.result_str())
+
+ @_cli_write_command(
+ 'orch daemon add mds',
+ "name=fs_name,type=CephString "
+ "name=num,type=CephInt,req=false "
+ "name=hosts,type=CephString,n=N,req=false",
+ 'Start MDS daemon(s)')
+ def _mds_add(self, fs_name, num=None, hosts=None):
+ spec = ServiceSpec(
+ fs_name,
+ placement=PlacementSpec(hosts=hosts, count=num))
+ completion = self.add_mds(spec)
+ self._orchestrator_wait([completion])
+ raise_if_exception(completion)
+ return HandleCommandResult(stdout=completion.result_str())
+
+ @_cli_write_command(
+ 'orch daemon add rgw',
+ 'name=realm_name,type=CephString '
+ 'name=zone_name,type=CephString '
+ 'name=num,type=CephInt,req=false '
+ "name=hosts,type=CephString,n=N,req=false",
+ 'Start RGW daemon(s)')
+ def _rgw_add(self, realm_name, zone_name, num=1, hosts=None, inbuf=None):
+ usage = """
+Usage:
+ ceph orch rgw add -i <json_file>
+ ceph orch rgw add <realm_name> <zone_name>
+ """
+ if inbuf:
+ try:
+ rgw_spec = RGWSpec.from_json(json.loads(inbuf))
+ except ValueError as e:
+ msg = 'Failed to read JSON input: {}'.format(str(e)) + usage
+ return HandleCommandResult(-errno.EINVAL, stderr=msg)
+ rgw_spec = RGWSpec(
+ rgw_realm=realm_name,
+ rgw_zone=zone_name,
+ placement=PlacementSpec(hosts=hosts, count=num))
+
+ completion = self.add_rgw(rgw_spec)
+ self._orchestrator_wait([completion])
+ raise_if_exception(completion)
+ return HandleCommandResult(stdout=completion.result_str())
+
+ @_cli_write_command(
+ 'orch daemon add nfs',
+ "name=svc_arg,type=CephString "
+ "name=pool,type=CephString "
+ "name=namespace,type=CephString,req=false "
+ 'name=num,type=CephInt,req=false '
+ 'name=hosts,type=CephString,n=N,req=false '
+ 'name=label,type=CephString,req=false',
+ 'Start NFS daemon(s)')
+ def _nfs_add(self, svc_arg, pool, namespace=None, num=None, label=None, hosts=[]):
+ spec = NFSServiceSpec(
+ svc_arg,
+ pool=pool,
+ namespace=namespace,
+ placement=PlacementSpec(label=label, hosts=hosts, count=num),
+ )
+ spec.validate_add()
+ completion = self.add_nfs(spec)
+ self._orchestrator_wait([completion])
+ raise_if_exception(completion)
+ return HandleCommandResult(stdout=completion.result_str())
+
+ @_cli_write_command(
+ 'orch daemon add prometheus',
+ 'name=num,type=CephInt,req=false '
+ 'name=hosts,type=CephString,n=N,req=false '
+ 'name=label,type=CephString,req=false',
+ 'Add prometheus daemon(s)')
+ def _daemon_add_prometheus(self, num=None, label=None, hosts=[]):
+ # type: (Optional[int], Optional[str], List[str]) -> HandleCommandResult
+ spec = ServiceSpec(
+ placement=PlacementSpec(label=label, hosts=hosts, count=num),
+ )
+ completion = self.add_prometheus(spec)
+ self._orchestrator_wait([completion])
+ return HandleCommandResult(stdout=completion.result_str())
+
+ @_cli_write_command(
+ 'orch',
+ "name=action,type=CephChoices,strings=start|stop|restart|redeploy|reconfig "
+ "name=svc_name,type=CephString",
+ 'Start, stop, restart, redeploy, or reconfig an entire service (i.e. all daemons)')
+ def _service_action(self, action, svc_name):
+ if '.' in svc_name:
+ (service_type, service_id) = svc_name.split('.', 1)
+ else:
+ service_type = svc_name;
+ service_id = None
+ completion = self.service_action(action, service_type, service_id)
+ self._orchestrator_wait([completion])
+ raise_if_exception(completion)
+ return HandleCommandResult(stdout=completion.result_str())
+
+ @_cli_write_command(
+ 'orch daemon',
+ "name=action,type=CephChoices,strings=start|stop|restart|redeploy|reconfig "
+ "name=name,type=CephString",
+ 'Start, stop, restart, redeploy, or reconfig a specific daemon')
+ def _daemon_action(self, action, name):
+ if '.' not in name:
+ raise OrchestratorError('%s is not a valid daemon name' % name)
+ (daemon_type, daemon_id) = name.split('.', 1)
+ completion = self.daemon_action(action, daemon_type, daemon_id)
+ self._orchestrator_wait([completion])
+ raise_if_exception(completion)
+ return HandleCommandResult(stdout=completion.result_str())
+
+ @_cli_write_command(
+ 'orch daemon rm',
+ "name=names,type=CephString,n=N "
+ 'name=force,type=CephBool,req=false',
+ 'Remove specific daemon(s)')
+ def _daemon_rm(self, names, force=False):
+ for name in names:
+ if '.' not in name:
+ raise OrchestratorError('%s is not a valid daemon name' % name)
+ completion = self.remove_daemons(names, force)
+ self._orchestrator_wait([completion])
+ raise_if_exception(completion)
+ return HandleCommandResult(stdout=completion.result_str())
+
+ @_cli_write_command(
+ 'orch rm',
+ "name=name,type=CephString",
+ 'Remove a service')
+ def _service_rm(self, name):
+ if '.' in name:
+ (service_type, service_name) = name.split('.')
+ else:
+ service_type = name;
+ service_name = None
+ if name in ['mon', 'mgr']:
+ raise OrchestratorError('The mon and mgr services cannot be removed')
+ completion = self.remove_service(service_type, service_name)
+ self._orchestrator_wait([completion])
+ raise_if_exception(completion)
+ return HandleCommandResult(stdout=completion.result_str())
+
+ @_cli_write_command(
+ 'orch apply mgr',
+ "name=num,type=CephInt,req=false "
+ "name=hosts,type=CephString,n=N,req=false "
+ "name=label,type=CephString,req=false",
+ 'Update the size or placement of managers')
+ def _apply_mgr(self, num=None, hosts=[], label=None):
+ placement = PlacementSpec(
+ label=label, count=num, hosts=hosts)
+ placement.validate()
+
+ spec = ServiceSpec(placement=placement)
+
+ completion = self.apply_mgr(spec)
+ self._orchestrator_wait([completion])
+ raise_if_exception(completion)
+ return HandleCommandResult(stdout=completion.result_str())
+
+ @_cli_write_command(
+ 'orch apply mon',
+ "name=num,type=CephInt,req=false "
+ "name=hosts,type=CephString,n=N,req=false "
+ "name=label,type=CephString,req=false",
+ 'Update the number of monitor instances')
+ def _apply_mon(self, num=None, hosts=[], label=None):
+ if not num and not hosts and not label:
+ # Improve Error message. Point to parse_host_spec examples
+ raise OrchestratorValidationError("Mons need a placement spec. (num, host, network, name(opt))")
+ placement = PlacementSpec(label=label, count=num, hosts=hosts)
+ placement.validate()
+
+ spec = ServiceSpec(placement=placement)
+
+ completion = self.apply_mon(spec)
+ self._orchestrator_wait([completion])
+ raise_if_exception(completion)
+ return HandleCommandResult(stdout=completion.result_str())
+
+ @_cli_write_command(
+ 'orch apply mds',
+ "name=fs_name,type=CephString "
+ "name=num,type=CephInt,req=false "
+ "name=hosts,type=CephString,n=N,req=false "
+ "name=label,type=CephString,req=false",
+ 'Update the number of MDS instances for the given fs_name')
+ def _apply_mds(self, fs_name, num=None, label=None, hosts=[]):
+ placement = PlacementSpec(label=label, count=num, hosts=hosts)
+ placement.validate()
+
+ spec = ServiceSpec(
+ fs_name,
+ placement=placement)
+
+ completion = self.apply_mds(spec)
+ self._orchestrator_wait([completion])
+ raise_if_exception(completion)
+ return HandleCommandResult(stdout=completion.result_str())
+
+ @_cli_write_command(
+ 'orch apply rbd-mirror',
+ "name=num,type=CephInt,req=false "
+ "name=hosts,type=CephString,n=N,req=false "
+ "name=label,type=CephString,req=false",
+ 'Update the number of rbd-mirror instances')
+ def _apply_rbd_mirror(self, num, label=None, hosts=[]):
+ spec = ServiceSpec(
+ placement=PlacementSpec(hosts=hosts, count=num, label=label))
+ completion = self.apply_rbd_mirror(spec)
+ self._orchestrator_wait([completion])
+ raise_if_exception(completion)
+ return HandleCommandResult(stdout=completion.result_str())
+
+ @_cli_write_command(
+ 'orch apply rgw',
+ 'name=realm_name,type=CephString '
+ 'name=zone_name,type=CephString '
+ 'name=num,type=CephInt,req=false '
+ 'name=hosts,type=CephString,n=N,req=false '
+ 'name=label,type=CephString,req=false',
+ 'Update the number of RGW instances for the given zone')
+ def _apply_rgw(self, zone_name, realm_name, num=None, label=None, hosts=[]):
+ spec = RGWSpec(
+ rgw_realm=realm_name,
+ rgw_zone=zone_name,
+ placement=PlacementSpec(hosts=hosts, label=label, count=num))
+ completion = self.apply_rgw(spec)
+ self._orchestrator_wait([completion])
+ raise_if_exception(completion)
+ return HandleCommandResult(stdout=completion.result_str())
+
+ @_cli_write_command(
+ 'orch apply nfs',
+ "name=svc_id,type=CephString "
+ 'name=num,type=CephInt,req=false '
+ 'name=hosts,type=CephString,n=N,req=false '
+ 'name=label,type=CephString,req=false',
+ 'Scale an NFS service')
+ def _apply_nfs(self, svc_id, num=None, label=None, hosts=[]):
+ # type: (str, Optional[int], Optional[str], List[str]) -> HandleCommandResult
+ spec = NFSServiceSpec(
+ svc_id,
+ placement=PlacementSpec(label=label, hosts=hosts, count=num),
+ )
+ completion = self.apply_nfs(spec)
+ self._orchestrator_wait([completion])
+ return HandleCommandResult(stdout=completion.result_str())
+
+ @_cli_write_command(
+ 'orch apply prometheus',
+ 'name=num,type=CephInt,req=false '
+ 'name=hosts,type=CephString,n=N,req=false '
+ 'name=label,type=CephString,req=false',
+ 'Scale prometheus service')
+ def _apply_prometheus(self, num=None, label=None, hosts=[]):
+ # type: (Optional[int], Optional[str], List[str]) -> HandleCommandResult
+ spec = ServiceSpec(
+ placement=PlacementSpec(label=label, hosts=hosts, count=num),
+ )
+ completion = self.apply_prometheus(spec)
+ self._orchestrator_wait([completion])
+ return HandleCommandResult(stdout=completion.result_str())
+
+ @_cli_write_command(
+ 'orch set backend',
+ "name=module_name,type=CephString,req=true",
+ 'Select orchestrator module backend')
+ def _set_backend(self, module_name):
+ """
+ We implement a setter command instead of just having the user
+ modify the setting directly, so that we can validate they're setting
+ it to a module that really exists and is enabled.
+
+ There isn't a mechanism for ensuring they don't *disable* the module
+ later, but this is better than nothing.
+ """
+ mgr_map = self.get("mgr_map")
+
+ if module_name is None or module_name == "":
+ self.set_module_option("orchestrator", None)
+ return HandleCommandResult()
+
+ for module in mgr_map['available_modules']:
+ if module['name'] != module_name:
+ continue
+
+ if not module['can_run']:
+ continue
+
+ enabled = module['name'] in mgr_map['modules']
+ if not enabled:
+ return HandleCommandResult(-errno.EINVAL,
+ stderr="Module '{module_name}' is not enabled. \n Run "
+ "`ceph mgr module enable {module_name}` "
+ "to enable.".format(module_name=module_name))
+
+ try:
+ is_orchestrator = self.remote(module_name,
+ "is_orchestrator_module")
+ except NameError:
+ is_orchestrator = False
+
+ if not is_orchestrator:
+ return HandleCommandResult(-errno.EINVAL,
+ stderr="'{0}' is not an orchestrator module".format(module_name))
+
+ self.set_module_option("orchestrator", module_name)
+
+ return HandleCommandResult()
+
+ return HandleCommandResult(-errno.EINVAL, stderr="Module '{0}' not found".format(module_name))
+
+ @_cli_write_command(
+ 'orch cancel',
+ desc='cancels ongoing operations')
+ def _cancel(self):
+ """
+ ProgressReferences might get stuck. Let's unstuck them.
+ """
+ self.cancel_completions()
+ return HandleCommandResult()
+
+ @_cli_read_command(
+ 'orch status',
+ desc='Report configured backend and its status')
+ def _status(self):
+ o = self._select_orchestrator()
+ if o is None:
+ raise NoOrchestrator()
+
+ avail, why = self.available()
+ if avail is None:
+ # The module does not report its availability
+ return HandleCommandResult(stdout="Backend: {0}".format(o))
+ else:
+ return HandleCommandResult(stdout="Backend: {0}\nAvailable: {1}{2}".format(
+ o, avail,
+ " ({0})".format(why) if not avail else ""
+ ))
+
+ def self_test(self):
+ old_orch = self._select_orchestrator()
+ self._set_backend('')
+ assert self._select_orchestrator() is None
+ self._set_backend(old_orch)
+
+ e1 = self.remote('selftest', 'remote_from_orchestrator_cli_self_test', "ZeroDivisionError")
+ try:
+ raise_if_exception(e1)
+ assert False
+ except ZeroDivisionError as e:
+ assert e.args == ('hello', 'world')
+
+ e2 = self.remote('selftest', 'remote_from_orchestrator_cli_self_test', "OrchestratorError")
+ try:
+ raise_if_exception(e2)
+ assert False
+ except OrchestratorError as e:
+ assert e.args == ('hello', 'world')
+
+ c = TrivialReadCompletion(result=True)
+ assert c.has_result
+
+ @_cli_write_command(
+ 'upgrade check',
+ 'name=image,type=CephString,req=false '
+ 'name=ceph_version,type=CephString,req=false',
+ desc='Check service versions vs available and target containers')
+ def _upgrade_check(self, image=None, ceph_version=None):
+ completion = self.upgrade_check(image=image, version=ceph_version)
+ self._orchestrator_wait([completion])
+ raise_if_exception(completion)
+ return HandleCommandResult(stdout=completion.result_str())
+
+ @_cli_write_command(
+ 'upgrade status',
+ desc='Check service versions vs available and target containers')
+ def _upgrade_status(self):
+ completion = self.upgrade_status()
+ self._orchestrator_wait([completion])
+ raise_if_exception(completion)
+ r = {
+ 'target_image': completion.result.target_image,
+ 'in_progress': completion.result.in_progress,
+ 'services_complete': completion.result.services_complete,
+ 'message': completion.result.message,
+ }
+ out = json.dumps(r, indent=4)
+ return HandleCommandResult(stdout=out)
+
+ @_cli_write_command(
+ 'upgrade start',
+ 'name=image,type=CephString,req=false '
+ 'name=ceph_version,type=CephString,req=false',
+ desc='Initiate upgrade')
+ def _upgrade_start(self, image=None, ceph_version=None):
+ completion = self.upgrade_start(image, ceph_version)
+ self._orchestrator_wait([completion])
+ raise_if_exception(completion)
+ return HandleCommandResult(stdout=completion.result_str())
+
+ @_cli_write_command(
+ 'upgrade pause',
+ desc='Pause an in-progress upgrade')
+ def _upgrade_pause(self):
+ completion = self.upgrade_pause()
+ self._orchestrator_wait([completion])
+ raise_if_exception(completion)
+ return HandleCommandResult(stdout=completion.result_str())
+
+ @_cli_write_command(
+ 'upgrade resume',
+ desc='Resume paused upgrade')
+ def _upgrade_resume(self):
+ completion = self.upgrade_resume()
+ self._orchestrator_wait([completion])
+ raise_if_exception(completion)
+ return HandleCommandResult(stdout=completion.result_str())
+
+ @_cli_write_command(
+ 'upgrade stop',
+ desc='Stop an in-progress upgrade')
+ def _upgrade_stop(self):
+ completion = self.upgrade_stop()
+ self._orchestrator_wait([completion])
+ raise_if_exception(completion)
+ return HandleCommandResult(stdout=completion.result_str())
+++ /dev/null
-# Orchestrator CLI
-
-See also [orchestrator cli doc](https://docs.ceph.com/docs/master/mgr/orchestrator_cli/).
-
-## Running the Teuthology tests
-
-To run the API tests against a real Ceph cluster, we leverage the Teuthology
-framework and the `test_orchestrator` backend.
-
-``source`` the script and run the tests manually::
-
- $ pushd ../dashboard ; source ./run-backend-api-tests.sh ; popd
- $ run_teuthology_tests tasks.mgr.test_orchestrator_cli
- $ cleanup_teuthology
+++ /dev/null
-from __future__ import absolute_import
-
-from .module import OrchestratorCli
+++ /dev/null
-import datetime
-import errno
-import json
-import yaml
-from functools import wraps
-
-from ceph.deployment.inventory import Device
-from prettytable import PrettyTable
-
-from mgr_util import format_bytes, to_pretty_timedelta
-
-try:
- from typing import List, Set, Optional
-except ImportError:
- pass # just for type checking.
-
-
-from ceph.deployment.drive_group import DriveGroupSpec, DriveGroupValidationError, \
- DeviceSelection, DriveGroupSpecs
-from mgr_module import MgrModule, CLICommand, HandleCommandResult
-
-import orchestrator
-
-
-class OrchestratorCli(orchestrator.OrchestratorClientMixin, MgrModule):
- MODULE_OPTIONS = [
- {
- 'name': 'orchestrator',
- 'type': 'str',
- 'default': None,
- 'desc': 'Orchestrator backend',
- 'enum_allowed': ['cephadm', 'rook',
- 'test_orchestrator'],
- 'runtime': True,
- },
- ]
- NATIVE_OPTIONS = [] # type: List[dict]
-
- def __init__(self, *args, **kwargs):
- super(OrchestratorCli, self).__init__(*args, **kwargs)
- self.ident = set() # type: Set[str]
- self.fault = set() # type: Set[str]
- self._load()
- self._refresh_health()
-
- def _load(self):
- active = self.get_store('active_devices')
- if active:
- decoded = json.loads(active)
- self.ident = set(decoded.get('ident', []))
- self.fault = set(decoded.get('fault', []))
- self.log.debug('ident {}, fault {}'.format(self.ident, self.fault))
-
- def _save(self):
- encoded = json.dumps({
- 'ident': list(self.ident),
- 'fault': list(self.fault),
- })
- self.set_store('active_devices', encoded)
-
- def _refresh_health(self):
- h = {}
- if self.ident:
- h['DEVICE_IDENT_ON'] = {
- 'severity': 'warning',
- 'summary': '%d devices have ident light turned on' % len(
- self.ident),
- 'detail': ['{} ident light enabled'.format(d) for d in self.ident]
- }
- if self.fault:
- h['DEVICE_FAULT_ON'] = {
- 'severity': 'warning',
- 'summary': '%d devices have fault light turned on' % len(
- self.fault),
- 'detail': ['{} fault light enabled'.format(d) for d in self.ident]
- }
- self.set_health_checks(h)
-
- def _get_device_locations(self, dev_id):
- # type: (str) -> List[orchestrator.DeviceLightLoc]
- locs = [d['location'] for d in self.get('devices')['devices'] if d['devid'] == dev_id]
- return [orchestrator.DeviceLightLoc(**l) for l in sum(locs, [])]
-
- @orchestrator._cli_read_command(
- prefix='device ls-lights',
- desc='List currently active device indicator lights')
- def _device_ls(self):
- return HandleCommandResult(
- stdout=json.dumps({
- 'ident': list(self.ident),
- 'fault': list(self.fault)
- }, indent=4, sort_keys=True))
-
- def light_on(self, fault_ident, devid):
- # type: (str, str) -> HandleCommandResult
- assert fault_ident in ("fault", "ident")
- locs = self._get_device_locations(devid)
- if locs is None:
- return HandleCommandResult(stderr='device {} not found'.format(devid),
- retval=-errno.ENOENT)
-
- getattr(self, fault_ident).add(devid)
- self._save()
- self._refresh_health()
- completion = self.blink_device_light(fault_ident, True, locs)
- self._orchestrator_wait([completion])
- return HandleCommandResult(stdout=str(completion.result))
-
- def light_off(self, fault_ident, devid, force):
- # type: (str, str, bool) -> HandleCommandResult
- assert fault_ident in ("fault", "ident")
- locs = self._get_device_locations(devid)
- if locs is None:
- return HandleCommandResult(stderr='device {} not found'.format(devid),
- retval=-errno.ENOENT)
-
- try:
- completion = self.blink_device_light(fault_ident, False, locs)
- self._orchestrator_wait([completion])
-
- if devid in getattr(self, fault_ident):
- getattr(self, fault_ident).remove(devid)
- self._save()
- self._refresh_health()
- return HandleCommandResult(stdout=str(completion.result))
-
- except:
- # There are several reasons the try: block might fail:
- # 1. the device no longer exist
- # 2. the device is no longer known to Ceph
- # 3. the host is not reachable
- if force and devid in getattr(self, fault_ident):
- getattr(self, fault_ident).remove(devid)
- self._save()
- self._refresh_health()
- raise
-
- @orchestrator._cli_write_command(
- prefix='device light',
- cmd_args='name=enable,type=CephChoices,strings=on|off '
- 'name=devid,type=CephString '
- 'name=light_type,type=CephChoices,strings=ident|fault,req=false '
- 'name=force,type=CephBool,req=false',
- desc='Enable or disable the device light. Default type is `ident`\n'
- 'Usage: device light (on|off) <devid> [ident|fault] [--force]')
- def _device_light(self, enable, devid, light_type=None, force=False):
- # type: (str, str, Optional[str], bool) -> HandleCommandResult
- light_type = light_type or 'ident'
- on = enable == 'on'
- if on:
- return self.light_on(light_type, devid)
- else:
- return self.light_off(light_type, devid, force)
-
- def _select_orchestrator(self):
- return self.get_module_option("orchestrator")
-
- @orchestrator._cli_write_command(
- 'orch host add',
- 'name=host,type=CephString,req=true '
- 'name=addr,type=CephString,req=false '
- 'name=labels,type=CephString,n=N,req=false',
- 'Add a host')
- def _add_host(self, host, addr=None, labels=None):
- s = orchestrator.HostSpec(hostname=host, addr=addr, labels=labels)
- completion = self.add_host(s)
- self._orchestrator_wait([completion])
- orchestrator.raise_if_exception(completion)
- return HandleCommandResult(stdout=completion.result_str())
-
- @orchestrator._cli_write_command(
- 'orch host rm',
- "name=host,type=CephString,req=true",
- 'Remove a host')
- def _remove_host(self, host):
- completion = self.remove_host(host)
- self._orchestrator_wait([completion])
- orchestrator.raise_if_exception(completion)
- return HandleCommandResult(stdout=completion.result_str())
-
- @orchestrator._cli_write_command(
- 'orch host set-addr',
- 'name=host,type=CephString '
- 'name=addr,type=CephString',
- 'Update a host address')
- def _update_set_addr(self, host, addr):
- completion = self.update_host_addr(host, addr)
- self._orchestrator_wait([completion])
- orchestrator.raise_if_exception(completion)
- return HandleCommandResult(stdout=completion.result_str())
-
- @orchestrator._cli_read_command(
- 'orch host ls',
- 'name=format,type=CephChoices,strings=json|plain,req=false',
- 'List hosts')
- def _get_hosts(self, format='plain'):
- completion = self.get_hosts()
- self._orchestrator_wait([completion])
- orchestrator.raise_if_exception(completion)
- if format == 'json':
- hosts = [dict(host=node.name, labels=node.labels)
- for node in completion.result]
- output = json.dumps(hosts, sort_keys=True)
- else:
- table = PrettyTable(
- ['HOST', 'ADDR', 'LABELS'],
- border=False)
- table.align = 'l'
- table.left_padding_width = 0
- table.right_padding_width = 1
- for node in completion.result:
- table.add_row((node.name, node.addr, ' '.join(node.labels)))
- output = table.get_string()
- return HandleCommandResult(stdout=output)
-
- @orchestrator._cli_write_command(
- 'orch host label add',
- 'name=host,type=CephString '
- 'name=label,type=CephString',
- 'Add a host label')
- def _host_label_add(self, host, label):
- completion = self.add_host_label(host, label)
- self._orchestrator_wait([completion])
- orchestrator.raise_if_exception(completion)
- return HandleCommandResult(stdout=completion.result_str())
-
- @orchestrator._cli_write_command(
- 'orch host label rm',
- 'name=host,type=CephString '
- 'name=label,type=CephString',
- 'Add a host label')
- def _host_label_rm(self, host, label):
- completion = self.remove_host_label(host, label)
- self._orchestrator_wait([completion])
- orchestrator.raise_if_exception(completion)
- return HandleCommandResult(stdout=completion.result_str())
-
- @orchestrator._cli_read_command(
- 'orch device ls',
- "name=host,type=CephString,n=N,req=false "
- "name=format,type=CephChoices,strings=json|plain,req=false "
- "name=refresh,type=CephBool,req=false",
- 'List devices on a node')
- def _list_devices(self, host=None, format='plain', refresh=False):
- # type: (Optional[List[str]], str, bool) -> HandleCommandResult
- """
- Provide information about storage devices present in cluster hosts
-
- Note: this does not have to be completely synchronous. Slightly out of
- date hardware inventory is fine as long as hardware ultimately appears
- in the output of this command.
- """
- nf = orchestrator.InventoryFilter(nodes=host) if host else None
-
- completion = self.get_inventory(node_filter=nf, refresh=refresh)
-
- self._orchestrator_wait([completion])
- orchestrator.raise_if_exception(completion)
-
- if format == 'json':
- data = [n.to_json() for n in completion.result]
- return HandleCommandResult(stdout=json.dumps(data))
- else:
- out = []
-
- table = PrettyTable(
- ['HOST', 'PATH', 'TYPE', 'SIZE', 'DEVICE', 'AVAIL',
- 'REJECT REASONS'],
- border=False)
- table.align = 'l'
- table._align['SIZE'] = 'r'
- table.left_padding_width = 0
- table.right_padding_width = 1
- for host_ in completion.result: # type: orchestrator.InventoryNode
- for d in host_.devices.devices: # type: Device
- table.add_row(
- (
- host_.name,
- d.path,
- d.human_readable_type,
- format_bytes(d.sys_api.get('size', 0), 5),
- d.device_id,
- d.available,
- ', '.join(d.rejected_reasons)
- )
- )
- out.append(table.get_string())
- return HandleCommandResult(stdout='\n'.join(out))
-
- @orchestrator._cli_read_command(
- 'orch ps',
- "name=host,type=CephString,req=false "
- "name=daemon_type,type=CephChoices,strings=mon|mgr|osd|mds|iscsi|nfs|rgw|rbd-mirror,req=false "
- "name=daemon_id,type=CephString,req=false "
- "name=format,type=CephChoices,strings=json|plain,req=false "
- "name=refresh,type=CephBool,req=false",
- 'List daemons known to orchestrator')
- def _list_daemons(self, host=None, daemon_type=None, daemon_id=None, format='plain', refresh=False):
- completion = self.list_daemons(daemon_type,
- daemon_id=daemon_id,
- host=host,
- refresh=refresh)
- self._orchestrator_wait([completion])
- orchestrator.raise_if_exception(completion)
- daemons = completion.result
-
- def ukn(s):
- return '<unknown>' if s is None else s
- # Sort the list for display
- daemons.sort(key=lambda s: (ukn(s.daemon_type), ukn(s.nodename), ukn(s.daemon_id)))
-
- if len(daemons) == 0:
- return HandleCommandResult(stdout="No daemons reported")
- elif format == 'json':
- data = [s.to_json() for s in daemons]
- return HandleCommandResult(stdout=json.dumps(data))
- else:
- now = datetime.datetime.utcnow()
- table = PrettyTable(
- ['NAME', 'HOST', 'STATUS', 'REFRESHED',
- 'VERSION', 'IMAGE NAME', 'IMAGE ID', 'CONTAINER ID'],
- border=False)
- table.align = 'l'
- table.left_padding_width = 0
- table.right_padding_width = 1
- for s in sorted(daemons, key=lambda s: s.name()):
- status = {
- -1: 'error',
- 0: 'stopped',
- 1: 'running',
- None: '<unknown>'
- }[s.status]
-
- if s.last_refresh:
- age = to_pretty_timedelta(now - s.last_refresh) + ' ago'
- else:
- age = '-'
- table.add_row((
- s.name(),
- ukn(s.nodename),
- status,
- age,
- ukn(s.version),
- ukn(s.container_image_name),
- ukn(s.container_image_id)[0:12],
- ukn(s.container_id)[0:12]))
-
- return HandleCommandResult(stdout=table.get_string())
-
- @orchestrator._cli_write_command(
- 'orch osd create',
- "name=svc_arg,type=CephString,req=false",
- 'Create an OSD service. Either --svc_arg=host:drives or -i <drive_group>')
- def _create_osd(self, svc_arg=None, inbuf=None):
- # type: (Optional[str], Optional[str]) -> HandleCommandResult
- """Create one or more OSDs"""
-
- usage = """
-Usage:
- ceph orch osd create -i <json_file/yaml_file>
- ceph orch osd create host:device1,device2,...
-"""
-
- if inbuf:
- try:
- dgs = DriveGroupSpecs(yaml.load(inbuf))
- drive_groups = dgs.drive_groups
- except ValueError as e:
- msg = 'Failed to read JSON input: {}'.format(str(e)) + usage
- return HandleCommandResult(-errno.EINVAL, stderr=msg)
-
- elif svc_arg:
- try:
- node_name, block_device = svc_arg.split(":")
- block_devices = block_device.split(',')
- except (TypeError, KeyError, ValueError):
- msg = "Invalid host:device spec: '{}'".format(svc_arg) + usage
- return HandleCommandResult(-errno.EINVAL, stderr=msg)
-
- devs = DeviceSelection(paths=block_devices)
- drive_groups = [DriveGroupSpec(node_name, data_devices=devs)]
- else:
- return HandleCommandResult(-errno.EINVAL, stderr=usage)
-
- completion = self.create_osds(drive_groups)
- self._orchestrator_wait([completion])
- orchestrator.raise_if_exception(completion)
- return HandleCommandResult(stdout=completion.result_str())
-
- @orchestrator._cli_write_command(
- 'orch daemon add mon',
- "name=num,type=CephInt,req=false "
- "name=hosts,type=CephString,n=N,req=false "
- "name=label,type=CephString,req=false",
- 'Start monitor daemon(s)')
- def _daemon_add_mon(self, num=None, hosts=[], label=None):
- if not num and not hosts and not label:
- # Improve Error message. Point to parse_host_spec examples
- raise orchestrator.OrchestratorValidationError("Mons need a placement spec. (num, host, network, name(opt))")
- placement = orchestrator.PlacementSpec(label=label, count=num, hosts=hosts)
- placement.validate()
-
- spec = orchestrator.ServiceSpec(placement=placement)
-
- completion = self.add_mon(spec)
- self._orchestrator_wait([completion])
- orchestrator.raise_if_exception(completion)
- return HandleCommandResult(stdout=completion.result_str())
-
- @orchestrator._cli_write_command(
- 'orch daemon add mgr',
- "name=num,type=CephInt,req=false "
- "name=hosts,type=CephString,n=N,req=false",
- 'Start rbd-mirror daemon(s)')
- def _daemon_add_mgr(self, num=None, hosts=None):
- spec = orchestrator.ServiceSpec(
- placement=orchestrator.PlacementSpec(hosts=hosts, count=num))
- completion = self.add_mgr(spec)
- self._orchestrator_wait([completion])
- orchestrator.raise_if_exception(completion)
- return HandleCommandResult(stdout=completion.result_str())
-
- @orchestrator._cli_write_command(
- 'orch daemon add rbd-mirror',
- "name=num,type=CephInt,req=false "
- "name=hosts,type=CephString,n=N,req=false",
- 'Start rbd-mirror daemon(s)')
- def _rbd_mirror_add(self, num=None, hosts=None):
- spec = orchestrator.ServiceSpec(
- None,
- placement=orchestrator.PlacementSpec(hosts=hosts, count=num))
- completion = self.add_rbd_mirror(spec)
- self._orchestrator_wait([completion])
- orchestrator.raise_if_exception(completion)
- return HandleCommandResult(stdout=completion.result_str())
-
- @orchestrator._cli_write_command(
- 'orch daemon add mds',
- "name=fs_name,type=CephString "
- "name=num,type=CephInt,req=false "
- "name=hosts,type=CephString,n=N,req=false",
- 'Start MDS daemon(s)')
- def _mds_add(self, fs_name, num=None, hosts=None):
- spec = orchestrator.ServiceSpec(
- fs_name,
- placement=orchestrator.PlacementSpec(hosts=hosts, count=num))
- completion = self.add_mds(spec)
- self._orchestrator_wait([completion])
- orchestrator.raise_if_exception(completion)
- return HandleCommandResult(stdout=completion.result_str())
-
- @orchestrator._cli_write_command(
- 'orch daemon add rgw',
- 'name=realm_name,type=CephString '
- 'name=zone_name,type=CephString '
- 'name=num,type=CephInt,req=false '
- "name=hosts,type=CephString,n=N,req=false",
- 'Start RGW daemon(s)')
- def _rgw_add(self, realm_name, zone_name, num=1, hosts=None, inbuf=None):
- usage = """
-Usage:
- ceph orch rgw add -i <json_file>
- ceph orch rgw add <realm_name> <zone_name>
- """
- if inbuf:
- try:
- rgw_spec = orchestrator.RGWSpec.from_json(json.loads(inbuf))
- except ValueError as e:
- msg = 'Failed to read JSON input: {}'.format(str(e)) + usage
- return HandleCommandResult(-errno.EINVAL, stderr=msg)
- rgw_spec = orchestrator.RGWSpec(
- rgw_realm=realm_name,
- rgw_zone=zone_name,
- placement=orchestrator.PlacementSpec(hosts=hosts, count=num))
-
- completion = self.add_rgw(rgw_spec)
- self._orchestrator_wait([completion])
- orchestrator.raise_if_exception(completion)
- return HandleCommandResult(stdout=completion.result_str())
-
- @orchestrator._cli_write_command(
- 'orch daemon add nfs',
- "name=svc_arg,type=CephString "
- "name=pool,type=CephString "
- "name=namespace,type=CephString,req=false "
- 'name=num,type=CephInt,req=false '
- 'name=hosts,type=CephString,n=N,req=false '
- 'name=label,type=CephString,req=false',
- 'Start NFS daemon(s)')
- def _nfs_add(self, svc_arg, pool, namespace=None, num=None, label=None, hosts=[]):
- spec = orchestrator.NFSServiceSpec(
- svc_arg,
- pool=pool,
- namespace=namespace,
- placement=orchestrator.PlacementSpec(label=label, hosts=hosts, count=num),
- )
- spec.validate_add()
- completion = self.add_nfs(spec)
- self._orchestrator_wait([completion])
- orchestrator.raise_if_exception(completion)
- return HandleCommandResult(stdout=completion.result_str())
-
- @orchestrator._cli_write_command(
- 'orch daemon add prometheus',
- 'name=num,type=CephInt,req=false '
- 'name=hosts,type=CephString,n=N,req=false '
- 'name=label,type=CephString,req=false',
- 'Add prometheus daemon(s)')
- def _daemon_add_prometheus(self, num=None, label=None, hosts=[]):
- # type: (Optional[int], Optional[str], List[str]) -> HandleCommandResult
- spec = orchestrator.ServiceSpec(
- placement=orchestrator.PlacementSpec(label=label, hosts=hosts, count=num),
- )
- completion = self.add_prometheus(spec)
- self._orchestrator_wait([completion])
- return HandleCommandResult(stdout=completion.result_str())
-
- @orchestrator._cli_write_command(
- 'orch',
- "name=action,type=CephChoices,strings=start|stop|restart|redeploy|reconfig "
- "name=svc_name,type=CephString",
- 'Start, stop, restart, redeploy, or reconfig an entire service (i.e. all daemons)')
- def _service_action(self, action, svc_name):
- if '.' in svc_name:
- (service_type, service_id) = svc_name.split('.', 1)
- else:
- service_type = svc_name;
- service_id = None
- completion = self.service_action(action, service_type, service_id)
- self._orchestrator_wait([completion])
- orchestrator.raise_if_exception(completion)
- return HandleCommandResult(stdout=completion.result_str())
-
- @orchestrator._cli_write_command(
- 'orch daemon',
- "name=action,type=CephChoices,strings=start|stop|restart|redeploy|reconfig "
- "name=name,type=CephString",
- 'Start, stop, restart, redeploy, or reconfig a specific daemon')
- def _daemon_action(self, action, name):
- if '.' not in name:
- raise orchestrator.OrchestratorError('%s is not a valid daemon name' % name)
- (daemon_type, daemon_id) = name.split('.', 1)
- completion = self.daemon_action(action, daemon_type, daemon_id)
- self._orchestrator_wait([completion])
- orchestrator.raise_if_exception(completion)
- return HandleCommandResult(stdout=completion.result_str())
-
- @orchestrator._cli_write_command(
- 'orch daemon rm',
- "name=names,type=CephString,n=N "
- 'name=force,type=CephBool,req=false',
- 'Remove specific daemon(s)')
- def _daemon_rm(self, names, force=False):
- for name in names:
- if '.' not in name:
- raise orchestrator.OrchestratorError('%s is not a valid daemon name' % name)
- completion = self.remove_daemons(names, force)
- self._orchestrator_wait([completion])
- orchestrator.raise_if_exception(completion)
- return HandleCommandResult(stdout=completion.result_str())
-
- @orchestrator._cli_write_command(
- 'orch rm',
- "name=name,type=CephString",
- 'Remove a service')
- def _service_rm(self, name):
- if '.' in name:
- (service_type, service_name) = name.split('.')
- else:
- service_type = name;
- service_name = None
- if name in ['mon', 'mgr']:
- raise orchestrator.OrchestratorError('The mon and mgr services cannot be removed')
- completion = self.remove_service(service_type, service_name)
- self._orchestrator_wait([completion])
- orchestrator.raise_if_exception(completion)
- return HandleCommandResult(stdout=completion.result_str())
-
- @orchestrator._cli_write_command(
- 'orch apply mgr',
- "name=num,type=CephInt,req=false "
- "name=hosts,type=CephString,n=N,req=false "
- "name=label,type=CephString,req=false",
- 'Update the size or placement of managers')
- def _apply_mgr(self, num=None, hosts=[], label=None):
- placement = orchestrator.PlacementSpec(
- label=label, count=num, hosts=hosts)
- placement.validate()
-
- spec = orchestrator.ServiceSpec(placement=placement)
-
- completion = self.apply_mgr(spec)
- self._orchestrator_wait([completion])
- orchestrator.raise_if_exception(completion)
- return HandleCommandResult(stdout=completion.result_str())
-
- @orchestrator._cli_write_command(
- 'orch apply mon',
- "name=num,type=CephInt,req=false "
- "name=hosts,type=CephString,n=N,req=false "
- "name=label,type=CephString,req=false",
- 'Update the number of monitor instances')
- def _apply_mon(self, num=None, hosts=[], label=None):
- if not num and not hosts and not label:
- # Improve Error message. Point to parse_host_spec examples
- raise orchestrator.OrchestratorValidationError("Mons need a placement spec. (num, host, network, name(opt))")
- placement = orchestrator.PlacementSpec(label=label, count=num, hosts=hosts)
- placement.validate()
-
- spec = orchestrator.ServiceSpec(placement=placement)
-
- completion = self.apply_mon(spec)
- self._orchestrator_wait([completion])
- orchestrator.raise_if_exception(completion)
- return HandleCommandResult(stdout=completion.result_str())
-
- @orchestrator._cli_write_command(
- 'orch apply mds',
- "name=fs_name,type=CephString "
- "name=num,type=CephInt,req=false "
- "name=hosts,type=CephString,n=N,req=false "
- "name=label,type=CephString,req=false",
- 'Update the number of MDS instances for the given fs_name')
- def _apply_mds(self, fs_name, num=None, label=None, hosts=[]):
- placement = orchestrator.PlacementSpec(label=label, count=num, hosts=hosts)
- placement.validate()
-
- spec = orchestrator.ServiceSpec(
- fs_name,
- placement=placement)
-
- completion = self.apply_mds(spec)
- self._orchestrator_wait([completion])
- orchestrator.raise_if_exception(completion)
- return HandleCommandResult(stdout=completion.result_str())
-
- @orchestrator._cli_write_command(
- 'orch apply rbd-mirror',
- "name=num,type=CephInt,req=false "
- "name=hosts,type=CephString,n=N,req=false "
- "name=label,type=CephString,req=false",
- 'Update the number of rbd-mirror instances')
- def _apply_rbd_mirror(self, num, label=None, hosts=[]):
- spec = orchestrator.ServiceSpec(
- placement=orchestrator.PlacementSpec(hosts=hosts, count=num, label=label))
- completion = self.apply_rbd_mirror(spec)
- self._orchestrator_wait([completion])
- orchestrator.raise_if_exception(completion)
- return HandleCommandResult(stdout=completion.result_str())
-
- @orchestrator._cli_write_command(
- 'orch apply rgw',
- 'name=realm_name,type=CephString '
- 'name=zone_name,type=CephString '
- 'name=num,type=CephInt,req=false '
- 'name=hosts,type=CephString,n=N,req=false '
- 'name=label,type=CephString,req=false',
- 'Update the number of RGW instances for the given zone')
- def _apply_rgw(self, zone_name, realm_name, num=None, label=None, hosts=[]):
- spec = orchestrator.RGWSpec(
- rgw_realm=realm_name,
- rgw_zone=zone_name,
- placement=orchestrator.PlacementSpec(hosts=hosts, label=label, count=num))
- completion = self.apply_rgw(spec)
- self._orchestrator_wait([completion])
- orchestrator.raise_if_exception(completion)
- return HandleCommandResult(stdout=completion.result_str())
-
- @orchestrator._cli_write_command(
- 'orch apply nfs',
- "name=svc_id,type=CephString "
- 'name=num,type=CephInt,req=false '
- 'name=hosts,type=CephString,n=N,req=false '
- 'name=label,type=CephString,req=false',
- 'Scale an NFS service')
- def _apply_nfs(self, svc_id, num=None, label=None, hosts=[]):
- # type: (str, Optional[int], Optional[str], List[str]) -> HandleCommandResult
- spec = orchestrator.NFSServiceSpec(
- svc_id,
- placement=orchestrator.PlacementSpec(label=label, hosts=hosts, count=num),
- )
- completion = self.apply_nfs(spec)
- self._orchestrator_wait([completion])
- return HandleCommandResult(stdout=completion.result_str())
-
- @orchestrator._cli_write_command(
- 'orch apply prometheus',
- 'name=num,type=CephInt,req=false '
- 'name=hosts,type=CephString,n=N,req=false '
- 'name=label,type=CephString,req=false',
- 'Scale prometheus service')
- def _apply_prometheus(self, num=None, label=None, hosts=[]):
- # type: (Optional[int], Optional[str], List[str]) -> HandleCommandResult
- spec = orchestrator.ServiceSpec(
- placement=orchestrator.PlacementSpec(label=label, hosts=hosts, count=num),
- )
- completion = self.apply_prometheus(spec)
- self._orchestrator_wait([completion])
- return HandleCommandResult(stdout=completion.result_str())
-
- @orchestrator._cli_write_command(
- 'orch set backend',
- "name=module_name,type=CephString,req=true",
- 'Select orchestrator module backend')
- def _set_backend(self, module_name):
- """
- We implement a setter command instead of just having the user
- modify the setting directly, so that we can validate they're setting
- it to a module that really exists and is enabled.
-
- There isn't a mechanism for ensuring they don't *disable* the module
- later, but this is better than nothing.
- """
- mgr_map = self.get("mgr_map")
-
- if module_name is None or module_name == "":
- self.set_module_option("orchestrator", None)
- return HandleCommandResult()
-
- for module in mgr_map['available_modules']:
- if module['name'] != module_name:
- continue
-
- if not module['can_run']:
- continue
-
- enabled = module['name'] in mgr_map['modules']
- if not enabled:
- return HandleCommandResult(-errno.EINVAL,
- stderr="Module '{module_name}' is not enabled. \n Run "
- "`ceph mgr module enable {module_name}` "
- "to enable.".format(module_name=module_name))
-
- try:
- is_orchestrator = self.remote(module_name,
- "is_orchestrator_module")
- except NameError:
- is_orchestrator = False
-
- if not is_orchestrator:
- return HandleCommandResult(-errno.EINVAL,
- stderr="'{0}' is not an orchestrator module".format(module_name))
-
- self.set_module_option("orchestrator", module_name)
-
- return HandleCommandResult()
-
- return HandleCommandResult(-errno.EINVAL, stderr="Module '{0}' not found".format(module_name))
-
- @orchestrator._cli_write_command(
- 'orch cancel',
- desc='cancels ongoing operations')
- def _cancel(self):
- """
- ProgressReferences might get stuck. Let's unstuck them.
- """
- self.cancel_completions()
- return HandleCommandResult()
-
- @orchestrator._cli_read_command(
- 'orch status',
- desc='Report configured backend and its status')
- def _status(self):
- o = self._select_orchestrator()
- if o is None:
- raise orchestrator.NoOrchestrator()
-
- avail, why = self.available()
- if avail is None:
- # The module does not report its availability
- return HandleCommandResult(stdout="Backend: {0}".format(o))
- else:
- return HandleCommandResult(stdout="Backend: {0}\nAvailable: {1}{2}".format(
- o, avail,
- " ({0})".format(why) if not avail else ""
- ))
-
- def self_test(self):
- old_orch = self._select_orchestrator()
- self._set_backend('')
- assert self._select_orchestrator() is None
- self._set_backend(old_orch)
-
- e1 = self.remote('selftest', 'remote_from_orchestrator_cli_self_test', "ZeroDivisionError")
- try:
- orchestrator.raise_if_exception(e1)
- assert False
- except ZeroDivisionError as e:
- assert e.args == ('hello', 'world')
-
- e2 = self.remote('selftest', 'remote_from_orchestrator_cli_self_test', "OrchestratorError")
- try:
- orchestrator.raise_if_exception(e2)
- assert False
- except orchestrator.OrchestratorError as e:
- assert e.args == ('hello', 'world')
-
- c = orchestrator.TrivialReadCompletion(result=True)
- assert c.has_result
-
- @orchestrator._cli_write_command(
- 'upgrade check',
- 'name=image,type=CephString,req=false '
- 'name=ceph_version,type=CephString,req=false',
- desc='Check service versions vs available and target containers')
- def _upgrade_check(self, image=None, ceph_version=None):
- completion = self.upgrade_check(image=image, version=ceph_version)
- self._orchestrator_wait([completion])
- orchestrator.raise_if_exception(completion)
- return HandleCommandResult(stdout=completion.result_str())
-
- @orchestrator._cli_write_command(
- 'upgrade status',
- desc='Check service versions vs available and target containers')
- def _upgrade_status(self):
- completion = self.upgrade_status()
- self._orchestrator_wait([completion])
- orchestrator.raise_if_exception(completion)
- r = {
- 'target_image': completion.result.target_image,
- 'in_progress': completion.result.in_progress,
- 'services_complete': completion.result.services_complete,
- 'message': completion.result.message,
- }
- out = json.dumps(r, indent=4)
- return HandleCommandResult(stdout=out)
-
- @orchestrator._cli_write_command(
- 'upgrade start',
- 'name=image,type=CephString,req=false '
- 'name=ceph_version,type=CephString,req=false',
- desc='Initiate upgrade')
- def _upgrade_start(self, image=None, ceph_version=None):
- completion = self.upgrade_start(image, ceph_version)
- self._orchestrator_wait([completion])
- orchestrator.raise_if_exception(completion)
- return HandleCommandResult(stdout=completion.result_str())
-
- @orchestrator._cli_write_command(
- 'upgrade pause',
- desc='Pause an in-progress upgrade')
- def _upgrade_pause(self):
- completion = self.upgrade_pause()
- self._orchestrator_wait([completion])
- orchestrator.raise_if_exception(completion)
- return HandleCommandResult(stdout=completion.result_str())
-
- @orchestrator._cli_write_command(
- 'upgrade resume',
- desc='Resume paused upgrade')
- def _upgrade_resume(self):
- completion = self.upgrade_resume()
- self._orchestrator_wait([completion])
- orchestrator.raise_if_exception(completion)
- return HandleCommandResult(stdout=completion.result_str())
-
- @orchestrator._cli_write_command(
- 'upgrade stop',
- desc='Stop an in-progress upgrade')
- def _upgrade_stop(self):
- completion = self.upgrade_stop()
- self._orchestrator_wait([completion])
- orchestrator.raise_if_exception(completion)
- return HandleCommandResult(stdout=completion.result_str())
+++ /dev/null
-[tox]
-envlist = py3
-skipsdist = true
-toxworkdir = {env:CEPH_BUILD_DIR}/orchestrator_cli
-minversion = 2.5
-
-[testenv]
-deps = -rrequirements.txt
-setenv=
- UNITTEST = true
- py3: PYTHONPATH = {toxinidir}/../../../../build/lib/cython_modules/lib.3
-
-commands=
- {envbindir}/py.test .
cephadm/module.py \
mgr_module.py \
mgr_util.py \
- orchestrator.py \
- orchestrator_cli/module.py \
+ orchestrator/__init__.py \
progress/module.py \
rook/module.py \
test_orchestrator/module.py