]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
mgr/orchestrator_cli: rename to mgr/orchestrator
authorSebastian Wagner <sebastian.wagner@suse.com>
Fri, 24 Jan 2020 12:08:02 +0000 (13:08 +0100)
committerSebastian Wagner <sebastian.wagner@suse.com>
Mon, 17 Feb 2020 09:24:01 +0000 (10:24 +0100)
* Move `mgr/orchestrator.py` to `orchestrator/_interface.py`

Signed-off-by: Sebastian Wagner <sebastian.wagner@suse.com>
13 files changed:
qa/tasks/mgr/test_module_selftest.py
qa/tasks/mgr/test_orchestrator_cli.py
src/pybind/mgr/orchestrator.py [deleted file]
src/pybind/mgr/orchestrator/README.md [new file with mode: 0644]
src/pybind/mgr/orchestrator/__init__.py [new file with mode: 0644]
src/pybind/mgr/orchestrator/_interface.py [new file with mode: 0644]
src/pybind/mgr/orchestrator/module.py [new file with mode: 0644]
src/pybind/mgr/orchestrator_cli/.gitignore [deleted file]
src/pybind/mgr/orchestrator_cli/README.md [deleted file]
src/pybind/mgr/orchestrator_cli/__init__.py [deleted file]
src/pybind/mgr/orchestrator_cli/module.py [deleted file]
src/pybind/mgr/orchestrator_cli/tox.ini [deleted file]
src/pybind/mgr/tox.ini

index 966a92a6977325dbc5c8dda69fc6c6ae07a73cca..969d31a7dc8e01b6249c67b420212f1f1550a0e0 100644 (file)
@@ -79,8 +79,8 @@ class TestModuleSelftest(MgrTestCase):
     def test_crash(self):
         self._selftest_plugin("crash")
 
-    def test_orchestrator_cli(self):
-        self._selftest_plugin("orchestrator_cli")
+    def test_orchestrator(self):
+        self._selftest_plugin("orchestrator")
 
 
     def test_selftest_config_update(self):
index 8faa40eb6d99d860c08ecadfddaa1a1fc9bf0fef..4c465a7831979c47cb6de71e51c91e1a4686aca8 100644 (file)
@@ -35,7 +35,7 @@ class TestOrchestratorCli(MgrTestCase):
     def setUp(self):
         super(TestOrchestratorCli, self).setUp()
 
-        self._load_module("orchestrator_cli")
+        self._load_module("orchestrator")
         self._load_module("test_orchestrator")
         self._orch_cmd("set", "backend", "test_orchestrator")
 
diff --git a/src/pybind/mgr/orchestrator.py b/src/pybind/mgr/orchestrator.py
deleted file mode 100644 (file)
index 18dc0a1..0000000
+++ /dev/null
@@ -1,1736 +0,0 @@
-
-"""
-ceph-mgr orchestrator interface
-
-Please see the ceph-mgr module developer's guide for more information.
-"""
-import copy
-import functools
-import logging
-import pickle
-import sys
-import time
-from collections import namedtuple
-from functools import wraps
-import uuid
-import string
-import random
-import datetime
-import copy
-import re
-import six
-import errno
-
-from ceph.deployment import inventory
-
-from mgr_module import MgrModule, PersistentStoreDict, CLICommand, HandleCommandResult
-from mgr_util import format_bytes
-
-try:
-    from ceph.deployment.drive_group import DriveGroupSpec
-    from typing import TypeVar, Generic, List, Optional, Union, Tuple, Iterator, Callable, Any, \
-        Type, Sequence
-except ImportError:
-    pass
-
-logger = logging.getLogger(__name__)
-
-
-class HostPlacementSpec(namedtuple('HostPlacementSpec', ['hostname', 'network', 'name'])):
-    def __str__(self):
-        res = ''
-        res += self.hostname
-        if self.network:
-            res += ':' + self.network
-        if self.name:
-            res += '=' + self.name
-        return res
-
-
-def parse_host_placement_specs(host, require_network=True):
-    # type: (str, Optional[bool]) -> HostPlacementSpec
-    """
-    Split host into host, network, and (optional) daemon name parts.  The network
-    part can be an IP, CIDR, or ceph addrvec like '[v2:1.2.3.4:3300,v1:1.2.3.4:6789]'.
-    e.g.,
-      "myhost"
-      "myhost=name"
-      "myhost:1.2.3.4"
-      "myhost:1.2.3.4=name"
-      "myhost:1.2.3.0/24"
-      "myhost:1.2.3.0/24=name"
-      "myhost:[v2:1.2.3.4:3000]=name"
-      "myhost:[v2:1.2.3.4:3000,v1:1.2.3.4:6789]=name"
-    """
-    # Matches from start to : or = or until end of string
-    host_re = r'^(.*?)(:|=|$)'
-    # Matches from : to = or until end of string
-    ip_re = r':(.*?)(=|$)'
-    # Matches from = to end of string
-    name_re = r'=(.*?)$'
-
-    # assign defaults
-    host_spec = HostPlacementSpec('', '', '')
-
-    match_host = re.search(host_re, host)
-    if match_host:
-        host_spec = host_spec._replace(hostname=match_host.group(1))
-
-    name_match = re.search(name_re, host)
-    if name_match:
-        host_spec = host_spec._replace(name=name_match.group(1))
-
-    ip_match = re.search(ip_re, host)
-    if ip_match:
-        host_spec = host_spec._replace(network=ip_match.group(1))
-
-    if not require_network:
-        return host_spec
-
-    from ipaddress import ip_network, ip_address
-    networks = list()  # type: List[str]
-    network = host_spec.network
-    # in case we have [v2:1.2.3.4:3000,v1:1.2.3.4:6478]
-    if ',' in network:
-        networks = [x for x in network.split(',')]
-    else:
-        networks.append(network)
-    for network in networks:
-        # only if we have versioned network configs
-        if network.startswith('v') or network.startswith('[v'):
-            network = network.split(':')[1]
-        try:
-            # if subnets are defined, also verify the validity
-            if '/' in network:
-                ip_network(six.text_type(network))
-            else:
-                ip_address(six.text_type(network))
-        except ValueError as e:
-            # logging?
-            raise e
-
-    return host_spec
-
-
-class OrchestratorError(Exception):
-    """
-    General orchestrator specific error.
-
-    Used for deployment, configuration or user errors.
-
-    It's not intended for programming errors or orchestrator internal errors.
-    """
-
-
-class NoOrchestrator(OrchestratorError):
-    """
-    No orchestrator in configured.
-    """
-    def __init__(self, msg="No orchestrator configured (try `ceph orch set backend`)"):
-        super(NoOrchestrator, self).__init__(msg)
-
-
-class OrchestratorValidationError(OrchestratorError):
-    """
-    Raised when an orchestrator doesn't support a specific feature.
-    """
-
-
-def handle_exception(prefix, cmd_args, desc, perm, func):
-    @wraps(func)
-    def wrapper(*args, **kwargs):
-        try:
-            return func(*args, **kwargs)
-        except (OrchestratorError, ImportError) as e:
-            # Do not print Traceback for expected errors.
-            return HandleCommandResult(-errno.ENOENT, stderr=str(e))
-        except NotImplementedError:
-            msg = 'This Orchestrator does not support `{}`'.format(prefix)
-            return HandleCommandResult(-errno.ENOENT, stderr=msg)
-
-    return CLICommand(prefix, cmd_args, desc, perm)(wrapper)
-
-
-def _cli_command(perm):
-    def inner_cli_command(prefix, cmd_args="", desc=""):
-        return lambda func: handle_exception(prefix, cmd_args, desc, perm, func)
-    return inner_cli_command
-
-
-_cli_read_command = _cli_command('r')
-_cli_write_command = _cli_command('rw')
-
-
-def _no_result():
-    return object()
-
-
-class _Promise(object):
-    """
-    A completion may need multiple promises to be fulfilled. `_Promise` is one
-    step.
-
-    Typically ``Orchestrator`` implementations inherit from this class to
-    build their own way of finishing a step to fulfil a future.
-
-    They are not exposed in the orchestrator interface and can be seen as a
-    helper to build orchestrator modules.
-    """
-    INITIALIZED = 1  # We have a parent completion and a next completion
-    RUNNING = 2
-    FINISHED = 3  # we have a final result
-
-    NO_RESULT = _no_result()  # type: None
-    ASYNC_RESULT = object()
-
-    def __init__(self,
-                 _first_promise=None,  # type: Optional["_Promise"]
-                 value=NO_RESULT,  # type: Optional[Any]
-                 on_complete=None,    # type: Optional[Callable]
-                 name=None,  # type: Optional[str]
-                 ):
-        self._on_complete_ = on_complete
-        self._name = name
-        self._next_promise = None  # type: Optional[_Promise]
-
-        self._state = self.INITIALIZED
-        self._exception = None  # type: Optional[Exception]
-
-        # Value of this _Promise. may be an intermediate result.
-        self._value = value
-
-        # _Promise is not a continuation monad, as `_result` is of type
-        # T instead of (T -> r) -> r. Therefore we need to store the first promise here.
-        self._first_promise = _first_promise or self  # type: '_Promise'
-
-    @property
-    def _exception(self):
-        # type: () -> Optional[Exception]
-        return getattr(self, '_exception_', None)
-
-    @_exception.setter
-    def _exception(self, e):
-        self._exception_ = e
-        self._serialized_exception_ = pickle.dumps(e) if e is not None else None
-
-    @property
-    def _serialized_exception(self):
-        # type: () -> Optional[bytes]
-        return getattr(self, '_serialized_exception_', None)
-
-
-
-    @property
-    def _on_complete(self):
-        # type: () -> Optional[Callable]
-        # https://github.com/python/mypy/issues/4125
-        return self._on_complete_
-
-    @_on_complete.setter
-    def _on_complete(self, val):
-        # type: (Optional[Callable]) -> None
-        self._on_complete_ = val
-
-
-    def __repr__(self):
-        name = self._name or getattr(self._on_complete, '__name__', '??') if self._on_complete else 'None'
-        val = repr(self._value) if self._value is not self.NO_RESULT else 'NA'
-        return '{}(_s={}, val={}, _on_c={}, id={}, name={}, pr={}, _next={})'.format(
-            self.__class__, self._state, val, self._on_complete, id(self), name, getattr(next, '_progress_reference', 'NA'), repr(self._next_promise)
-        )
-
-    def pretty_print_1(self):
-        if self._name:
-            name = self._name
-        elif self._on_complete is None:
-            name = 'lambda x: x'
-        elif hasattr(self._on_complete, '__name__'):
-            name = getattr(self._on_complete, '__name__')
-        else:
-            name = self._on_complete.__class__.__name__
-        val = repr(self._value) if self._value not in (self.NO_RESULT, self.ASYNC_RESULT) else '...'
-        prefix = {
-            self.INITIALIZED: '      ',
-            self.RUNNING:     '   >>>',
-            self.FINISHED:    '(done)'
-        }[self._state]
-        return '{} {}({}),'.format(prefix, name, val)
-
-    def then(self, on_complete):
-        # type: (Any, Callable) -> Any
-        """
-        Call ``on_complete`` as soon as this promise is finalized.
-        """
-        assert self._state in (self.INITIALIZED, self.RUNNING)
-        if self._on_complete is not None:
-            assert self._next_promise is None
-            self._set_next_promise(self.__class__(
-                _first_promise=self._first_promise,
-                on_complete=on_complete
-            ))
-            return self._next_promise
-
-        else:
-            self._on_complete = on_complete
-            self._set_next_promise(self.__class__(_first_promise=self._first_promise))
-            return self._next_promise
-
-    def _set_next_promise(self, next):
-        # type: (_Promise) -> None
-        assert self is not next
-        assert self._state in (self.INITIALIZED, self.RUNNING)
-
-        self._next_promise = next
-        assert self._next_promise is not None
-        for p in iter(self._next_promise):
-            p._first_promise = self._first_promise
-
-    def _finalize(self, value=NO_RESULT):
-        """
-        Sets this promise to complete.
-
-        Orchestrators may choose to use this helper function.
-
-        :param value: new value.
-        """
-        if self._state not in (self.INITIALIZED, self.RUNNING):
-            raise ValueError('finalize: {} already finished. {}'.format(repr(self), value))
-
-        self._state = self.RUNNING
-
-        if value is not self.NO_RESULT:
-            self._value = value
-        assert self._value is not self.NO_RESULT, repr(self)
-
-        if self._on_complete:
-            try:
-                next_result = self._on_complete(self._value)
-            except Exception as e:
-                self.fail(e)
-                return
-        else:
-            next_result = self._value
-
-        if isinstance(next_result, _Promise):
-            # hack: _Promise is not a continuation monad.
-            next_result = next_result._first_promise  # type: ignore
-            assert next_result not in self, repr(self._first_promise) + repr(next_result)
-            assert self not in next_result
-            next_result._append_promise(self._next_promise)
-            self._set_next_promise(next_result)
-            assert self._next_promise
-            if self._next_promise._value is self.NO_RESULT:
-                self._next_promise._value = self._value
-            self.propagate_to_next()
-        elif next_result is not self.ASYNC_RESULT:
-            # simple map. simply forward
-            if self._next_promise:
-                self._next_promise._value = next_result
-            else:
-                # Hack: next_result is of type U, _value is of type T
-                self._value = next_result  # type: ignore
-            self.propagate_to_next()
-        else:
-            # asynchronous promise
-            pass
-
-
-    def propagate_to_next(self):
-        self._state = self.FINISHED
-        logger.debug('finalized {}'.format(repr(self)))
-        if self._next_promise:
-            self._next_promise._finalize()
-
-    def fail(self, e):
-        # type: (Exception) -> None
-        """
-        Sets the whole completion to be faild with this exception and end the
-        evaluation.
-        """
-        if self._state == self.FINISHED:
-            raise ValueError(
-                'Invalid State: called fail, but Completion is already finished: {}'.format(str(e)))
-        assert self._state in (self.INITIALIZED, self.RUNNING)
-        logger.exception('_Promise failed')
-        self._exception = e
-        self._value = 'exception'
-        if self._next_promise:
-            self._next_promise.fail(e)
-        self._state = self.FINISHED
-
-    def __contains__(self, item):
-        return any(item is p for p in iter(self._first_promise))
-
-    def __iter__(self):
-        yield self
-        elem = self._next_promise
-        while elem is not None:
-            yield elem
-            elem = elem._next_promise
-
-    def _append_promise(self, other):
-        if other is not None:
-            assert self not in other
-            assert other not in self
-            self._last_promise()._set_next_promise(other)
-
-    def _last_promise(self):
-        # type: () -> _Promise
-        return list(iter(self))[-1]
-
-
-class ProgressReference(object):
-    def __init__(self,
-                 message,  # type: str
-                 mgr,
-                 completion=None  # type: Optional[Callable[[], Completion]]
-                ):
-        """
-        ProgressReference can be used within Completions::
-
-            +---------------+      +---------------------------------+
-            |               | then |                                 |
-            | My Completion | +--> | on_complete=ProgressReference() |
-            |               |      |                                 |
-            +---------------+      +---------------------------------+
-
-        See :func:`Completion.with_progress` for an easy way to create
-        a progress reference
-
-        """
-        super(ProgressReference, self).__init__()
-        self.progress_id = str(uuid.uuid4())
-        self.message = message
-        self.mgr = mgr
-
-        #: The completion can already have a result, before the write
-        #: operation is effective. progress == 1 means, the services are
-        #: created / removed.
-        self.completion = completion  # type: Optional[Callable[[], Completion]]
-
-        #: if a orchestrator module can provide a more detailed
-        #: progress information, it needs to also call ``progress.update()``.
-        self.progress = 0.0
-
-        self._completion_has_result = False
-        self.mgr.all_progress_references.append(self)
-
-    def __str__(self):
-        """
-        ``__str__()`` is used for determining the message for progress events.
-        """
-        return self.message or super(ProgressReference, self).__str__()
-
-    def __call__(self, arg):
-        self._completion_has_result = True
-        self.progress = 1.0
-        return arg
-
-    @property
-    def progress(self):
-        return self._progress
-
-    @progress.setter
-    def progress(self, progress):
-        assert progress <= 1.0
-        self._progress = progress
-        try:
-            if self.effective:
-                self.mgr.remote("progress", "complete", self.progress_id)
-                self.mgr.all_progress_references = [p for p in self.mgr.all_progress_references if p is not self]
-            else:
-                self.mgr.remote("progress", "update", self.progress_id, self.message,
-                                progress,
-                                [("origin", "orchestrator")])
-        except ImportError:
-            # If the progress module is disabled that's fine,
-            # they just won't see the output.
-            pass
-
-    @property
-    def effective(self):
-        return self.progress == 1 and self._completion_has_result
-
-    def update(self):
-        def progress_run(progress):
-            self.progress = progress
-        if self.completion:
-            c = self.completion().then(progress_run)
-            self.mgr.process([c._first_promise])
-        else:
-            self.progress = 1
-
-    def fail(self):
-        self._completion_has_result = True
-        self.progress = 1
-
-
-class Completion(_Promise):
-    """
-    Combines multiple promises into one overall operation.
-
-    Completions are composable by being able to
-    call one completion from another completion. I.e. making them re-usable
-    using Promises E.g.::
-
-        >>> return Orchestrator().get_hosts().then(self._create_osd)
-
-    where ``get_hosts`` returns a Completion of list of hosts and
-    ``_create_osd`` takes a list of hosts.
-
-    The concept behind this is to store the computation steps
-    explicit and then explicitly evaluate the chain:
-
-        >>> p = Completion(on_complete=lambda x: x*2).then(on_complete=lambda x: str(x))
-        ... p.finalize(2)
-        ... assert p.result = "4"
-
-    or graphically::
-
-        +---------------+      +-----------------+
-        |               | then |                 |
-        | lambda x: x*x | +--> | lambda x: str(x)|
-        |               |      |                 |
-        +---------------+      +-----------------+
-
-    """
-    def __init__(self,
-                 _first_promise=None,  # type: Optional["Completion"]
-                 value=_Promise.NO_RESULT,  # type: Any
-                 on_complete=None,  # type: Optional[Callable]
-                 name=None,  # type: Optional[str]
-                 ):
-        super(Completion, self).__init__(_first_promise, value, on_complete, name)
-
-    @property
-    def _progress_reference(self):
-        # type: () -> Optional[ProgressReference]
-        if hasattr(self._on_complete, 'progress_id'):
-            return self._on_complete  # type: ignore
-        return None
-
-    @property
-    def progress_reference(self):
-        # type: () -> Optional[ProgressReference]
-        """
-        ProgressReference. Marks this completion
-        as a write completeion.
-        """
-
-        references = [c._progress_reference for c in iter(self) if c._progress_reference is not None]
-        if references:
-            assert len(references) == 1
-            return references[0]
-        return None
-
-    @classmethod
-    def with_progress(cls,  # type: Any
-                      message,  # type: str
-                      mgr,
-                      _first_promise=None,  # type: Optional["Completion"]
-                      value=_Promise.NO_RESULT,  # type: Any
-                      on_complete=None,  # type: Optional[Callable]
-                      calc_percent=None  # type: Optional[Callable[[], Any]]
-                      ):
-        # type: (...) -> Any
-
-        c = cls(
-            _first_promise=_first_promise,
-            value=value,
-            on_complete=on_complete
-        ).add_progress(message, mgr, calc_percent)
-
-        return c._first_promise
-
-    def add_progress(self,
-                     message,  # type: str
-                     mgr,
-                     calc_percent=None  # type: Optional[Callable[[], Any]]
-                     ):
-        return self.then(
-            on_complete=ProgressReference(
-                message=message,
-                mgr=mgr,
-                completion=calc_percent
-            )
-        )
-
-    def fail(self, e):
-        super(Completion, self).fail(e)
-        if self._progress_reference:
-            self._progress_reference.fail()
-
-    def finalize(self, result=_Promise.NO_RESULT):
-        if self._first_promise._state == self.INITIALIZED:
-            self._first_promise._finalize(result)
-
-    @property
-    def result(self):
-        """
-        The result of the operation that we were waited
-        for.  Only valid after calling Orchestrator.process() on this
-        completion.
-        """
-        last = self._last_promise()
-        assert last._state == _Promise.FINISHED
-        return last._value
-
-    def result_str(self):
-        """Force a string."""
-        if self.result is None:
-            return ''
-        if isinstance(self.result, list):
-            return '\n'.join(str(x) for x in self.result)
-        return str(self.result)
-
-    @property
-    def exception(self):
-        # type: () -> Optional[Exception]
-        return self._last_promise()._exception
-
-    @property
-    def serialized_exception(self):
-        # type: () -> Optional[bytes]
-        return self._last_promise()._serialized_exception
-
-    @property
-    def has_result(self):
-        # type: () -> bool
-        """
-        Has the operation already a result?
-
-        For Write operations, it can already have a
-        result, if the orchestrator's configuration is
-        persistently written. Typically this would
-        indicate that an update had been written to
-        a manifest, but that the update had not
-        necessarily been pushed out to the cluster.
-
-        :return:
-        """
-        return self._last_promise()._state == _Promise.FINISHED
-
-    @property
-    def is_errored(self):
-        # type: () -> bool
-        """
-        Has the completion failed. Default implementation looks for
-        self.exception. Can be overwritten.
-        """
-        return self.exception is not None
-
-    @property
-    def needs_result(self):
-        # type: () -> bool
-        """
-        Could the external operation be deemed as complete,
-        or should we wait?
-        We must wait for a read operation only if it is not complete.
-        """
-        return not self.is_errored and not self.has_result
-
-    @property
-    def is_finished(self):
-        # type: () -> bool
-        """
-        Could the external operation be deemed as complete,
-        or should we wait?
-        We must wait for a read operation only if it is not complete.
-        """
-        return self.is_errored or (self.has_result)
-
-    def pretty_print(self):
-
-        reprs = '\n'.join(p.pretty_print_1() for p in iter(self._first_promise))
-        return """<{}>[\n{}\n]""".format(self.__class__.__name__, reprs)
-
-
-def pretty_print(completions):
-    # type: (Sequence[Completion]) -> str
-    return ', '.join(c.pretty_print() for c in completions)
-
-
-def raise_if_exception(c):
-    # type: (Completion) -> None
-    """
-    :raises OrchestratorError: Some user error or a config error.
-    :raises Exception: Some internal error
-    """
-    if c.serialized_exception is not None:
-        try:
-            e = pickle.loads(c.serialized_exception)
-        except (KeyError, AttributeError):
-            raise Exception('{}: {}'.format(type(c.exception), c.exception))
-        raise e
-
-
-class TrivialReadCompletion(Completion):
-    """
-    This is the trivial completion simply wrapping a result.
-    """
-    def __init__(self, result):
-        super(TrivialReadCompletion, self).__init__()
-        if result:
-            self.finalize(result)
-
-
-def _hide_in_features(f):
-    f._hide_in_features = True
-    return f
-
-
-class Orchestrator(object):
-    """
-    Calls in this class may do long running remote operations, with time
-    periods ranging from network latencies to package install latencies and large
-    internet downloads.  For that reason, all are asynchronous, and return
-    ``Completion`` objects.
-
-    Methods should only return the completion and not directly execute
-    anything, like network calls. Otherwise the purpose of
-    those completions is defeated.
-
-    Implementations are not required to start work on an operation until
-    the caller waits on the relevant Completion objects.  Callers making
-    multiple updates should not wait on Completions until they're done
-    sending operations: this enables implementations to batch up a series
-    of updates when wait() is called on a set of Completion objects.
-
-    Implementations are encouraged to keep reasonably fresh caches of
-    the status of the system: it is better to serve a stale-but-recent
-    result read of e.g. device inventory than it is to keep the caller waiting
-    while you scan hosts every time.
-    """
-
-    @_hide_in_features
-    def is_orchestrator_module(self):
-        """
-        Enable other modules to interrogate this module to discover
-        whether it's usable as an orchestrator module.
-
-        Subclasses do not need to override this.
-        """
-        return True
-
-    @_hide_in_features
-    def available(self):
-        # type: () -> Tuple[bool, str]
-        """
-        Report whether we can talk to the orchestrator.  This is the
-        place to give the user a meaningful message if the orchestrator
-        isn't running or can't be contacted.
-
-        This method may be called frequently (e.g. every page load
-        to conditionally display a warning banner), so make sure it's
-        not too expensive.  It's okay to give a slightly stale status
-        (e.g. based on a periodic background ping of the orchestrator)
-        if that's necessary to make this method fast.
-
-        .. note::
-            `True` doesn't mean that the desired functionality
-            is actually available in the orchestrator. I.e. this
-            won't work as expected::
-
-                >>> if OrchestratorClientMixin().available()[0]:  # wrong.
-                ...     OrchestratorClientMixin().get_hosts()
-
-        :return: two-tuple of boolean, string
-        """
-        raise NotImplementedError()
-
-    @_hide_in_features
-    def process(self, completions):
-        # type: (List[Completion]) -> None
-        """
-        Given a list of Completion instances, process any which are
-        incomplete.
-
-        Callers should inspect the detail of each completion to identify
-        partial completion/progress information, and present that information
-        to the user.
-
-        This method should not block, as this would make it slow to query
-        a status, while other long running operations are in progress.
-        """
-        raise NotImplementedError()
-
-    @_hide_in_features
-    def get_feature_set(self):
-        """Describes which methods this orchestrator implements
-
-        .. note::
-            `True` doesn't mean that the desired functionality
-            is actually possible in the orchestrator. I.e. this
-            won't work as expected::
-
-                >>> api = OrchestratorClientMixin()
-                ... if api.get_feature_set()['get_hosts']['available']:  # wrong.
-                ...     api.get_hosts()
-
-            It's better to ask for forgiveness instead::
-
-                >>> try:
-                ...     OrchestratorClientMixin().get_hosts()
-                ... except (OrchestratorError, NotImplementedError):
-                ...     ...
-
-        :returns: Dict of API method names to ``{'available': True or False}``
-        """
-        module = self.__class__
-        features = {a: {'available': getattr(Orchestrator, a, None) != getattr(module, a)}
-                    for a in Orchestrator.__dict__
-                    if not a.startswith('_') and not getattr(getattr(Orchestrator, a), '_hide_in_features', False)
-                    }
-        return features
-
-    @_hide_in_features
-    def cancel_completions(self):
-        # type: () -> None
-        """
-        Cancels ongoing completions. Unstuck the mgr.
-        """
-        raise NotImplementedError()
-
-    def add_host(self, HostSpec):
-        # type: (HostSpec) -> Completion
-        """
-        Add a host to the orchestrator inventory.
-
-        :param host: hostname
-        """
-        raise NotImplementedError()
-
-    def remove_host(self, host):
-        # type: (str) -> Completion
-        """
-        Remove a host from the orchestrator inventory.
-
-        :param host: hostname
-        """
-        raise NotImplementedError()
-
-    def update_host_addr(self, host, addr):
-        # type: (str, str) -> Completion
-        """
-        Update a host's address
-
-        :param host: hostname
-        :param addr: address (dns name or IP)
-        """
-        raise NotImplementedError()
-
-    def get_hosts(self):
-        # type: () -> Completion
-        """
-        Report the hosts in the cluster.
-
-        The default implementation is extra slow.
-
-        :return: list of InventoryNodes
-        """
-        return self.get_inventory()
-
-    def add_host_label(self, host, label):
-        # type: (str, str) -> Completion
-        """
-        Add a host label
-        """
-        raise NotImplementedError()
-
-    def remove_host_label(self, host, label):
-        # type: (str, str) -> Completion
-        """
-        Remove a host label
-        """
-        raise NotImplementedError()
-
-    def get_inventory(self, node_filter=None, refresh=False):
-        # type: (Optional[InventoryFilter], bool) -> Completion
-        """
-        Returns something that was created by `ceph-volume inventory`.
-
-        :return: list of InventoryNode
-        """
-        raise NotImplementedError()
-
-    def describe_service(self, service_type=None, service_id=None, node_name=None, refresh=False):
-        # type: (Optional[str], Optional[str], Optional[str], bool) -> Completion
-        """
-        Describe a service (of any kind) that is already configured in
-        the orchestrator.  For example, when viewing an OSD in the dashboard
-        we might like to also display information about the orchestrator's
-        view of the service (like the kubernetes pod ID).
-
-        When viewing a CephFS filesystem in the dashboard, we would use this
-        to display the pods being currently run for MDS daemons.
-
-        :return: list of ServiceDescription objects.
-        """
-        raise NotImplementedError()
-
-    def list_daemons(self, daemon_type=None, daemon_id=None, host=None, refresh=False):
-        # type: (Optional[str], Optional[str], Optional[str], bool) -> Completion
-        """
-        Describe a daemon (of any kind) that is already configured in
-        the orchestrator.
-
-        :return: list of DaemonDescription objects.
-        """
-        raise NotImplementedError()
-
-    def remove_daemons(self, names, force):
-        # type: (List[str], bool) -> Completion
-        """
-        Remove specific daemon(s).
-
-        :return: None
-        """
-        raise NotImplementedError()
-
-    def remove_service(self, service_type, service_name=None):
-        # type: (str, Optional[str]) -> Completion
-        """
-        Remove a service (a collection of daemons).
-
-        :return: None
-        """
-        raise NotImplementedError()
-
-    def service_action(self, action, service_type, service_name):
-        # type: (str, str, str) -> Completion
-        """
-        Perform an action (start/stop/reload) on a service (i.e., all daemons
-        providing the logical service).
-
-        :param action: one of "start", "stop", "restart", "redeploy", "reconfig"
-        :param service_type: e.g. "mds", "rgw", ...
-        :param service_name: name of logical service ("cephfs", "us-east", ...)
-        :rtype: Completion
-        """
-        #assert action in ["start", "stop", "reload, "restart", "redeploy"]
-        raise NotImplementedError()
-
-    def daemon_action(self, action, daemon_type, daemon_id):
-        # type: (str, str, str) -> Completion
-        """
-        Perform an action (start/stop/reload) on a daemon.
-
-        :param action: one of "start", "stop", "restart", "redeploy", "reconfig"
-        :param name: name of daemon
-        :rtype: Completion
-        """
-        #assert action in ["start", "stop", "reload, "restart", "redeploy"]
-        raise NotImplementedError()
-
-    def create_osds(self, drive_groups):
-        # type: (List[DriveGroupSpec]) -> Completion
-        """
-        Create one or more OSDs within a single Drive Group.
-
-        The principal argument here is the drive_group member
-        of OsdSpec: other fields are advisory/extensible for any
-        finer-grained OSD feature enablement (choice of backing store,
-        compression/encryption, etc).
-
-        :param drive_groups: a list of DriveGroupSpec
-        :param all_hosts: TODO, this is required because the orchestrator methods are not composable
-                Probably this parameter can be easily removed because each orchestrator can use
-                the "get_inventory" method and the "drive_group.host_pattern" attribute
-                to obtain the list of hosts where to apply the operation
-        """
-        raise NotImplementedError()
-
-    def blink_device_light(self, ident_fault, on, locations):
-        # type: (str, bool, List[DeviceLightLoc]) -> Completion
-        """
-        Instructs the orchestrator to enable or disable either the ident or the fault LED.
-
-        :param ident_fault: either ``"ident"`` or ``"fault"``
-        :param on: ``True`` = on.
-        :param locations: See :class:`orchestrator.DeviceLightLoc`
-        """
-        raise NotImplementedError()
-
-    def add_mon(self, spec):
-        # type: (ServiceSpec) -> Completion
-        """Create mon daemon(s)"""
-        raise NotImplementedError()
-
-    def apply_mon(self, spec):
-        # type: (ServiceSpec) -> Completion
-        """Update mon cluster"""
-        raise NotImplementedError()
-
-    def add_mgr(self, spec):
-        # type: (ServiceSpec) -> Completion
-        """Create mgr daemon(s)"""
-        raise NotImplementedError()
-
-    def apply_mgr(self, spec):
-        # type: (ServiceSpec) -> Completion
-        """Update mgr cluster"""
-        raise NotImplementedError()
-
-    def add_mds(self, spec):
-        # type: (ServiceSpec) -> Completion
-        """Create MDS daemon(s)"""
-        raise NotImplementedError()
-
-    def apply_mds(self, spec):
-        # type: (ServiceSpec) -> Completion
-        """Update MDS cluster"""
-        raise NotImplementedError()
-
-    def add_rbd_mirror(self, spec):
-        # type: (ServiceSpec) -> Completion
-        """Create rbd-mirror daemon(s)"""
-        raise NotImplementedError()
-
-    def apply_rbd_mirror(self, spec):
-        # type: (ServiceSpec) -> Completion
-        """Update rbd-mirror cluster"""
-        raise NotImplementedError()
-
-    def add_nfs(self, spec):
-        # type: (NFSServiceSpec) -> Completion
-        """Create NFS daemon(s)"""
-        raise NotImplementedError()
-
-    def apply_nfs(self, spec):
-        # type: (NFSServiceSpec) -> Completion
-        """Update NFS cluster"""
-        raise NotImplementedError()
-
-    def add_rgw(self, spec):
-        # type: (RGWSpec) -> Completion
-        """Create RGW daemon(s)"""
-        raise NotImplementedError()
-
-    def apply_rgw(self, spec):
-        # type: (RGWSpec) -> Completion
-        """Update RGW cluster"""
-        raise NotImplementedError()
-
-    def add_prometheus(self, spec):
-        # type: (ServiceSpec) -> Completion
-        """Create new prometheus daemon"""
-        raise NotImplementedError()
-
-    def apply_prometheus(self, spec):
-        # type: (ServiceSpec) -> Completion
-        """Update prometheus cluster"""
-        raise NotImplementedError()
-
-    def upgrade_check(self, image, version):
-        # type: (Optional[str], Optional[str]) -> Completion
-        raise NotImplementedError()
-
-    def upgrade_start(self, image, version):
-        # type: (Optional[str], Optional[str]) -> Completion
-        raise NotImplementedError()
-
-    def upgrade_pause(self):
-        # type: () -> Completion
-        raise NotImplementedError()
-
-    def upgrade_resume(self):
-        # type: () -> Completion
-        raise NotImplementedError()
-
-    def upgrade_stop(self):
-        # type: () -> Completion
-        raise NotImplementedError()
-
-    def upgrade_status(self):
-        # type: () -> Completion
-        """
-        If an upgrade is currently underway, report on where
-        we are in the process, or if some error has occurred.
-
-        :return: UpgradeStatusSpec instance
-        """
-        raise NotImplementedError()
-
-    @_hide_in_features
-    def upgrade_available(self):
-        # type: () -> Completion
-        """
-        Report on what versions are available to upgrade to
-
-        :return: List of strings
-        """
-        raise NotImplementedError()
-
-class HostSpec(object):
-    def __init__(self, hostname, addr=None, labels=None):
-        # type: (str, Optional[str], Optional[List[str]]) -> None
-        self.hostname = hostname       # the hostname on the host
-        self.addr = addr or hostname   # DNS name or IP address to reach it
-        self.labels = labels or []     # initial label(s), if any
-
-class UpgradeStatusSpec(object):
-    # Orchestrator's report on what's going on with any ongoing upgrade
-    def __init__(self):
-        self.in_progress = False  # Is an upgrade underway?
-        self.target_image = None
-        self.services_complete = []  # Which daemon types are fully updated?
-        self.message = ""  # Freeform description
-
-
-class PlacementSpec(object):
-    """
-    For APIs that need to specify a node subset
-    """
-    def __init__(self, label=None, hosts=None, count=None):
-        # type: (Optional[str], Optional[List], Optional[int]) -> None
-        self.label = label
-        self.hosts = []  # type: List[HostPlacementSpec]
-        if hosts:
-            if all([isinstance(host, HostPlacementSpec) for host in hosts]):
-                self.hosts = hosts
-            else:
-                self.hosts = [parse_host_placement_specs(x, require_network=False) for x in hosts if x]
-
-
-        self.count = count  # type: Optional[int]
-
-    def set_hosts(self, hosts):
-        # To backpopulate the .hosts attribute when using labels or count
-        # in the orchestrator backend.
-        self.hosts = hosts
-
-    @classmethod
-    def from_dict(cls, data):
-        _cls = cls(**data)
-        _cls.validate()
-        return _cls
-
-    def validate(self):
-        if self.hosts and self.label:
-            # TODO: a less generic Exception
-            raise Exception('Node and label are mutually exclusive')
-        if self.count is not None and self.count <= 0:
-            raise Exception("num/count must be > 1")
-
-
-def handle_type_error(method):
-    @wraps(method)
-    def inner(cls, *args, **kwargs):
-        try:
-            return method(cls, *args, **kwargs)
-        except TypeError as e:
-            error_msg = '{}: {}'.format(cls.__name__, e)
-        raise OrchestratorValidationError(error_msg)
-    return inner
-
-
-class DaemonDescription(object):
-    """
-    For responding to queries about the status of a particular daemon,
-    stateful or stateless.
-
-    This is not about health or performance monitoring of daemons: it's
-    about letting the orchestrator tell Ceph whether and where a
-    daemon is scheduled in the cluster.  When an orchestrator tells
-    Ceph "it's running on node123", that's not a promise that the process
-    is literally up this second, it's a description of where the orchestrator
-    has decided the daemon should run.
-    """
-
-    def __init__(self,
-                 daemon_type=None,
-                 daemon_id=None,
-                 nodename=None,
-                 container_id=None,
-                 container_image_id=None,
-                 container_image_name=None,
-                 version=None,
-                 status=None,
-                 status_desc=None):
-        # Node is at the same granularity as InventoryNode
-        self.nodename = nodename
-
-        # Not everyone runs in containers, but enough people do to
-        # justify having the container_id (runtime id) and container_image
-        # (image name)
-        self.container_id = container_id                  # runtime id
-        self.container_image_id = container_image_id      # image hash
-        self.container_image_name = container_image_name  # image friendly name
-
-        # The type of service (osd, mon, mgr, etc.)
-        self.daemon_type = daemon_type
-
-        # The orchestrator will have picked some names for daemons,
-        # typically either based on hostnames or on pod names.
-        # This is the <foo> in mds.<foo>, the ID that will appear
-        # in the FSMap/ServiceMap.
-        self.daemon_id = daemon_id
-
-        # Service version that was deployed
-        self.version = version
-
-        # Service status: -1 error, 0 stopped, 1 running
-        self.status = status
-
-        # Service status description when status == -1.
-        self.status_desc = status_desc
-
-        # datetime when this info was last refreshed
-        self.last_refresh = None   # type: Optional[datetime.datetime]
-
-    def name(self):
-        return '%s.%s' % (self.daemon_type, self.daemon_id)
-
-    def __repr__(self):
-        return "<DaemonDescription>({type}.{id})".format(type=self.daemon_type,
-                                                         id=self.daemon_id)
-
-    def to_json(self):
-        out = {
-            'nodename': self.nodename,
-            'container_id': self.container_id,
-            'container_image_id': self.container_image_id,
-            'container_image_name': self.container_image_name,
-            'daemon_id': self.daemon_id,
-            'daemon_type': self.daemon_type,
-            'version': self.version,
-            'status': self.status,
-            'status_desc': self.status_desc,
-        }
-        return {k: v for (k, v) in out.items() if v is not None}
-
-    @classmethod
-    @handle_type_error
-    def from_json(cls, data):
-        return cls(**data)
-
-class ServiceDescription(object):
-    """
-    For responding to queries about the status of a particular service,
-    stateful or stateless.
-
-    This is not about health or performance monitoring of services: it's
-    about letting the orchestrator tell Ceph whether and where a
-    service is scheduled in the cluster.  When an orchestrator tells
-    Ceph "it's running on node123", that's not a promise that the process
-    is literally up this second, it's a description of where the orchestrator
-    has decided the service should run.
-    """
-
-    def __init__(self, nodename=None,
-                 container_id=None, container_image_id=None,
-                 container_image_name=None,
-                 service=None, service_instance=None,
-                 service_type=None, version=None, rados_config_location=None,
-                 service_url=None, status=None, status_desc=None):
-        # Node is at the same granularity as InventoryNode
-        self.nodename = nodename  # type: Optional[str]
-
-        # Not everyone runs in containers, but enough people do to
-        # justify having the container_id (runtime id) and container_image
-        # (image name)
-        self.container_id = container_id                  # runtime id
-        self.container_image_id = container_image_id      # image hash
-        self.container_image_name = container_image_name  # image friendly name
-
-        # Some services can be deployed in groups. For example, mds's can
-        # have an active and standby daemons, and nfs-ganesha can run daemons
-        # in parallel. This tag refers to a group of daemons as a whole.
-        #
-        # For instance, a cluster of mds' all service the same fs, and they
-        # will all have the same service value (which may be the
-        # Filesystem name in the FSMap).
-        #
-        # Single-instance services should leave this set to None
-        self.service = service
-
-        # The orchestrator will have picked some names for daemons,
-        # typically either based on hostnames or on pod names.
-        # This is the <foo> in mds.<foo>, the ID that will appear
-        # in the FSMap/ServiceMap.
-        self.service_instance = service_instance
-
-        # The type of service (osd, mon, mgr, etc.)
-        self.service_type = service_type
-
-        # Service version that was deployed
-        self.version = version
-
-        # Location of the service configuration when stored in rados
-        # object. Format: "rados://<pool>/[<namespace/>]<object>"
-        self.rados_config_location = rados_config_location
-
-        # If the service exposes REST-like API, this attribute should hold
-        # the URL.
-        self.service_url = service_url
-
-        # Service status: -1 error, 0 stopped, 1 running
-        self.status = status
-
-        # Service status description when status == -1.
-        self.status_desc = status_desc
-
-        # datetime when this info was last refreshed
-        self.last_refresh = None   # type: Optional[datetime.datetime]
-
-    def name(self):
-        if self.service_instance:
-            return '%s.%s' % (self.service_type, self.service_instance)
-        return self.service_type
-
-    def __repr__(self):
-        return "<ServiceDescription>({n_name}:{s_type})".format(n_name=self.nodename,
-                                                                s_type=self.name())
-
-    def to_json(self):
-        out = {
-            'nodename': self.nodename,
-            'container_id': self.container_id,
-            'service': self.service,
-            'service_instance': self.service_instance,
-            'service_type': self.service_type,
-            'version': self.version,
-            'rados_config_location': self.rados_config_location,
-            'service_url': self.service_url,
-            'status': self.status,
-            'status_desc': self.status_desc,
-        }
-        return {k: v for (k, v) in out.items() if v is not None}
-
-    @classmethod
-    @handle_type_error
-    def from_json(cls, data):
-        return cls(**data)
-
-
-class ServiceSpec(object):
-    """
-    Details of service creation.
-
-    Request to the orchestrator for a cluster of daemons
-    such as MDS, RGW, iscsi gateway, MONs, MGRs, Prometheus
-
-    This structure is supposed to be enough information to
-    start the services.
-
-    """
-
-    def __init__(self, name=None, placement=None):
-        # type: (Optional[str], Optional[PlacementSpec]) -> None
-        self.placement = PlacementSpec() if placement is None else placement  # type: PlacementSpec
-
-        #: Give this set of stateless services a name: typically it would
-        #: be the name of a CephFS filesystem, RGW zone, etc.  Must be unique
-        #: within one ceph cluster. Note: Not all clusters have a name
-        self.name = name  # type: Optional[str]
-
-        if self.placement is not None and self.placement.count is not None:
-            #: Count of service instances. Deprecated.
-            self.count = self.placement.count  # type: int
-        else:
-            self.count = 1
-
-    def validate_add(self):
-        if not self.name:
-            raise OrchestratorValidationError('Cannot add Service: Name required')
-
-
-class NFSServiceSpec(ServiceSpec):
-    def __init__(self, name, pool=None, namespace=None, placement=None):
-        super(NFSServiceSpec, self).__init__(name, placement)
-
-        #: RADOS pool where NFS client recovery data is stored.
-        self.pool = pool
-
-        #: RADOS namespace where NFS client recovery data is stored in the pool.
-        self.namespace = namespace
-
-    def validate_add(self):
-        super(NFSServiceSpec, self).validate_add()
-
-        if not self.pool:
-            raise OrchestratorValidationError('Cannot add NFS: No Pool specified')
-
-
-class RGWSpec(ServiceSpec):
-    """
-    Settings to configure a (multisite) Ceph RGW
-
-    """
-    def __init__(self,
-                 rgw_realm,  # type: str
-                 rgw_zone,  # type: str
-                 placement=None,
-                 hosts=None,  # type: Optional[List[str]]
-                 rgw_multisite=None,  # type: Optional[bool]
-                 rgw_zonemaster=None,  # type: Optional[bool]
-                 rgw_zonesecondary=None,  # type: Optional[bool]
-                 rgw_multisite_proto=None,  # type: Optional[str]
-                 rgw_frontend_port=None,  # type: Optional[int]
-                 rgw_zonegroup=None,  # type: Optional[str]
-                 rgw_zone_user=None,  # type: Optional[str]
-                 system_access_key=None,  # type: Optional[str]
-                 system_secret_key=None,  # type: Optional[str]
-                 count=None  # type: Optional[int]
-                 ):
-        # Regarding default values. Ansible has a `set_rgwspec_defaults` that sets
-        # default values that makes sense for Ansible. Rook has default values implemented
-        # in Rook itself. Thus we don't set any defaults here in this class.
-
-        super(RGWSpec, self).__init__(name=rgw_realm + '.' + rgw_zone,
-                                      placement=placement)
-
-        #: List of hosts where RGWs should run. Not for Rook.
-        if hosts:
-            self.placement = PlacementSpec(hosts=hosts)
-
-        #: is multisite
-        self.rgw_multisite = rgw_multisite
-        self.rgw_zonemaster = rgw_zonemaster
-        self.rgw_zonesecondary = rgw_zonesecondary
-        self.rgw_multisite_proto = rgw_multisite_proto
-        self.rgw_frontend_port = rgw_frontend_port
-
-        self.rgw_realm = rgw_realm
-        self.rgw_zone = rgw_zone
-        self.rgw_zonegroup = rgw_zonegroup
-        self.rgw_zone_user = rgw_zone_user
-
-        self.system_access_key = system_access_key
-        self.system_secret_key = system_secret_key
-
-    @property
-    def rgw_multisite_endpoint_addr(self):
-        """Returns the first host. Not supported for Rook."""
-        return self.placement.hosts[0]
-
-    @property
-    def rgw_multisite_endpoints_list(self):
-        return ",".join(["{}://{}:{}".format(self.rgw_multisite_proto,
-                             host,
-                             self.rgw_frontend_port) for host in self.placement.hosts])
-
-    def genkey(self, nchars):
-        """ Returns a random string of nchars
-
-        :nchars : Length of the returned string
-        """
-        # TODO Python 3: use Secrets module instead.
-
-        return ''.join(random.choice(string.ascii_uppercase +
-                                     string.ascii_lowercase +
-                                     string.digits) for _ in range(nchars))
-
-    @classmethod
-    def from_json(cls, json_rgw_spec):
-        # type: (dict) -> RGWSpec
-        """
-        Initialize 'RGWSpec' object data from a json structure
-        :param json_rgw_spec: A valid dict with a the RGW settings
-        """
-        # TODO: also add PlacementSpec(**json_rgw_spec['placement'])
-        args = {k:v for k, v in json_rgw_spec.items()}
-        return RGWSpec(**args)
-
-
-class InventoryFilter(object):
-    """
-    When fetching inventory, use this filter to avoid unnecessarily
-    scanning the whole estate.
-
-    Typical use: filter by node when presenting UI workflow for configuring
-                 a particular server.
-                 filter by label when not all of estate is Ceph servers,
-                 and we want to only learn about the Ceph servers.
-                 filter by label when we are interested particularly
-                 in e.g. OSD servers.
-
-    """
-    def __init__(self, labels=None, nodes=None):
-        # type: (Optional[List[str]], Optional[List[str]]) -> None
-
-        #: Optional: get info about nodes matching labels
-        self.labels = labels
-
-        #: Optional: get info about certain named nodes only
-        self.nodes = nodes
-
-
-class InventoryNode(object):
-    """
-    When fetching inventory, all Devices are groups inside of an
-    InventoryNode.
-    """
-    def __init__(self, name, devices=None, labels=None, addr=None):
-        # type: (str, Optional[inventory.Devices], Optional[List[str]], Optional[str]) -> None
-        if devices is None:
-            devices = inventory.Devices([])
-        if labels is None:
-            labels = []
-        assert isinstance(devices, inventory.Devices)
-
-        self.name = name  # unique within cluster.  For example a hostname.
-        self.addr = addr or name
-        self.devices = devices
-        self.labels = labels
-
-    def to_json(self):
-        return {
-            'name': self.name,
-            'addr': self.addr,
-            'devices': self.devices.to_json(),
-            'labels': self.labels,
-        }
-
-    @classmethod
-    def from_json(cls, data):
-        try:
-            _data = copy.deepcopy(data)
-            name = _data.pop('name')
-            addr = _data.pop('addr', None) or name
-            devices = inventory.Devices.from_json(_data.pop('devices'))
-            if _data:
-                error_msg = 'Unknown key(s) in Inventory: {}'.format(','.join(_data.keys()))
-                raise OrchestratorValidationError(error_msg)
-            labels = _data.get('labels', list())
-            return cls(name, devices, labels, addr)
-        except KeyError as e:
-            error_msg = '{} is required for {}'.format(e, cls.__name__)
-            raise OrchestratorValidationError(error_msg)
-        except TypeError as e:
-            raise OrchestratorValidationError('Failed to read inventory: {}'.format(e))
-
-
-    @classmethod
-    def from_nested_items(cls, hosts):
-        devs = inventory.Devices.from_json
-        return [cls(item[0], devs(item[1].data)) for item in hosts]
-
-    def __repr__(self):
-        return "<InventoryNode>({name})".format(name=self.name)
-
-    @staticmethod
-    def get_host_names(nodes):
-        # type: (List[InventoryNode]) -> List[str]
-        return [node.name for node in nodes]
-
-    def __eq__(self, other):
-        return self.name == other.name and self.devices == other.devices
-
-
-class DeviceLightLoc(namedtuple('DeviceLightLoc', ['host', 'dev', 'path'])):
-    """
-    Describes a specific device on a specific host. Used for enabling or disabling LEDs
-    on devices.
-
-    hostname as in :func:`orchestrator.Orchestrator.get_hosts`
-
-    device_id: e.g. ``ABC1234DEF567-1R1234_ABC8DE0Q``.
-       See ``ceph osd metadata | jq '.[].device_ids'``
-    """
-    __slots__ = ()
-
-
-def _mk_orch_methods(cls):
-    # Needs to be defined outside of for.
-    # Otherwise meth is always bound to last key
-    def shim(method_name):
-        def inner(self, *args, **kwargs):
-            completion = self._oremote(method_name, args, kwargs)
-            return completion
-        return inner
-
-    for meth in Orchestrator.__dict__:
-        if not meth.startswith('_') and meth not in ['is_orchestrator_module']:
-            setattr(cls, meth, shim(meth))
-    return cls
-
-
-@_mk_orch_methods
-class OrchestratorClientMixin(Orchestrator):
-    """
-    A module that inherents from `OrchestratorClientMixin` can directly call
-    all :class:`Orchestrator` methods without manually calling remote.
-
-    Every interface method from ``Orchestrator`` is converted into a stub method that internally
-    calls :func:`OrchestratorClientMixin._oremote`
-
-    >>> class MyModule(OrchestratorClientMixin):
-    ...    def func(self):
-    ...        completion = self.add_host('somehost')  # calls `_oremote()`
-    ...        self._orchestrator_wait([completion])
-    ...        self.log.debug(completion.result)
-
-    .. note:: Orchestrator implementations should not inherit from `OrchestratorClientMixin`.
-        Reason is, that OrchestratorClientMixin magically redirects all methods to the
-        "real" implementation of the orchestrator.
-
-
-    >>> import mgr_module
-    >>> class MyImplentation(mgr_module.MgrModule, Orchestrator):
-    ...     def __init__(self, ...):
-    ...         self.orch_client = OrchestratorClientMixin()
-    ...         self.orch_client.set_mgr(self.mgr))
-    """
-
-    def set_mgr(self, mgr):
-        # type: (MgrModule) -> None
-        """
-        Useable in the Dashbord that uses a global ``mgr``
-        """
-
-        self.__mgr = mgr  # Make sure we're not overwriting any other `mgr` properties
-
-    def __get_mgr(self):
-        try:
-            return self.__mgr
-        except AttributeError:
-            return self
-
-    def _oremote(self, meth, args, kwargs):
-        """
-        Helper for invoking `remote` on whichever orchestrator is enabled
-
-        :raises RuntimeError: If the remote method failed.
-        :raises OrchestratorError: orchestrator failed to perform
-        :raises ImportError: no `orchestrator_cli` module or backend not found.
-        """
-        mgr = self.__get_mgr()
-
-        try:
-            o = mgr._select_orchestrator()
-        except AttributeError:
-            o = mgr.remote('orchestrator_cli', '_select_orchestrator')
-
-        if o is None:
-            raise NoOrchestrator()
-
-        mgr.log.debug("_oremote {} -> {}.{}(*{}, **{})".format(mgr.module_name, o, meth, args, kwargs))
-        return mgr.remote(o, meth, *args, **kwargs)
-
-    def _orchestrator_wait(self, completions):
-        # type: (List[Completion]) -> None
-        """
-        Wait for completions to complete (reads) or
-        become persistent (writes).
-
-        Waits for writes to be *persistent* but not *effective*.
-
-        :param completions: List of Completions
-        :raises NoOrchestrator:
-        :raises RuntimeError: something went wrong while calling the process method.
-        :raises ImportError: no `orchestrator_cli` module or backend not found.
-        """
-        while any(not c.has_result for c in completions):
-            self.process(completions)
-            self.__get_mgr().log.info("Operations pending: %s",
-                                      sum(1 for c in completions if not c.has_result))
-            if any(c.needs_result for c in completions):
-                time.sleep(1)
-            else:
-                break
-
-
-class OutdatableData(object):
-    DATEFMT = '%Y-%m-%d %H:%M:%S.%f'
-
-    def __init__(self, data=None, last_refresh=None):
-        # type: (Optional[dict], Optional[datetime.datetime]) -> None
-        self._data = data
-        if data is not None and last_refresh is None:
-            self.last_refresh = datetime.datetime.utcnow()  # type: Optional[datetime.datetime]
-        else:
-            self.last_refresh = last_refresh
-
-    def json(self):
-        if self.last_refresh is not None:
-            timestr = self.last_refresh.strftime(self.DATEFMT)  # type: Optional[str]
-        else:
-            timestr = None
-
-        return {
-            "data": self._data,
-            "last_refresh": timestr,
-        }
-
-    @property
-    def data(self):
-        return self._data
-
-    # @data.setter
-    # No setter, as it doesn't work as expected: It's not saved in store automatically
-
-    @classmethod
-    def time_from_string(cls, timestr):
-        if timestr is None:
-            return None
-        # drop the 'Z' timezone indication, it's always UTC
-        timestr = timestr.rstrip('Z')
-        return datetime.datetime.strptime(timestr, cls.DATEFMT)
-
-    @classmethod
-    def from_json(cls, data):
-        return cls(data['data'], cls.time_from_string(data['last_refresh']))
-
-    def outdated(self, timeout=None):
-        if timeout is None:
-            timeout = 600
-        if self.last_refresh is None:
-            return True
-        cutoff = datetime.datetime.utcnow() - datetime.timedelta(
-            seconds=timeout)
-        return self.last_refresh < cutoff
-
-    def __repr__(self):
-        return 'OutdatableData(data={}, last_refresh={})'.format(self._data, self.last_refresh)
-
-
-class OutdatableDictMixin(object):
-    """
-    Toolbox for implementing a cache. As every orchestrator has
-    different needs, we cannot implement any logic here.
-    """
-
-    def __getitem__(self, item):
-        # type: (str) -> OutdatableData
-        return OutdatableData.from_json(super(OutdatableDictMixin, self).__getitem__(item))  # type: ignore
-
-    def __setitem__(self, key, value):
-        # type: (str, OutdatableData) -> None
-        val = None if value is None else value.json()
-        super(OutdatableDictMixin, self).__setitem__(key, val)  # type: ignore
-
-    def items(self):
-        ## type: () -> Iterator[Tuple[str, OutdatableData]]
-        for item in super(OutdatableDictMixin, self).items():  # type: ignore
-            k, v = item
-            yield k, OutdatableData.from_json(v)
-
-    def items_filtered(self, keys=None):
-        if keys:
-            return [(host, self[host]) for host in keys]
-        else:
-            return list(self.items())
-
-    def any_outdated(self, timeout=None):
-        items = self.items()
-        if not list(items):
-            return True
-        return any([i[1].outdated(timeout) for i in items])
-
-    def remove_outdated(self):
-        outdated = [item[0] for item in self.items() if item[1].outdated()]
-        for o in outdated:
-            del self[o]  # type: ignore
-
-    def invalidate(self, key):
-        self[key] = OutdatableData(self[key].data,
-                                   datetime.datetime.fromtimestamp(0))
-
-
-class OutdatablePersistentDict(OutdatableDictMixin, PersistentStoreDict):
-    pass
-
-
-class OutdatableDict(OutdatableDictMixin, dict):
-    pass
diff --git a/src/pybind/mgr/orchestrator/README.md b/src/pybind/mgr/orchestrator/README.md
new file mode 100644 (file)
index 0000000..d70e88c
--- /dev/null
@@ -0,0 +1,14 @@
+# Orchestrator CLI
+
+See also [orchestrator cli doc](https://docs.ceph.com/docs/master/mgr/orchestrator_cli/).
+
+## Running the Teuthology tests
+
+To run the API tests against a real Ceph cluster, we leverage the Teuthology
+framework and the `test_orchestrator` backend.
+
+``source`` the script and run the tests manually::
+
+    $ pushd ../dashboard ; source ./run-backend-api-tests.sh ; popd
+    $ run_teuthology_tests tasks.mgr.test_orchestrator_cli
+    $ cleanup_teuthology
diff --git a/src/pybind/mgr/orchestrator/__init__.py b/src/pybind/mgr/orchestrator/__init__.py
new file mode 100644 (file)
index 0000000..946ddb0
--- /dev/null
@@ -0,0 +1,16 @@
+from __future__ import absolute_import
+
+from .module import OrchestratorCli
+
+# usage: E.g. `from orchestrator import StatelessServiceSpec`
+from ._interface import \
+    Completion, TrivialReadCompletion, raise_if_exception, ProgressReference, pretty_print, _Promise, \
+    CLICommand, _cli_write_command, _cli_read_command, \
+    Orchestrator, OrchestratorClientMixin, \
+    OrchestratorValidationError, OrchestratorError, NoOrchestrator, \
+    ServiceSpec, NFSServiceSpec, RGWSpec, HostPlacementSpec, \
+    ServiceDescription, InventoryFilter, PlacementSpec,  HostSpec, \
+    DaemonDescription, \
+    InventoryNode, DeviceLightLoc, \
+    OutdatableData, OutdatablePersistentDict, \
+    UpgradeStatusSpec
diff --git a/src/pybind/mgr/orchestrator/_interface.py b/src/pybind/mgr/orchestrator/_interface.py
new file mode 100644 (file)
index 0000000..576b21d
--- /dev/null
@@ -0,0 +1,1736 @@
+
+"""
+ceph-mgr orchestrator interface
+
+Please see the ceph-mgr module developer's guide for more information.
+"""
+import copy
+import functools
+import logging
+import pickle
+import sys
+import time
+from collections import namedtuple
+from functools import wraps
+import uuid
+import string
+import random
+import datetime
+import copy
+import re
+import six
+import errno
+
+from ceph.deployment import inventory
+
+from mgr_module import MgrModule, PersistentStoreDict, CLICommand, HandleCommandResult
+from mgr_util import format_bytes
+
+try:
+    from ceph.deployment.drive_group import DriveGroupSpec
+    from typing import TypeVar, Generic, List, Optional, Union, Tuple, Iterator, Callable, Any, \
+        Type, Sequence
+except ImportError:
+    pass
+
+logger = logging.getLogger(__name__)
+
+
+class HostPlacementSpec(namedtuple('HostPlacementSpec', ['hostname', 'network', 'name'])):
+    def __str__(self):
+        res = ''
+        res += self.hostname
+        if self.network:
+            res += ':' + self.network
+        if self.name:
+            res += '=' + self.name
+        return res
+
+    @classmethod
+    def parse(cls, host, require_network=True):
+        # type: (str, bool) -> HostPlacementSpec
+        """
+        Split host into host, network, and (optional) daemon name parts.  The network
+        part can be an IP, CIDR, or ceph addrvec like '[v2:1.2.3.4:3300,v1:1.2.3.4:6789]'.
+        e.g.,
+          "myhost"
+          "myhost=name"
+          "myhost:1.2.3.4"
+          "myhost:1.2.3.4=name"
+          "myhost:1.2.3.0/24"
+          "myhost:1.2.3.0/24=name"
+          "myhost:[v2:1.2.3.4:3000]=name"
+          "myhost:[v2:1.2.3.4:3000,v1:1.2.3.4:6789]=name"
+        """
+        # Matches from start to : or = or until end of string
+        host_re = r'^(.*?)(:|=|$)'
+        # Matches from : to = or until end of string
+        ip_re = r':(.*?)(=|$)'
+        # Matches from = to end of string
+        name_re = r'=(.*?)$'
+
+        # assign defaults
+        host_spec = cls('', '', '')
+
+        match_host = re.search(host_re, host)
+        if match_host:
+            host_spec = host_spec._replace(hostname=match_host.group(1))
+
+        name_match = re.search(name_re, host)
+        if name_match:
+            host_spec = host_spec._replace(name=name_match.group(1))
+
+        ip_match = re.search(ip_re, host)
+        if ip_match:
+            host_spec = host_spec._replace(network=ip_match.group(1))
+
+        if not require_network:
+            return host_spec
+
+        from ipaddress import ip_network, ip_address
+        networks = list()  # type: List[str]
+        network = host_spec.network
+        # in case we have [v2:1.2.3.4:3000,v1:1.2.3.4:6478]
+        if ',' in network:
+            networks = [x for x in network.split(',')]
+        else:
+            networks.append(network)
+        for network in networks:
+            # only if we have versioned network configs
+            if network.startswith('v') or network.startswith('[v'):
+                network = network.split(':')[1]
+            try:
+                # if subnets are defined, also verify the validity
+                if '/' in network:
+                    ip_network(six.text_type(network))
+                else:
+                    ip_address(six.text_type(network))
+            except ValueError as e:
+                # logging?
+                raise e
+
+        return host_spec
+
+
+class OrchestratorError(Exception):
+    """
+    General orchestrator specific error.
+
+    Used for deployment, configuration or user errors.
+
+    It's not intended for programming errors or orchestrator internal errors.
+    """
+
+
+class NoOrchestrator(OrchestratorError):
+    """
+    No orchestrator in configured.
+    """
+    def __init__(self, msg="No orchestrator configured (try `ceph orch set backend`)"):
+        super(NoOrchestrator, self).__init__(msg)
+
+
+class OrchestratorValidationError(OrchestratorError):
+    """
+    Raised when an orchestrator doesn't support a specific feature.
+    """
+
+
+def handle_exception(prefix, cmd_args, desc, perm, func):
+    @wraps(func)
+    def wrapper(*args, **kwargs):
+        try:
+            return func(*args, **kwargs)
+        except (OrchestratorError, ImportError) as e:
+            # Do not print Traceback for expected errors.
+            return HandleCommandResult(-errno.ENOENT, stderr=str(e))
+        except NotImplementedError:
+            msg = 'This Orchestrator does not support `{}`'.format(prefix)
+            return HandleCommandResult(-errno.ENOENT, stderr=msg)
+
+    return CLICommand(prefix, cmd_args, desc, perm)(wrapper)
+
+
+def _cli_command(perm):
+    def inner_cli_command(prefix, cmd_args="", desc=""):
+        return lambda func: handle_exception(prefix, cmd_args, desc, perm, func)
+    return inner_cli_command
+
+
+_cli_read_command = _cli_command('r')
+_cli_write_command = _cli_command('rw')
+
+
+def _no_result():
+    return object()
+
+
+class _Promise(object):
+    """
+    A completion may need multiple promises to be fulfilled. `_Promise` is one
+    step.
+
+    Typically ``Orchestrator`` implementations inherit from this class to
+    build their own way of finishing a step to fulfil a future.
+
+    They are not exposed in the orchestrator interface and can be seen as a
+    helper to build orchestrator modules.
+    """
+    INITIALIZED = 1  # We have a parent completion and a next completion
+    RUNNING = 2
+    FINISHED = 3  # we have a final result
+
+    NO_RESULT = _no_result()  # type: None
+    ASYNC_RESULT = object()
+
+    def __init__(self,
+                 _first_promise=None,  # type: Optional["_Promise"]
+                 value=NO_RESULT,  # type: Optional[Any]
+                 on_complete=None,    # type: Optional[Callable]
+                 name=None,  # type: Optional[str]
+                 ):
+        self._on_complete_ = on_complete
+        self._name = name
+        self._next_promise = None  # type: Optional[_Promise]
+
+        self._state = self.INITIALIZED
+        self._exception = None  # type: Optional[Exception]
+
+        # Value of this _Promise. may be an intermediate result.
+        self._value = value
+
+        # _Promise is not a continuation monad, as `_result` is of type
+        # T instead of (T -> r) -> r. Therefore we need to store the first promise here.
+        self._first_promise = _first_promise or self  # type: '_Promise'
+
+    @property
+    def _exception(self):
+        # type: () -> Optional[Exception]
+        return getattr(self, '_exception_', None)
+
+    @_exception.setter
+    def _exception(self, e):
+        self._exception_ = e
+        self._serialized_exception_ = pickle.dumps(e) if e is not None else None
+
+    @property
+    def _serialized_exception(self):
+        # type: () -> Optional[bytes]
+        return getattr(self, '_serialized_exception_', None)
+
+
+
+    @property
+    def _on_complete(self):
+        # type: () -> Optional[Callable]
+        # https://github.com/python/mypy/issues/4125
+        return self._on_complete_
+
+    @_on_complete.setter
+    def _on_complete(self, val):
+        # type: (Optional[Callable]) -> None
+        self._on_complete_ = val
+
+
+    def __repr__(self):
+        name = self._name or getattr(self._on_complete, '__name__', '??') if self._on_complete else 'None'
+        val = repr(self._value) if self._value is not self.NO_RESULT else 'NA'
+        return '{}(_s={}, val={}, _on_c={}, id={}, name={}, pr={}, _next={})'.format(
+            self.__class__, self._state, val, self._on_complete, id(self), name, getattr(next, '_progress_reference', 'NA'), repr(self._next_promise)
+        )
+
+    def pretty_print_1(self):
+        if self._name:
+            name = self._name
+        elif self._on_complete is None:
+            name = 'lambda x: x'
+        elif hasattr(self._on_complete, '__name__'):
+            name = getattr(self._on_complete, '__name__')
+        else:
+            name = self._on_complete.__class__.__name__
+        val = repr(self._value) if self._value not in (self.NO_RESULT, self.ASYNC_RESULT) else '...'
+        prefix = {
+            self.INITIALIZED: '      ',
+            self.RUNNING:     '   >>>',
+            self.FINISHED:    '(done)'
+        }[self._state]
+        return '{} {}({}),'.format(prefix, name, val)
+
+    def then(self, on_complete):
+        # type: (Any, Callable) -> Any
+        """
+        Call ``on_complete`` as soon as this promise is finalized.
+        """
+        assert self._state in (self.INITIALIZED, self.RUNNING)
+        if self._on_complete is not None:
+            assert self._next_promise is None
+            self._set_next_promise(self.__class__(
+                _first_promise=self._first_promise,
+                on_complete=on_complete
+            ))
+            return self._next_promise
+
+        else:
+            self._on_complete = on_complete
+            self._set_next_promise(self.__class__(_first_promise=self._first_promise))
+            return self._next_promise
+
+    def _set_next_promise(self, next):
+        # type: (_Promise) -> None
+        assert self is not next
+        assert self._state in (self.INITIALIZED, self.RUNNING)
+
+        self._next_promise = next
+        assert self._next_promise is not None
+        for p in iter(self._next_promise):
+            p._first_promise = self._first_promise
+
+    def _finalize(self, value=NO_RESULT):
+        """
+        Sets this promise to complete.
+
+        Orchestrators may choose to use this helper function.
+
+        :param value: new value.
+        """
+        if self._state not in (self.INITIALIZED, self.RUNNING):
+            raise ValueError('finalize: {} already finished. {}'.format(repr(self), value))
+
+        self._state = self.RUNNING
+
+        if value is not self.NO_RESULT:
+            self._value = value
+        assert self._value is not self.NO_RESULT, repr(self)
+
+        if self._on_complete:
+            try:
+                next_result = self._on_complete(self._value)
+            except Exception as e:
+                self.fail(e)
+                return
+        else:
+            next_result = self._value
+
+        if isinstance(next_result, _Promise):
+            # hack: _Promise is not a continuation monad.
+            next_result = next_result._first_promise  # type: ignore
+            assert next_result not in self, repr(self._first_promise) + repr(next_result)
+            assert self not in next_result
+            next_result._append_promise(self._next_promise)
+            self._set_next_promise(next_result)
+            assert self._next_promise
+            if self._next_promise._value is self.NO_RESULT:
+                self._next_promise._value = self._value
+            self.propagate_to_next()
+        elif next_result is not self.ASYNC_RESULT:
+            # simple map. simply forward
+            if self._next_promise:
+                self._next_promise._value = next_result
+            else:
+                # Hack: next_result is of type U, _value is of type T
+                self._value = next_result  # type: ignore
+            self.propagate_to_next()
+        else:
+            # asynchronous promise
+            pass
+
+
+    def propagate_to_next(self):
+        self._state = self.FINISHED
+        logger.debug('finalized {}'.format(repr(self)))
+        if self._next_promise:
+            self._next_promise._finalize()
+
+    def fail(self, e):
+        # type: (Exception) -> None
+        """
+        Sets the whole completion to be faild with this exception and end the
+        evaluation.
+        """
+        if self._state == self.FINISHED:
+            raise ValueError(
+                'Invalid State: called fail, but Completion is already finished: {}'.format(str(e)))
+        assert self._state in (self.INITIALIZED, self.RUNNING)
+        logger.exception('_Promise failed')
+        self._exception = e
+        self._value = 'exception'
+        if self._next_promise:
+            self._next_promise.fail(e)
+        self._state = self.FINISHED
+
+    def __contains__(self, item):
+        return any(item is p for p in iter(self._first_promise))
+
+    def __iter__(self):
+        yield self
+        elem = self._next_promise
+        while elem is not None:
+            yield elem
+            elem = elem._next_promise
+
+    def _append_promise(self, other):
+        if other is not None:
+            assert self not in other
+            assert other not in self
+            self._last_promise()._set_next_promise(other)
+
+    def _last_promise(self):
+        # type: () -> _Promise
+        return list(iter(self))[-1]
+
+
+class ProgressReference(object):
+    def __init__(self,
+                 message,  # type: str
+                 mgr,
+                 completion=None  # type: Optional[Callable[[], Completion]]
+                ):
+        """
+        ProgressReference can be used within Completions::
+
+            +---------------+      +---------------------------------+
+            |               | then |                                 |
+            | My Completion | +--> | on_complete=ProgressReference() |
+            |               |      |                                 |
+            +---------------+      +---------------------------------+
+
+        See :func:`Completion.with_progress` for an easy way to create
+        a progress reference
+
+        """
+        super(ProgressReference, self).__init__()
+        self.progress_id = str(uuid.uuid4())
+        self.message = message
+        self.mgr = mgr
+
+        #: The completion can already have a result, before the write
+        #: operation is effective. progress == 1 means, the services are
+        #: created / removed.
+        self.completion = completion  # type: Optional[Callable[[], Completion]]
+
+        #: if a orchestrator module can provide a more detailed
+        #: progress information, it needs to also call ``progress.update()``.
+        self.progress = 0.0
+
+        self._completion_has_result = False
+        self.mgr.all_progress_references.append(self)
+
+    def __str__(self):
+        """
+        ``__str__()`` is used for determining the message for progress events.
+        """
+        return self.message or super(ProgressReference, self).__str__()
+
+    def __call__(self, arg):
+        self._completion_has_result = True
+        self.progress = 1.0
+        return arg
+
+    @property
+    def progress(self):
+        return self._progress
+
+    @progress.setter
+    def progress(self, progress):
+        assert progress <= 1.0
+        self._progress = progress
+        try:
+            if self.effective:
+                self.mgr.remote("progress", "complete", self.progress_id)
+                self.mgr.all_progress_references = [p for p in self.mgr.all_progress_references if p is not self]
+            else:
+                self.mgr.remote("progress", "update", self.progress_id, self.message,
+                                progress,
+                                [("origin", "orchestrator")])
+        except ImportError:
+            # If the progress module is disabled that's fine,
+            # they just won't see the output.
+            pass
+
+    @property
+    def effective(self):
+        return self.progress == 1 and self._completion_has_result
+
+    def update(self):
+        def progress_run(progress):
+            self.progress = progress
+        if self.completion:
+            c = self.completion().then(progress_run)
+            self.mgr.process([c._first_promise])
+        else:
+            self.progress = 1
+
+    def fail(self):
+        self._completion_has_result = True
+        self.progress = 1
+
+
+class Completion(_Promise):
+    """
+    Combines multiple promises into one overall operation.
+
+    Completions are composable by being able to
+    call one completion from another completion. I.e. making them re-usable
+    using Promises E.g.::
+
+        >>> return Orchestrator().get_hosts().then(self._create_osd)
+
+    where ``get_hosts`` returns a Completion of list of hosts and
+    ``_create_osd`` takes a list of hosts.
+
+    The concept behind this is to store the computation steps
+    explicit and then explicitly evaluate the chain:
+
+        >>> p = Completion(on_complete=lambda x: x*2).then(on_complete=lambda x: str(x))
+        ... p.finalize(2)
+        ... assert p.result = "4"
+
+    or graphically::
+
+        +---------------+      +-----------------+
+        |               | then |                 |
+        | lambda x: x*x | +--> | lambda x: str(x)|
+        |               |      |                 |
+        +---------------+      +-----------------+
+
+    """
+    def __init__(self,
+                 _first_promise=None,  # type: Optional["Completion"]
+                 value=_Promise.NO_RESULT,  # type: Any
+                 on_complete=None,  # type: Optional[Callable]
+                 name=None,  # type: Optional[str]
+                 ):
+        super(Completion, self).__init__(_first_promise, value, on_complete, name)
+
+    @property
+    def _progress_reference(self):
+        # type: () -> Optional[ProgressReference]
+        if hasattr(self._on_complete, 'progress_id'):
+            return self._on_complete  # type: ignore
+        return None
+
+    @property
+    def progress_reference(self):
+        # type: () -> Optional[ProgressReference]
+        """
+        ProgressReference. Marks this completion
+        as a write completeion.
+        """
+
+        references = [c._progress_reference for c in iter(self) if c._progress_reference is not None]
+        if references:
+            assert len(references) == 1
+            return references[0]
+        return None
+
+    @classmethod
+    def with_progress(cls,  # type: Any
+                      message,  # type: str
+                      mgr,
+                      _first_promise=None,  # type: Optional["Completion"]
+                      value=_Promise.NO_RESULT,  # type: Any
+                      on_complete=None,  # type: Optional[Callable]
+                      calc_percent=None  # type: Optional[Callable[[], Any]]
+                      ):
+        # type: (...) -> Any
+
+        c = cls(
+            _first_promise=_first_promise,
+            value=value,
+            on_complete=on_complete
+        ).add_progress(message, mgr, calc_percent)
+
+        return c._first_promise
+
+    def add_progress(self,
+                     message,  # type: str
+                     mgr,
+                     calc_percent=None  # type: Optional[Callable[[], Any]]
+                     ):
+        return self.then(
+            on_complete=ProgressReference(
+                message=message,
+                mgr=mgr,
+                completion=calc_percent
+            )
+        )
+
+    def fail(self, e):
+        super(Completion, self).fail(e)
+        if self._progress_reference:
+            self._progress_reference.fail()
+
+    def finalize(self, result=_Promise.NO_RESULT):
+        if self._first_promise._state == self.INITIALIZED:
+            self._first_promise._finalize(result)
+
+    @property
+    def result(self):
+        """
+        The result of the operation that we were waited
+        for.  Only valid after calling Orchestrator.process() on this
+        completion.
+        """
+        last = self._last_promise()
+        assert last._state == _Promise.FINISHED
+        return last._value
+
+    def result_str(self):
+        """Force a string."""
+        if self.result is None:
+            return ''
+        if isinstance(self.result, list):
+            return '\n'.join(str(x) for x in self.result)
+        return str(self.result)
+
+    @property
+    def exception(self):
+        # type: () -> Optional[Exception]
+        return self._last_promise()._exception
+
+    @property
+    def serialized_exception(self):
+        # type: () -> Optional[bytes]
+        return self._last_promise()._serialized_exception
+
+    @property
+    def has_result(self):
+        # type: () -> bool
+        """
+        Has the operation already a result?
+
+        For Write operations, it can already have a
+        result, if the orchestrator's configuration is
+        persistently written. Typically this would
+        indicate that an update had been written to
+        a manifest, but that the update had not
+        necessarily been pushed out to the cluster.
+
+        :return:
+        """
+        return self._last_promise()._state == _Promise.FINISHED
+
+    @property
+    def is_errored(self):
+        # type: () -> bool
+        """
+        Has the completion failed. Default implementation looks for
+        self.exception. Can be overwritten.
+        """
+        return self.exception is not None
+
+    @property
+    def needs_result(self):
+        # type: () -> bool
+        """
+        Could the external operation be deemed as complete,
+        or should we wait?
+        We must wait for a read operation only if it is not complete.
+        """
+        return not self.is_errored and not self.has_result
+
+    @property
+    def is_finished(self):
+        # type: () -> bool
+        """
+        Could the external operation be deemed as complete,
+        or should we wait?
+        We must wait for a read operation only if it is not complete.
+        """
+        return self.is_errored or (self.has_result)
+
+    def pretty_print(self):
+
+        reprs = '\n'.join(p.pretty_print_1() for p in iter(self._first_promise))
+        return """<{}>[\n{}\n]""".format(self.__class__.__name__, reprs)
+
+
+def pretty_print(completions):
+    # type: (Sequence[Completion]) -> str
+    return ', '.join(c.pretty_print() for c in completions)
+
+
+def raise_if_exception(c):
+    # type: (Completion) -> None
+    """
+    :raises OrchestratorError: Some user error or a config error.
+    :raises Exception: Some internal error
+    """
+    if c.serialized_exception is not None:
+        try:
+            e = pickle.loads(c.serialized_exception)
+        except (KeyError, AttributeError):
+            raise Exception('{}: {}'.format(type(c.exception), c.exception))
+        raise e
+
+
+class TrivialReadCompletion(Completion):
+    """
+    This is the trivial completion simply wrapping a result.
+    """
+    def __init__(self, result):
+        super(TrivialReadCompletion, self).__init__()
+        if result:
+            self.finalize(result)
+
+
+def _hide_in_features(f):
+    f._hide_in_features = True
+    return f
+
+
+class Orchestrator(object):
+    """
+    Calls in this class may do long running remote operations, with time
+    periods ranging from network latencies to package install latencies and large
+    internet downloads.  For that reason, all are asynchronous, and return
+    ``Completion`` objects.
+
+    Methods should only return the completion and not directly execute
+    anything, like network calls. Otherwise the purpose of
+    those completions is defeated.
+
+    Implementations are not required to start work on an operation until
+    the caller waits on the relevant Completion objects.  Callers making
+    multiple updates should not wait on Completions until they're done
+    sending operations: this enables implementations to batch up a series
+    of updates when wait() is called on a set of Completion objects.
+
+    Implementations are encouraged to keep reasonably fresh caches of
+    the status of the system: it is better to serve a stale-but-recent
+    result read of e.g. device inventory than it is to keep the caller waiting
+    while you scan hosts every time.
+    """
+
+    @_hide_in_features
+    def is_orchestrator_module(self):
+        """
+        Enable other modules to interrogate this module to discover
+        whether it's usable as an orchestrator module.
+
+        Subclasses do not need to override this.
+        """
+        return True
+
+    @_hide_in_features
+    def available(self):
+        # type: () -> Tuple[bool, str]
+        """
+        Report whether we can talk to the orchestrator.  This is the
+        place to give the user a meaningful message if the orchestrator
+        isn't running or can't be contacted.
+
+        This method may be called frequently (e.g. every page load
+        to conditionally display a warning banner), so make sure it's
+        not too expensive.  It's okay to give a slightly stale status
+        (e.g. based on a periodic background ping of the orchestrator)
+        if that's necessary to make this method fast.
+
+        .. note::
+            `True` doesn't mean that the desired functionality
+            is actually available in the orchestrator. I.e. this
+            won't work as expected::
+
+                >>> if OrchestratorClientMixin().available()[0]:  # wrong.
+                ...     OrchestratorClientMixin().get_hosts()
+
+        :return: two-tuple of boolean, string
+        """
+        raise NotImplementedError()
+
+    @_hide_in_features
+    def process(self, completions):
+        # type: (List[Completion]) -> None
+        """
+        Given a list of Completion instances, process any which are
+        incomplete.
+
+        Callers should inspect the detail of each completion to identify
+        partial completion/progress information, and present that information
+        to the user.
+
+        This method should not block, as this would make it slow to query
+        a status, while other long running operations are in progress.
+        """
+        raise NotImplementedError()
+
+    @_hide_in_features
+    def get_feature_set(self):
+        """Describes which methods this orchestrator implements
+
+        .. note::
+            `True` doesn't mean that the desired functionality
+            is actually possible in the orchestrator. I.e. this
+            won't work as expected::
+
+                >>> api = OrchestratorClientMixin()
+                ... if api.get_feature_set()['get_hosts']['available']:  # wrong.
+                ...     api.get_hosts()
+
+            It's better to ask for forgiveness instead::
+
+                >>> try:
+                ...     OrchestratorClientMixin().get_hosts()
+                ... except (OrchestratorError, NotImplementedError):
+                ...     ...
+
+        :returns: Dict of API method names to ``{'available': True or False}``
+        """
+        module = self.__class__
+        features = {a: {'available': getattr(Orchestrator, a, None) != getattr(module, a)}
+                    for a in Orchestrator.__dict__
+                    if not a.startswith('_') and not getattr(getattr(Orchestrator, a), '_hide_in_features', False)
+                    }
+        return features
+
+    @_hide_in_features
+    def cancel_completions(self):
+        # type: () -> None
+        """
+        Cancels ongoing completions. Unstuck the mgr.
+        """
+        raise NotImplementedError()
+
+    def add_host(self, HostSpec):
+        # type: (HostSpec) -> Completion
+        """
+        Add a host to the orchestrator inventory.
+
+        :param host: hostname
+        """
+        raise NotImplementedError()
+
+    def remove_host(self, host):
+        # type: (str) -> Completion
+        """
+        Remove a host from the orchestrator inventory.
+
+        :param host: hostname
+        """
+        raise NotImplementedError()
+
+    def update_host_addr(self, host, addr):
+        # type: (str, str) -> Completion
+        """
+        Update a host's address
+
+        :param host: hostname
+        :param addr: address (dns name or IP)
+        """
+        raise NotImplementedError()
+
+    def get_hosts(self):
+        # type: () -> Completion
+        """
+        Report the hosts in the cluster.
+
+        The default implementation is extra slow.
+
+        :return: list of InventoryNodes
+        """
+        return self.get_inventory()
+
+    def add_host_label(self, host, label):
+        # type: (str, str) -> Completion
+        """
+        Add a host label
+        """
+        raise NotImplementedError()
+
+    def remove_host_label(self, host, label):
+        # type: (str, str) -> Completion
+        """
+        Remove a host label
+        """
+        raise NotImplementedError()
+
+    def get_inventory(self, node_filter=None, refresh=False):
+        # type: (Optional[InventoryFilter], bool) -> Completion
+        """
+        Returns something that was created by `ceph-volume inventory`.
+
+        :return: list of InventoryNode
+        """
+        raise NotImplementedError()
+
+    def describe_service(self, service_type=None, service_id=None, node_name=None, refresh=False):
+        # type: (Optional[str], Optional[str], Optional[str], bool) -> Completion
+        """
+        Describe a service (of any kind) that is already configured in
+        the orchestrator.  For example, when viewing an OSD in the dashboard
+        we might like to also display information about the orchestrator's
+        view of the service (like the kubernetes pod ID).
+
+        When viewing a CephFS filesystem in the dashboard, we would use this
+        to display the pods being currently run for MDS daemons.
+
+        :return: list of ServiceDescription objects.
+        """
+        raise NotImplementedError()
+
+    def list_daemons(self, daemon_type=None, daemon_id=None, host=None, refresh=False):
+        # type: (Optional[str], Optional[str], Optional[str], bool) -> Completion
+        """
+        Describe a daemon (of any kind) that is already configured in
+        the orchestrator.
+
+        :return: list of DaemonDescription objects.
+        """
+        raise NotImplementedError()
+
+    def remove_daemons(self, names, force):
+        # type: (List[str], bool) -> Completion
+        """
+        Remove specific daemon(s).
+
+        :return: None
+        """
+        raise NotImplementedError()
+
+    def remove_service(self, service_type, service_name=None):
+        # type: (str, Optional[str]) -> Completion
+        """
+        Remove a service (a collection of daemons).
+
+        :return: None
+        """
+        raise NotImplementedError()
+
+    def service_action(self, action, service_type, service_name):
+        # type: (str, str, str) -> Completion
+        """
+        Perform an action (start/stop/reload) on a service (i.e., all daemons
+        providing the logical service).
+
+        :param action: one of "start", "stop", "restart", "redeploy", "reconfig"
+        :param service_type: e.g. "mds", "rgw", ...
+        :param service_name: name of logical service ("cephfs", "us-east", ...)
+        :rtype: Completion
+        """
+        #assert action in ["start", "stop", "reload, "restart", "redeploy"]
+        raise NotImplementedError()
+
+    def daemon_action(self, action, daemon_type, daemon_id):
+        # type: (str, str, str) -> Completion
+        """
+        Perform an action (start/stop/reload) on a daemon.
+
+        :param action: one of "start", "stop", "restart", "redeploy", "reconfig"
+        :param name: name of daemon
+        :rtype: Completion
+        """
+        #assert action in ["start", "stop", "reload, "restart", "redeploy"]
+        raise NotImplementedError()
+
+    def create_osds(self, drive_groups):
+        # type: (List[DriveGroupSpec]) -> Completion
+        """
+        Create one or more OSDs within a single Drive Group.
+
+        The principal argument here is the drive_group member
+        of OsdSpec: other fields are advisory/extensible for any
+        finer-grained OSD feature enablement (choice of backing store,
+        compression/encryption, etc).
+
+        :param drive_groups: a list of DriveGroupSpec
+        :param all_hosts: TODO, this is required because the orchestrator methods are not composable
+                Probably this parameter can be easily removed because each orchestrator can use
+                the "get_inventory" method and the "drive_group.host_pattern" attribute
+                to obtain the list of hosts where to apply the operation
+        """
+        raise NotImplementedError()
+
+    def blink_device_light(self, ident_fault, on, locations):
+        # type: (str, bool, List[DeviceLightLoc]) -> Completion
+        """
+        Instructs the orchestrator to enable or disable either the ident or the fault LED.
+
+        :param ident_fault: either ``"ident"`` or ``"fault"``
+        :param on: ``True`` = on.
+        :param locations: See :class:`orchestrator.DeviceLightLoc`
+        """
+        raise NotImplementedError()
+
+    def add_mon(self, spec):
+        # type: (ServiceSpec) -> Completion
+        """Create mon daemon(s)"""
+        raise NotImplementedError()
+
+    def apply_mon(self, spec):
+        # type: (ServiceSpec) -> Completion
+        """Update mon cluster"""
+        raise NotImplementedError()
+
+    def add_mgr(self, spec):
+        # type: (ServiceSpec) -> Completion
+        """Create mgr daemon(s)"""
+        raise NotImplementedError()
+
+    def apply_mgr(self, spec):
+        # type: (ServiceSpec) -> Completion
+        """Update mgr cluster"""
+        raise NotImplementedError()
+
+    def add_mds(self, spec):
+        # type: (ServiceSpec) -> Completion
+        """Create MDS daemon(s)"""
+        raise NotImplementedError()
+
+    def apply_mds(self, spec):
+        # type: (ServiceSpec) -> Completion
+        """Update MDS cluster"""
+        raise NotImplementedError()
+
+    def add_rbd_mirror(self, spec):
+        # type: (ServiceSpec) -> Completion
+        """Create rbd-mirror daemon(s)"""
+        raise NotImplementedError()
+
+    def apply_rbd_mirror(self, spec):
+        # type: (ServiceSpec) -> Completion
+        """Update rbd-mirror cluster"""
+        raise NotImplementedError()
+
+    def add_nfs(self, spec):
+        # type: (NFSServiceSpec) -> Completion
+        """Create NFS daemon(s)"""
+        raise NotImplementedError()
+
+    def apply_nfs(self, spec):
+        # type: (NFSServiceSpec) -> Completion
+        """Update NFS cluster"""
+        raise NotImplementedError()
+
+    def add_rgw(self, spec):
+        # type: (RGWSpec) -> Completion
+        """Create RGW daemon(s)"""
+        raise NotImplementedError()
+
+    def apply_rgw(self, spec):
+        # type: (RGWSpec) -> Completion
+        """Update RGW cluster"""
+        raise NotImplementedError()
+
+    def add_prometheus(self, spec):
+        # type: (ServiceSpec) -> Completion
+        """Create new prometheus daemon"""
+        raise NotImplementedError()
+
+    def apply_prometheus(self, spec):
+        # type: (ServiceSpec) -> Completion
+        """Update prometheus cluster"""
+        raise NotImplementedError()
+
+    def upgrade_check(self, image, version):
+        # type: (Optional[str], Optional[str]) -> Completion
+        raise NotImplementedError()
+
+    def upgrade_start(self, image, version):
+        # type: (Optional[str], Optional[str]) -> Completion
+        raise NotImplementedError()
+
+    def upgrade_pause(self):
+        # type: () -> Completion
+        raise NotImplementedError()
+
+    def upgrade_resume(self):
+        # type: () -> Completion
+        raise NotImplementedError()
+
+    def upgrade_stop(self):
+        # type: () -> Completion
+        raise NotImplementedError()
+
+    def upgrade_status(self):
+        # type: () -> Completion
+        """
+        If an upgrade is currently underway, report on where
+        we are in the process, or if some error has occurred.
+
+        :return: UpgradeStatusSpec instance
+        """
+        raise NotImplementedError()
+
+    @_hide_in_features
+    def upgrade_available(self):
+        # type: () -> Completion
+        """
+        Report on what versions are available to upgrade to
+
+        :return: List of strings
+        """
+        raise NotImplementedError()
+
+class HostSpec(object):
+    def __init__(self, hostname, addr=None, labels=None):
+        # type: (str, Optional[str], Optional[List[str]]) -> None
+        self.hostname = hostname       # the hostname on the host
+        self.addr = addr or hostname   # DNS name or IP address to reach it
+        self.labels = labels or []     # initial label(s), if any
+
+class UpgradeStatusSpec(object):
+    # Orchestrator's report on what's going on with any ongoing upgrade
+    def __init__(self):
+        self.in_progress = False  # Is an upgrade underway?
+        self.target_image = None
+        self.services_complete = []  # Which daemon types are fully updated?
+        self.message = ""  # Freeform description
+
+
+class PlacementSpec(object):
+    """
+    For APIs that need to specify a node subset
+    """
+    def __init__(self, label=None, hosts=None, count=None):
+        # type: (Optional[str], Optional[List], Optional[int]) -> None
+        self.label = label
+        self.hosts = []  # type: List[HostPlacementSpec]
+        if hosts:
+            if all([isinstance(host, HostPlacementSpec) for host in hosts]):
+                self.hosts = hosts
+            else:
+                self.hosts = [parse_host_placement_specs(x, require_network=False) for x in hosts if x]
+
+
+        self.count = count  # type: Optional[int]
+
+    def set_hosts(self, hosts):
+        # To backpopulate the .hosts attribute when using labels or count
+        # in the orchestrator backend.
+        self.hosts = hosts
+
+    @classmethod
+    def from_dict(cls, data):
+        _cls = cls(**data)
+        _cls.validate()
+        return _cls
+
+    def validate(self):
+        if self.hosts and self.label:
+            # TODO: a less generic Exception
+            raise Exception('Node and label are mutually exclusive')
+        if self.count is not None and self.count <= 0:
+            raise Exception("num/count must be > 1")
+
+
+def handle_type_error(method):
+    @wraps(method)
+    def inner(cls, *args, **kwargs):
+        try:
+            return method(cls, *args, **kwargs)
+        except TypeError as e:
+            error_msg = '{}: {}'.format(cls.__name__, e)
+        raise OrchestratorValidationError(error_msg)
+    return inner
+
+
+class DaemonDescription(object):
+    """
+    For responding to queries about the status of a particular daemon,
+    stateful or stateless.
+
+    This is not about health or performance monitoring of daemons: it's
+    about letting the orchestrator tell Ceph whether and where a
+    daemon is scheduled in the cluster.  When an orchestrator tells
+    Ceph "it's running on node123", that's not a promise that the process
+    is literally up this second, it's a description of where the orchestrator
+    has decided the daemon should run.
+    """
+
+    def __init__(self,
+                 daemon_type=None,
+                 daemon_id=None,
+                 nodename=None,
+                 container_id=None,
+                 container_image_id=None,
+                 container_image_name=None,
+                 version=None,
+                 status=None,
+                 status_desc=None):
+        # Node is at the same granularity as InventoryNode
+        self.nodename = nodename
+
+        # Not everyone runs in containers, but enough people do to
+        # justify having the container_id (runtime id) and container_image
+        # (image name)
+        self.container_id = container_id                  # runtime id
+        self.container_image_id = container_image_id      # image hash
+        self.container_image_name = container_image_name  # image friendly name
+
+        # The type of service (osd, mon, mgr, etc.)
+        self.daemon_type = daemon_type
+
+        # The orchestrator will have picked some names for daemons,
+        # typically either based on hostnames or on pod names.
+        # This is the <foo> in mds.<foo>, the ID that will appear
+        # in the FSMap/ServiceMap.
+        self.daemon_id = daemon_id
+
+        # Service version that was deployed
+        self.version = version
+
+        # Service status: -1 error, 0 stopped, 1 running
+        self.status = status
+
+        # Service status description when status == -1.
+        self.status_desc = status_desc
+
+        # datetime when this info was last refreshed
+        self.last_refresh = None   # type: Optional[datetime.datetime]
+
+    def name(self):
+        return '%s.%s' % (self.daemon_type, self.daemon_id)
+
+    def __repr__(self):
+        return "<DaemonDescription>({type}.{id})".format(type=self.daemon_type,
+                                                         id=self.daemon_id)
+
+    def to_json(self):
+        out = {
+            'nodename': self.nodename,
+            'container_id': self.container_id,
+            'container_image_id': self.container_image_id,
+            'container_image_name': self.container_image_name,
+            'daemon_id': self.daemon_id,
+            'daemon_type': self.daemon_type,
+            'version': self.version,
+            'status': self.status,
+            'status_desc': self.status_desc,
+        }
+        return {k: v for (k, v) in out.items() if v is not None}
+
+    @classmethod
+    @handle_type_error
+    def from_json(cls, data):
+        return cls(**data)
+
+class ServiceDescription(object):
+    """
+    For responding to queries about the status of a particular service,
+    stateful or stateless.
+
+    This is not about health or performance monitoring of services: it's
+    about letting the orchestrator tell Ceph whether and where a
+    service is scheduled in the cluster.  When an orchestrator tells
+    Ceph "it's running on node123", that's not a promise that the process
+    is literally up this second, it's a description of where the orchestrator
+    has decided the service should run.
+    """
+
+    def __init__(self, nodename=None,
+                 container_id=None, container_image_id=None,
+                 container_image_name=None,
+                 service=None, service_instance=None,
+                 service_type=None, version=None, rados_config_location=None,
+                 service_url=None, status=None, status_desc=None):
+        # Node is at the same granularity as InventoryNode
+        self.nodename = nodename  # type: Optional[str]
+
+        # Not everyone runs in containers, but enough people do to
+        # justify having the container_id (runtime id) and container_image
+        # (image name)
+        self.container_id = container_id                  # runtime id
+        self.container_image_id = container_image_id      # image hash
+        self.container_image_name = container_image_name  # image friendly name
+
+        # Some services can be deployed in groups. For example, mds's can
+        # have an active and standby daemons, and nfs-ganesha can run daemons
+        # in parallel. This tag refers to a group of daemons as a whole.
+        #
+        # For instance, a cluster of mds' all service the same fs, and they
+        # will all have the same service value (which may be the
+        # Filesystem name in the FSMap).
+        #
+        # Single-instance services should leave this set to None
+        self.service = service
+
+        # The orchestrator will have picked some names for daemons,
+        # typically either based on hostnames or on pod names.
+        # This is the <foo> in mds.<foo>, the ID that will appear
+        # in the FSMap/ServiceMap.
+        self.service_instance = service_instance
+
+        # The type of service (osd, mon, mgr, etc.)
+        self.service_type = service_type
+
+        # Service version that was deployed
+        self.version = version
+
+        # Location of the service configuration when stored in rados
+        # object. Format: "rados://<pool>/[<namespace/>]<object>"
+        self.rados_config_location = rados_config_location
+
+        # If the service exposes REST-like API, this attribute should hold
+        # the URL.
+        self.service_url = service_url
+
+        # Service status: -1 error, 0 stopped, 1 running
+        self.status = status
+
+        # Service status description when status == -1.
+        self.status_desc = status_desc
+
+        # datetime when this info was last refreshed
+        self.last_refresh = None   # type: Optional[datetime.datetime]
+
+    def name(self):
+        if self.service_instance:
+            return '%s.%s' % (self.service_type, self.service_instance)
+        return self.service_type
+
+    def __repr__(self):
+        return "<ServiceDescription>({n_name}:{s_type})".format(n_name=self.nodename,
+                                                                s_type=self.name())
+
+    def to_json(self):
+        out = {
+            'nodename': self.nodename,
+            'container_id': self.container_id,
+            'service': self.service,
+            'service_instance': self.service_instance,
+            'service_type': self.service_type,
+            'version': self.version,
+            'rados_config_location': self.rados_config_location,
+            'service_url': self.service_url,
+            'status': self.status,
+            'status_desc': self.status_desc,
+        }
+        return {k: v for (k, v) in out.items() if v is not None}
+
+    @classmethod
+    @handle_type_error
+    def from_json(cls, data):
+        return cls(**data)
+
+
+class ServiceSpec(object):
+    """
+    Details of service creation.
+
+    Request to the orchestrator for a cluster of daemons
+    such as MDS, RGW, iscsi gateway, MONs, MGRs, Prometheus
+
+    This structure is supposed to be enough information to
+    start the services.
+
+    """
+
+    def __init__(self, name=None, placement=None):
+        # type: (Optional[str], Optional[PlacementSpec]) -> None
+        self.placement = PlacementSpec() if placement is None else placement  # type: PlacementSpec
+
+        #: Give this set of stateless services a name: typically it would
+        #: be the name of a CephFS filesystem, RGW zone, etc.  Must be unique
+        #: within one ceph cluster. Note: Not all clusters have a name
+        self.name = name  # type: Optional[str]
+
+        if self.placement is not None and self.placement.count is not None:
+            #: Count of service instances. Deprecated.
+            self.count = self.placement.count  # type: int
+        else:
+            self.count = 1
+
+    def validate_add(self):
+        if not self.name:
+            raise OrchestratorValidationError('Cannot add Service: Name required')
+
+
+class NFSServiceSpec(ServiceSpec):
+    def __init__(self, name, pool=None, namespace=None, placement=None):
+        super(NFSServiceSpec, self).__init__(name, placement)
+
+        #: RADOS pool where NFS client recovery data is stored.
+        self.pool = pool
+
+        #: RADOS namespace where NFS client recovery data is stored in the pool.
+        self.namespace = namespace
+
+    def validate_add(self):
+        super(NFSServiceSpec, self).validate_add()
+
+        if not self.pool:
+            raise OrchestratorValidationError('Cannot add NFS: No Pool specified')
+
+
+class RGWSpec(ServiceSpec):
+    """
+    Settings to configure a (multisite) Ceph RGW
+
+    """
+    def __init__(self,
+                 rgw_realm,  # type: str
+                 rgw_zone,  # type: str
+                 placement=None,
+                 hosts=None,  # type: Optional[List[str]]
+                 rgw_multisite=None,  # type: Optional[bool]
+                 rgw_zonemaster=None,  # type: Optional[bool]
+                 rgw_zonesecondary=None,  # type: Optional[bool]
+                 rgw_multisite_proto=None,  # type: Optional[str]
+                 rgw_frontend_port=None,  # type: Optional[int]
+                 rgw_zonegroup=None,  # type: Optional[str]
+                 rgw_zone_user=None,  # type: Optional[str]
+                 system_access_key=None,  # type: Optional[str]
+                 system_secret_key=None,  # type: Optional[str]
+                 count=None  # type: Optional[int]
+                 ):
+        # Regarding default values. Ansible has a `set_rgwspec_defaults` that sets
+        # default values that makes sense for Ansible. Rook has default values implemented
+        # in Rook itself. Thus we don't set any defaults here in this class.
+
+        super(RGWSpec, self).__init__(name=rgw_realm + '.' + rgw_zone,
+                                      placement=placement)
+
+        #: List of hosts where RGWs should run. Not for Rook.
+        if hosts:
+            self.placement = PlacementSpec(hosts=hosts)
+
+        #: is multisite
+        self.rgw_multisite = rgw_multisite
+        self.rgw_zonemaster = rgw_zonemaster
+        self.rgw_zonesecondary = rgw_zonesecondary
+        self.rgw_multisite_proto = rgw_multisite_proto
+        self.rgw_frontend_port = rgw_frontend_port
+
+        self.rgw_realm = rgw_realm
+        self.rgw_zone = rgw_zone
+        self.rgw_zonegroup = rgw_zonegroup
+        self.rgw_zone_user = rgw_zone_user
+
+        self.system_access_key = system_access_key
+        self.system_secret_key = system_secret_key
+
+    @property
+    def rgw_multisite_endpoint_addr(self):
+        """Returns the first host. Not supported for Rook."""
+        return self.placement.hosts[0]
+
+    @property
+    def rgw_multisite_endpoints_list(self):
+        return ",".join(["{}://{}:{}".format(self.rgw_multisite_proto,
+                             host,
+                             self.rgw_frontend_port) for host in self.placement.hosts])
+
+    def genkey(self, nchars):
+        """ Returns a random string of nchars
+
+        :nchars : Length of the returned string
+        """
+        # TODO Python 3: use Secrets module instead.
+
+        return ''.join(random.choice(string.ascii_uppercase +
+                                     string.ascii_lowercase +
+                                     string.digits) for _ in range(nchars))
+
+    @classmethod
+    def from_json(cls, json_rgw_spec):
+        # type: (dict) -> RGWSpec
+        """
+        Initialize 'RGWSpec' object data from a json structure
+        :param json_rgw_spec: A valid dict with a the RGW settings
+        """
+        # TODO: also add PlacementSpec(**json_rgw_spec['placement'])
+        args = {k:v for k, v in json_rgw_spec.items()}
+        return RGWSpec(**args)
+
+
+class InventoryFilter(object):
+    """
+    When fetching inventory, use this filter to avoid unnecessarily
+    scanning the whole estate.
+
+    Typical use: filter by node when presenting UI workflow for configuring
+                 a particular server.
+                 filter by label when not all of estate is Ceph servers,
+                 and we want to only learn about the Ceph servers.
+                 filter by label when we are interested particularly
+                 in e.g. OSD servers.
+
+    """
+    def __init__(self, labels=None, nodes=None):
+        # type: (Optional[List[str]], Optional[List[str]]) -> None
+
+        #: Optional: get info about nodes matching labels
+        self.labels = labels
+
+        #: Optional: get info about certain named nodes only
+        self.nodes = nodes
+
+
+class InventoryNode(object):
+    """
+    When fetching inventory, all Devices are groups inside of an
+    InventoryNode.
+    """
+    def __init__(self, name, devices=None, labels=None, addr=None):
+        # type: (str, Optional[inventory.Devices], Optional[List[str]], Optional[str]) -> None
+        if devices is None:
+            devices = inventory.Devices([])
+        if labels is None:
+            labels = []
+        assert isinstance(devices, inventory.Devices)
+
+        self.name = name  # unique within cluster.  For example a hostname.
+        self.addr = addr or name
+        self.devices = devices
+        self.labels = labels
+
+    def to_json(self):
+        return {
+            'name': self.name,
+            'addr': self.addr,
+            'devices': self.devices.to_json(),
+            'labels': self.labels,
+        }
+
+    @classmethod
+    def from_json(cls, data):
+        try:
+            _data = copy.deepcopy(data)
+            name = _data.pop('name')
+            addr = _data.pop('addr', None) or name
+            devices = inventory.Devices.from_json(_data.pop('devices'))
+            if _data:
+                error_msg = 'Unknown key(s) in Inventory: {}'.format(','.join(_data.keys()))
+                raise OrchestratorValidationError(error_msg)
+            labels = _data.get('labels', list())
+            return cls(name, devices, labels, addr)
+        except KeyError as e:
+            error_msg = '{} is required for {}'.format(e, cls.__name__)
+            raise OrchestratorValidationError(error_msg)
+        except TypeError as e:
+            raise OrchestratorValidationError('Failed to read inventory: {}'.format(e))
+
+
+    @classmethod
+    def from_nested_items(cls, hosts):
+        devs = inventory.Devices.from_json
+        return [cls(item[0], devs(item[1].data)) for item in hosts]
+
+    def __repr__(self):
+        return "<InventoryNode>({name})".format(name=self.name)
+
+    @staticmethod
+    def get_host_names(nodes):
+        # type: (List[InventoryNode]) -> List[str]
+        return [node.name for node in nodes]
+
+    def __eq__(self, other):
+        return self.name == other.name and self.devices == other.devices
+
+
+class DeviceLightLoc(namedtuple('DeviceLightLoc', ['host', 'dev', 'path'])):
+    """
+    Describes a specific device on a specific host. Used for enabling or disabling LEDs
+    on devices.
+
+    hostname as in :func:`orchestrator.Orchestrator.get_hosts`
+
+    device_id: e.g. ``ABC1234DEF567-1R1234_ABC8DE0Q``.
+       See ``ceph osd metadata | jq '.[].device_ids'``
+    """
+    __slots__ = ()
+
+
+def _mk_orch_methods(cls):
+    # Needs to be defined outside of for.
+    # Otherwise meth is always bound to last key
+    def shim(method_name):
+        def inner(self, *args, **kwargs):
+            completion = self._oremote(method_name, args, kwargs)
+            return completion
+        return inner
+
+    for meth in Orchestrator.__dict__:
+        if not meth.startswith('_') and meth not in ['is_orchestrator_module']:
+            setattr(cls, meth, shim(meth))
+    return cls
+
+
+@_mk_orch_methods
+class OrchestratorClientMixin(Orchestrator):
+    """
+    A module that inherents from `OrchestratorClientMixin` can directly call
+    all :class:`Orchestrator` methods without manually calling remote.
+
+    Every interface method from ``Orchestrator`` is converted into a stub method that internally
+    calls :func:`OrchestratorClientMixin._oremote`
+
+    >>> class MyModule(OrchestratorClientMixin):
+    ...    def func(self):
+    ...        completion = self.add_host('somehost')  # calls `_oremote()`
+    ...        self._orchestrator_wait([completion])
+    ...        self.log.debug(completion.result)
+
+    .. note:: Orchestrator implementations should not inherit from `OrchestratorClientMixin`.
+        Reason is, that OrchestratorClientMixin magically redirects all methods to the
+        "real" implementation of the orchestrator.
+
+
+    >>> import mgr_module
+    >>> class MyImplentation(mgr_module.MgrModule, Orchestrator):
+    ...     def __init__(self, ...):
+    ...         self.orch_client = OrchestratorClientMixin()
+    ...         self.orch_client.set_mgr(self.mgr))
+    """
+
+    def set_mgr(self, mgr):
+        # type: (MgrModule) -> None
+        """
+        Useable in the Dashbord that uses a global ``mgr``
+        """
+
+        self.__mgr = mgr  # Make sure we're not overwriting any other `mgr` properties
+
+    def __get_mgr(self):
+        try:
+            return self.__mgr
+        except AttributeError:
+            return self
+
+    def _oremote(self, meth, args, kwargs):
+        """
+        Helper for invoking `remote` on whichever orchestrator is enabled
+
+        :raises RuntimeError: If the remote method failed.
+        :raises OrchestratorError: orchestrator failed to perform
+        :raises ImportError: no `orchestrator` module or backend not found.
+        """
+        mgr = self.__get_mgr()
+
+        try:
+            o = mgr._select_orchestrator()
+        except AttributeError:
+            o = mgr.remote('orchestrator', '_select_orchestrator')
+
+        if o is None:
+            raise NoOrchestrator()
+
+        mgr.log.debug("_oremote {} -> {}.{}(*{}, **{})".format(mgr.module_name, o, meth, args, kwargs))
+        return mgr.remote(o, meth, *args, **kwargs)
+
+    def _orchestrator_wait(self, completions):
+        # type: (List[Completion]) -> None
+        """
+        Wait for completions to complete (reads) or
+        become persistent (writes).
+
+        Waits for writes to be *persistent* but not *effective*.
+
+        :param completions: List of Completions
+        :raises NoOrchestrator:
+        :raises RuntimeError: something went wrong while calling the process method.
+        :raises ImportError: no `orchestrator` module or backend not found.
+        """
+        while any(not c.has_result for c in completions):
+            self.process(completions)
+            self.__get_mgr().log.info("Operations pending: %s",
+                                      sum(1 for c in completions if not c.has_result))
+            if any(c.needs_result for c in completions):
+                time.sleep(1)
+            else:
+                break
+
+
+class OutdatableData(object):
+    DATEFMT = '%Y-%m-%d %H:%M:%S.%f'
+
+    def __init__(self, data=None, last_refresh=None):
+        # type: (Optional[dict], Optional[datetime.datetime]) -> None
+        self._data = data
+        if data is not None and last_refresh is None:
+            self.last_refresh = datetime.datetime.utcnow()  # type: Optional[datetime.datetime]
+        else:
+            self.last_refresh = last_refresh
+
+    def json(self):
+        if self.last_refresh is not None:
+            timestr = self.last_refresh.strftime(self.DATEFMT)  # type: Optional[str]
+        else:
+            timestr = None
+
+        return {
+            "data": self._data,
+            "last_refresh": timestr,
+        }
+
+    @property
+    def data(self):
+        return self._data
+
+    # @data.setter
+    # No setter, as it doesn't work as expected: It's not saved in store automatically
+
+    @classmethod
+    def time_from_string(cls, timestr):
+        if timestr is None:
+            return None
+        # drop the 'Z' timezone indication, it's always UTC
+        timestr = timestr.rstrip('Z')
+        return datetime.datetime.strptime(timestr, cls.DATEFMT)
+
+    @classmethod
+    def from_json(cls, data):
+        return cls(data['data'], cls.time_from_string(data['last_refresh']))
+
+    def outdated(self, timeout=None):
+        if timeout is None:
+            timeout = 600
+        if self.last_refresh is None:
+            return True
+        cutoff = datetime.datetime.utcnow() - datetime.timedelta(
+            seconds=timeout)
+        return self.last_refresh < cutoff
+
+    def __repr__(self):
+        return 'OutdatableData(data={}, last_refresh={})'.format(self._data, self.last_refresh)
+
+
+class OutdatableDictMixin(object):
+    """
+    Toolbox for implementing a cache. As every orchestrator has
+    different needs, we cannot implement any logic here.
+    """
+
+    def __getitem__(self, item):
+        # type: (str) -> OutdatableData
+        return OutdatableData.from_json(super(OutdatableDictMixin, self).__getitem__(item))  # type: ignore
+
+    def __setitem__(self, key, value):
+        # type: (str, OutdatableData) -> None
+        val = None if value is None else value.json()
+        super(OutdatableDictMixin, self).__setitem__(key, val)  # type: ignore
+
+    def items(self):
+        ## type: () -> Iterator[Tuple[str, OutdatableData]]
+        for item in super(OutdatableDictMixin, self).items():  # type: ignore
+            k, v = item
+            yield k, OutdatableData.from_json(v)
+
+    def items_filtered(self, keys=None):
+        if keys:
+            return [(host, self[host]) for host in keys]
+        else:
+            return list(self.items())
+
+    def any_outdated(self, timeout=None):
+        items = self.items()
+        if not list(items):
+            return True
+        return any([i[1].outdated(timeout) for i in items])
+
+    def remove_outdated(self):
+        outdated = [item[0] for item in self.items() if item[1].outdated()]
+        for o in outdated:
+            del self[o]  # type: ignore
+
+    def invalidate(self, key):
+        self[key] = OutdatableData(self[key].data,
+                                   datetime.datetime.fromtimestamp(0))
+
+
+class OutdatablePersistentDict(OutdatableDictMixin, PersistentStoreDict):
+    pass
+
+
+class OutdatableDict(OutdatableDictMixin, dict):
+    pass
diff --git a/src/pybind/mgr/orchestrator/module.py b/src/pybind/mgr/orchestrator/module.py
new file mode 100644 (file)
index 0000000..277743f
--- /dev/null
@@ -0,0 +1,866 @@
+import datetime
+import errno
+import json
+import yaml
+
+from ceph.deployment.inventory import Device
+from prettytable import PrettyTable
+
+from mgr_util import format_bytes, to_pretty_timedelta
+
+try:
+    from typing import List, Set, Optional
+except ImportError:
+    pass  # just for type checking.
+
+
+from ceph.deployment.drive_group import DriveGroupSpec, DeviceSelection, \
+    DriveGroupSpecs
+from mgr_module import MgrModule, HandleCommandResult
+
+from ._interface import OrchestratorClientMixin, DeviceLightLoc, _cli_read_command, \
+    raise_if_exception, _cli_write_command, TrivialReadCompletion, OrchestratorError, \
+    NoOrchestrator, ServiceSpec, PlacementSpec, OrchestratorValidationError, NFSServiceSpec, \
+    RGWSpec, InventoryFilter, InventoryNode, HostPlacementSpec, HostSpec
+
+
+class OrchestratorCli(OrchestratorClientMixin, MgrModule):
+    MODULE_OPTIONS = [
+        {
+            'name': 'orchestrator',
+            'type': 'str',
+            'default': None,
+            'desc': 'Orchestrator backend',
+            'enum_allowed': ['cephadm', 'rook',
+                             'test_orchestrator'],
+            'runtime': True,
+        },
+    ]
+    NATIVE_OPTIONS = []  # type: List[dict]
+
+    def __init__(self, *args, **kwargs):
+        super(OrchestratorCli, self).__init__(*args, **kwargs)
+        self.ident = set()  # type: Set[str]
+        self.fault = set()  # type: Set[str]
+        self._load()
+        self._refresh_health()
+
+    def _load(self):
+        active = self.get_store('active_devices')
+        if active:
+            decoded = json.loads(active)
+            self.ident = set(decoded.get('ident', []))
+            self.fault = set(decoded.get('fault', []))
+        self.log.debug('ident {}, fault {}'.format(self.ident, self.fault))
+
+    def _save(self):
+        encoded = json.dumps({
+            'ident': list(self.ident),
+            'fault': list(self.fault),
+            })
+        self.set_store('active_devices', encoded)
+
+    def _refresh_health(self):
+        h = {}
+        if self.ident:
+            h['DEVICE_IDENT_ON'] = {
+                'severity': 'warning',
+                'summary': '%d devices have ident light turned on' % len(
+                    self.ident),
+                'detail': ['{} ident light enabled'.format(d) for d in self.ident]
+            }
+        if self.fault:
+            h['DEVICE_FAULT_ON'] = {
+                'severity': 'warning',
+                'summary': '%d devices have fault light turned on' % len(
+                    self.fault),
+                'detail': ['{} fault light enabled'.format(d) for d in self.ident]
+            }
+        self.set_health_checks(h)
+
+    def _get_device_locations(self, dev_id):
+        # type: (str) -> List[DeviceLightLoc]
+        locs = [d['location'] for d in self.get('devices')['devices'] if d['devid'] == dev_id]
+        return [DeviceLightLoc(**l) for l in sum(locs, [])]
+
+    @_cli_read_command(
+        prefix='device ls-lights',
+        desc='List currently active device indicator lights')
+    def _device_ls(self):
+        return HandleCommandResult(
+            stdout=json.dumps({
+                'ident': list(self.ident),
+                'fault': list(self.fault)
+                }, indent=4, sort_keys=True))
+
+    def light_on(self, fault_ident, devid):
+        # type: (str, str) -> HandleCommandResult
+        assert fault_ident in ("fault", "ident")
+        locs = self._get_device_locations(devid)
+        if locs is None:
+            return HandleCommandResult(stderr='device {} not found'.format(devid),
+                                       retval=-errno.ENOENT)
+
+        getattr(self, fault_ident).add(devid)
+        self._save()
+        self._refresh_health()
+        completion = self.blink_device_light(fault_ident, True, locs)
+        self._orchestrator_wait([completion])
+        return HandleCommandResult(stdout=str(completion.result))
+
+    def light_off(self, fault_ident, devid, force):
+        # type: (str, str, bool) -> HandleCommandResult
+        assert fault_ident in ("fault", "ident")
+        locs = self._get_device_locations(devid)
+        if locs is None:
+            return HandleCommandResult(stderr='device {} not found'.format(devid),
+                                       retval=-errno.ENOENT)
+
+        try:
+            completion = self.blink_device_light(fault_ident, False, locs)
+            self._orchestrator_wait([completion])
+
+            if devid in getattr(self, fault_ident):
+                getattr(self, fault_ident).remove(devid)
+                self._save()
+                self._refresh_health()
+            return HandleCommandResult(stdout=str(completion.result))
+
+        except:
+            # There are several reasons the try: block might fail:
+            # 1. the device no longer exist
+            # 2. the device is no longer known to Ceph
+            # 3. the host is not reachable
+            if force and devid in getattr(self, fault_ident):
+                getattr(self, fault_ident).remove(devid)
+                self._save()
+                self._refresh_health()
+            raise
+
+    @_cli_write_command(
+        prefix='device light',
+        cmd_args='name=enable,type=CephChoices,strings=on|off '
+                 'name=devid,type=CephString '
+                 'name=light_type,type=CephChoices,strings=ident|fault,req=false '
+                 'name=force,type=CephBool,req=false',
+        desc='Enable or disable the device light. Default type is `ident`\n'
+             'Usage: device light (on|off) <devid> [ident|fault] [--force]')
+    def _device_light(self, enable, devid, light_type=None, force=False):
+        # type: (str, str, Optional[str], bool) -> HandleCommandResult
+        light_type = light_type or 'ident'
+        on = enable == 'on'
+        if on:
+            return self.light_on(light_type, devid)
+        else:
+            return self.light_off(light_type, devid, force)
+
+    def _select_orchestrator(self):
+        return self.get_module_option("orchestrator")
+
+    @_cli_write_command(
+        'orch host add',
+        'name=host,type=CephString,req=true '
+        'name=addr,type=CephString,req=false '
+        'name=labels,type=CephString,n=N,req=false',
+        'Add a host')
+    def _add_host(self, host, addr=None, labels=None):
+        s = HostSpec(hostname=host, addr=addr, labels=labels)
+        completion = self.add_host(s)
+        self._orchestrator_wait([completion])
+        raise_if_exception(completion)
+        return HandleCommandResult(stdout=completion.result_str())
+
+    @_cli_write_command(
+        'orch host rm',
+        "name=host,type=CephString,req=true",
+        'Remove a host')
+    def _remove_host(self, host):
+        completion = self.remove_host(host)
+        self._orchestrator_wait([completion])
+        raise_if_exception(completion)
+        return HandleCommandResult(stdout=completion.result_str())
+
+    @_cli_write_command(
+        'orch host set-addr',
+        'name=host,type=CephString '
+        'name=addr,type=CephString',
+        'Update a host address')
+    def _update_set_addr(self, host, addr):
+        completion = self.update_host_addr(host, addr)
+        self._orchestrator_wait([completion])
+        raise_if_exception(completion)
+        return HandleCommandResult(stdout=completion.result_str())
+
+    @_cli_read_command(
+        'orch host ls',
+        'name=format,type=CephChoices,strings=json|plain,req=false',
+        'List hosts')
+    def _get_hosts(self, format='plain'):
+        completion = self.get_hosts()
+        self._orchestrator_wait([completion])
+        raise_if_exception(completion)
+        if format == 'json':
+            hosts = [dict(host=node.name, labels=node.labels)
+                     for node in completion.result]
+            output = json.dumps(hosts, sort_keys=True)
+        else:
+            table = PrettyTable(
+                ['HOST', 'ADDR', 'LABELS'],
+                border=False)
+            table.align = 'l'
+            table.left_padding_width = 0
+            table.right_padding_width = 1
+            for node in completion.result:
+                table.add_row((node.name, node.addr, ' '.join(node.labels)))
+            output = table.get_string()
+        return HandleCommandResult(stdout=output)
+
+    @_cli_write_command(
+        'orch host label add',
+        'name=host,type=CephString '
+        'name=label,type=CephString',
+        'Add a host label')
+    def _host_label_add(self, host, label):
+        completion = self.add_host_label(host, label)
+        self._orchestrator_wait([completion])
+        raise_if_exception(completion)
+        return HandleCommandResult(stdout=completion.result_str())
+
+    @_cli_write_command(
+        'orch host label rm',
+        'name=host,type=CephString '
+        'name=label,type=CephString',
+        'Add a host label')
+    def _host_label_rm(self, host, label):
+        completion = self.remove_host_label(host, label)
+        self._orchestrator_wait([completion])
+        raise_if_exception(completion)
+        return HandleCommandResult(stdout=completion.result_str())
+
+    @_cli_read_command(
+        'orch device ls',
+        "name=host,type=CephString,n=N,req=false "
+        "name=format,type=CephChoices,strings=json|plain,req=false "
+        "name=refresh,type=CephBool,req=false",
+        'List devices on a node')
+    def _list_devices(self, host=None, format='plain', refresh=False):
+        # type: (Optional[List[str]], str, bool) -> HandleCommandResult
+        """
+        Provide information about storage devices present in cluster hosts
+
+        Note: this does not have to be completely synchronous. Slightly out of
+        date hardware inventory is fine as long as hardware ultimately appears
+        in the output of this command.
+        """
+        nf = InventoryFilter(nodes=host) if host else None
+
+        completion = self.get_inventory(node_filter=nf, refresh=refresh)
+
+        self._orchestrator_wait([completion])
+        raise_if_exception(completion)
+
+        if format == 'json':
+            data = [n.to_json() for n in completion.result]
+            return HandleCommandResult(stdout=json.dumps(data))
+        else:
+            out = []
+
+            table = PrettyTable(
+                ['HOST', 'PATH', 'TYPE', 'SIZE', 'DEVICE', 'AVAIL',
+                 'REJECT REASONS'],
+                border=False)
+            table.align = 'l'
+            table._align['SIZE'] = 'r'
+            table.left_padding_width = 0
+            table.right_padding_width = 1
+            for host_ in completion.result: # type: InventoryNode
+                for d in host_.devices.devices:  # type: Device
+                    table.add_row(
+                        (
+                            host_.name,
+                            d.path,
+                            d.human_readable_type,
+                            format_bytes(d.sys_api.get('size', 0), 5),
+                            d.device_id,
+                            d.available,
+                            ', '.join(d.rejected_reasons)
+                        )
+                    )
+            out.append(table.get_string())
+            return HandleCommandResult(stdout='\n'.join(out))
+
+    @_cli_read_command(
+        'orch ps',
+        "name=host,type=CephString,req=false "
+        "name=daemon_type,type=CephChoices,strings=mon|mgr|osd|mds|iscsi|nfs|rgw|rbd-mirror,req=false "
+        "name=daemon_id,type=CephString,req=false "
+        "name=format,type=CephChoices,strings=json|plain,req=false "
+        "name=refresh,type=CephBool,req=false",
+        'List daemons known to orchestrator')
+    def _list_daemons(self, host=None, daemon_type=None, daemon_id=None, format='plain', refresh=False):
+        completion = self.list_daemons(daemon_type,
+                                       daemon_id=daemon_id,
+                                       host=host,
+                                       refresh=refresh)
+        self._orchestrator_wait([completion])
+        raise_if_exception(completion)
+        daemons = completion.result
+
+        def ukn(s):
+            return '<unknown>' if s is None else s
+        # Sort the list for display
+        daemons.sort(key=lambda s: (ukn(s.daemon_type), ukn(s.nodename), ukn(s.daemon_id)))
+
+        if len(daemons) == 0:
+            return HandleCommandResult(stdout="No daemons reported")
+        elif format == 'json':
+            data = [s.to_json() for s in daemons]
+            return HandleCommandResult(stdout=json.dumps(data))
+        else:
+            now = datetime.datetime.utcnow()
+            table = PrettyTable(
+                ['NAME', 'HOST', 'STATUS', 'REFRESHED',
+                 'VERSION', 'IMAGE NAME', 'IMAGE ID', 'CONTAINER ID'],
+                border=False)
+            table.align = 'l'
+            table.left_padding_width = 0
+            table.right_padding_width = 1
+            for s in sorted(daemons, key=lambda s: s.name()):
+                status = {
+                    -1: 'error',
+                    0: 'stopped',
+                    1: 'running',
+                    None: '<unknown>'
+                }[s.status]
+
+                if s.last_refresh:
+                    age = to_pretty_timedelta(now - s.last_refresh) + ' ago'
+                else:
+                    age = '-'
+                table.add_row((
+                    s.name(),
+                    ukn(s.nodename),
+                    status,
+                    age,
+                    ukn(s.version),
+                    ukn(s.container_image_name),
+                    ukn(s.container_image_id)[0:12],
+                    ukn(s.container_id)[0:12]))
+
+            return HandleCommandResult(stdout=table.get_string())
+
+    @_cli_write_command(
+        'orch osd create',
+        "name=svc_arg,type=CephString,req=false",
+        'Create an OSD service. Either --svc_arg=host:drives or -i <drive_group>')
+    def _create_osd(self, svc_arg=None, inbuf=None):
+        # type: (Optional[str], Optional[str]) -> HandleCommandResult
+        """Create one or more OSDs"""
+
+        usage = """
+Usage:
+  ceph orch osd create -i <json_file/yaml_file>
+  ceph orch osd create host:device1,device2,...
+"""
+
+        if inbuf:
+            try:
+                dgs = DriveGroupSpecs(yaml.load(inbuf))
+                drive_groups = dgs.drive_groups
+            except ValueError as e:
+                msg = 'Failed to read JSON input: {}'.format(str(e)) + usage
+                return HandleCommandResult(-errno.EINVAL, stderr=msg)
+
+        elif svc_arg:
+            try:
+                node_name, block_device = svc_arg.split(":")
+                block_devices = block_device.split(',')
+            except (TypeError, KeyError, ValueError):
+                msg = "Invalid host:device spec: '{}'".format(svc_arg) + usage
+                return HandleCommandResult(-errno.EINVAL, stderr=msg)
+
+            devs = DeviceSelection(paths=block_devices)
+            drive_groups = [DriveGroupSpec(node_name, data_devices=devs)]
+        else:
+            return HandleCommandResult(-errno.EINVAL, stderr=usage)
+
+        completion = self.create_osds(drive_groups)
+        self._orchestrator_wait([completion])
+        raise_if_exception(completion)
+        return HandleCommandResult(stdout=completion.result_str())
+
+    @_cli_write_command(
+        'orch daemon add mon',
+        "name=num,type=CephInt,req=false "
+        "name=hosts,type=CephString,n=N,req=false "
+        "name=label,type=CephString,req=false",
+        'Start monitor daemon(s)')
+    def _daemon_add_mon(self, num=None, hosts=[], label=None):
+        if not num and not hosts and not label:
+            # Improve Error message. Point to parse_host_spec examples
+            raise OrchestratorValidationError("Mons need a placement spec. (num, host, network, name(opt))")
+        placement = PlacementSpec(label=label, count=num, hosts=hosts)
+        placement.validate()
+
+        spec = ServiceSpec(placement=placement)
+
+        completion = self.add_mon(spec)
+        self._orchestrator_wait([completion])
+        raise_if_exception(completion)
+        return HandleCommandResult(stdout=completion.result_str())
+
+    @_cli_write_command(
+        'orch daemon add mgr',
+        "name=num,type=CephInt,req=false "
+        "name=hosts,type=CephString,n=N,req=false",
+        'Start rbd-mirror daemon(s)')
+    def _daemon_add_mgr(self, num=None, hosts=None):
+        spec = ServiceSpec(
+            placement=PlacementSpec(hosts=hosts, count=num))
+        completion = self.add_mgr(spec)
+        self._orchestrator_wait([completion])
+        raise_if_exception(completion)
+        return HandleCommandResult(stdout=completion.result_str())
+
+    @_cli_write_command(
+        'orch daemon add rbd-mirror',
+        "name=num,type=CephInt,req=false "
+        "name=hosts,type=CephString,n=N,req=false",
+        'Start rbd-mirror daemon(s)')
+    def _rbd_mirror_add(self, num=None, hosts=None):
+        spec = ServiceSpec(
+            None,
+            placement=PlacementSpec(hosts=hosts, count=num))
+        completion = self.add_rbd_mirror(spec)
+        self._orchestrator_wait([completion])
+        raise_if_exception(completion)
+        return HandleCommandResult(stdout=completion.result_str())
+
+    @_cli_write_command(
+        'orch daemon add mds',
+        "name=fs_name,type=CephString "
+        "name=num,type=CephInt,req=false "
+        "name=hosts,type=CephString,n=N,req=false",
+        'Start MDS daemon(s)')
+    def _mds_add(self, fs_name, num=None, hosts=None):
+        spec = ServiceSpec(
+            fs_name,
+            placement=PlacementSpec(hosts=hosts, count=num))
+        completion = self.add_mds(spec)
+        self._orchestrator_wait([completion])
+        raise_if_exception(completion)
+        return HandleCommandResult(stdout=completion.result_str())
+
+    @_cli_write_command(
+        'orch daemon add rgw',
+        'name=realm_name,type=CephString '
+        'name=zone_name,type=CephString '
+        'name=num,type=CephInt,req=false '
+        "name=hosts,type=CephString,n=N,req=false",
+        'Start RGW daemon(s)')
+    def _rgw_add(self, realm_name, zone_name, num=1, hosts=None, inbuf=None):
+        usage = """
+Usage:
+  ceph orch rgw add -i <json_file>
+  ceph orch rgw add <realm_name> <zone_name>
+        """
+        if inbuf:
+            try:
+                rgw_spec = RGWSpec.from_json(json.loads(inbuf))
+            except ValueError as e:
+                msg = 'Failed to read JSON input: {}'.format(str(e)) + usage
+                return HandleCommandResult(-errno.EINVAL, stderr=msg)
+        rgw_spec = RGWSpec(
+            rgw_realm=realm_name,
+            rgw_zone=zone_name,
+            placement=PlacementSpec(hosts=hosts, count=num))
+
+        completion = self.add_rgw(rgw_spec)
+        self._orchestrator_wait([completion])
+        raise_if_exception(completion)
+        return HandleCommandResult(stdout=completion.result_str())
+
+    @_cli_write_command(
+        'orch daemon add nfs',
+        "name=svc_arg,type=CephString "
+        "name=pool,type=CephString "
+        "name=namespace,type=CephString,req=false "
+        'name=num,type=CephInt,req=false '
+        'name=hosts,type=CephString,n=N,req=false '
+        'name=label,type=CephString,req=false',
+        'Start NFS daemon(s)')
+    def _nfs_add(self, svc_arg, pool, namespace=None, num=None, label=None, hosts=[]):
+        spec = NFSServiceSpec(
+            svc_arg,
+            pool=pool,
+            namespace=namespace,
+            placement=PlacementSpec(label=label, hosts=hosts, count=num),
+        )
+        spec.validate_add()
+        completion = self.add_nfs(spec)
+        self._orchestrator_wait([completion])
+        raise_if_exception(completion)
+        return HandleCommandResult(stdout=completion.result_str())
+
+    @_cli_write_command(
+        'orch daemon add prometheus',
+        'name=num,type=CephInt,req=false '
+        'name=hosts,type=CephString,n=N,req=false '
+        'name=label,type=CephString,req=false',
+        'Add prometheus daemon(s)')
+    def _daemon_add_prometheus(self, num=None, label=None, hosts=[]):
+        # type: (Optional[int], Optional[str], List[str]) -> HandleCommandResult
+        spec = ServiceSpec(
+            placement=PlacementSpec(label=label, hosts=hosts, count=num),
+        )
+        completion = self.add_prometheus(spec)
+        self._orchestrator_wait([completion])
+        return HandleCommandResult(stdout=completion.result_str())
+
+    @_cli_write_command(
+        'orch',
+        "name=action,type=CephChoices,strings=start|stop|restart|redeploy|reconfig "
+        "name=svc_name,type=CephString",
+        'Start, stop, restart, redeploy, or reconfig an entire service (i.e. all daemons)')
+    def _service_action(self, action, svc_name):
+        if '.' in svc_name:
+            (service_type, service_id) = svc_name.split('.', 1)
+        else:
+            service_type = svc_name;
+            service_id = None
+        completion = self.service_action(action, service_type, service_id)
+        self._orchestrator_wait([completion])
+        raise_if_exception(completion)
+        return HandleCommandResult(stdout=completion.result_str())
+
+    @_cli_write_command(
+        'orch daemon',
+        "name=action,type=CephChoices,strings=start|stop|restart|redeploy|reconfig "
+        "name=name,type=CephString",
+        'Start, stop, restart, redeploy, or reconfig a specific daemon')
+    def _daemon_action(self, action, name):
+        if '.' not in name:
+            raise OrchestratorError('%s is not a valid daemon name' % name)
+        (daemon_type, daemon_id) = name.split('.', 1)
+        completion = self.daemon_action(action, daemon_type, daemon_id)
+        self._orchestrator_wait([completion])
+        raise_if_exception(completion)
+        return HandleCommandResult(stdout=completion.result_str())
+
+    @_cli_write_command(
+        'orch daemon rm',
+        "name=names,type=CephString,n=N "
+        'name=force,type=CephBool,req=false',
+        'Remove specific daemon(s)')
+    def _daemon_rm(self, names, force=False):
+        for name in names:
+            if '.' not in name:
+                raise OrchestratorError('%s is not a valid daemon name' % name)
+        completion = self.remove_daemons(names, force)
+        self._orchestrator_wait([completion])
+        raise_if_exception(completion)
+        return HandleCommandResult(stdout=completion.result_str())
+
+    @_cli_write_command(
+        'orch rm',
+        "name=name,type=CephString",
+        'Remove a service')
+    def _service_rm(self, name):
+        if '.' in name:
+            (service_type, service_name) = name.split('.')
+        else:
+            service_type = name;
+            service_name = None
+        if name in ['mon', 'mgr']:
+            raise OrchestratorError('The mon and mgr services cannot be removed')
+        completion = self.remove_service(service_type, service_name)
+        self._orchestrator_wait([completion])
+        raise_if_exception(completion)
+        return HandleCommandResult(stdout=completion.result_str())
+
+    @_cli_write_command(
+        'orch apply mgr',
+        "name=num,type=CephInt,req=false "
+        "name=hosts,type=CephString,n=N,req=false "
+        "name=label,type=CephString,req=false",
+        'Update the size or placement of managers')
+    def _apply_mgr(self, num=None, hosts=[], label=None):
+        placement = PlacementSpec(
+            label=label, count=num, hosts=hosts)
+        placement.validate()
+
+        spec = ServiceSpec(placement=placement)
+
+        completion = self.apply_mgr(spec)
+        self._orchestrator_wait([completion])
+        raise_if_exception(completion)
+        return HandleCommandResult(stdout=completion.result_str())
+
+    @_cli_write_command(
+        'orch apply mon',
+        "name=num,type=CephInt,req=false "
+        "name=hosts,type=CephString,n=N,req=false "
+        "name=label,type=CephString,req=false",
+        'Update the number of monitor instances')
+    def _apply_mon(self, num=None, hosts=[], label=None):
+        if not num and not hosts and not label:
+            # Improve Error message. Point to parse_host_spec examples
+            raise OrchestratorValidationError("Mons need a placement spec. (num, host, network, name(opt))")
+        placement = PlacementSpec(label=label, count=num, hosts=hosts)
+        placement.validate()
+
+        spec = ServiceSpec(placement=placement)
+
+        completion = self.apply_mon(spec)
+        self._orchestrator_wait([completion])
+        raise_if_exception(completion)
+        return HandleCommandResult(stdout=completion.result_str())
+
+    @_cli_write_command(
+        'orch apply mds',
+        "name=fs_name,type=CephString "
+        "name=num,type=CephInt,req=false "
+        "name=hosts,type=CephString,n=N,req=false "
+        "name=label,type=CephString,req=false",
+        'Update the number of MDS instances for the given fs_name')
+    def _apply_mds(self, fs_name, num=None, label=None, hosts=[]):
+        placement = PlacementSpec(label=label, count=num, hosts=hosts)
+        placement.validate()
+
+        spec = ServiceSpec(
+            fs_name,
+            placement=placement)
+
+        completion = self.apply_mds(spec)
+        self._orchestrator_wait([completion])
+        raise_if_exception(completion)
+        return HandleCommandResult(stdout=completion.result_str())
+
+    @_cli_write_command(
+        'orch apply rbd-mirror',
+        "name=num,type=CephInt,req=false "
+        "name=hosts,type=CephString,n=N,req=false "
+        "name=label,type=CephString,req=false",
+        'Update the number of rbd-mirror instances')
+    def _apply_rbd_mirror(self, num, label=None, hosts=[]):
+        spec = ServiceSpec(
+            placement=PlacementSpec(hosts=hosts, count=num, label=label))
+        completion = self.apply_rbd_mirror(spec)
+        self._orchestrator_wait([completion])
+        raise_if_exception(completion)
+        return HandleCommandResult(stdout=completion.result_str())
+
+    @_cli_write_command(
+        'orch apply rgw',
+        'name=realm_name,type=CephString '
+        'name=zone_name,type=CephString '
+        'name=num,type=CephInt,req=false '
+        'name=hosts,type=CephString,n=N,req=false '
+        'name=label,type=CephString,req=false',
+        'Update the number of RGW instances for the given zone')
+    def _apply_rgw(self, zone_name, realm_name, num=None, label=None, hosts=[]):
+        spec = RGWSpec(
+            rgw_realm=realm_name,
+            rgw_zone=zone_name,
+            placement=PlacementSpec(hosts=hosts, label=label, count=num))
+        completion = self.apply_rgw(spec)
+        self._orchestrator_wait([completion])
+        raise_if_exception(completion)
+        return HandleCommandResult(stdout=completion.result_str())
+
+    @_cli_write_command(
+        'orch apply nfs',
+        "name=svc_id,type=CephString "
+        'name=num,type=CephInt,req=false '
+        'name=hosts,type=CephString,n=N,req=false '
+        'name=label,type=CephString,req=false',
+        'Scale an NFS service')
+    def _apply_nfs(self, svc_id, num=None, label=None, hosts=[]):
+        # type: (str, Optional[int], Optional[str], List[str]) -> HandleCommandResult
+        spec = NFSServiceSpec(
+            svc_id,
+            placement=PlacementSpec(label=label, hosts=hosts, count=num),
+        )
+        completion = self.apply_nfs(spec)
+        self._orchestrator_wait([completion])
+        return HandleCommandResult(stdout=completion.result_str())
+
+    @_cli_write_command(
+        'orch apply prometheus',
+        'name=num,type=CephInt,req=false '
+        'name=hosts,type=CephString,n=N,req=false '
+        'name=label,type=CephString,req=false',
+        'Scale prometheus service')
+    def _apply_prometheus(self, num=None, label=None, hosts=[]):
+        # type: (Optional[int], Optional[str], List[str]) -> HandleCommandResult
+        spec = ServiceSpec(
+            placement=PlacementSpec(label=label, hosts=hosts, count=num),
+        )
+        completion = self.apply_prometheus(spec)
+        self._orchestrator_wait([completion])
+        return HandleCommandResult(stdout=completion.result_str())
+
+    @_cli_write_command(
+        'orch set backend',
+        "name=module_name,type=CephString,req=true",
+        'Select orchestrator module backend')
+    def _set_backend(self, module_name):
+        """
+        We implement a setter command instead of just having the user
+        modify the setting directly, so that we can validate they're setting
+        it to a module that really exists and is enabled.
+
+        There isn't a mechanism for ensuring they don't *disable* the module
+        later, but this is better than nothing.
+        """
+        mgr_map = self.get("mgr_map")
+
+        if module_name is None or module_name == "":
+            self.set_module_option("orchestrator", None)
+            return HandleCommandResult()
+
+        for module in mgr_map['available_modules']:
+            if module['name'] != module_name:
+                continue
+
+            if not module['can_run']:
+                continue
+
+            enabled = module['name'] in mgr_map['modules']
+            if not enabled:
+                return HandleCommandResult(-errno.EINVAL,
+                                           stderr="Module '{module_name}' is not enabled. \n Run "
+                                                  "`ceph mgr module enable {module_name}` "
+                                                  "to enable.".format(module_name=module_name))
+
+            try:
+                is_orchestrator = self.remote(module_name,
+                                              "is_orchestrator_module")
+            except NameError:
+                is_orchestrator = False
+
+            if not is_orchestrator:
+                return HandleCommandResult(-errno.EINVAL,
+                                           stderr="'{0}' is not an orchestrator module".format(module_name))
+
+            self.set_module_option("orchestrator", module_name)
+
+            return HandleCommandResult()
+
+        return HandleCommandResult(-errno.EINVAL, stderr="Module '{0}' not found".format(module_name))
+
+    @_cli_write_command(
+        'orch cancel',
+        desc='cancels ongoing operations')
+    def _cancel(self):
+        """
+        ProgressReferences might get stuck. Let's unstuck them.
+        """
+        self.cancel_completions()
+        return HandleCommandResult()
+
+    @_cli_read_command(
+        'orch status',
+        desc='Report configured backend and its status')
+    def _status(self):
+        o = self._select_orchestrator()
+        if o is None:
+            raise NoOrchestrator()
+
+        avail, why = self.available()
+        if avail is None:
+            # The module does not report its availability
+            return HandleCommandResult(stdout="Backend: {0}".format(o))
+        else:
+            return HandleCommandResult(stdout="Backend: {0}\nAvailable: {1}{2}".format(
+                                           o, avail,
+                                           " ({0})".format(why) if not avail else ""
+                                       ))
+
+    def self_test(self):
+        old_orch = self._select_orchestrator()
+        self._set_backend('')
+        assert self._select_orchestrator() is None
+        self._set_backend(old_orch)
+
+        e1 = self.remote('selftest', 'remote_from_orchestrator_cli_self_test', "ZeroDivisionError")
+        try:
+            raise_if_exception(e1)
+            assert False
+        except ZeroDivisionError as e:
+            assert e.args == ('hello', 'world')
+
+        e2 = self.remote('selftest', 'remote_from_orchestrator_cli_self_test', "OrchestratorError")
+        try:
+            raise_if_exception(e2)
+            assert False
+        except OrchestratorError as e:
+            assert e.args == ('hello', 'world')
+
+        c = TrivialReadCompletion(result=True)
+        assert c.has_result
+
+    @_cli_write_command(
+        'upgrade check',
+        'name=image,type=CephString,req=false '
+        'name=ceph_version,type=CephString,req=false',
+        desc='Check service versions vs available and target containers')
+    def _upgrade_check(self, image=None, ceph_version=None):
+        completion = self.upgrade_check(image=image, version=ceph_version)
+        self._orchestrator_wait([completion])
+        raise_if_exception(completion)
+        return HandleCommandResult(stdout=completion.result_str())
+
+    @_cli_write_command(
+        'upgrade status',
+        desc='Check service versions vs available and target containers')
+    def _upgrade_status(self):
+        completion = self.upgrade_status()
+        self._orchestrator_wait([completion])
+        raise_if_exception(completion)
+        r = {
+            'target_image': completion.result.target_image,
+            'in_progress': completion.result.in_progress,
+            'services_complete': completion.result.services_complete,
+            'message': completion.result.message,
+        }
+        out = json.dumps(r, indent=4)
+        return HandleCommandResult(stdout=out)
+
+    @_cli_write_command(
+        'upgrade start',
+        'name=image,type=CephString,req=false '
+        'name=ceph_version,type=CephString,req=false',
+        desc='Initiate upgrade')
+    def _upgrade_start(self, image=None, ceph_version=None):
+        completion = self.upgrade_start(image, ceph_version)
+        self._orchestrator_wait([completion])
+        raise_if_exception(completion)
+        return HandleCommandResult(stdout=completion.result_str())
+
+    @_cli_write_command(
+        'upgrade pause',
+        desc='Pause an in-progress upgrade')
+    def _upgrade_pause(self):
+        completion = self.upgrade_pause()
+        self._orchestrator_wait([completion])
+        raise_if_exception(completion)
+        return HandleCommandResult(stdout=completion.result_str())
+
+    @_cli_write_command(
+        'upgrade resume',
+        desc='Resume paused upgrade')
+    def _upgrade_resume(self):
+        completion = self.upgrade_resume()
+        self._orchestrator_wait([completion])
+        raise_if_exception(completion)
+        return HandleCommandResult(stdout=completion.result_str())
+
+    @_cli_write_command(
+        'upgrade stop',
+        desc='Stop an in-progress upgrade')
+    def _upgrade_stop(self):
+        completion = self.upgrade_stop()
+        self._orchestrator_wait([completion])
+        raise_if_exception(completion)
+        return HandleCommandResult(stdout=completion.result_str())
diff --git a/src/pybind/mgr/orchestrator_cli/.gitignore b/src/pybind/mgr/orchestrator_cli/.gitignore
deleted file mode 100644 (file)
index e69de29..0000000
diff --git a/src/pybind/mgr/orchestrator_cli/README.md b/src/pybind/mgr/orchestrator_cli/README.md
deleted file mode 100644 (file)
index d70e88c..0000000
+++ /dev/null
@@ -1,14 +0,0 @@
-# Orchestrator CLI
-
-See also [orchestrator cli doc](https://docs.ceph.com/docs/master/mgr/orchestrator_cli/).
-
-## Running the Teuthology tests
-
-To run the API tests against a real Ceph cluster, we leverage the Teuthology
-framework and the `test_orchestrator` backend.
-
-``source`` the script and run the tests manually::
-
-    $ pushd ../dashboard ; source ./run-backend-api-tests.sh ; popd
-    $ run_teuthology_tests tasks.mgr.test_orchestrator_cli
-    $ cleanup_teuthology
diff --git a/src/pybind/mgr/orchestrator_cli/__init__.py b/src/pybind/mgr/orchestrator_cli/__init__.py
deleted file mode 100644 (file)
index ef27d74..0000000
+++ /dev/null
@@ -1,3 +0,0 @@
-from __future__ import absolute_import
-
-from .module import OrchestratorCli
diff --git a/src/pybind/mgr/orchestrator_cli/module.py b/src/pybind/mgr/orchestrator_cli/module.py
deleted file mode 100644 (file)
index 204a663..0000000
+++ /dev/null
@@ -1,864 +0,0 @@
-import datetime
-import errno
-import json
-import yaml
-from functools import wraps
-
-from ceph.deployment.inventory import Device
-from prettytable import PrettyTable
-
-from mgr_util import format_bytes, to_pretty_timedelta
-
-try:
-    from typing import List, Set, Optional
-except ImportError:
-    pass  # just for type checking.
-
-
-from ceph.deployment.drive_group import DriveGroupSpec, DriveGroupValidationError, \
-    DeviceSelection, DriveGroupSpecs
-from mgr_module import MgrModule, CLICommand, HandleCommandResult
-
-import orchestrator
-
-
-class OrchestratorCli(orchestrator.OrchestratorClientMixin, MgrModule):
-    MODULE_OPTIONS = [
-        {
-            'name': 'orchestrator',
-            'type': 'str',
-            'default': None,
-            'desc': 'Orchestrator backend',
-            'enum_allowed': ['cephadm', 'rook',
-                             'test_orchestrator'],
-            'runtime': True,
-        },
-    ]
-    NATIVE_OPTIONS = []  # type: List[dict]
-
-    def __init__(self, *args, **kwargs):
-        super(OrchestratorCli, self).__init__(*args, **kwargs)
-        self.ident = set()  # type: Set[str]
-        self.fault = set()  # type: Set[str]
-        self._load()
-        self._refresh_health()
-
-    def _load(self):
-        active = self.get_store('active_devices')
-        if active:
-            decoded = json.loads(active)
-            self.ident = set(decoded.get('ident', []))
-            self.fault = set(decoded.get('fault', []))
-        self.log.debug('ident {}, fault {}'.format(self.ident, self.fault))
-
-    def _save(self):
-        encoded = json.dumps({
-            'ident': list(self.ident),
-            'fault': list(self.fault),
-            })
-        self.set_store('active_devices', encoded)
-
-    def _refresh_health(self):
-        h = {}
-        if self.ident:
-            h['DEVICE_IDENT_ON'] = {
-                'severity': 'warning',
-                'summary': '%d devices have ident light turned on' % len(
-                    self.ident),
-                'detail': ['{} ident light enabled'.format(d) for d in self.ident]
-            }
-        if self.fault:
-            h['DEVICE_FAULT_ON'] = {
-                'severity': 'warning',
-                'summary': '%d devices have fault light turned on' % len(
-                    self.fault),
-                'detail': ['{} fault light enabled'.format(d) for d in self.ident]
-            }
-        self.set_health_checks(h)
-
-    def _get_device_locations(self, dev_id):
-        # type: (str) -> List[orchestrator.DeviceLightLoc]
-        locs = [d['location'] for d in self.get('devices')['devices'] if d['devid'] == dev_id]
-        return [orchestrator.DeviceLightLoc(**l) for l in sum(locs, [])]
-
-    @orchestrator._cli_read_command(
-        prefix='device ls-lights',
-        desc='List currently active device indicator lights')
-    def _device_ls(self):
-        return HandleCommandResult(
-            stdout=json.dumps({
-                'ident': list(self.ident),
-                'fault': list(self.fault)
-                }, indent=4, sort_keys=True))
-
-    def light_on(self, fault_ident, devid):
-        # type: (str, str) -> HandleCommandResult
-        assert fault_ident in ("fault", "ident")
-        locs = self._get_device_locations(devid)
-        if locs is None:
-            return HandleCommandResult(stderr='device {} not found'.format(devid),
-                                       retval=-errno.ENOENT)
-
-        getattr(self, fault_ident).add(devid)
-        self._save()
-        self._refresh_health()
-        completion = self.blink_device_light(fault_ident, True, locs)
-        self._orchestrator_wait([completion])
-        return HandleCommandResult(stdout=str(completion.result))
-
-    def light_off(self, fault_ident, devid, force):
-        # type: (str, str, bool) -> HandleCommandResult
-        assert fault_ident in ("fault", "ident")
-        locs = self._get_device_locations(devid)
-        if locs is None:
-            return HandleCommandResult(stderr='device {} not found'.format(devid),
-                                       retval=-errno.ENOENT)
-
-        try:
-            completion = self.blink_device_light(fault_ident, False, locs)
-            self._orchestrator_wait([completion])
-
-            if devid in getattr(self, fault_ident):
-                getattr(self, fault_ident).remove(devid)
-                self._save()
-                self._refresh_health()
-            return HandleCommandResult(stdout=str(completion.result))
-
-        except:
-            # There are several reasons the try: block might fail:
-            # 1. the device no longer exist
-            # 2. the device is no longer known to Ceph
-            # 3. the host is not reachable
-            if force and devid in getattr(self, fault_ident):
-                getattr(self, fault_ident).remove(devid)
-                self._save()
-                self._refresh_health()
-            raise
-
-    @orchestrator._cli_write_command(
-        prefix='device light',
-        cmd_args='name=enable,type=CephChoices,strings=on|off '
-                 'name=devid,type=CephString '
-                 'name=light_type,type=CephChoices,strings=ident|fault,req=false '
-                 'name=force,type=CephBool,req=false',
-        desc='Enable or disable the device light. Default type is `ident`\n'
-             'Usage: device light (on|off) <devid> [ident|fault] [--force]')
-    def _device_light(self, enable, devid, light_type=None, force=False):
-        # type: (str, str, Optional[str], bool) -> HandleCommandResult
-        light_type = light_type or 'ident'
-        on = enable == 'on'
-        if on:
-            return self.light_on(light_type, devid)
-        else:
-            return self.light_off(light_type, devid, force)
-
-    def _select_orchestrator(self):
-        return self.get_module_option("orchestrator")
-
-    @orchestrator._cli_write_command(
-        'orch host add',
-        'name=host,type=CephString,req=true '
-        'name=addr,type=CephString,req=false '
-        'name=labels,type=CephString,n=N,req=false',
-        'Add a host')
-    def _add_host(self, host, addr=None, labels=None):
-        s = orchestrator.HostSpec(hostname=host, addr=addr, labels=labels)
-        completion = self.add_host(s)
-        self._orchestrator_wait([completion])
-        orchestrator.raise_if_exception(completion)
-        return HandleCommandResult(stdout=completion.result_str())
-
-    @orchestrator._cli_write_command(
-        'orch host rm',
-        "name=host,type=CephString,req=true",
-        'Remove a host')
-    def _remove_host(self, host):
-        completion = self.remove_host(host)
-        self._orchestrator_wait([completion])
-        orchestrator.raise_if_exception(completion)
-        return HandleCommandResult(stdout=completion.result_str())
-
-    @orchestrator._cli_write_command(
-        'orch host set-addr',
-        'name=host,type=CephString '
-        'name=addr,type=CephString',
-        'Update a host address')
-    def _update_set_addr(self, host, addr):
-        completion = self.update_host_addr(host, addr)
-        self._orchestrator_wait([completion])
-        orchestrator.raise_if_exception(completion)
-        return HandleCommandResult(stdout=completion.result_str())
-
-    @orchestrator._cli_read_command(
-        'orch host ls',
-        'name=format,type=CephChoices,strings=json|plain,req=false',
-        'List hosts')
-    def _get_hosts(self, format='plain'):
-        completion = self.get_hosts()
-        self._orchestrator_wait([completion])
-        orchestrator.raise_if_exception(completion)
-        if format == 'json':
-            hosts = [dict(host=node.name, labels=node.labels)
-                     for node in completion.result]
-            output = json.dumps(hosts, sort_keys=True)
-        else:
-            table = PrettyTable(
-                ['HOST', 'ADDR', 'LABELS'],
-                border=False)
-            table.align = 'l'
-            table.left_padding_width = 0
-            table.right_padding_width = 1
-            for node in completion.result:
-                table.add_row((node.name, node.addr, ' '.join(node.labels)))
-            output = table.get_string()
-        return HandleCommandResult(stdout=output)
-
-    @orchestrator._cli_write_command(
-        'orch host label add',
-        'name=host,type=CephString '
-        'name=label,type=CephString',
-        'Add a host label')
-    def _host_label_add(self, host, label):
-        completion = self.add_host_label(host, label)
-        self._orchestrator_wait([completion])
-        orchestrator.raise_if_exception(completion)
-        return HandleCommandResult(stdout=completion.result_str())
-
-    @orchestrator._cli_write_command(
-        'orch host label rm',
-        'name=host,type=CephString '
-        'name=label,type=CephString',
-        'Add a host label')
-    def _host_label_rm(self, host, label):
-        completion = self.remove_host_label(host, label)
-        self._orchestrator_wait([completion])
-        orchestrator.raise_if_exception(completion)
-        return HandleCommandResult(stdout=completion.result_str())
-
-    @orchestrator._cli_read_command(
-        'orch device ls',
-        "name=host,type=CephString,n=N,req=false "
-        "name=format,type=CephChoices,strings=json|plain,req=false "
-        "name=refresh,type=CephBool,req=false",
-        'List devices on a node')
-    def _list_devices(self, host=None, format='plain', refresh=False):
-        # type: (Optional[List[str]], str, bool) -> HandleCommandResult
-        """
-        Provide information about storage devices present in cluster hosts
-
-        Note: this does not have to be completely synchronous. Slightly out of
-        date hardware inventory is fine as long as hardware ultimately appears
-        in the output of this command.
-        """
-        nf = orchestrator.InventoryFilter(nodes=host) if host else None
-
-        completion = self.get_inventory(node_filter=nf, refresh=refresh)
-
-        self._orchestrator_wait([completion])
-        orchestrator.raise_if_exception(completion)
-
-        if format == 'json':
-            data = [n.to_json() for n in completion.result]
-            return HandleCommandResult(stdout=json.dumps(data))
-        else:
-            out = []
-
-            table = PrettyTable(
-                ['HOST', 'PATH', 'TYPE', 'SIZE', 'DEVICE', 'AVAIL',
-                 'REJECT REASONS'],
-                border=False)
-            table.align = 'l'
-            table._align['SIZE'] = 'r'
-            table.left_padding_width = 0
-            table.right_padding_width = 1
-            for host_ in completion.result: # type: orchestrator.InventoryNode
-                for d in host_.devices.devices:  # type: Device
-                    table.add_row(
-                        (
-                            host_.name,
-                            d.path,
-                            d.human_readable_type,
-                            format_bytes(d.sys_api.get('size', 0), 5),
-                            d.device_id,
-                            d.available,
-                            ', '.join(d.rejected_reasons)
-                        )
-                    )
-            out.append(table.get_string())
-            return HandleCommandResult(stdout='\n'.join(out))
-
-    @orchestrator._cli_read_command(
-        'orch ps',
-        "name=host,type=CephString,req=false "
-        "name=daemon_type,type=CephChoices,strings=mon|mgr|osd|mds|iscsi|nfs|rgw|rbd-mirror,req=false "
-        "name=daemon_id,type=CephString,req=false "
-        "name=format,type=CephChoices,strings=json|plain,req=false "
-        "name=refresh,type=CephBool,req=false",
-        'List daemons known to orchestrator')
-    def _list_daemons(self, host=None, daemon_type=None, daemon_id=None, format='plain', refresh=False):
-        completion = self.list_daemons(daemon_type,
-                                       daemon_id=daemon_id,
-                                       host=host,
-                                       refresh=refresh)
-        self._orchestrator_wait([completion])
-        orchestrator.raise_if_exception(completion)
-        daemons = completion.result
-
-        def ukn(s):
-            return '<unknown>' if s is None else s
-        # Sort the list for display
-        daemons.sort(key=lambda s: (ukn(s.daemon_type), ukn(s.nodename), ukn(s.daemon_id)))
-
-        if len(daemons) == 0:
-            return HandleCommandResult(stdout="No daemons reported")
-        elif format == 'json':
-            data = [s.to_json() for s in daemons]
-            return HandleCommandResult(stdout=json.dumps(data))
-        else:
-            now = datetime.datetime.utcnow()
-            table = PrettyTable(
-                ['NAME', 'HOST', 'STATUS', 'REFRESHED',
-                 'VERSION', 'IMAGE NAME', 'IMAGE ID', 'CONTAINER ID'],
-                border=False)
-            table.align = 'l'
-            table.left_padding_width = 0
-            table.right_padding_width = 1
-            for s in sorted(daemons, key=lambda s: s.name()):
-                status = {
-                    -1: 'error',
-                    0: 'stopped',
-                    1: 'running',
-                    None: '<unknown>'
-                }[s.status]
-
-                if s.last_refresh:
-                    age = to_pretty_timedelta(now - s.last_refresh) + ' ago'
-                else:
-                    age = '-'
-                table.add_row((
-                    s.name(),
-                    ukn(s.nodename),
-                    status,
-                    age,
-                    ukn(s.version),
-                    ukn(s.container_image_name),
-                    ukn(s.container_image_id)[0:12],
-                    ukn(s.container_id)[0:12]))
-
-            return HandleCommandResult(stdout=table.get_string())
-
-    @orchestrator._cli_write_command(
-        'orch osd create',
-        "name=svc_arg,type=CephString,req=false",
-        'Create an OSD service. Either --svc_arg=host:drives or -i <drive_group>')
-    def _create_osd(self, svc_arg=None, inbuf=None):
-        # type: (Optional[str], Optional[str]) -> HandleCommandResult
-        """Create one or more OSDs"""
-
-        usage = """
-Usage:
-  ceph orch osd create -i <json_file/yaml_file>
-  ceph orch osd create host:device1,device2,...
-"""
-
-        if inbuf:
-            try:
-                dgs = DriveGroupSpecs(yaml.load(inbuf))
-                drive_groups = dgs.drive_groups
-            except ValueError as e:
-                msg = 'Failed to read JSON input: {}'.format(str(e)) + usage
-                return HandleCommandResult(-errno.EINVAL, stderr=msg)
-
-        elif svc_arg:
-            try:
-                node_name, block_device = svc_arg.split(":")
-                block_devices = block_device.split(',')
-            except (TypeError, KeyError, ValueError):
-                msg = "Invalid host:device spec: '{}'".format(svc_arg) + usage
-                return HandleCommandResult(-errno.EINVAL, stderr=msg)
-
-            devs = DeviceSelection(paths=block_devices)
-            drive_groups = [DriveGroupSpec(node_name, data_devices=devs)]
-        else:
-            return HandleCommandResult(-errno.EINVAL, stderr=usage)
-
-        completion = self.create_osds(drive_groups)
-        self._orchestrator_wait([completion])
-        orchestrator.raise_if_exception(completion)
-        return HandleCommandResult(stdout=completion.result_str())
-
-    @orchestrator._cli_write_command(
-        'orch daemon add mon',
-        "name=num,type=CephInt,req=false "
-        "name=hosts,type=CephString,n=N,req=false "
-        "name=label,type=CephString,req=false",
-        'Start monitor daemon(s)')
-    def _daemon_add_mon(self, num=None, hosts=[], label=None):
-        if not num and not hosts and not label:
-            # Improve Error message. Point to parse_host_spec examples
-            raise orchestrator.OrchestratorValidationError("Mons need a placement spec. (num, host, network, name(opt))")
-        placement = orchestrator.PlacementSpec(label=label, count=num, hosts=hosts)
-        placement.validate()
-
-        spec = orchestrator.ServiceSpec(placement=placement)
-
-        completion = self.add_mon(spec)
-        self._orchestrator_wait([completion])
-        orchestrator.raise_if_exception(completion)
-        return HandleCommandResult(stdout=completion.result_str())
-
-    @orchestrator._cli_write_command(
-        'orch daemon add mgr',
-        "name=num,type=CephInt,req=false "
-        "name=hosts,type=CephString,n=N,req=false",
-        'Start rbd-mirror daemon(s)')
-    def _daemon_add_mgr(self, num=None, hosts=None):
-        spec = orchestrator.ServiceSpec(
-            placement=orchestrator.PlacementSpec(hosts=hosts, count=num))
-        completion = self.add_mgr(spec)
-        self._orchestrator_wait([completion])
-        orchestrator.raise_if_exception(completion)
-        return HandleCommandResult(stdout=completion.result_str())
-
-    @orchestrator._cli_write_command(
-        'orch daemon add rbd-mirror',
-        "name=num,type=CephInt,req=false "
-        "name=hosts,type=CephString,n=N,req=false",
-        'Start rbd-mirror daemon(s)')
-    def _rbd_mirror_add(self, num=None, hosts=None):
-        spec = orchestrator.ServiceSpec(
-            None,
-            placement=orchestrator.PlacementSpec(hosts=hosts, count=num))
-        completion = self.add_rbd_mirror(spec)
-        self._orchestrator_wait([completion])
-        orchestrator.raise_if_exception(completion)
-        return HandleCommandResult(stdout=completion.result_str())
-
-    @orchestrator._cli_write_command(
-        'orch daemon add mds',
-        "name=fs_name,type=CephString "
-        "name=num,type=CephInt,req=false "
-        "name=hosts,type=CephString,n=N,req=false",
-        'Start MDS daemon(s)')
-    def _mds_add(self, fs_name, num=None, hosts=None):
-        spec = orchestrator.ServiceSpec(
-            fs_name,
-            placement=orchestrator.PlacementSpec(hosts=hosts, count=num))
-        completion = self.add_mds(spec)
-        self._orchestrator_wait([completion])
-        orchestrator.raise_if_exception(completion)
-        return HandleCommandResult(stdout=completion.result_str())
-
-    @orchestrator._cli_write_command(
-        'orch daemon add rgw',
-        'name=realm_name,type=CephString '
-        'name=zone_name,type=CephString '
-        'name=num,type=CephInt,req=false '
-        "name=hosts,type=CephString,n=N,req=false",
-        'Start RGW daemon(s)')
-    def _rgw_add(self, realm_name, zone_name, num=1, hosts=None, inbuf=None):
-        usage = """
-Usage:
-  ceph orch rgw add -i <json_file>
-  ceph orch rgw add <realm_name> <zone_name>
-        """
-        if inbuf:
-            try:
-                rgw_spec = orchestrator.RGWSpec.from_json(json.loads(inbuf))
-            except ValueError as e:
-                msg = 'Failed to read JSON input: {}'.format(str(e)) + usage
-                return HandleCommandResult(-errno.EINVAL, stderr=msg)
-        rgw_spec = orchestrator.RGWSpec(
-            rgw_realm=realm_name,
-            rgw_zone=zone_name,
-            placement=orchestrator.PlacementSpec(hosts=hosts, count=num))
-
-        completion = self.add_rgw(rgw_spec)
-        self._orchestrator_wait([completion])
-        orchestrator.raise_if_exception(completion)
-        return HandleCommandResult(stdout=completion.result_str())
-
-    @orchestrator._cli_write_command(
-        'orch daemon add nfs',
-        "name=svc_arg,type=CephString "
-        "name=pool,type=CephString "
-        "name=namespace,type=CephString,req=false "
-        'name=num,type=CephInt,req=false '
-        'name=hosts,type=CephString,n=N,req=false '
-        'name=label,type=CephString,req=false',
-        'Start NFS daemon(s)')
-    def _nfs_add(self, svc_arg, pool, namespace=None, num=None, label=None, hosts=[]):
-        spec = orchestrator.NFSServiceSpec(
-            svc_arg,
-            pool=pool,
-            namespace=namespace,
-            placement=orchestrator.PlacementSpec(label=label, hosts=hosts, count=num),
-        )
-        spec.validate_add()
-        completion = self.add_nfs(spec)
-        self._orchestrator_wait([completion])
-        orchestrator.raise_if_exception(completion)
-        return HandleCommandResult(stdout=completion.result_str())
-
-    @orchestrator._cli_write_command(
-        'orch daemon add prometheus',
-        'name=num,type=CephInt,req=false '
-        'name=hosts,type=CephString,n=N,req=false '
-        'name=label,type=CephString,req=false',
-        'Add prometheus daemon(s)')
-    def _daemon_add_prometheus(self, num=None, label=None, hosts=[]):
-        # type: (Optional[int], Optional[str], List[str]) -> HandleCommandResult
-        spec = orchestrator.ServiceSpec(
-            placement=orchestrator.PlacementSpec(label=label, hosts=hosts, count=num),
-        )
-        completion = self.add_prometheus(spec)
-        self._orchestrator_wait([completion])
-        return HandleCommandResult(stdout=completion.result_str())
-
-    @orchestrator._cli_write_command(
-        'orch',
-        "name=action,type=CephChoices,strings=start|stop|restart|redeploy|reconfig "
-        "name=svc_name,type=CephString",
-        'Start, stop, restart, redeploy, or reconfig an entire service (i.e. all daemons)')
-    def _service_action(self, action, svc_name):
-        if '.' in svc_name:
-            (service_type, service_id) = svc_name.split('.', 1)
-        else:
-            service_type = svc_name;
-            service_id = None
-        completion = self.service_action(action, service_type, service_id)
-        self._orchestrator_wait([completion])
-        orchestrator.raise_if_exception(completion)
-        return HandleCommandResult(stdout=completion.result_str())
-
-    @orchestrator._cli_write_command(
-        'orch daemon',
-        "name=action,type=CephChoices,strings=start|stop|restart|redeploy|reconfig "
-        "name=name,type=CephString",
-        'Start, stop, restart, redeploy, or reconfig a specific daemon')
-    def _daemon_action(self, action, name):
-        if '.' not in name:
-            raise orchestrator.OrchestratorError('%s is not a valid daemon name' % name)
-        (daemon_type, daemon_id) = name.split('.', 1)
-        completion = self.daemon_action(action, daemon_type, daemon_id)
-        self._orchestrator_wait([completion])
-        orchestrator.raise_if_exception(completion)
-        return HandleCommandResult(stdout=completion.result_str())
-
-    @orchestrator._cli_write_command(
-        'orch daemon rm',
-        "name=names,type=CephString,n=N "
-        'name=force,type=CephBool,req=false',
-        'Remove specific daemon(s)')
-    def _daemon_rm(self, names, force=False):
-        for name in names:
-            if '.' not in name:
-                raise orchestrator.OrchestratorError('%s is not a valid daemon name' % name)
-        completion = self.remove_daemons(names, force)
-        self._orchestrator_wait([completion])
-        orchestrator.raise_if_exception(completion)
-        return HandleCommandResult(stdout=completion.result_str())
-
-    @orchestrator._cli_write_command(
-        'orch rm',
-        "name=name,type=CephString",
-        'Remove a service')
-    def _service_rm(self, name):
-        if '.' in name:
-            (service_type, service_name) = name.split('.')
-        else:
-            service_type = name;
-            service_name = None
-        if name in ['mon', 'mgr']:
-            raise orchestrator.OrchestratorError('The mon and mgr services cannot be removed')
-        completion = self.remove_service(service_type, service_name)
-        self._orchestrator_wait([completion])
-        orchestrator.raise_if_exception(completion)
-        return HandleCommandResult(stdout=completion.result_str())
-
-    @orchestrator._cli_write_command(
-        'orch apply mgr',
-        "name=num,type=CephInt,req=false "
-        "name=hosts,type=CephString,n=N,req=false "
-        "name=label,type=CephString,req=false",
-        'Update the size or placement of managers')
-    def _apply_mgr(self, num=None, hosts=[], label=None):
-        placement = orchestrator.PlacementSpec(
-            label=label, count=num, hosts=hosts)
-        placement.validate()
-
-        spec = orchestrator.ServiceSpec(placement=placement)
-
-        completion = self.apply_mgr(spec)
-        self._orchestrator_wait([completion])
-        orchestrator.raise_if_exception(completion)
-        return HandleCommandResult(stdout=completion.result_str())
-
-    @orchestrator._cli_write_command(
-        'orch apply mon',
-        "name=num,type=CephInt,req=false "
-        "name=hosts,type=CephString,n=N,req=false "
-        "name=label,type=CephString,req=false",
-        'Update the number of monitor instances')
-    def _apply_mon(self, num=None, hosts=[], label=None):
-        if not num and not hosts and not label:
-            # Improve Error message. Point to parse_host_spec examples
-            raise orchestrator.OrchestratorValidationError("Mons need a placement spec. (num, host, network, name(opt))")
-        placement = orchestrator.PlacementSpec(label=label, count=num, hosts=hosts)
-        placement.validate()
-
-        spec = orchestrator.ServiceSpec(placement=placement)
-
-        completion = self.apply_mon(spec)
-        self._orchestrator_wait([completion])
-        orchestrator.raise_if_exception(completion)
-        return HandleCommandResult(stdout=completion.result_str())
-
-    @orchestrator._cli_write_command(
-        'orch apply mds',
-        "name=fs_name,type=CephString "
-        "name=num,type=CephInt,req=false "
-        "name=hosts,type=CephString,n=N,req=false "
-        "name=label,type=CephString,req=false",
-        'Update the number of MDS instances for the given fs_name')
-    def _apply_mds(self, fs_name, num=None, label=None, hosts=[]):
-        placement = orchestrator.PlacementSpec(label=label, count=num, hosts=hosts)
-        placement.validate()
-
-        spec = orchestrator.ServiceSpec(
-            fs_name,
-            placement=placement)
-
-        completion = self.apply_mds(spec)
-        self._orchestrator_wait([completion])
-        orchestrator.raise_if_exception(completion)
-        return HandleCommandResult(stdout=completion.result_str())
-
-    @orchestrator._cli_write_command(
-        'orch apply rbd-mirror',
-        "name=num,type=CephInt,req=false "
-        "name=hosts,type=CephString,n=N,req=false "
-        "name=label,type=CephString,req=false",
-        'Update the number of rbd-mirror instances')
-    def _apply_rbd_mirror(self, num, label=None, hosts=[]):
-        spec = orchestrator.ServiceSpec(
-            placement=orchestrator.PlacementSpec(hosts=hosts, count=num, label=label))
-        completion = self.apply_rbd_mirror(spec)
-        self._orchestrator_wait([completion])
-        orchestrator.raise_if_exception(completion)
-        return HandleCommandResult(stdout=completion.result_str())
-
-    @orchestrator._cli_write_command(
-        'orch apply rgw',
-        'name=realm_name,type=CephString '
-        'name=zone_name,type=CephString '
-        'name=num,type=CephInt,req=false '
-        'name=hosts,type=CephString,n=N,req=false '
-        'name=label,type=CephString,req=false',
-        'Update the number of RGW instances for the given zone')
-    def _apply_rgw(self, zone_name, realm_name, num=None, label=None, hosts=[]):
-        spec = orchestrator.RGWSpec(
-            rgw_realm=realm_name,
-            rgw_zone=zone_name,
-            placement=orchestrator.PlacementSpec(hosts=hosts, label=label, count=num))
-        completion = self.apply_rgw(spec)
-        self._orchestrator_wait([completion])
-        orchestrator.raise_if_exception(completion)
-        return HandleCommandResult(stdout=completion.result_str())
-
-    @orchestrator._cli_write_command(
-        'orch apply nfs',
-        "name=svc_id,type=CephString "
-        'name=num,type=CephInt,req=false '
-        'name=hosts,type=CephString,n=N,req=false '
-        'name=label,type=CephString,req=false',
-        'Scale an NFS service')
-    def _apply_nfs(self, svc_id, num=None, label=None, hosts=[]):
-        # type: (str, Optional[int], Optional[str], List[str]) -> HandleCommandResult
-        spec = orchestrator.NFSServiceSpec(
-            svc_id,
-            placement=orchestrator.PlacementSpec(label=label, hosts=hosts, count=num),
-        )
-        completion = self.apply_nfs(spec)
-        self._orchestrator_wait([completion])
-        return HandleCommandResult(stdout=completion.result_str())
-
-    @orchestrator._cli_write_command(
-        'orch apply prometheus',
-        'name=num,type=CephInt,req=false '
-        'name=hosts,type=CephString,n=N,req=false '
-        'name=label,type=CephString,req=false',
-        'Scale prometheus service')
-    def _apply_prometheus(self, num=None, label=None, hosts=[]):
-        # type: (Optional[int], Optional[str], List[str]) -> HandleCommandResult
-        spec = orchestrator.ServiceSpec(
-            placement=orchestrator.PlacementSpec(label=label, hosts=hosts, count=num),
-        )
-        completion = self.apply_prometheus(spec)
-        self._orchestrator_wait([completion])
-        return HandleCommandResult(stdout=completion.result_str())
-
-    @orchestrator._cli_write_command(
-        'orch set backend',
-        "name=module_name,type=CephString,req=true",
-        'Select orchestrator module backend')
-    def _set_backend(self, module_name):
-        """
-        We implement a setter command instead of just having the user
-        modify the setting directly, so that we can validate they're setting
-        it to a module that really exists and is enabled.
-
-        There isn't a mechanism for ensuring they don't *disable* the module
-        later, but this is better than nothing.
-        """
-        mgr_map = self.get("mgr_map")
-
-        if module_name is None or module_name == "":
-            self.set_module_option("orchestrator", None)
-            return HandleCommandResult()
-
-        for module in mgr_map['available_modules']:
-            if module['name'] != module_name:
-                continue
-
-            if not module['can_run']:
-                continue
-
-            enabled = module['name'] in mgr_map['modules']
-            if not enabled:
-                return HandleCommandResult(-errno.EINVAL,
-                                           stderr="Module '{module_name}' is not enabled. \n Run "
-                                                  "`ceph mgr module enable {module_name}` "
-                                                  "to enable.".format(module_name=module_name))
-
-            try:
-                is_orchestrator = self.remote(module_name,
-                                              "is_orchestrator_module")
-            except NameError:
-                is_orchestrator = False
-
-            if not is_orchestrator:
-                return HandleCommandResult(-errno.EINVAL,
-                                           stderr="'{0}' is not an orchestrator module".format(module_name))
-
-            self.set_module_option("orchestrator", module_name)
-
-            return HandleCommandResult()
-
-        return HandleCommandResult(-errno.EINVAL, stderr="Module '{0}' not found".format(module_name))
-
-    @orchestrator._cli_write_command(
-        'orch cancel',
-        desc='cancels ongoing operations')
-    def _cancel(self):
-        """
-        ProgressReferences might get stuck. Let's unstuck them.
-        """
-        self.cancel_completions()
-        return HandleCommandResult()
-
-    @orchestrator._cli_read_command(
-        'orch status',
-        desc='Report configured backend and its status')
-    def _status(self):
-        o = self._select_orchestrator()
-        if o is None:
-            raise orchestrator.NoOrchestrator()
-
-        avail, why = self.available()
-        if avail is None:
-            # The module does not report its availability
-            return HandleCommandResult(stdout="Backend: {0}".format(o))
-        else:
-            return HandleCommandResult(stdout="Backend: {0}\nAvailable: {1}{2}".format(
-                                           o, avail,
-                                           " ({0})".format(why) if not avail else ""
-                                       ))
-
-    def self_test(self):
-        old_orch = self._select_orchestrator()
-        self._set_backend('')
-        assert self._select_orchestrator() is None
-        self._set_backend(old_orch)
-
-        e1 = self.remote('selftest', 'remote_from_orchestrator_cli_self_test', "ZeroDivisionError")
-        try:
-            orchestrator.raise_if_exception(e1)
-            assert False
-        except ZeroDivisionError as e:
-            assert e.args == ('hello', 'world')
-
-        e2 = self.remote('selftest', 'remote_from_orchestrator_cli_self_test', "OrchestratorError")
-        try:
-            orchestrator.raise_if_exception(e2)
-            assert False
-        except orchestrator.OrchestratorError as e:
-            assert e.args == ('hello', 'world')
-
-        c = orchestrator.TrivialReadCompletion(result=True)
-        assert c.has_result
-
-    @orchestrator._cli_write_command(
-        'upgrade check',
-        'name=image,type=CephString,req=false '
-        'name=ceph_version,type=CephString,req=false',
-        desc='Check service versions vs available and target containers')
-    def _upgrade_check(self, image=None, ceph_version=None):
-        completion = self.upgrade_check(image=image, version=ceph_version)
-        self._orchestrator_wait([completion])
-        orchestrator.raise_if_exception(completion)
-        return HandleCommandResult(stdout=completion.result_str())
-
-    @orchestrator._cli_write_command(
-        'upgrade status',
-        desc='Check service versions vs available and target containers')
-    def _upgrade_status(self):
-        completion = self.upgrade_status()
-        self._orchestrator_wait([completion])
-        orchestrator.raise_if_exception(completion)
-        r = {
-            'target_image': completion.result.target_image,
-            'in_progress': completion.result.in_progress,
-            'services_complete': completion.result.services_complete,
-            'message': completion.result.message,
-        }
-        out = json.dumps(r, indent=4)
-        return HandleCommandResult(stdout=out)
-
-    @orchestrator._cli_write_command(
-        'upgrade start',
-        'name=image,type=CephString,req=false '
-        'name=ceph_version,type=CephString,req=false',
-        desc='Initiate upgrade')
-    def _upgrade_start(self, image=None, ceph_version=None):
-        completion = self.upgrade_start(image, ceph_version)
-        self._orchestrator_wait([completion])
-        orchestrator.raise_if_exception(completion)
-        return HandleCommandResult(stdout=completion.result_str())
-
-    @orchestrator._cli_write_command(
-        'upgrade pause',
-        desc='Pause an in-progress upgrade')
-    def _upgrade_pause(self):
-        completion = self.upgrade_pause()
-        self._orchestrator_wait([completion])
-        orchestrator.raise_if_exception(completion)
-        return HandleCommandResult(stdout=completion.result_str())
-
-    @orchestrator._cli_write_command(
-        'upgrade resume',
-        desc='Resume paused upgrade')
-    def _upgrade_resume(self):
-        completion = self.upgrade_resume()
-        self._orchestrator_wait([completion])
-        orchestrator.raise_if_exception(completion)
-        return HandleCommandResult(stdout=completion.result_str())
-
-    @orchestrator._cli_write_command(
-        'upgrade stop',
-        desc='Stop an in-progress upgrade')
-    def _upgrade_stop(self):
-        completion = self.upgrade_stop()
-        self._orchestrator_wait([completion])
-        orchestrator.raise_if_exception(completion)
-        return HandleCommandResult(stdout=completion.result_str())
diff --git a/src/pybind/mgr/orchestrator_cli/tox.ini b/src/pybind/mgr/orchestrator_cli/tox.ini
deleted file mode 100644 (file)
index 60a6902..0000000
+++ /dev/null
@@ -1,14 +0,0 @@
-[tox]
-envlist = py3
-skipsdist = true
-toxworkdir = {env:CEPH_BUILD_DIR}/orchestrator_cli
-minversion = 2.5
-
-[testenv]
-deps = -rrequirements.txt
-setenv=
-    UNITTEST = true
-    py3:  PYTHONPATH = {toxinidir}/../../../../build/lib/cython_modules/lib.3
-
-commands=
-    {envbindir}/py.test .
index d9ea1ef5fd0575c43e3fab0b53f325a3bdd2c4a5..d0c2fdf597a78e4bb92915594900c5a97efddb91 100644 (file)
@@ -16,8 +16,7 @@ commands = mypy --config-file=../../mypy.ini \
            cephadm/module.py \
            mgr_module.py \
            mgr_util.py \
-           orchestrator.py \
-           orchestrator_cli/module.py \
+           orchestrator/__init__.py \
            progress/module.py \
            rook/module.py \
            test_orchestrator/module.py