From 2e6ac9a43d2c47b30bc44bec403d5721aa056e3b Mon Sep 17 00:00:00 2001
From: John Spray <john.spray@redhat.com>
Date: Thu, 2 Aug 2018 15:32:38 +0100
Subject: [PATCH] doc: add general documentation on orchestrator

Signed-off-by: John Spray <john.spray@redhat.com>
---
 doc/mgr/index.rst                |   2 +
 doc/mgr/orchestrator_cli.rst     |  54 +++++++++++
 doc/mgr/orchestrator_modules.rst | 158 +++++++++++++++++++++++++++++++
 doc/mgr/plugins.rst              |  18 +++-
 src/pybind/mgr/orchestrator.py   | 112 +++++++---------------
 5 files changed, 264 insertions(+), 80 deletions(-)
 create mode 100644 doc/mgr/orchestrator_cli.rst
 create mode 100644 doc/mgr/orchestrator_modules.rst
diff --git a/doc/mgr/index.rst b/doc/mgr/index.rst
index ff85b8de4d7a9..e00c0381a052b 100644
--- a/doc/mgr/index.rst
+++ b/doc/mgr/index.rst
@@ -27,6 +27,7 @@ sensible.
 
     Installation and Configuration <administrator>
     Writing plugins <plugins>
+    Writing orchestrator plugins <orchestrator_modules>
     Balancer plugin <balancer>
     Dashboard plugin <dashboard>
     Local pool plugin <localpool>
@@ -40,4 +41,5 @@ sensible.
     Iostat plugin <iostat>
     Crash plugin <crash>
     Devicehealth plugin <devicehealth>
+    Orchestrator CLI plugin <orchestrator_cli>
     Rook plugin <rook>
diff --git a/doc/mgr/orchestrator_cli.rst b/doc/mgr/orchestrator_cli.rst
new file mode 100644
index 0000000000000..5b4e4de497fe0
--- /dev/null
+++ b/doc/mgr/orchestrator_cli.rst
@@ -0,0 +1,54 @@
+
+.. _orchestrator-cli-module:
+
+================
+Orchestrator CLI
+================
+
+This module provides a command line interface (CLI) to orchestrator
+modules (ceph-mgr modules which interface with external orchestation services)
+
+Configuration
+=============
+
+You can select the orchestrator module to use with the ``set backend`` command:
+
+::
+
+    ceph orchestrator set backend <module>
+
+For example, to enable the Rook orchestrator module and use it with the CLI:
+
+::
+
+    ceph mgr module enable orchestrator_cli
+    ceph mgr module enable rook
+    ceph orchestrator set backend rook
+
+
+Usage
+=====
+
+Print a list of discovered devices, grouped by node and optionally
+filtered to a particular node:
+
+::
+
+    orchestrator device ls [node]
+
+Query the status of a particular service (mon, osd, mds, rgw).  For OSDs
+the id is the numeric OSD ID, for MDS services it is the filesystem name:
+
+::
+
+    orchestrator service status <type> <id>
+
+Create a service.  For an OSD, the "what" is <node>:<device>, where the
+device naming should match what was reported in ``device ls``.  For an MDS
+service, the "what" is the filesystem name:
+
+::
+
+    orchestrator service add <type> <what>
+
+
diff --git a/doc/mgr/orchestrator_modules.rst b/doc/mgr/orchestrator_modules.rst
new file mode 100644
index 0000000000000..d417e573c6472
--- /dev/null
+++ b/doc/mgr/orchestrator_modules.rst
@@ -0,0 +1,158 @@
+
+
+.. _orchestrator-modules:
+
+.. py:currentmodule:: orchestrator
+
+ceph-mgr orchestrator modules
+=============================
+
+.. warning::
+
+    This is developer documentation, describing Ceph internals that
+    are only relevant to people writing ceph-mgr orchestrator modules.
+
+In this context, *orchestrator* refers to some external service that
+provides the ability to discover devices and create Ceph services.  This
+includes external projects such as ceph-ansible, DeepSea, and Rook.
+
+An *orchestrator module* is a ceph-mgr module (:ref:`mgr-module-dev`)
+which implements common managment operations using a particular
+orchestrator.
+
+Orchestrator modules subclass the ``Orchestrator`` class: this class is
+an interface, it only provides method definitions to be implemented
+by subclasses.  The purpose of defining this common interface
+for different orchestrators is to enable common UI code, such as
+the dashboard, to work with various different backends.
+
+Behind all the abstraction, the purpose of orchestrator modules is simple:
+enable Ceph to do things like discover available hardware, create and
+destroy OSDs, and run MDS and RGW services.
+
+A tutorial is not included here: for full and concrete examples, see
+the existing implemented orchestrator modules in the Ceph source tree.
+
+Glossary
+--------
+
+Stateful service
+  a daemon that uses local storage, such as OSD or mon.
+
+Stateless service
+  a daemon that doesn't use any local storage, such
+  as an MDS, RGW, nfs-ganesha, iSCSI gateway.
+
+Label
+  arbitrary string tags that may be applied by administrators
+  to nodes.  Typically administrators use labels to indicate
+  which nodes should run which kinds of service.  Labels are
+  advisory (from human input) and do not guarantee that nodes
+  have particular physical capabilities.
+
+Drive group
+  collection of block devices with common/shared OSD
+  formatting (typically one or more SSDs acting as
+  journals/dbs for a group of HDDs).
+
+Placement
+  choice of which node is used to run a service.
+
+Key Concepts
+------------
+
+The underlying orchestrator remains the source of truth for information
+about whether a service is running, what is running where, which
+nodes are available, etc.  Orchestrator modules should avoid taking
+any internal copies of this information, and read it directly from
+the orchestrator backend as much as possible.
+
+Bootstrapping nodes and adding them to the underlying orchestration
+system is outside the scope of Ceph's orchestrator interface.  Ceph
+can only work on nodes when the orchestrator is already aware of them.
+
+Calls to orchestrator modules are all asynchronous, and return *completion*
+objects (see below) rather than returning values immediately.
+
+Where possible, placement of stateless services should be left up to the
+orchestrator.
+
+Completions and batching
+------------------------
+
+All methods that read or modify the state of the system can potentially
+be long running.  To handle that, all such methods return a *completion*
+object (a *ReadCompletion* or a *WriteCompletion*).  Orchestrator modules
+must implement the *wait* method: this takes a list of completions, and
+is responsible for checking if they're finished, and advancing the underlying
+operations as needed.
+
+Each orchestrator module implements its own underlying mechanisms
+for completions.  This might involve running the underlying operations
+in threads, or batching the operations up before later executing
+in one go in the background.  If implementing such a batching pattern, the
+module would do no work on any operation until it appeared in a list
+of completions passed into *wait*.
+
+*WriteCompletion* objects have a two-stage execution.  First they become
+*persistent*, meaning that the write has made it to the orchestrator
+itself, and been persisted there (e.g. a manifest file has been updated).
+If ceph-mgr crashed at this point, the operation would still eventually take
+effect.  Second, the completion becomes *effective*, meaning that the operation has really happened (e.g. a service has actually been started).
+
+.. automethod:: Orchestrator.wait
+
+.. autoclass:: ReadCompletion
+.. autoclass:: WriteCompletion
+
+Placement
+---------
+
+In general, stateless services do not require any specific placement
+rules, as they can run anywhere that sufficient system resources
+are available.  However, some orchestrators may not include the
+functionality to choose a location in this way, so we can optionally
+specify a location when creating a stateless service.
+
+OSD services generally require a specific placement choice, as this
+will determine which storage devices are used.
+
+Excluded functionality
+----------------------
+
+- Ceph's orchestrator interface is not a general purpose framework for
+  managing linux servers -- it is deliberately constrained to manage
+  the Ceph cluster's services only.
+- Multipathed storage is not handled (multipathing is unnecessary for
+  Ceph clusters).  Each drive is assumed to be visible only on
+  a single node.
+
+Inventory and status
+--------------------
+
+.. automethod:: Orchestrator.get_inventory
+.. autoclass:: InventoryFilter
+.. autoclass:: InventoryNode
+.. autoclass:: InventoryDevice
+
+.. automethod:: Orchestrator.describe_service
+.. autoclass:: ServiceDescription
+.. autoclass:: ServiceLocation
+
+OSD management
+--------------
+
+.. automethod:: Orchestrator.create_osds
+.. automethod:: Orchestrator.replace_osds
+.. automethod:: Orchestrator.remove_osds
+.. autoclass:: OsdCreationSpec
+.. autoclass:: DriveGroupSpec
+
+Upgrades
+--------
+
+.. automethod:: Orchestrator.upgrade_available
+.. automethod:: Orchestrator.upgrade_start
+.. automethod:: Orchestrator.upgrade_status
+.. autoclass:: UpgradeSpec
+.. autoclass:: UpgradeStatusSpec
diff --git a/doc/mgr/plugins.rst b/doc/mgr/plugins.rst
index 6ccb1bf38ac3c..d1196dea6f7c5 100644
--- a/doc/mgr/plugins.rst
+++ b/doc/mgr/plugins.rst
@@ -1,6 +1,14 @@
 
-ceph-mgr plugin author guide
-============================
+
+.. _mgr-module-dev:
+
+ceph-mgr module developer's guide
+=================================
+
+.. warning::
+
+    This is developer documentation, describing Ceph internals that
+    are only relevant to people writing ceph-mgr modules.
 
 Creating a plugin
 -----------------
@@ -18,6 +26,12 @@ The most important methods to override are:
 * a ``handle_command`` member function if your module
   exposes CLI commands.
 
+Some modules interface with external orchestrators to deploy
+Ceph services.  These also inherit from ``Orchestrator``, which adds
+additional methods to the base ``MgrModule`` class.  See
+:ref:`Orchestrator modules <orchestrator-modules>` for more on
+creating these modules.
+
 Installing a plugin
 -------------------
 
diff --git a/src/pybind/mgr/orchestrator.py b/src/pybind/mgr/orchestrator.py
index 4bec52b6cdab0..38033a8320325 100644
--- a/src/pybind/mgr/orchestrator.py
+++ b/src/pybind/mgr/orchestrator.py
@@ -2,62 +2,7 @@
 """
 ceph-mgr orchestrator interface
 
-This is a DRAFT for discussion.
-
-Goal: enable UI workflows for cluster service management
-      (such as creating OSDs, in addition to stateless services)
-      using common concepts that are implemented by
-      diverse backends such as Rook, DeepSea, ceph-ansible
-
-Concepts:
-    "Stateful service": a daemon that uses local storage, such as OSD or mon.
-    "Stateless service": a daemon that doesn't use any local storage, such
-                         as an MDS, RGW, nfs-ganesha, iSCSI gateway.
-    "Label": arbitrary string tags that may be applied by administrators
-             to nodes.  Typically administrators use labels to indicate
-             which nodes should run which kinds of service.  Labels are
-             advisory (from human input) and do not guarantee that nodes
-             have particular physical capabilities.
-    "Drive group": collection of block devices with common/shared OSD
-                   formatting (typically one or more SSDs acting as
-                   journals/dbs for a group of HDDs).
-    "Placement": choice of which node is used to run a service.
-
-Design choices:
-    1. The orchestrator is to be the source of truth for
-       all the physical information, and will be queried directly
-       as needed (i.e. no in-Ceph database of hardware etc).
-    2. The orchestrator handles placement of collections of stateless
-       services.
-    3. The orchestrator accepts explicit placement of individual stateful
-       services, and optionally also accepts label-based automatic placement.
-       (i.e. it *must* support "create OSD at host1:/dev/sdb", and it *may*
-        support "create OSDs on nodes with label=ceph-osd")
-    4. Bootstrapping nodes and connecting them to the orchestrator's
-       infrastructure is out of scope: this interface operates only
-       on nodes that are already visible to the orchestrator.
-    5. Methods all run in background, returning an instance of WriteCompletion
-       or ReadCompletion, to be polled by the caller using the wait() method
-
-Optional features:
-    1. Extensions to OSDs, such as block-level encryption.  See OsdSpec.extended
-    2. Label-based placement of OSDs.  If an orchestrator does not support
-       a labelling concept then only explicit per-node placement will work.
-    3. Explicit placement of stateless services.  Some orchestrators
-       may only support a basic round-robin placement of stateless services,
-       in which case they would also enable users to do explicit placement
-       for
-
-Excluded functionality:
-    1. No support for multipathed drives: all block devices are to be
-       reported from one node only.
-    2. No networking inventory or configuration.  Network configuration
-       is not Ceph-specific functionality, and by the time ceph-mgr
-       starts, we know that some external entity has already taken
-       care of at least the public network configuration.  This does
-       not preclude orchestrators implementing smart networking functionality
-       internally, it just isn't exposed up into ceph-mgr.
-    3. No OSD configuration outside the scope of Drive Group rules.
+Please see the ceph-mgr module developer's guide for more information.
 """
 
 
@@ -157,6 +102,13 @@ class Orchestrator(object):
     while you scan hosts every time.
     """
 
+    def is_orchestrator_module(self):
+        """
+        Enable other modules to interrogate this module to discover
+        whether it's usable as an orchestrator module.
+        """
+        return True
+
     def wait(self, completions):
         """
         Given a list of Completion instances, progress any which are
@@ -172,7 +124,11 @@ class Orchestrator(object):
         raise NotImplementedError()
 
     def get_inventory(self, node_filter=None):
-        # Return list of InventoryHost
+        """
+
+        :param node_filter:
+        :return: list of InventoryNode
+        """
         raise NotImplementedError()
 
     def describe_service(self, service_type, service_id):
@@ -187,25 +143,6 @@ class Orchestrator(object):
         """
         raise NotImplementedError()
 
-    def add_mon(self, node_name):
-        """
-        We operate on a node rather than a particular device: it is
-        assumed/expected that proper SSD storage is already available
-        and accessible in /var.
-
-        :param node_name:
-        :return:
-        """
-        raise NotImplementedError()
-
-    def remove_mon(self, node_name):
-        """
-
-        :param node_name:
-        :return:
-        """
-        raise NotImplementedError()
-
     def create_osds(self, osd_spec):
         """
         Create one or more OSDs within a single Drive Group.
@@ -248,6 +185,25 @@ class Orchestrator(object):
     def remove_stateless_service(self, service_type, id_):
         raise NotImplementedError()
 
+    def add_mon(self, node_name):
+        """
+        We operate on a node rather than a particular device: it is
+        assumed/expected that proper SSD storage is already available
+        and accessible in /var.
+
+        :param node_name:
+        :return:
+        """
+        raise NotImplementedError()
+
+    def remove_mon(self, node_name):
+        """
+
+        :param node_name:
+        :return:
+        """
+        raise NotImplementedError()
+
     def upgrade_start(self, upgrade_spec):
         assert isinstance(upgrade_spec, UpgradeSpec)
         raise NotImplementedError()
@@ -441,7 +397,7 @@ class InventoryFilter(object):
         self.nodes = None  # Optional: get info about certain named nodes only
 
 
-class InventoryBlockDevice(object):
+class InventoryDevice(object):
     """
     When fetching inventory, block devices are reported in this format.
 
@@ -475,4 +431,4 @@ class InventoryNode(object):
     def __init__(self, name, devices):
         assert isinstance(devices, list)
         self.name = name  # unique within cluster.  For example a hostname.
-        self.devices = devices  # list of InventoryBlockDevice
+        self.devices = devices  # list of InventoryDevice
-- 
2.39.5