From 2e6ac9a43d2c47b30bc44bec403d5721aa056e3b Mon Sep 17 00:00:00 2001 From: John Spray Date: Thu, 2 Aug 2018 15:32:38 +0100 Subject: [PATCH] doc: add general documentation on orchestrator Signed-off-by: John Spray --- doc/mgr/index.rst | 2 + doc/mgr/orchestrator_cli.rst | 54 +++++++++++ doc/mgr/orchestrator_modules.rst | 158 +++++++++++++++++++++++++++++++ doc/mgr/plugins.rst | 18 +++- src/pybind/mgr/orchestrator.py | 112 +++++++--------------- 5 files changed, 264 insertions(+), 80 deletions(-) create mode 100644 doc/mgr/orchestrator_cli.rst create mode 100644 doc/mgr/orchestrator_modules.rst diff --git a/doc/mgr/index.rst b/doc/mgr/index.rst index ff85b8de4d7a9..e00c0381a052b 100644 --- a/doc/mgr/index.rst +++ b/doc/mgr/index.rst @@ -27,6 +27,7 @@ sensible. Installation and Configuration Writing plugins + Writing orchestrator plugins Balancer plugin Dashboard plugin Local pool plugin @@ -40,4 +41,5 @@ sensible. Iostat plugin Crash plugin Devicehealth plugin + Orchestrator CLI plugin Rook plugin diff --git a/doc/mgr/orchestrator_cli.rst b/doc/mgr/orchestrator_cli.rst new file mode 100644 index 0000000000000..5b4e4de497fe0 --- /dev/null +++ b/doc/mgr/orchestrator_cli.rst @@ -0,0 +1,54 @@ + +.. _orchestrator-cli-module: + +================ +Orchestrator CLI +================ + +This module provides a command line interface (CLI) to orchestrator +modules (ceph-mgr modules which interface with external orchestation services) + +Configuration +============= + +You can select the orchestrator module to use with the ``set backend`` command: + +:: + + ceph orchestrator set backend + +For example, to enable the Rook orchestrator module and use it with the CLI: + +:: + + ceph mgr module enable orchestrator_cli + ceph mgr module enable rook + ceph orchestrator set backend rook + + +Usage +===== + +Print a list of discovered devices, grouped by node and optionally +filtered to a particular node: + +:: + + orchestrator device ls [node] + +Query the status of a particular service (mon, osd, mds, rgw). For OSDs +the id is the numeric OSD ID, for MDS services it is the filesystem name: + +:: + + orchestrator service status + +Create a service. For an OSD, the "what" is :, where the +device naming should match what was reported in ``device ls``. For an MDS +service, the "what" is the filesystem name: + +:: + + orchestrator service add + + diff --git a/doc/mgr/orchestrator_modules.rst b/doc/mgr/orchestrator_modules.rst new file mode 100644 index 0000000000000..d417e573c6472 --- /dev/null +++ b/doc/mgr/orchestrator_modules.rst @@ -0,0 +1,158 @@ + + +.. _orchestrator-modules: + +.. py:currentmodule:: orchestrator + +ceph-mgr orchestrator modules +============================= + +.. warning:: + + This is developer documentation, describing Ceph internals that + are only relevant to people writing ceph-mgr orchestrator modules. + +In this context, *orchestrator* refers to some external service that +provides the ability to discover devices and create Ceph services. This +includes external projects such as ceph-ansible, DeepSea, and Rook. + +An *orchestrator module* is a ceph-mgr module (:ref:`mgr-module-dev`) +which implements common managment operations using a particular +orchestrator. + +Orchestrator modules subclass the ``Orchestrator`` class: this class is +an interface, it only provides method definitions to be implemented +by subclasses. The purpose of defining this common interface +for different orchestrators is to enable common UI code, such as +the dashboard, to work with various different backends. + +Behind all the abstraction, the purpose of orchestrator modules is simple: +enable Ceph to do things like discover available hardware, create and +destroy OSDs, and run MDS and RGW services. + +A tutorial is not included here: for full and concrete examples, see +the existing implemented orchestrator modules in the Ceph source tree. + +Glossary +-------- + +Stateful service + a daemon that uses local storage, such as OSD or mon. + +Stateless service + a daemon that doesn't use any local storage, such + as an MDS, RGW, nfs-ganesha, iSCSI gateway. + +Label + arbitrary string tags that may be applied by administrators + to nodes. Typically administrators use labels to indicate + which nodes should run which kinds of service. Labels are + advisory (from human input) and do not guarantee that nodes + have particular physical capabilities. + +Drive group + collection of block devices with common/shared OSD + formatting (typically one or more SSDs acting as + journals/dbs for a group of HDDs). + +Placement + choice of which node is used to run a service. + +Key Concepts +------------ + +The underlying orchestrator remains the source of truth for information +about whether a service is running, what is running where, which +nodes are available, etc. Orchestrator modules should avoid taking +any internal copies of this information, and read it directly from +the orchestrator backend as much as possible. + +Bootstrapping nodes and adding them to the underlying orchestration +system is outside the scope of Ceph's orchestrator interface. Ceph +can only work on nodes when the orchestrator is already aware of them. + +Calls to orchestrator modules are all asynchronous, and return *completion* +objects (see below) rather than returning values immediately. + +Where possible, placement of stateless services should be left up to the +orchestrator. + +Completions and batching +------------------------ + +All methods that read or modify the state of the system can potentially +be long running. To handle that, all such methods return a *completion* +object (a *ReadCompletion* or a *WriteCompletion*). Orchestrator modules +must implement the *wait* method: this takes a list of completions, and +is responsible for checking if they're finished, and advancing the underlying +operations as needed. + +Each orchestrator module implements its own underlying mechanisms +for completions. This might involve running the underlying operations +in threads, or batching the operations up before later executing +in one go in the background. If implementing such a batching pattern, the +module would do no work on any operation until it appeared in a list +of completions passed into *wait*. + +*WriteCompletion* objects have a two-stage execution. First they become +*persistent*, meaning that the write has made it to the orchestrator +itself, and been persisted there (e.g. a manifest file has been updated). +If ceph-mgr crashed at this point, the operation would still eventually take +effect. Second, the completion becomes *effective*, meaning that the operation has really happened (e.g. a service has actually been started). + +.. automethod:: Orchestrator.wait + +.. autoclass:: ReadCompletion +.. autoclass:: WriteCompletion + +Placement +--------- + +In general, stateless services do not require any specific placement +rules, as they can run anywhere that sufficient system resources +are available. However, some orchestrators may not include the +functionality to choose a location in this way, so we can optionally +specify a location when creating a stateless service. + +OSD services generally require a specific placement choice, as this +will determine which storage devices are used. + +Excluded functionality +---------------------- + +- Ceph's orchestrator interface is not a general purpose framework for + managing linux servers -- it is deliberately constrained to manage + the Ceph cluster's services only. +- Multipathed storage is not handled (multipathing is unnecessary for + Ceph clusters). Each drive is assumed to be visible only on + a single node. + +Inventory and status +-------------------- + +.. automethod:: Orchestrator.get_inventory +.. autoclass:: InventoryFilter +.. autoclass:: InventoryNode +.. autoclass:: InventoryDevice + +.. automethod:: Orchestrator.describe_service +.. autoclass:: ServiceDescription +.. autoclass:: ServiceLocation + +OSD management +-------------- + +.. automethod:: Orchestrator.create_osds +.. automethod:: Orchestrator.replace_osds +.. automethod:: Orchestrator.remove_osds +.. autoclass:: OsdCreationSpec +.. autoclass:: DriveGroupSpec + +Upgrades +-------- + +.. automethod:: Orchestrator.upgrade_available +.. automethod:: Orchestrator.upgrade_start +.. automethod:: Orchestrator.upgrade_status +.. autoclass:: UpgradeSpec +.. autoclass:: UpgradeStatusSpec diff --git a/doc/mgr/plugins.rst b/doc/mgr/plugins.rst index 6ccb1bf38ac3c..d1196dea6f7c5 100644 --- a/doc/mgr/plugins.rst +++ b/doc/mgr/plugins.rst @@ -1,6 +1,14 @@ -ceph-mgr plugin author guide -============================ + +.. _mgr-module-dev: + +ceph-mgr module developer's guide +================================= + +.. warning:: + + This is developer documentation, describing Ceph internals that + are only relevant to people writing ceph-mgr modules. Creating a plugin ----------------- @@ -18,6 +26,12 @@ The most important methods to override are: * a ``handle_command`` member function if your module exposes CLI commands. +Some modules interface with external orchestrators to deploy +Ceph services. These also inherit from ``Orchestrator``, which adds +additional methods to the base ``MgrModule`` class. See +:ref:`Orchestrator modules ` for more on +creating these modules. + Installing a plugin ------------------- diff --git a/src/pybind/mgr/orchestrator.py b/src/pybind/mgr/orchestrator.py index 4bec52b6cdab0..38033a8320325 100644 --- a/src/pybind/mgr/orchestrator.py +++ b/src/pybind/mgr/orchestrator.py @@ -2,62 +2,7 @@ """ ceph-mgr orchestrator interface -This is a DRAFT for discussion. - -Goal: enable UI workflows for cluster service management - (such as creating OSDs, in addition to stateless services) - using common concepts that are implemented by - diverse backends such as Rook, DeepSea, ceph-ansible - -Concepts: - "Stateful service": a daemon that uses local storage, such as OSD or mon. - "Stateless service": a daemon that doesn't use any local storage, such - as an MDS, RGW, nfs-ganesha, iSCSI gateway. - "Label": arbitrary string tags that may be applied by administrators - to nodes. Typically administrators use labels to indicate - which nodes should run which kinds of service. Labels are - advisory (from human input) and do not guarantee that nodes - have particular physical capabilities. - "Drive group": collection of block devices with common/shared OSD - formatting (typically one or more SSDs acting as - journals/dbs for a group of HDDs). - "Placement": choice of which node is used to run a service. - -Design choices: - 1. The orchestrator is to be the source of truth for - all the physical information, and will be queried directly - as needed (i.e. no in-Ceph database of hardware etc). - 2. The orchestrator handles placement of collections of stateless - services. - 3. The orchestrator accepts explicit placement of individual stateful - services, and optionally also accepts label-based automatic placement. - (i.e. it *must* support "create OSD at host1:/dev/sdb", and it *may* - support "create OSDs on nodes with label=ceph-osd") - 4. Bootstrapping nodes and connecting them to the orchestrator's - infrastructure is out of scope: this interface operates only - on nodes that are already visible to the orchestrator. - 5. Methods all run in background, returning an instance of WriteCompletion - or ReadCompletion, to be polled by the caller using the wait() method - -Optional features: - 1. Extensions to OSDs, such as block-level encryption. See OsdSpec.extended - 2. Label-based placement of OSDs. If an orchestrator does not support - a labelling concept then only explicit per-node placement will work. - 3. Explicit placement of stateless services. Some orchestrators - may only support a basic round-robin placement of stateless services, - in which case they would also enable users to do explicit placement - for - -Excluded functionality: - 1. No support for multipathed drives: all block devices are to be - reported from one node only. - 2. No networking inventory or configuration. Network configuration - is not Ceph-specific functionality, and by the time ceph-mgr - starts, we know that some external entity has already taken - care of at least the public network configuration. This does - not preclude orchestrators implementing smart networking functionality - internally, it just isn't exposed up into ceph-mgr. - 3. No OSD configuration outside the scope of Drive Group rules. +Please see the ceph-mgr module developer's guide for more information. """ @@ -157,6 +102,13 @@ class Orchestrator(object): while you scan hosts every time. """ + def is_orchestrator_module(self): + """ + Enable other modules to interrogate this module to discover + whether it's usable as an orchestrator module. + """ + return True + def wait(self, completions): """ Given a list of Completion instances, progress any which are @@ -172,7 +124,11 @@ class Orchestrator(object): raise NotImplementedError() def get_inventory(self, node_filter=None): - # Return list of InventoryHost + """ + + :param node_filter: + :return: list of InventoryNode + """ raise NotImplementedError() def describe_service(self, service_type, service_id): @@ -187,25 +143,6 @@ class Orchestrator(object): """ raise NotImplementedError() - def add_mon(self, node_name): - """ - We operate on a node rather than a particular device: it is - assumed/expected that proper SSD storage is already available - and accessible in /var. - - :param node_name: - :return: - """ - raise NotImplementedError() - - def remove_mon(self, node_name): - """ - - :param node_name: - :return: - """ - raise NotImplementedError() - def create_osds(self, osd_spec): """ Create one or more OSDs within a single Drive Group. @@ -248,6 +185,25 @@ class Orchestrator(object): def remove_stateless_service(self, service_type, id_): raise NotImplementedError() + def add_mon(self, node_name): + """ + We operate on a node rather than a particular device: it is + assumed/expected that proper SSD storage is already available + and accessible in /var. + + :param node_name: + :return: + """ + raise NotImplementedError() + + def remove_mon(self, node_name): + """ + + :param node_name: + :return: + """ + raise NotImplementedError() + def upgrade_start(self, upgrade_spec): assert isinstance(upgrade_spec, UpgradeSpec) raise NotImplementedError() @@ -441,7 +397,7 @@ class InventoryFilter(object): self.nodes = None # Optional: get info about certain named nodes only -class InventoryBlockDevice(object): +class InventoryDevice(object): """ When fetching inventory, block devices are reported in this format. @@ -475,4 +431,4 @@ class InventoryNode(object): def __init__(self, name, devices): assert isinstance(devices, list) self.name = name # unique within cluster. For example a hostname. - self.devices = devices # list of InventoryBlockDevice + self.devices = devices # list of InventoryDevice -- 2.39.5