/src/pybind/mgr/orchestrator_cli @ceph/orchestrators
/src/pybind/mgr/orchestrator.py @ceph/orchestrators
/src/pybind/mgr/rook @ceph/orchestrators
-/src/pybind/mgr/ssh @ceph/orchestrators
+/src/pybind/mgr/cephadm @ceph/orchestrators
/src/pybind/mgr/test_orchestrator @ceph/orchestrators
/src/python-common/ceph/deployment @ceph/orchestrators
/qa/workunits/cephadm/test_cephadm.sh @ceph/orchestrators
/qa/tasks/ceph2.py @ceph/orchestrators
/qa/tasks/mgr/test_orchestrator_cli.py @ceph/orchestrators
-/qa/tasks/mgr/test_ssh_orchestrator.py @ceph/orchestrators
+/qa/tasks/mgr/test_cephadm_orchestrator.py @ceph/orchestrators
/doc/mgr/orchestrator_cli.rst @ceph/orchestrators
/doc/mgr/orchestrator_modules.rst @ceph/orchestrators
Recommends: ceph-mgr-diskprediction-cloud = %{_epoch_prefix}%{version}-%{release}
Recommends: ceph-mgr-rook = %{_epoch_prefix}%{version}-%{release}
Recommends: ceph-mgr-k8sevents = %{_epoch_prefix}%{version}-%{release}
-Recommends: ceph-mgr-ssh = %{_epoch_prefix}%{version}-%{release}
+Recommends: ceph-mgr-cephadm = %{_epoch_prefix}%{version}-%{release}
Recommends: python%{_python_buildid}-influxdb
%endif
%if 0%{?rhel} == 7
ceph-mgr-k8sevents is a ceph-mgr plugin that sends every ceph-events
to kubernetes' events API
-%package mgr-ssh
-Summary: Ceph Manager plugin for SSH-based orchestration
+%package mgr-cephadm
+Summary: Ceph Manager plugin for cephadm-based orchestration
BuildArch: noarch
%if 0%{?suse_version}
Group: System/Filesystems
%if 0%{?rhel} || 0%{?fedora}
Requires: openssh-clients
%endif
-%description mgr-ssh
-ceph-mgr-ssh is a ceph-mgr module for orchestration functions using
-direct SSH connections for management operations.
+%description mgr-cephadm
+ceph-mgr-cephadm is a ceph-mgr module for orchestration functions using
+the integrated cephadm deployment tool management operations.
%package fuse
Summary: Ceph fuse-based client
%{?python_provide:%python_provide python-ceph-common}
%description -n python-ceph-common
This package contains data structures, classes and functions used by Ceph.
-It also contains utilities used for the SSH orchestrator.
+It also contains utilities used for the cephadm orchestrator.
%endif
%package -n python%{python3_pkgversion}-ceph-common
%{?python_provide:%python_provide python%{python3_pkgversion}-ceph-common}
%description -n python%{python3_pkgversion}-ceph-common
This package contains data structures, classes and functions used by Ceph.
-It also contains utilities used for the SSH orchestrator.
+It also contains utilities used for the cephadm orchestrator.
%if 0%{with cephfs_shell}
%package -n cephfs-shell
%pre -n cephadm
# create user
if ! getent passwd | grep -q '^cephadm:'; then
- useradd -r -s /bin/bash -c "cephadm user for mgr/ssh" -m cephadm
+ useradd -r -s /bin/bash -c "cephadm user for mgr/cephadm" -m cephadm
fi
# set up (initially empty) .ssh/authorized_keys file
if ! test -d /home/cephadm/.ssh; then
/usr/bin/systemctl try-restart ceph-mgr.target >/dev/null 2>&1 || :
fi
-%files mgr-ssh
-%{_datadir}/ceph/mgr/ssh
+%files mgr-cephadm
+%{_datadir}/ceph/mgr/cephadm
-%post mgr-ssh
+%post mgr-cephadm
if [ $1 -eq 1 ] ; then
/usr/bin/systemctl try-restart ceph-mgr.target >/dev/null 2>&1 || :
fi
-%postun mgr-ssh
+%postun mgr-cephadm
if [ $1 -eq 1 ] ; then
/usr/bin/systemctl try-restart ceph-mgr.target >/dev/null 2>&1 || :
fi
--- /dev/null
+usr/share/ceph/mgr/cephadm
--- /dev/null
+#!/bin/sh
+# vim: set noet ts=8:
+# postinst script for ceph-mgr-cephadm
+#
+# see: dh_installdeb(1)
+
+set -e
+
+# summary of how this script can be called:
+#
+# postinst configure <most-recently-configured-version>
+# old-postinst abort-upgrade <new-version>
+# conflictor's-postinst abort-remove in-favour <package> <new-version>
+# postinst abort-remove
+# deconfigured's-postinst abort-deconfigure in-favour <failed-install-package> <version> [<removing conflicting-package> <version>]
+#
+
+# for details, see http://www.debian.org/doc/debian-policy/ or
+# the debian-policy package
+
+case "$1" in
+ configure)
+ # attempt to load the plugin if the mgr is running
+ deb-systemd-invoke try-restart ceph-mgr.target
+ ;;
+ abort-upgrade|abort-remove|abort-deconfigure)
+ :
+ ;;
+
+ *)
+ echo "postinst called with unknown argument \`$1'" >&2
+ exit 1
+ ;;
+esac
+
+# dh_installdeb will replace this with shell code automatically
+# generated by other debhelper scripts.
+
+#DEBHELPER#
+
+exit 0
--- /dev/null
+#!/bin/sh
+# vim: set noet ts=8:
+
+set -e
+
+#DEBHELPER#
+
+exit 0
+++ /dev/null
-usr/share/ceph/mgr/ssh
+++ /dev/null
-#!/bin/sh
-# vim: set noet ts=8:
-# postinst script for ceph-mgr-ssh
-#
-# see: dh_installdeb(1)
-
-set -e
-
-# summary of how this script can be called:
-#
-# postinst configure <most-recently-configured-version>
-# old-postinst abort-upgrade <new-version>
-# conflictor's-postinst abort-remove in-favour <package> <new-version>
-# postinst abort-remove
-# deconfigured's-postinst abort-deconfigure in-favour <failed-install-package> <version> [<removing conflicting-package> <version>]
-#
-
-# for details, see http://www.debian.org/doc/debian-policy/ or
-# the debian-policy package
-
-case "$1" in
- configure)
- # attempt to load the plugin if the mgr is running
- deb-systemd-invoke try-restart ceph-mgr.target
- ;;
- abort-upgrade|abort-remove|abort-deconfigure)
- :
- ;;
-
- *)
- echo "postinst called with unknown argument \`$1'" >&2
- exit 1
- ;;
-esac
-
-# dh_installdeb will replace this with shell code automatically
-# generated by other debhelper scripts.
-
-#DEBHELPER#
-
-exit 0
-
-
+++ /dev/null
-#!/bin/sh
-# vim: set noet ts=8:
-
-set -e
-
-#DEBHELPER#
-
-exit 0
# 1. create user if not existing
if ! getent passwd | grep -q "^cephadm:"; then
echo -n "Adding system user cephadm.."
- adduser --quiet --system --disabled-password --gecos 'Ceph-dameon user for mgr/ssh' --shell /bin/bash cephadm 2>/dev/null || true
+ adduser --quiet --system --disabled-password --gecos 'Ceph-dameon user for cephadm' --shell /bin/bash cephadm 2>/dev/null || true
echo "..done"
fi
ceph-mgr-diskprediction-cloud,
ceph-mgr-rook,
ceph-mgr-k8sevents,
- ceph-mgr-ssh
+ ceph-mgr-cephadm
Suggests: python-influxdb
Replaces: ceph (<< 0.93-417),
Breaks: ceph (<< 0.93-417),
ceph related events to the kubernetes events API, and track all events
that occur within the rook-ceph namespace.
-Package: ceph-mgr-ssh
+Package: ceph-mgr-cephadm
Architecture: all
Depends: ceph-mgr (= ${binary:Version}),
cephadm,
${misc:Depends},
${python:Depends},
openssh-client
-Description: ssh orchestrator plugin for ceph-mgr
+Description: cephadm orchestrator plugin for ceph-mgr
Ceph is a massively scalable, open-source, distributed
storage system that runs on commodity hardware and delivers object,
block and file system storage.
.
- This package contains the SSH plugin for ceph-mgr's orchestration
+ This package contains the CEPHADM plugin for ceph-mgr's orchestration
functionality, to allow ceph-mgr to perform orchestration functions
over a standard SSH connection.
block and file system storage.
.
This package contains data structures, classes and functions used by Ceph.
- It also contains utilities used for the SSH orchestrator.
+ It also contains utilities used for the cephadm orchestrator.
Package: python3-ceph-common
Architecture: all
block and file system storage.
.
This package contains data structures, classes and functions used by Ceph.
- It also contains utilities used for the SSH orchestrator.
+ It also contains utilities used for the cephadm orchestrator.
Package: libcephfs-java
Section: java
--- /dev/null
+====================
+cephadm orchestrator
+====================
+
+The cephadm orchestrator is an orchestrator module that does not rely on a separate
+system such as Rook or Ansible, but rather manages nodes in a cluster by
+establishing an SSH connection and issuing explicit management commands.
+
+Orchestrator modules only provide services to other modules, which in turn
+provide user interfaces. To try out the cephadm module, you might like
+to use the :ref:`Orchestrator CLI <orchestrator-cli-module>` module.
+
+Requirements
+------------
+
+- The Python `remoto` library version 0.35 or newer
+
+Configuration
+-------------
+
+The cephadm orchestrator can be configured to use an SSH configuration file. This is
+useful for specifying private keys and other SSH connection options.
+
+::
+
+ # ceph config set mgr mgr/cephadm/ssh_config_file /path/to/config
+
+An SSH configuration file can be provided without requiring an accessible file
+system path as the method above does.
+
+::
+
+ # ceph cephadm set-ssh-config -i /path/to/config
+
+To clear this value use the command:
+
+::
+
+ # ceph cephadm clear-ssh-config
Telemetry module <telemetry>
Iostat module <iostat>
Crash module <crash>
+ Insights module <insights>
Orchestrator CLI module <orchestrator_cli>
+ Cephadm orchestrator <cephadm>
Rook module <rook>
DeepSea module <deepsea>
- Insights module <insights>
Ansible module <ansible>
- SSH orchestrator <ssh>
+++ /dev/null
-================
-SSH orchestrator
-================
-
-The SSH orchestrator is an orchestrator module that does not rely on a separate
-system such as Rook or Ansible, but rather manages nodes in a cluster by
-establishing an SSH connection and issuing explicit management commands.
-
-Orchestrator modules only provide services to other modules, which in turn
-provide user interfaces. To try out the SSH module, you might like
-to use the :ref:`Orchestrator CLI <orchestrator-cli-module>` module.
-
-Requirements
-------------
-
-- The Python `remoto` library version 0.35 or newer
-
-Configuration
--------------
-
-The SSH orchestrator can be configured to use an SSH configuration file. This is
-useful for specifying private keys and other SSH connection options.
-
-::
-
- # ceph config set mgr mgr/ssh/ssh_config_file /path/to/config
-
-An SSH configuration file can be provided without requiring an accessible file
-system path as the method above does.
-
-::
-
- # ceph ssh set-ssh-config -i /path/to/config
-
-To clear this value use the command:
-
-::
-
- # ceph ssh clear-ssh-config
- ceph-mgr-diskprediction-cloud
- ceph-mgr-diskprediction-local
- ceph-mgr-rook
- - ceph-mgr-ssh
+ - ceph-mgr-cephadm
- ceph-fuse
- libcephfs2
- libcephfs-devel
--- /dev/null
+../.qa
\ No newline at end of file
--- /dev/null
+.qa/clusters/fixed-2.yaml
\ No newline at end of file
--- /dev/null
+overrides:
+ ceph2:
+ cephadm_mode: cephadm-package
+ install:
+ extra_packages: [cephadm]
--- /dev/null
+overrides:
+ ceph2:
+ cephadm_mode: root
--- /dev/null
+.qa/msgr
\ No newline at end of file
--- /dev/null
+tasks:
+- install:
+- ceph2:
--- /dev/null
+.qa/distros/supported-random-distro$
\ No newline at end of file
--- /dev/null
+../../basic/tasks/rados_api_tests.yaml
\ No newline at end of file
--- /dev/null
+../../basic/tasks/rados_python.yaml
\ No newline at end of file
--- /dev/null
+
+tasks:
+ - install:
+ - ceph:
+ # tests may leave mgrs broken, so don't try and call into them
+ # to invoke e.g. pg dump during teardown.
+ wait-for-scrub: false
+ log-whitelist:
+ - overall HEALTH_
+ - \(MGR_DOWN\)
+ - \(MGR_INSIGHTS_WARNING\)
+ - \(insights_health_check
+ - \(PG_
+ - replacing it with standby
+ - No standby daemons available
+ - cephfs_test_runner:
+ modules:
+ - tasks.mgr.test_cephadm_orchestrator
+++ /dev/null
-
-tasks:
- - install:
- - ceph:
- # tests may leave mgrs broken, so don't try and call into them
- # to invoke e.g. pg dump during teardown.
- wait-for-scrub: false
- log-whitelist:
- - overall HEALTH_
- - \(MGR_DOWN\)
- - \(MGR_INSIGHTS_WARNING\)
- - \(insights_health_check
- - \(PG_
- - replacing it with standby
- - No standby daemons available
- - cephfs_test_runner:
- modules:
- - tasks.mgr.test_ssh_orchestrator
+++ /dev/null
-../.qa
\ No newline at end of file
+++ /dev/null
-.qa/clusters/fixed-2.yaml
\ No newline at end of file
+++ /dev/null
-overrides:
- ceph2:
- cephadm_mode: cephadm-package
- install:
- extra_packages: [cephadm]
+++ /dev/null
-overrides:
- ceph2:
- cephadm_mode: root
+++ /dev/null
-.qa/msgr
\ No newline at end of file
+++ /dev/null
-tasks:
-- install:
-- ceph2:
+++ /dev/null
-.qa/distros/supported-random-distro$
\ No newline at end of file
+++ /dev/null
-../../basic/tasks/rados_api_tests.yaml
\ No newline at end of file
+++ /dev/null
-../../basic/tasks/rados_python.yaml
\ No newline at end of file
- ceph-mgr-diskprediction-local
- ceph-mgr-diskprediction-cloud
- ceph-mgr-rook
- - ceph-mgr-ssh
+ - ceph-mgr-cephadm
- cephadm
- ceph-mgr
- libcephfs2
- ceph-mgr-diskprediction-local
- ceph-mgr-diskprediction-cloud
- ceph-mgr-rook
- - ceph-mgr-ssh
+ - ceph-mgr-cephadm
- cephadm
- ceph-mgr
- libcephfs2
- ceph-mgr-diskprediction-local
- ceph-mgr-diskprediction-cloud
- ceph-mgr-rook
- - ceph-mgr-ssh
+ - ceph-mgr-cephadm
- cephadm
- ceph-mgr
- libcephfs2
- ceph-mgr-diskprediction-local
- ceph-mgr-diskprediction-cloud
- ceph-mgr-rook
- - ceph-mgr-ssh
+ - ceph-mgr-cephadm
- cephadm
extra_packages: ['librados2']
- install.upgrade:
- ceph-mgr-diskprediction-local
- ceph-mgr-diskprediction-cloud
- ceph-mgr-rook
- - ceph-mgr-ssh
+ - ceph-mgr-cephadm
- cephadm
extra_packages: ['librados2']
- install.upgrade:
- ceph-mgr-diskprediction-local
- ceph-mgr-diskprediction-cloud
- ceph-mgr-rook
- - ceph-mgr-ssh
+ - ceph-mgr-cephadm
- cephadm
extra_packages: ['librados2']
- install.upgrade:
- ceph-mgr-diskprediction-local
- ceph-mgr-diskprediction-cloud
- ceph-mgr-rook
- - ceph-mgr-ssh
+ - ceph-mgr-cephadm
- cephadm
extra_packages: ['librados2']
- install.upgrade:
- ceph-mgr-diskprediction-local
- ceph-mgr-diskprediction-cloud
- ceph-mgr-rook
- - ceph-mgr-ssh
+ - ceph-mgr-cephadm
- cephadm
extra_packages: ['librados2']
- print: "**** done install mimic"
- ceph-mgr-diskprediction-local
- ceph-mgr-diskprediction-cloud
- ceph-mgr-rook
- - ceph-mgr-ssh
+ - ceph-mgr-cephadm
- cephadm
extra_packages: ['librados2']
- print: "**** done installing mimic"
- ceph-mgr-diskprediction-local
- ceph-mgr-diskprediction-cloud
- ceph-mgr-rook
- - ceph-mgr-ssh
+ - ceph-mgr-cephadm
- cephadm
extra_packages: ['librados2']
- print: "**** done install mimic"
- ceph-mgr-diskprediction-local
- ceph-mgr-diskprediction-cloud
- ceph-mgr-rook
- - ceph-mgr-ssh
+ - ceph-mgr-cephadm
- cephadm
extra_packages: ['librados2']
- print: "**** done install nautilus"
--- /dev/null
+import json
+import logging
+from tempfile import NamedTemporaryFile
+from teuthology.exceptions import CommandFailedError
+from mgr_test_case import MgrTestCase
+
+log = logging.getLogger(__name__)
+
+class TestOrchestratorCli(MgrTestCase):
+ MGRS_REQUIRED = 1
+
+ def _orch_cmd(self, *args):
+ return self.mgr_cluster.mon_manager.raw_cluster_cmd("orchestrator", *args)
+
+ def setUp(self):
+ super(TestOrchestratorCli, self).setUp()
+ self._load_module("orchestrator_cli")
+ self._load_module("cephadm")
+ self._orch_cmd("set", "backend", "cephadm")
+
+ def test_host_ls(self):
+ self._orch_cmd("host", "add", "osd0")
+ self._orch_cmd("host", "add", "mon0")
+ ret = self._orch_cmd("host", "ls")
+ self.assertIn("osd0", ret)
+ self.assertIn("mon0", ret)
+++ /dev/null
-import json
-import logging
-from tempfile import NamedTemporaryFile
-from teuthology.exceptions import CommandFailedError
-from mgr_test_case import MgrTestCase
-
-log = logging.getLogger(__name__)
-
-class TestOrchestratorCli(MgrTestCase):
- MGRS_REQUIRED = 1
-
- def _orch_cmd(self, *args):
- return self.mgr_cluster.mon_manager.raw_cluster_cmd("orchestrator", *args)
-
- def setUp(self):
- super(TestOrchestratorCli, self).setUp()
- self._load_module("orchestrator_cli")
- self._load_module("ssh")
- self._orch_cmd("set", "backend", "ssh")
-
- def test_host_ls(self):
- self._orch_cmd("host", "add", "osd0")
- self._orch_cmd("host", "add", "mon0")
- ret = self._orch_cmd("host", "ls")
- self.assertIn("osd0", ret)
- self.assertIn("mon0", ret)
# ssh
if not args.skip_ssh:
- logger.info('Enabling ssh module...')
- cli(['mgr', 'module', 'enable', 'ssh'])
+ logger.info('Enabling cephadm module...')
+ cli(['mgr', 'module', 'enable', 'cephadm'])
logger.info('Setting orchestrator backend to ssh...')
- cli(['orchestrator', 'set', 'backend', 'ssh'])
+ cli(['orchestrator', 'set', 'backend', 'cephadm'])
logger.info('Generating ssh key...')
- cli(['ssh', 'generate-key'])
- ssh_pub = cli(['ssh', 'get-pub-key'])
+ cli(['cephadm', 'generate-key'])
+ ssh_pub = cli(['cephadm', 'get-pub-key'])
with open(args.output_pub_ssh_key, 'w') as f:
f.write(ssh_pub)
.add_see_also("mon_host"),
Option("container_image", Option::TYPE_STR, Option::LEVEL_BASIC)
- .set_description("container image (used by ssh orchestrator)")
+ .set_description("container image (used by cephadm orchestrator)")
.set_flag(Option::FLAG_STARTUP)
.set_default("ceph/daemon-base:latest-master-devel"),
profile_grants.push_back(MonCapGrant("auth", MON_CAP_R | MON_CAP_X));
profile_grants.push_back(MonCapGrant("config-key", MON_CAP_R | MON_CAP_W));
profile_grants.push_back(MonCapGrant("config", MON_CAP_R | MON_CAP_W));
- // ssh orchestrator provisions new daemon keys
+ // cephadm orchestrator provisions new daemon keys
profile_grants.push_back(MonCapGrant("auth get-or-create"));
profile_grants.push_back(MonCapGrant("auth rm"));
// tell commands (this is a bit of a kludge)
--- /dev/null
+.vagrant
+ssh-config
--- /dev/null
+Development
+===========
+
+
+There are multiple ways to set up a development environment for the SSH orchestrator.
+In the following I'll use the `vstart` method.
+
+1) Make sure remoto is installed (0.35 or newer)
+
+2) Use vstart to spin up a cluster
+
+
+::
+
+ # ../src/vstart.sh -n --cephadm
+
+*Note that when you specify `--cephadm` you have to have passwordless ssh access to localhost*
+
+It will add your ~/.ssh/id_rsa and ~/.ssh/id_rsa.pub to `mgr/ssh/ssh_identity_{key, pub}`
+and add your $HOSTNAME to the list of known hosts.
+
+This will also enable the cephadm mgr module and enable it as the orchestrator backend.
+
+*Optional:*
+
+While the above is sufficient for most operations, you may want to add a second host to the mix.
+There is `Vagrantfile` for creating a minimal cluster in `src/pybind/mgr/cephadm/`.
+
+If you wish to extend the one-node-localhost cluster to i.e. test more sophisticated OSD deployments you can follow the next steps:
+
+From within the `src/pybind/mgr/cephadm` directory.
+
+
+1) Spawn VMs
+
+::
+
+ # vagrant up
+
+This will spawn three machines.
+mon0, mgr0, osd0
+
+NUM_DAEMONS can be used to increase the number of VMs created. (defaults to 1)
+
+If will also come with the necessary packages preinstalled as well as your ~/.ssh/id_rsa.pub key
+injected. (to users root and vagrant; the cephadm-orchestrator currently connects as root)
+
+
+2) Update the ssh-config
+
+The cephadm orchestrator needs to understand how to connect to the new node. Most likely the VM isn't reachable with the default settings used:
+
+```
+Host *
+User root
+StrictHostKeyChecking no
+```
+
+You want to adjust this by retrieving an adapted ssh_config from Vagrant.
+
+::
+
+ # vagrant ssh-config > ssh-config
+
+
+Now set the newly created config for Ceph.
+
+::
+
+ # ceph cephadm set-ssh-config -i <path_to_ssh_conf>
+
+
+3) Add the new host
+
+Add the newly created host(s) to the inventory.
+
+::
+
+
+ # ceph orchestrator host add <host>
+
+
+4) Verify the inventory
+
+::
+
+ # ceph orchestrator host ls
+
+
+You should see the hostname in the list.
+
+Understanding ``AsyncCompletion``
+=================================
+
+How can I store temporary variables?
+------------------------------------
+
+Let's imagine you want to write code similar to
+
+.. code:: python
+
+ hosts = self.get_hosts()
+ inventory = self.get_inventory(hosts)
+ return self._create_osd(hosts, drive_group, inventory)
+
+That won't work, as ``get_hosts`` and ``get_inventory`` return objects
+of type ``AsyncCompletion``.
+
+Now let's imaging a Python 3 world, where we can use ``async`` and
+``await``. Then we actually can write this like so:
+
+.. code:: python
+
+ hosts = await self.get_hosts()
+ inventory = await self.get_inventory(hosts)
+ return self._create_osd(hosts, drive_group, inventory)
+
+Let's use a simple example to make this clear:
+
+.. code:: python
+
+ val = await func_1()
+ return func_2(val)
+
+As we're not yet in Python 3, we need to do write ``await`` manually by
+calling ``orchestrator.Completion.then()``:
+
+.. code:: python
+
+ func_1().then(lambda val: func_2(val))
+
+ # or
+ func_1().then(func_2)
+
+Now let's desugar the original example:
+
+.. code:: python
+
+ hosts = await self.get_hosts()
+ inventory = await self.get_inventory(hosts)
+ return self._create_osd(hosts, drive_group, inventory)
+
+Now let's replace one ``async`` at a time:
+
+.. code:: python
+
+ hosts = await self.get_hosts()
+ return self.get_inventory(hosts).then(lambda inventory:
+ self._create_osd(hosts, drive_group, inventory))
+
+Then finally:
+
+.. code:: python
+
+ self.get_hosts().then(lambda hosts:
+ self.get_inventory(hosts).then(lambda inventory:
+ self._create_osd(hosts,
+ drive_group, inventory)))
+
+This also works without lambdas:
+
+.. code:: python
+
+ def call_inventory(hosts):
+ def call_create(inventory)
+ return self._create_osd(hosts, drive_group, inventory)
+
+ return self.get_inventory(hosts).then(call_create)
+
+ self.get_hosts(call_inventory)
+
+We should add support for ``await`` as soon as we're on Python 3.
+
+I want to call my function for every host!
+------------------------------------------
+
+Imagine you have a function that looks like so:
+
+.. code:: python
+
+ @async_completion
+ def deploy_stuff(name, node):
+ ...
+
+And you want to call ``deploy_stuff`` like so:
+
+.. code:: python
+
+ return [deploy_stuff(name, node) for node in nodes]
+
+This won't work as expected. The number of ``AsyncCompletion`` objects
+created should be ``O(1)``. But there is a solution:
+``@async_map_completion``
+
+.. code:: python
+
+ @async_map_completion
+ def deploy_stuff(name, node):
+ ...
+
+ return deploy_stuff([(name, node) for node in nodes])
+
+This way, we're only creating one ``AsyncCompletion`` object. Note that
+you should not create new ``AsyncCompletion`` within ``deploy_stuff``, as
+we're then no longer have ``O(1)`` completions:
+
+.. code:: python
+
+ @async_completion
+ def other_async_function():
+ ...
+
+ @async_map_completion
+ def deploy_stuff(name, node):
+ return other_async_function() # wrong!
+
+Why do we need this?
+--------------------
+
+I've tried to look into making Completions composable by being able to
+call one completion from another completion. I.e. making them re-usable
+using Promises E.g.:
+
+.. code:: python
+
+ >>> return self.get_hosts().then(self._create_osd)
+
+where ``get_hosts`` returns a Completion of list of hosts and
+``_create_osd`` takes a list of hosts.
+
+The concept behind this is to store the computation steps explicit and
+then explicitly evaluate the chain:
+
+.. code:: python
+
+ p = Completion(on_complete=lambda x: x*2).then(on_complete=lambda x: str(x))
+ p.finalize(2)
+ assert p.result = "4"
+
+or graphically:
+
+::
+
+ +---------------+ +-----------------+
+ | | then | |
+ | lambda x: x*x | +--> | lambda x: str(x)|
+ | | | |
+ +---------------+ +-----------------+
--- /dev/null
+# vi: set ft=ruby :
+
+NUM_DAEMONS = ENV["NUM_DAEMONS"] ? ENV["NUM_DAEMONS"].to_i : 1
+
+Vagrant.configure("2") do |config|
+ config.vm.synced_folder ".", "/vagrant", disabled: true
+ config.vm.network "private_network", type: "dhcp"
+ config.vm.box = "centos/7"
+
+ (0..NUM_DAEMONS - 1).each do |i|
+ config.vm.define "mon#{i}" do |mon|
+ mon.vm.hostname = "mon#{i}"
+ end
+ config.vm.define "mgr#{i}" do |mgr|
+ mgr.vm.hostname = "mgr#{i}"
+ end
+ config.vm.define "osd#{i}" do |osd|
+ osd.vm.hostname = "osd#{i}"
+ osd.vm.provider :libvirt do |libvirt|
+ libvirt.storage :file, :size => '5G'
+ libvirt.storage :file, :size => '5G'
+ end
+ end
+ end
+
+ config.vm.provision "file", source: "~/.ssh/id_rsa.pub", destination: "~/.ssh/id_rsa.pub"
+ config.vm.provision "shell", inline: <<-SHELL
+ cat /home/vagrant/.ssh/id_rsa.pub >> /home/vagrant/.ssh/authorized_keys
+ sudo cp -r /home/vagrant/.ssh /root/.ssh
+ SHELL
+
+ config.vm.provision "shell", inline: <<-SHELL
+ sudo yum install -y yum-utils
+ sudo yum install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm
+ sudo rpm --import 'https://download.ceph.com/keys/release.asc'
+ curl -L https://shaman.ceph.com/api/repos/ceph/master/latest/centos/7/repo/ | sudo tee /etc/yum.repos.d/shaman.repo
+ sudo yum install -y python36 podman ceph
+ sudo ln -s /usr/bin/python36 /usr/bin/python3 || true
+ SHELL
+end
--- /dev/null
+import os
+
+if 'UNITTEST' in os.environ:
+ import tests
+
+from .module import CephadmOrchestrator
--- /dev/null
+[ceph]
+name=Ceph packages for $basearch
+baseurl=https://download.ceph.com/rpm-mimic/el7/$basearch
+enabled=1
+priority=2
+gpgcheck=1
+gpgkey=https://download.ceph.com/keys/release.asc
+
+[ceph-noarch]
+name=Ceph noarch packages
+baseurl=https://download.ceph.com/rpm-mimic/el7/noarch
+enabled=1
+priority=2
+gpgcheck=1
+gpgkey=https://download.ceph.com/keys/release.asc
+
+[ceph-source]
+name=Ceph source packages
+baseurl=https://download.ceph.com/rpm-mimic/el7/SRPMS
+enabled=0
+priority=2
+gpgcheck=1
+gpgkey=https://download.ceph.com/keys/release.asc
--- /dev/null
+import json
+import errno
+import logging
+from functools import wraps
+
+import string
+try:
+ from typing import List, Dict, Optional, Callable, TypeVar, Type, Any
+except ImportError:
+ pass # just for type checking
+
+
+import six
+import os
+import random
+import tempfile
+import multiprocessing.pool
+import shutil
+import subprocess
+
+from ceph.deployment import inventory
+from mgr_module import MgrModule
+import orchestrator
+from orchestrator import OrchestratorError
+
+from . import remotes
+
+try:
+ import remoto
+ import remoto.process
+except ImportError as e:
+ remoto = None
+ remoto_import_error = str(e)
+
+try:
+ from typing import List
+except ImportError:
+ pass
+
+logger = logging.getLogger(__name__)
+
+DEFAULT_SSH_CONFIG = ('Host *\n'
+ 'User root\n'
+ 'StrictHostKeyChecking no\n'
+ 'UserKnownHostsFile /dev/null\n')
+
+# for py2 compat
+try:
+ from tempfile import TemporaryDirectory # py3
+except ImportError:
+ # define a minimal (but sufficient) equivalent for <= py 3.2
+ class TemporaryDirectory(object): # type: ignore
+ def __init__(self):
+ self.name = tempfile.mkdtemp()
+
+ def __enter__(self):
+ if not self.name:
+ self.name = tempfile.mkdtemp()
+ return self.name
+
+ def cleanup(self):
+ shutil.rmtree(self.name)
+
+ def __exit__(self, exc_type, exc_value, traceback):
+ self.cleanup()
+
+
+# high-level TODO:
+# - bring over some of the protections from ceph-deploy that guard against
+# multiple bootstrapping / initialization
+
+def _name_to_entity_name(name):
+ """
+ Map from service names to ceph entity names (as seen in config)
+ """
+ if name.startswith('rgw.') or name.startswith('rbd-mirror'):
+ return 'client.' + name
+ else:
+ return name
+
+
+class AsyncCompletion(orchestrator.Completion):
+ def __init__(self,
+ _first_promise=None, # type: Optional[orchestrator.Completion]
+ value=orchestrator._Promise.NO_RESULT, # type: Any
+ on_complete=None, # type: Optional[Callable]
+ name=None, # type: Optional[str]
+ many=False, # type: bool
+ ):
+
+ assert CephadmOrchestrator.instance is not None
+ self.many = many
+ if name is None and on_complete is not None:
+ name = on_complete.__name__
+ super(AsyncCompletion, self).__init__(_first_promise, value, on_complete, name)
+
+ @property
+ def _progress_reference(self):
+ # type: () -> Optional[orchestrator.ProgressReference]
+ if hasattr(self._on_complete_, 'progress_id'): # type: ignore
+ return self._on_complete_ # type: ignore
+ return None
+
+ @property
+ def _on_complete(self):
+ # type: () -> Optional[Callable]
+ if self._on_complete_ is None:
+ return None
+
+ def callback(result):
+ try:
+ self._on_complete_ = None
+ self._finalize(result)
+ except Exception as e:
+ self.fail(e)
+
+ def error_callback(e):
+ self.fail(e)
+
+ if six.PY3:
+ _callback = self._on_complete_
+ else:
+ def _callback(*args, **kwargs):
+ # Py2 only: _worker_pool doesn't call error_callback
+ try:
+ return self._on_complete_(*args, **kwargs)
+ except Exception as e:
+ self.fail(e)
+
+ def run(value):
+ assert CephadmOrchestrator.instance
+ if self.many:
+ if not value:
+ logger.info('calling map_async without values')
+ callback([])
+ if six.PY3:
+ CephadmOrchestrator.instance._worker_pool.map_async(_callback, value,
+ callback=callback,
+ error_callback=error_callback)
+ else:
+ CephadmOrchestrator.instance._worker_pool.map_async(_callback, value,
+ callback=callback)
+ else:
+ if six.PY3:
+ CephadmOrchestrator.instance._worker_pool.apply_async(_callback, (value,),
+ callback=callback, error_callback=error_callback)
+ else:
+ CephadmOrchestrator.instance._worker_pool.apply_async(_callback, (value,),
+ callback=callback)
+ return self.ASYNC_RESULT
+
+ return run
+
+ @_on_complete.setter
+ def _on_complete(self, inner):
+ # type: (Callable) -> None
+ self._on_complete_ = inner
+
+
+def ssh_completion(cls=AsyncCompletion, **c_kwargs):
+ # type: (Type[orchestrator.Completion], Any) -> Callable
+ """
+ See ./HACKING.rst for a how-to
+ """
+ def decorator(f):
+ @wraps(f)
+ def wrapper(*args):
+
+ name = f.__name__
+ many = c_kwargs.get('many', False)
+
+ # Some weired logic to make calling functions with multiple arguments work.
+ if len(args) == 1:
+ [value] = args
+ if many and value and isinstance(value[0], tuple):
+ return cls(on_complete=lambda x: f(*x), value=value, name=name, **c_kwargs)
+ else:
+ return cls(on_complete=f, value=value, name=name, **c_kwargs)
+ else:
+ if many:
+ self, value = args
+
+ def call_self(inner_args):
+ if not isinstance(inner_args, tuple):
+ inner_args = (inner_args, )
+ return f(self, *inner_args)
+
+ return cls(on_complete=call_self, value=value, name=name, **c_kwargs)
+ else:
+ return cls(on_complete=lambda x: f(*x), value=args, name=name, **c_kwargs)
+
+
+ return wrapper
+ return decorator
+
+
+def async_completion(f):
+ # type: (Callable) -> Callable[..., AsyncCompletion]
+ """
+ See ./HACKING.rst for a how-to
+
+ :param f: wrapped function
+ """
+ return ssh_completion()(f)
+
+
+def async_map_completion(f):
+ # type: (Callable) -> Callable[..., AsyncCompletion]
+ """
+ See ./HACKING.rst for a how-to
+
+ :param f: wrapped function
+
+ kind of similar to
+
+ >>> def sync_map(f):
+ ... return lambda x: map(f, x)
+
+ """
+ return ssh_completion(many=True)(f)
+
+
+def trivial_completion(f):
+ # type: (Callable) -> Callable[..., orchestrator.Completion]
+ return ssh_completion(cls=orchestrator.Completion)(f)
+
+
+def trivial_result(val):
+ return AsyncCompletion(value=val, name='trivial_result')
+
+
+class CephadmOrchestrator(MgrModule, orchestrator.Orchestrator):
+
+ _STORE_HOST_PREFIX = "host"
+
+
+ instance = None
+ NATIVE_OPTIONS = [] # type: List[Any]
+ MODULE_OPTIONS = [
+ {
+ 'name': 'ssh_config_file',
+ 'type': 'str',
+ 'default': None,
+ 'desc': 'customized SSH config file to connect to managed hosts',
+ },
+ {
+ 'name': 'inventory_cache_timeout',
+ 'type': 'seconds',
+ 'default': 10 * 60,
+ 'desc': 'seconds to cache device inventory',
+ },
+ {
+ 'name': 'service_cache_timeout',
+ 'type': 'seconds',
+ 'default': 60,
+ 'desc': 'seconds to cache service (daemon) inventory',
+ },
+ {
+ 'name': 'mode',
+ 'type': 'str',
+ 'enum_allowed': ['root', 'cephadm-package'],
+ 'default': 'root',
+ 'desc': 'mode for remote execution of cephadm',
+ },
+ {
+ 'name': 'container_image_base',
+ 'default': 'ceph/ceph',
+ 'desc': 'Container image name, without the tag',
+ 'runtime': True,
+ },
+ ]
+
+ def __init__(self, *args, **kwargs):
+ super(CephadmOrchestrator, self).__init__(*args, **kwargs)
+ self._cluster_fsid = self.get('mon_map')['fsid']
+
+ self.config_notify()
+
+ path = self.get_ceph_option('cephadm_path')
+ try:
+ with open(path, 'r') as f:
+ self._cephadm = f.read()
+ except (IOError, TypeError) as e:
+ raise RuntimeError("unable to read cephadm at '%s': %s" % (
+ path, str(e)))
+
+ self._worker_pool = multiprocessing.pool.ThreadPool(1)
+
+ self._reconfig_ssh()
+
+ CephadmOrchestrator.instance = self
+ self.all_progress_references = list() # type: List[orchestrator.ProgressReference]
+
+ # load inventory
+ i = self.get_store('inventory')
+ if i:
+ self.inventory = json.loads(i)
+ else:
+ self.inventory = dict()
+ self.log.debug('Loaded inventory %s' % self.inventory)
+
+ # The values are cached by instance.
+ # cache is invalidated by
+ # 1. timeout
+ # 2. refresh parameter
+ self.inventory_cache = orchestrator.OutdatablePersistentDict(
+ self, self._STORE_HOST_PREFIX + '.devices')
+
+ self.service_cache = orchestrator.OutdatablePersistentDict(
+ self, self._STORE_HOST_PREFIX + '.services')
+
+ # ensure the host lists are in sync
+ for h in self.inventory.keys():
+ if h not in self.inventory_cache:
+ self.log.debug('adding inventory item for %s' % h)
+ self.inventory_cache[h] = orchestrator.OutdatableData()
+ if h not in self.service_cache:
+ self.log.debug('adding service item for %s' % h)
+ self.service_cache[h] = orchestrator.OutdatableData()
+ for h in self.inventory_cache:
+ if h not in self.inventory:
+ del self.inventory_cache[h]
+ for h in self.service_cache:
+ if h not in self.inventory:
+ del self.service_cache[h]
+
+ def shutdown(self):
+ self.log.error('shutdown')
+ self._worker_pool.close()
+ self._worker_pool.join()
+
+ def config_notify(self):
+ """
+ This method is called whenever one of our config options is changed.
+ """
+ for opt in self.MODULE_OPTIONS:
+ setattr(self,
+ opt['name'], # type: ignore
+ self.get_module_option(opt['name']) or opt['default']) # type: ignore
+ self.log.debug(' mgr option %s = %s',
+ opt['name'], getattr(self, opt['name'])) # type: ignore
+ for opt in self.NATIVE_OPTIONS:
+ setattr(self,
+ opt, # type: ignore
+ self.get_ceph_option(opt))
+ self.log.debug(' native option %s = %s', opt, getattr(self, opt)) # type: ignore
+
+ def get_unique_name(self, existing, prefix=None, forcename=None):
+ """
+ Generate a unique random service name
+ """
+ if forcename:
+ if len([d for d in existing if d.service_instance == forcename]):
+ raise RuntimeError('specified name %s already in use', forcename)
+ return forcename
+
+ while True:
+ if prefix:
+ name = prefix + '.'
+ else:
+ name = ''
+ name += ''.join(random.choice(string.ascii_lowercase)
+ for _ in range(6))
+ if len([d for d in existing if d.service_instance == name]):
+ self.log('name %s exists, trying again', name)
+ continue
+ return name
+
+ def _save_inventory(self):
+ self.set_store('inventory', json.dumps(self.inventory))
+
+ def _reconfig_ssh(self):
+ temp_files = [] # type: list
+ ssh_options = [] # type: List[str]
+
+ # ssh_config
+ ssh_config_fname = self.ssh_config_file
+ ssh_config = self.get_store("ssh_config")
+ if ssh_config is not None or ssh_config_fname is None:
+ if not ssh_config:
+ ssh_config = DEFAULT_SSH_CONFIG
+ f = tempfile.NamedTemporaryFile(prefix='ceph-mgr-ssh-conf-')
+ os.fchmod(f.fileno(), 0o600)
+ f.write(ssh_config.encode('utf-8'))
+ f.flush() # make visible to other processes
+ temp_files += [f]
+ ssh_config_fname = f.name
+ if ssh_config_fname:
+ if not os.path.isfile(ssh_config_fname):
+ raise Exception("ssh_config \"{}\" does not exist".format(
+ ssh_config_fname))
+ ssh_options += ['-F', ssh_config_fname]
+
+ # identity
+ ssh_key = self.get_store("ssh_identity_key")
+ ssh_pub = self.get_store("ssh_identity_pub")
+ self.ssh_pub = ssh_pub
+ self.ssh_key = ssh_key
+ if ssh_key and ssh_pub:
+ tkey = tempfile.NamedTemporaryFile(prefix='ceph-mgr-ssh-identity-')
+ tkey.write(ssh_key.encode('utf-8'))
+ os.fchmod(tkey.fileno(), 0o600)
+ tkey.flush() # make visible to other processes
+ tpub = open(tkey.name + '.pub', 'w')
+ os.fchmod(tpub.fileno(), 0o600)
+ tpub.write(ssh_pub)
+ tpub.flush() # make visible to other processes
+ temp_files += [tkey, tpub]
+ ssh_options += ['-i', tkey.name]
+
+ self._temp_files = temp_files
+ if ssh_options:
+ self._ssh_options = ' '.join(ssh_options) # type: Optional[str]
+ else:
+ self._ssh_options = None
+ self.log.info('ssh_options %s' % ssh_options)
+
+ if self.mode == 'root':
+ self.ssh_user = 'root'
+ elif self.mode == 'cephadm-package':
+ self.ssh_user = 'cephadm'
+
+ @staticmethod
+ def can_run():
+ if remoto is not None:
+ return True, ""
+ else:
+ return False, "loading remoto library:{}".format(
+ remoto_import_error)
+
+ def available(self):
+ """
+ The cephadm orchestrator is always available.
+ """
+ return self.can_run()
+
+ def process(self, completions):
+ """
+ Does nothing, as completions are processed in another thread.
+ """
+ if completions:
+ self.log.info("process: completions={0}".format(orchestrator.pretty_print(completions)))
+
+ for p in completions:
+ p.finalize()
+
+ def _require_hosts(self, hosts):
+ """
+ Raise an error if any of the given hosts are unregistered.
+ """
+ if isinstance(hosts, six.string_types):
+ hosts = [hosts]
+ keys = self.inventory_cache.keys()
+ unregistered_hosts = set(hosts) - keys
+ if unregistered_hosts:
+ logger.warning('keys = {}'.format(keys))
+ raise RuntimeError("Host(s) {} not registered".format(
+ ", ".join(map(lambda h: "'{}'".format(h),
+ unregistered_hosts))))
+
+ @orchestrator._cli_write_command(
+ prefix='cephadm set-ssh-config',
+ desc='Set the ssh_config file (use -i <ssh_config>)')
+ def _set_ssh_config(self, inbuf=None):
+ """
+ Set an ssh_config file provided from stdin
+
+ TODO:
+ - validation
+ """
+ if inbuf is None or len(inbuf) == 0:
+ return -errno.EINVAL, "", "empty ssh config provided"
+ self.set_store("ssh_config", inbuf)
+ return 0, "", ""
+
+ @orchestrator._cli_write_command(
+ prefix='cephadm clear-ssh-config',
+ desc='Clear the ssh_config file')
+ def _clear_ssh_config(self):
+ """
+ Clear the ssh_config file provided from stdin
+ """
+ self.set_store("ssh_config", None)
+ self.ssh_config_tmp = None
+ return 0, "", ""
+
+ @orchestrator._cli_write_command(
+ 'cephadm generate-key',
+ desc='Generate a cluster SSH key (if not present)')
+ def _generate_key(self):
+ if not self.ssh_pub or not self.ssh_key:
+ self.log.info('Generating ssh key...')
+ tmp_dir = TemporaryDirectory()
+ path = tmp_dir.name + '/key'
+ try:
+ subprocess.call([
+ '/usr/bin/ssh-keygen',
+ '-C', 'ceph-%s' % self._cluster_fsid,
+ '-N', '',
+ '-f', path
+ ])
+ with open(path, 'r') as f:
+ secret = f.read()
+ with open(path + '.pub', 'r') as f:
+ pub = f.read()
+ finally:
+ os.unlink(path)
+ os.unlink(path + '.pub')
+ tmp_dir.cleanup()
+ self.set_store('ssh_identity_key', secret)
+ self.set_store('ssh_identity_pub', pub)
+ self._reconfig_ssh()
+ return 0, '', ''
+
+ @orchestrator._cli_write_command(
+ 'cephadm clear-key',
+ desc='Clear cluster SSH key')
+ def _clear_key(self):
+ self.set_store('ssh_identity_key', None)
+ self.set_store('ssh_identity_pub', None)
+ self._reconfig_ssh()
+ return 0, '', ''
+
+ @orchestrator._cli_read_command(
+ 'cephadm get-pub-key',
+ desc='Show SSH public key for connecting to cluster hosts')
+ def _get_pub_key(self):
+ if self.ssh_pub:
+ return 0, self.ssh_pub, ''
+ else:
+ return -errno.ENOENT, '', 'No cluster SSH key defined'
+
+ @orchestrator._cli_read_command(
+ 'cephadm get-user',
+ desc='Show user for SSHing to cluster hosts')
+ def _get_user(self):
+ return 0, self.ssh_user, ''
+
+ @orchestrator._cli_read_command(
+ 'cephadm check-host',
+ 'name=host,type=CephString',
+ 'Check whether we can access and manage a remote host')
+ def _check_host(self, host):
+ out, err, code = self._run_cephadm(host, '', 'check-host', [],
+ error_ok=True, no_fsid=True)
+ if code:
+ return 1, '', err
+ return 0, 'host ok', err
+
+ @orchestrator._cli_write_command(
+ 'cephadm prepare-host',
+ 'name=host,type=CephString',
+ 'Try to prepare a host for remote management')
+ def _prepare_host(self, host):
+ out, err, code = self._run_cephadm(host, '', 'prepare-host', [],
+ error_ok=True, no_fsid=True)
+ if code:
+ return 1, '', err
+ return 0, 'host ok', err
+
+ def _get_connection(self, host):
+ """
+ Setup a connection for running commands on remote host.
+ """
+ n = self.ssh_user + '@' + host
+ self.log.info("Opening connection to {} with ssh options '{}'".format(
+ n, self._ssh_options))
+ conn = remoto.Connection(
+ n,
+ logger=self.log.getChild(n),
+ ssh_options=self._ssh_options)
+
+ conn.import_module(remotes)
+
+ return conn
+
+ def _executable_path(self, conn, executable):
+ """
+ Remote validator that accepts a connection object to ensure that a certain
+ executable is available returning its full path if so.
+
+ Otherwise an exception with thorough details will be raised, informing the
+ user that the executable was not found.
+ """
+ executable_path = conn.remote_module.which(executable)
+ if not executable_path:
+ raise RuntimeError("Executable '{}' not found on host '{}'".format(
+ executable, conn.hostname))
+ self.log.info("Found executable '{}' at path '{}'".format(executable,
+ executable_path))
+ return executable_path
+
+ def _run_cephadm(self, host, entity, command, args,
+ stdin=None,
+ no_fsid=False,
+ error_ok=False,
+ image=None):
+ """
+ Run cephadm on the remote host with the given command + args
+ """
+ conn = self._get_connection(host)
+
+ try:
+ if not image:
+ # get container image
+ ret, image, err = self.mon_command({
+ 'prefix': 'config get',
+ 'who': _name_to_entity_name(entity),
+ 'key': 'container_image',
+ })
+ image = image.strip()
+ self.log.debug('%s container image %s' % (entity, image))
+
+ final_args = [
+ '--image', image,
+ command
+ ]
+ if not no_fsid:
+ final_args += ['--fsid', self._cluster_fsid]
+ final_args += args
+
+ if self.mode == 'root':
+ self.log.debug('args: %s' % final_args)
+ self.log.debug('stdin: %s' % stdin)
+ script = 'injected_argv = ' + json.dumps(final_args) + '\n'
+ if stdin:
+ script += 'injected_stdin = ' + json.dumps(stdin) + '\n'
+ script += self._cephadm
+ out, err, code = remoto.process.check(
+ conn,
+ ['/usr/bin/python', '-u'],
+ stdin=script.encode('utf-8'))
+ elif self.mode == 'cephadm-package':
+ out, err, code = remoto.process.check(
+ conn,
+ ['sudo', '/usr/bin/cephadm'] + final_args,
+ stdin=stdin)
+ self.log.debug('exit code %s out %s err %s' % (code, out, err))
+ if code and not error_ok:
+ raise RuntimeError(
+ 'cephadm exited with an error code: %d, stderr:%s' % (
+ code, '\n'.join(err)))
+ return out, err, code
+
+ except Exception as ex:
+ self.log.exception(ex)
+ raise
+
+ finally:
+ conn.exit()
+
+ def _get_hosts(self, wanted=None):
+ return self.inventory_cache.items_filtered(wanted)
+
+ @async_completion
+ def add_host(self, host):
+ """
+ Add a host to be managed by the orchestrator.
+
+ :param host: host name
+ """
+ self.inventory[host] = {}
+ self._save_inventory()
+ self.inventory_cache[host] = orchestrator.OutdatableData()
+ self.service_cache[host] = orchestrator.OutdatableData()
+ return "Added host '{}'".format(host)
+
+ @async_completion
+ def remove_host(self, host):
+ """
+ Remove a host from orchestrator management.
+
+ :param host: host name
+ """
+ del self.inventory[host]
+ self._save_inventory()
+ del self.inventory_cache[host]
+ del self.service_cache[host]
+ return "Removed host '{}'".format(host)
+
+ @trivial_completion
+ def get_hosts(self):
+ """
+ Return a list of hosts managed by the orchestrator.
+
+ Notes:
+ - skip async: manager reads from cache.
+
+ TODO:
+ - InventoryNode probably needs to be able to report labels
+ """
+ return [orchestrator.InventoryNode(host_name) for host_name in self.inventory_cache]
+
+ """
+ def add_host_label(self, host, label):
+ if host not in self.inventory:
+ raise OrchestratorError('host %s does not exist' % host)
+
+ @log_exceptions
+ def run(host, label):
+ if 'labels' not in self.inventory[host]:
+ self.inventory[host]['labels'] = list()
+ if label not in self.inventory[host]['labels']:
+ self.inventory[host]['labels'].append(label)
+ self._save_inventory()
+ return 'Added label %s to host %s' % (label, host)
+
+ return SSHWriteCompletion(
+ self._worker_pool.apply_async(run, (host, label)))
+
+ def remove_host_label(self, host, label):
+ if host not in self.inventory:
+ raise OrchestratorError('host %s does not exist' % host)
+
+ @log_exceptions
+ def run(host, label):
+ if 'labels' not in self.inventory[host]:
+ self.inventory[host]['labels'] = list()
+ if label in self.inventory[host]['labels']:
+ self.inventory[host]['labels'].remove(label)
+ self._save_inventory()
+ return 'Removed label %s to host %s' % (label, host)
+
+ return SSHWriteCompletion(
+ self._worker_pool.apply_async(run, (host, label)))
+ """
+
+ @async_map_completion
+ def _refresh_host_services(self, host):
+ out, err, code = self._run_cephadm(
+ host, 'mon', 'ls', [], no_fsid=True)
+ data = json.loads(''.join(out))
+ self.log.error('refreshed host %s services: %s' % (host, data))
+ self.service_cache[host] = orchestrator.OutdatableData(data)
+ return data
+
+ def _get_services(self,
+ service_type=None,
+ service_name=None,
+ service_id=None,
+ node_name=None,
+ refresh=False):
+ hosts = []
+ wait_for_args = []
+ in_cache = []
+ for host, host_info in self.service_cache.items_filtered():
+ hosts.append(host)
+ if host_info.outdated(self.service_cache_timeout) or refresh:
+ self.log.info("refresing stale services for '{}'".format(host))
+ wait_for_args.append((host,))
+ else:
+ self.log.debug('have recent services for %s: %s' % (
+ host, host_info.data))
+ in_cache.append(host_info.data)
+
+ def _get_services_result(results):
+ services = {}
+ for host, data in zip(hosts, results + in_cache):
+ services[host] = data
+
+ result = []
+ for host, ls in services.items():
+ for d in ls:
+ if not d['style'].startswith('cephadm'):
+ self.log.debug('ignoring non-cephadm on %s: %s' % (host, d))
+ continue
+ if d['fsid'] != self._cluster_fsid:
+ self.log.debug('ignoring foreign daemon on %s: %s' % (host, d))
+ continue
+ self.log.debug('including %s' % d)
+ sd = orchestrator.ServiceDescription()
+ sd.service_type = d['name'].split('.')[0]
+ if service_type and service_type != sd.service_type:
+ continue
+ if '.' in d['name']:
+ sd.service_instance = '.'.join(d['name'].split('.')[1:])
+ else:
+ sd.service_instance = host # e.g., crash
+ if service_id and service_id != sd.service_instance:
+ continue
+ if service_name and not sd.service_instance.startswith(service_name + '.'):
+ continue
+ sd.nodename = host
+ sd.container_id = d.get('container_id')
+ sd.container_image_name = d.get('container_image_name')
+ sd.container_image_id = d.get('container_image_id')
+ sd.version = d.get('version')
+ sd.status_desc = d['state']
+ sd.status = {
+ 'running': 1,
+ 'stopped': 0,
+ 'error': -1,
+ 'unknown': -1,
+ }[d['state']]
+ result.append(sd)
+ return result
+
+ return self._refresh_host_services(wait_for_args).then(
+ _get_services_result)
+
+
+ def describe_service(self, service_type=None, service_id=None,
+ node_name=None, refresh=False):
+ if service_type not in ("mds", "osd", "mgr", "mon", 'rgw', "nfs", None):
+ raise orchestrator.OrchestratorValidationError(
+ service_type + " unsupported")
+ result = self._get_services(service_type,
+ service_id=service_id,
+ node_name=node_name,
+ refresh=refresh)
+ return result
+
+ def service_action(self, action, service_type,
+ service_name=None,
+ service_id=None):
+ self.log.debug('service_action action %s type %s name %s id %s' % (
+ action, service_type, service_name, service_id))
+ if action == 'reload':
+ return trivial_result(["Reload is a no-op"])
+
+ def _proc_daemons(daemons):
+ args = []
+ for d in daemons:
+ args.append((d.service_type, d.service_instance,
+ d.nodename, action))
+ if not args:
+ if service_name:
+ n = service_name + '-*'
+ else:
+ n = service_id
+ raise orchestrator.OrchestratorError(
+ 'Unable to find %s.%s daemon(s)' % (
+ service_type, n))
+ return self._service_action(args)
+
+ return self._get_services(
+ service_type,
+ service_name=service_name,
+ service_id=service_id).then(_proc_daemons)
+
+ @async_map_completion
+ def _service_action(self, service_type, service_id, host, action):
+ if action == 'redeploy':
+ # recreate the systemd unit and then restart
+ if service_type == 'mon':
+ # get mon. key
+ ret, keyring, err = self.mon_command({
+ 'prefix': 'auth get',
+ 'entity': 'mon.',
+ })
+ else:
+ ret, keyring, err = self.mon_command({
+ 'prefix': 'auth get',
+ 'entity': '%s.%s' % (service_type, service_id),
+ })
+ return self._create_daemon(service_type, service_id, host,
+ keyring)
+
+ actions = {
+ 'start': ['reset-failed', 'start'],
+ 'stop': ['stop'],
+ 'restart': ['reset-failed', 'restart'],
+ }
+ name = '%s.%s' % (service_type, service_id)
+ for a in actions[action]:
+ out, err, code = self._run_cephadm(
+ host, name, 'unit',
+ ['--name', name, a],
+ error_ok=True)
+ self.service_cache.invalidate(host)
+ self.log.debug('_service_action code %s out %s' % (code, out))
+ return trivial_result("{} {} from host '{}'".format(action, name, host))
+
+ def get_inventory(self, node_filter=None, refresh=False):
+ """
+ Return the storage inventory of nodes matching the given filter.
+
+ :param node_filter: node filter
+
+ TODO:
+ - add filtering by label
+ """
+ if node_filter:
+ hosts = node_filter.nodes
+ self._require_hosts(hosts)
+ hosts = self._get_hosts(hosts)
+ else:
+ # this implies the returned hosts are registered
+ hosts = self._get_hosts()
+
+ @async_map_completion
+ def _get_inventory(host, host_info):
+ # type: (str, orchestrator.OutdatableData) -> orchestrator.InventoryNode
+
+ if host_info.outdated(self.inventory_cache_timeout) or refresh:
+ self.log.info("refresh stale inventory for '{}'".format(host))
+ out, err, code = self._run_cephadm(
+ host, 'osd',
+ 'ceph-volume',
+ ['--', 'inventory', '--format=json'])
+ data = json.loads(''.join(out))
+ host_info = orchestrator.OutdatableData(data)
+ self.inventory_cache[host] = host_info
+ else:
+ self.log.debug("reading cached inventory for '{}'".format(host))
+
+ devices = inventory.Devices.from_json(host_info.data)
+ return orchestrator.InventoryNode(host, devices)
+
+ return _get_inventory(hosts)
+
+ def blink_device_light(self, ident_fault, on, locs):
+ @async_map_completion
+ def blink(host, dev):
+ cmd = [
+ 'lsmcli',
+ 'local-disk-%s-led-%s' % (
+ ident_fault,
+ 'on' if on else 'off'),
+ '--path', '/dev/' + dev,
+ ]
+ out, err, code = self._run_cephadm(
+ host, 'osd', 'shell', ['--'] + cmd,
+ error_ok=True)
+ if code:
+ raise RuntimeError(
+ 'Unable to affect %s light for %s:%s. Command: %s' % (
+ ident_fault, host, dev, ' '.join(cmd)))
+ return "Set %s light for %s:%s %s" % (
+ ident_fault, host, dev, 'on' if on else 'off')
+
+ return blink(locs)
+
+ @async_completion
+ def _create_osd(self, all_hosts_, drive_group):
+ all_hosts = orchestrator.InventoryNode.get_host_names(all_hosts_)
+ assert len(drive_group.hosts(all_hosts)) == 1
+ assert len(drive_group.data_devices.paths) > 0
+ assert all(map(lambda p: isinstance(p, six.string_types),
+ drive_group.data_devices.paths))
+
+ host = drive_group.hosts(all_hosts)[0]
+ self._require_hosts(host)
+
+
+ # get bootstrap key
+ ret, keyring, err = self.mon_command({
+ 'prefix': 'auth get',
+ 'entity': 'client.bootstrap-osd',
+ })
+
+ # generate config
+ ret, config, err = self.mon_command({
+ "prefix": "config generate-minimal-conf",
+ })
+
+ j = json.dumps({
+ 'config': config,
+ 'keyring': keyring,
+ })
+
+ devices = drive_group.data_devices.paths
+ for device in devices:
+ out, err, code = self._run_cephadm(
+ host, 'osd', 'ceph-volume',
+ [
+ '--config-and-keyring', '-',
+ '--',
+ 'lvm', 'prepare',
+ "--cluster-fsid", self._cluster_fsid,
+ "--{}".format(drive_group.objectstore),
+ "--data", device,
+ ],
+ stdin=j)
+ self.log.debug('ceph-volume prepare: %s' % out)
+
+ # check result
+ out, err, code = self._run_cephadm(
+ host, 'osd', 'ceph-volume',
+ [
+ '--',
+ 'lvm', 'list',
+ '--format', 'json',
+ ])
+ self.log.debug('code %s out %s' % (code, out))
+ osds_elems = json.loads('\n'.join(out))
+ fsid = self._cluster_fsid
+ for osd_id, osds in osds_elems.items():
+ for osd in osds:
+ if osd['tags']['ceph.cluster_fsid'] != fsid:
+ self.log.debug('mismatched fsid, skipping %s' % osd)
+ continue
+ if len(list(set(devices) & set(osd['devices']))) == 0 and osd.get('lv_path') not in devices:
+ self.log.debug('mismatched devices, skipping %s' % osd)
+ continue
+
+ # create
+ ret, keyring, err = self.mon_command({
+ 'prefix': 'auth get',
+ 'entity': 'osd.%s' % str(osd_id),
+ })
+ self._create_daemon(
+ 'osd', str(osd_id), host, keyring,
+ extra_args=[
+ '--osd-fsid', osd['tags']['ceph.osd_fsid'],
+ ])
+
+ return "Created osd(s) on host '{}'".format(host)
+
+ def create_osds(self, drive_group):
+ """
+ Create a new osd.
+
+ The orchestrator CLI currently handles a narrow form of drive
+ specification defined by a single block device using bluestore.
+
+ :param drive_group: osd specification
+
+ TODO:
+ - support full drive_group specification
+ - support batch creation
+ """
+
+ return self.get_hosts().then(lambda hosts: self._create_osd(hosts, drive_group))
+
+ def remove_osds(self, name):
+ def _search(daemons):
+ args = [('osd.%s' % d.service_instance, d.nodename) for d in daemons]
+ if not args:
+ raise OrchestratorError('Unable to find osd.%s' % name)
+ return self._remove_daemon(args)
+ return self._get_services('osd', service_id=name).then(_search)
+
+ def _create_daemon(self, daemon_type, daemon_id, host, keyring,
+ extra_args=[]):
+ conn = self._get_connection(host)
+ try:
+ name = '%s.%s' % (daemon_type, daemon_id)
+
+ # generate config
+ ret, config, err = self.mon_command({
+ "prefix": "config generate-minimal-conf",
+ })
+
+ ret, crash_keyring, err = self.mon_command({
+ 'prefix': 'auth get-or-create',
+ 'entity': 'client.crash.%s' % host,
+ 'caps': ['mon', 'profile crash',
+ 'mgr', 'profile crash'],
+ })
+
+ j = json.dumps({
+ 'config': config,
+ 'keyring': keyring,
+ 'crash_keyring': crash_keyring,
+ })
+
+ out, err, code = self._run_cephadm(
+ host, name, 'deploy',
+ [
+ '--name', name,
+ '--config-and-keyrings', '-',
+ ] + extra_args,
+ stdin=j)
+ self.log.debug('create_daemon code %s out %s' % (code, out))
+ self.service_cache.invalidate(host)
+ return "(Re)deployed {} on host '{}'".format(name, host)
+
+ except Exception as e:
+ self.log.error("create_daemon({}): error: {}".format(host, e))
+ raise
+
+ finally:
+ self.log.info("create_daemon({}): finished".format(host))
+ conn.exit()
+
+ @async_map_completion
+ def _remove_daemon(self, name, host):
+ """
+ Remove a daemon
+ """
+ out, err, code = self._run_cephadm(
+ host, name, 'rm-daemon',
+ ['--name', name])
+ self.log.debug('_remove_daemon code %s out %s' % (code, out))
+ self.service_cache.invalidate(host)
+ return "Removed {} from host '{}'".format(name, host)
+
+ def _update_service(self, daemon_type, add_func, spec):
+ def ___update_service(daemons):
+ if len(daemons) > spec.count:
+ # remove some
+ to_remove = len(daemons) - spec.count
+ args = []
+ for d in daemons[0:to_remove]:
+ args.append(
+ ('%s.%s' % (d.service_type, d.service_instance), d.nodename)
+ )
+ return self._remove_daemon(args)
+ elif len(daemons) < spec.count:
+ # add some
+ spec.count -= len(daemons)
+ return add_func(spec)
+ return []
+ return self._get_services(daemon_type, service_name=spec.name).then(___update_service)
+
+ @async_map_completion
+ def _create_mon(self, host, network, name):
+ """
+ Create a new monitor on the given host.
+ """
+ self.log.info("create_mon({}:{}): starting".format(host, network))
+
+ # get mon. key
+ ret, keyring, err = self.mon_command({
+ 'prefix': 'auth get',
+ 'entity': 'mon.',
+ })
+
+ # infer whether this is a CIDR network, addrvec, or plain IP
+ if '/' in network:
+ extra_args = ['--mon-network', network]
+ elif network.startswith('[v') and network.endswith(']'):
+ extra_args = ['--mon-addrv', network]
+ elif ':' not in network:
+ extra_args = ['--mon-ip', network]
+ else:
+ raise RuntimeError('Must specify a CIDR network, ceph addrvec, or plain IP: \'%s\'' % network)
+
+ return self._create_daemon('mon', name or host, host, keyring,
+ extra_args=extra_args)
+
+ def update_mons(self, num, host_specs):
+ # type: (int, List[orchestrator.HostSpec]) -> orchestrator.Completion
+ """
+ Adjust the number of cluster monitors.
+ """
+ # current support limited to adding monitors.
+ mon_map = self.get("mon_map")
+ num_mons = len(mon_map["mons"])
+ if num == num_mons:
+ return orchestrator.Completion(value="The requested number of monitors exist.")
+ if num < num_mons:
+ raise NotImplementedError("Removing monitors is not supported.")
+
+ self.log.debug("Trying to update monitors on: {}".format(host_specs))
+ # check that all the hosts are registered
+ [self._require_hosts(host.hostname) for host in host_specs]
+
+ # current support requires a network to be specified
+ for host, network, _ in host_specs:
+ if not network:
+ raise RuntimeError("Host '{}' is missing a network spec".format(host))
+
+ def update_mons_with_daemons(daemons):
+ for _, _, name in host_specs:
+ if name and len([d for d in daemons if d.service_instance == name]):
+ raise RuntimeError('name %s alrady exists', name)
+
+ # explicit placement: enough hosts provided?
+ num_new_mons = num - num_mons
+ if len(host_specs) < num_new_mons:
+ raise RuntimeError("Error: {} hosts provided, expected {}".format(
+ len(host_specs), num_new_mons))
+
+ self.log.info("creating {} monitors on hosts: '{}'".format(
+ num_new_mons, ",".join(map(lambda h: ":".join(h), host_specs))))
+
+ # TODO: we may want to chain the creation of the monitors so they join
+ # the quorum one at a time.
+ return self._create_mon(host_specs)
+ return self._get_services('mon').then(update_mons_with_daemons)
+
+ @async_map_completion
+ def _create_mgr(self, host, name):
+ """
+ Create a new manager instance on a host.
+ """
+ self.log.info("create_mgr({}, mgr.{}): starting".format(host, name))
+
+ # get mgr. key
+ ret, keyring, err = self.mon_command({
+ 'prefix': 'auth get-or-create',
+ 'entity': 'mgr.%s' % name,
+ 'caps': ['mon', 'profile mgr',
+ 'osd', 'allow *',
+ 'mds', 'allow *'],
+ })
+
+ return self._create_daemon('mgr', name, host, keyring)
+
+ def update_mgrs(self, num, host_specs):
+ """
+ Adjust the number of cluster managers.
+ """
+ return self._get_services('mgr').then(lambda daemons: self._update_mgrs(num, host_specs, daemons))
+
+ def _update_mgrs(self, num, host_specs, daemons):
+ num_mgrs = len(daemons)
+ if num == num_mgrs:
+ return orchestrator.Completion(value="The requested number of managers exist.")
+
+ self.log.debug("Trying to update managers on: {}".format(host_specs))
+ # check that all the hosts are registered
+ [self._require_hosts(host.hostname) for host in host_specs]
+
+ results = []
+ if num < num_mgrs:
+ num_to_remove = num_mgrs - num
+
+ # first try to remove unconnected mgr daemons that the
+ # cluster doesn't see
+ connected = []
+ mgr_map = self.get("mgr_map")
+ if mgr_map.get("active_name", {}):
+ connected.append(mgr_map.get('active_name', ''))
+ for standby in mgr_map.get('standbys', []):
+ connected.append(standby.get('name', ''))
+ to_remove_damons = []
+ for d in daemons:
+ if d.service_instance not in connected:
+ to_remove_damons.append(('%s.%s' % (d.service_type, d.service_instance),
+ d.nodename))
+ num_to_remove -= 1
+ if num_to_remove == 0:
+ break
+
+ # otherwise, remove *any* mgr
+ if num_to_remove > 0:
+ for d in daemons:
+ to_remove_damons.append(('%s.%s' % (d.service_type, d.service_instance), d.nodename))
+ num_to_remove -= 1
+ if num_to_remove == 0:
+ break
+ return self._remove_daemon(to_remove_damons)
+
+ else:
+ # we assume explicit placement by which there are the same number of
+ # hosts specified as the size of increase in number of daemons.
+ num_new_mgrs = num - num_mgrs
+ if len(host_specs) < num_new_mgrs:
+ raise RuntimeError(
+ "Error: {} hosts provided, expected {}".format(
+ len(host_specs), num_new_mgrs))
+
+ for host_spec in host_specs:
+ if host_spec.name and len([d for d in daemons if d.service_instance == host_spec.name]):
+ raise RuntimeError('name %s alrady exists', host_spec.name)
+
+ for host_spec in host_specs:
+ if host_spec.name and len([d for d in daemons if d.service_instance == host_spec.name]):
+ raise RuntimeError('name %s alrady exists', host_spec.name)
+
+ self.log.info("creating {} managers on hosts: '{}'".format(
+ num_new_mgrs, ",".join([spec.hostname for spec in host_specs])))
+
+ args = []
+ for host_spec in host_specs:
+ name = host_spec.name or self.get_unique_name(daemons)
+ host = host_spec.hostname
+ args.append((host, name))
+ return self._create_mgr(args)
+
+ def add_mds(self, spec):
+ if not spec.placement.nodes or len(spec.placement.nodes) < spec.count:
+ raise RuntimeError("must specify at least %d hosts" % spec.count)
+ return self._get_services('mds').then(lambda ds: self._add_mds(ds, spec))
+
+ def _add_mds(self, daemons, spec):
+ args = []
+ num_added = 0
+ for host, _, name in spec.placement.nodes:
+ if num_added >= spec.count:
+ break
+ mds_id = self.get_unique_name(daemons, spec.name, name)
+ self.log.debug('placing mds.%s on host %s' % (mds_id, host))
+ args.append((mds_id, host))
+ # add to daemon list so next name(s) will also be unique
+ sd = orchestrator.ServiceDescription()
+ sd.service_instance = mds_id
+ sd.service_type = 'mds'
+ sd.nodename = host
+ daemons.append(sd)
+ num_added += 1
+ return self._create_mds(args)
+
+ def update_mds(self, spec):
+ return self._update_service('mds', self.add_mds, spec)
+
+ @async_map_completion
+ def _create_mds(self, mds_id, host):
+ # get mgr. key
+ ret, keyring, err = self.mon_command({
+ 'prefix': 'auth get-or-create',
+ 'entity': 'mds.' + mds_id,
+ 'caps': ['mon', 'profile mds',
+ 'osd', 'allow rwx',
+ 'mds', 'allow'],
+ })
+ return self._create_daemon('mds', mds_id, host, keyring)
+
+ def remove_mds(self, name):
+ self.log.debug("Attempting to remove volume: {}".format(name))
+ def _remove_mds(daemons):
+ args = []
+ for d in daemons:
+ if d.service_instance == name or d.service_instance.startswith(name + '.'):
+ args.append(
+ ('%s.%s' % (d.service_type, d.service_instance), d.nodename)
+ )
+ if not args:
+ raise OrchestratorError('Unable to find mds.%s[-*] daemon(s)' % name)
+ return self._remove_daemon(args)
+ return self._get_services('mds').then(_remove_mds)
+
+ def add_rgw(self, spec):
+ if not spec.placement.nodes or len(spec.placement.nodes) < spec.count:
+ raise RuntimeError("must specify at least %d hosts" % spec.count)
+ # ensure rgw_realm and rgw_zone is set for these daemons
+ ret, out, err = self.mon_command({
+ 'prefix': 'config set',
+ 'who': 'client.rgw.' + spec.name,
+ 'name': 'rgw_zone',
+ 'value': spec.rgw_zone,
+ })
+ ret, out, err = self.mon_command({
+ 'prefix': 'config set',
+ 'who': 'client.rgw.' + spec.rgw_realm,
+ 'name': 'rgw_realm',
+ 'value': spec.rgw_realm,
+ })
+
+ def _add_rgw(daemons):
+ args = []
+ num_added = 0
+ for host, _, name in spec.placement.nodes:
+ if num_added >= spec.count:
+ break
+ rgw_id = self.get_unique_name(daemons, spec.name, name)
+ self.log.debug('placing rgw.%s on host %s' % (rgw_id, host))
+ args.append((rgw_id, host))
+ # add to daemon list so next name(s) will also be unique
+ sd = orchestrator.ServiceDescription()
+ sd.service_instance = rgw_id
+ sd.service_type = 'rgw'
+ sd.nodename = host
+ daemons.append(sd)
+ num_added += 1
+ return self._create_rgw(args)
+
+ return self._get_services('rgw').then(_add_rgw)
+
+ @async_map_completion
+ def _create_rgw(self, rgw_id, host):
+ ret, keyring, err = self.mon_command({
+ 'prefix': 'auth get-or-create',
+ 'entity': 'client.rgw.' + rgw_id,
+ 'caps': ['mon', 'allow rw',
+ 'mgr', 'allow rw',
+ 'osd', 'allow rwx'],
+ })
+ return self._create_daemon('rgw', rgw_id, host, keyring)
+
+ def remove_rgw(self, name):
+
+ def _remove_rgw(daemons):
+ args = []
+ for d in daemons:
+ if d.service_instance == name or d.service_instance.startswith(name + '.'):
+ args.append(('%s.%s' % (d.service_type, d.service_instance),
+ d.nodename))
+ if args:
+ return self._remove_daemon(args)
+ raise RuntimeError('Unable to find rgw.%s[-*] daemon(s)' % name)
+
+ return self._get_services('rgw').then(_remove_rgw)
+
+ def update_rgw(self, spec):
+ return self._update_service('rgw', self.add_rgw, spec)
+
+ def add_rbd_mirror(self, spec):
+ if not spec.placement.nodes or len(spec.placement.nodes) < spec.count:
+ raise RuntimeError("must specify at least %d hosts" % spec.count)
+ self.log.debug('nodes %s' % spec.placement.nodes)
+
+ def _add_rbd_mirror(daemons):
+ args = []
+ num_added = 0
+ for host, _, name in spec.placement.nodes:
+ if num_added >= spec.count:
+ break
+ daemon_id = self.get_unique_name(daemons, None, name)
+ self.log.debug('placing rbd-mirror.%s on host %s' % (daemon_id,
+ host))
+ args.append((daemon_id, host))
+
+ # add to daemon list so next name(s) will also be unique
+ sd = orchestrator.ServiceDescription()
+ sd.service_instance = daemon_id
+ sd.service_type = 'rbd-mirror'
+ sd.nodename = host
+ daemons.append(sd)
+ num_added += 1
+ return self._create_rbd_mirror(args)
+
+ return self._get_services('rbd-mirror').then(_add_rbd_mirror)
+
+ @async_map_completion
+ def _create_rbd_mirror(self, daemon_id, host):
+ ret, keyring, err = self.mon_command({
+ 'prefix': 'auth get-or-create',
+ 'entity': 'client.rbd-mirror.' + daemon_id,
+ 'caps': ['mon', 'profile rbd-mirror',
+ 'osd', 'profile rbd'],
+ })
+ return self._create_daemon('rbd-mirror', daemon_id, host, keyring)
+
+ def remove_rbd_mirror(self, name):
+ def _remove_rbd_mirror(daemons):
+ args = []
+ for d in daemons:
+ if not name or d.service_instance == name:
+ args.append(
+ ('%s.%s' % (d.service_type, d.service_instance),
+ d.nodename)
+ )
+ if not args and name:
+ raise RuntimeError('Unable to find rbd-mirror.%s daemon' % name)
+ return self._remove_daemon(args)
+
+ return self._get_services('rbd-mirror').then(_remove_rbd_mirror)
+
+ def update_rbd_mirror(self, spec):
+ return self._update_service('rbd-mirror', self.add_rbd_mirror, spec)
+
+ def _get_container_image_id(self, image_name):
+ # pick a random host...
+ host = None
+ for host_name in self.inventory_cache:
+ host = host_name
+ break
+ if not host:
+ raise OrchestratorError('no hosts defined')
+ self.log.debug('using host %s' % host)
+ out, code = self._run_cephadm(
+ host, None, 'pull', [],
+ image=image_name,
+ no_fsid=True)
+ return out[0]
+
+ def upgrade_check(self, image, version):
+ if version:
+ target = self.container_image_base + ':v' + version
+ elif image:
+ target = image
+ else:
+ raise OrchestratorError('must specify either image or version')
+ return self._get_services().then(lambda daemons: self._upgrade_check(target, daemons))
+
+ def _upgrade_check(self, target, services):
+ # get service state
+ target_id = self._get_container_image_id(target)
+ self.log.debug('Target image %s id %s' % (target, target_id))
+ r = {
+ 'target_image_name': target,
+ 'target_image_id': target_id,
+ 'needs_update': dict(),
+ 'up_to_date': list(),
+ }
+ for s in services:
+ if target_id == s.container_image_id:
+ r['up_to_date'].append(s.name())
+ else:
+ r['needs_update'][s.name()] = {
+ 'current_name': s.container_image_name,
+ 'current_id': s.container_image_id,
+ }
+ return trivial_result(json.dumps(r, indent=4))
--- /dev/null
+# ceph-deploy ftw
+import os
+import errno
+import tempfile
+import shutil
+
+def safe_makedirs(path, uid=-1, gid=-1):
+ """ create path recursively if it doesn't exist """
+ try:
+ os.makedirs(path)
+ except OSError as e:
+ if e.errno == errno.EEXIST:
+ pass
+ else:
+ raise
+ else:
+ os.chown(path, uid, gid)
+
+def write_conf(path, conf):
+ if not os.path.exists(path):
+ dirpath = os.path.dirname(path)
+ if os.path.exists(dirpath):
+ with open(path, "w") as f:
+ f.write(conf)
+ os.chmod(path, 0o644)
+ else:
+ raise RuntimeError(
+ "{0} does not exist".format(dirpath))
+
+def write_keyring(path, key, overwrite=False, uid=-1, gid=-1):
+ dirname = os.path.dirname(path)
+ if not os.path.exists(dirname):
+ safe_makedirs(dirname, uid, gid)
+ if not overwrite and os.path.exists(path):
+ return
+ with open(path, "wb") as f:
+ f.write(key.encode('utf-8'))
+
+def create_mon_path(path, uid=-1, gid=-1):
+ """create the mon path if it does not exist"""
+ if not os.path.exists(path):
+ os.makedirs(path)
+ os.chown(path, uid, gid);
+
+def write_file(path, content, mode=0o644, directory=None, uid=-1, gid=-1):
+ if directory:
+ if path.startswith("/"):
+ path = path[1:]
+ path = os.path.join(directory, path)
+ if os.path.exists(path):
+ # Delete file in case we are changing its mode
+ os.unlink(path)
+ with os.fdopen(os.open(path, os.O_WRONLY | os.O_CREAT, mode), 'wb') as f:
+ f.write(content.encode('utf-8'))
+ os.chown(path, uid, gid)
+
+def path_getuid(path):
+ return os.stat(path).st_uid
+
+def path_getgid(path):
+ return os.stat(path).st_gid
+
+def which(executable):
+ """find the location of an executable"""
+ locations = (
+ '/usr/local/bin',
+ '/bin',
+ '/usr/bin',
+ '/usr/local/sbin',
+ '/usr/sbin',
+ '/sbin',
+ )
+
+ for location in locations:
+ executable_path = os.path.join(location, executable)
+ if os.path.exists(executable_path) and os.path.isfile(executable_path):
+ return executable_path
+
+if __name__ == '__channelexec__':
+ for item in channel: # type: ignore
+ channel.send(eval(item)) # type: ignore
--- /dev/null
+from contextlib import contextmanager
+
+import pytest
+
+from cephadm import CephadmOrchestrator
+from tests import mock
+
+
+def set_store(self, k, v):
+ if v is None:
+ del self._store[k]
+ else:
+ self._store[k] = v
+
+
+def get_store(self, k):
+ return self._store[k]
+
+
+def get_store_prefix(self, prefix):
+ return {
+ k: v for k, v in self._store.items()
+ if k.startswith(prefix)
+ }
+
+def get_ceph_option(_, key):
+ return __file__
+
+@pytest.yield_fixture()
+def cephadm_module():
+ with mock.patch("cephadm.module.CephadmOrchestrator.get_ceph_option", get_ceph_option),\
+ mock.patch("cephadm.module.CephadmOrchestrator._configure_logging", lambda *args: None),\
+ mock.patch("cephadm.module.CephadmOrchestrator.set_store", set_store),\
+ mock.patch("cephadm.module.CephadmOrchestrator.get_store", get_store),\
+ mock.patch("cephadm.module.CephadmOrchestrator.get_store_prefix", get_store_prefix):
+ CephadmOrchestrator._register_commands('')
+ m = CephadmOrchestrator.__new__ (CephadmOrchestrator)
+ m._root_logger = mock.MagicMock()
+ m._store = {
+ 'ssh_config': '',
+ 'ssh_identity_key': '',
+ 'ssh_identity_pub': '',
+ 'inventory': {},
+ }
+ m.__init__('cephadm', 0, 0)
+ yield m
--- /dev/null
+import json
+import time
+from contextlib import contextmanager
+
+from ceph.deployment.drive_group import DriveGroupSpec, DeviceSelection
+
+try:
+ from typing import Any
+except ImportError:
+ pass
+
+from orchestrator import ServiceDescription, raise_if_exception, Completion, InventoryNode, \
+ StatelessServiceSpec, PlacementSpec, RGWSpec, parse_host_specs
+from ..module import CephadmOrchestrator
+from tests import mock
+from .fixtures import cephadm_module
+
+
+"""
+TODOs:
+ There is really room for improvement here. I just quickly assembled theses tests.
+ I general, everything should be testes in Teuthology as well. Reasons for
+ also testing this here is the development roundtrip time.
+"""
+
+
+
+def _run_cephadm(ret):
+ def foo(*args, **kwargs):
+ return ret, '', 0
+ return foo
+
+def mon_command(*args, **kwargs):
+ return 0, '', ''
+
+
+class TestSSH(object):
+ def _wait(self, m, c):
+ # type: (CephadmOrchestrator, Completion) -> Any
+ m.process([c])
+ m.process([c])
+
+ for _ in range(30):
+ if c.is_finished:
+ raise_if_exception(c)
+ return c.result
+ time.sleep(0.1)
+ assert False, "timeout" + str(c._state)
+
+ @contextmanager
+ def _with_host(self, m, name):
+ self._wait(m, m.add_host(name))
+ yield
+ self._wait(m, m.remove_host(name))
+
+ def test_get_unique_name(self, cephadm_module):
+ existing = [
+ ServiceDescription(service_instance='mon.a')
+ ]
+ new_mon = cephadm_module.get_unique_name(existing, 'mon')
+ assert new_mon.startswith('mon.')
+ assert new_mon != 'mon.a'
+
+ def test_host(self, cephadm_module):
+ with self._with_host(cephadm_module, 'test'):
+ assert self._wait(cephadm_module, cephadm_module.get_hosts()) == [InventoryNode('test')]
+ c = cephadm_module.get_hosts()
+ assert self._wait(cephadm_module, c) == []
+
+ @mock.patch("cephadm.module.CephadmOrchestrator._run_cephadm", _run_cephadm('[]'))
+ def test_service_ls(self, cephadm_module):
+ with self._with_host(cephadm_module, 'test'):
+ c = cephadm_module.describe_service()
+ assert self._wait(cephadm_module, c) == []
+
+ @mock.patch("cephadm.module.CephadmOrchestrator._run_cephadm", _run_cephadm('[]'))
+ def test_device_ls(self, cephadm_module):
+ with self._with_host(cephadm_module, 'test'):
+ c = cephadm_module.get_inventory()
+ assert self._wait(cephadm_module, c) == [InventoryNode('test')]
+
+ @mock.patch("cephadm.module.CephadmOrchestrator._run_cephadm", _run_cephadm('[]'))
+ @mock.patch("cephadm.module.CephadmOrchestrator.send_command")
+ @mock.patch("cephadm.module.CephadmOrchestrator.mon_command", mon_command)
+ @mock.patch("cephadm.module.CephadmOrchestrator._get_connection")
+ def test_mon_update(self, _send_command, _get_connection, cephadm_module):
+ with self._with_host(cephadm_module, 'test'):
+ c = cephadm_module.update_mons(1, [parse_host_specs('test:0.0.0.0=a')])
+ assert self._wait(cephadm_module, c) == ["(Re)deployed mon.a on host 'test'"]
+
+ @mock.patch("cephadm.module.CephadmOrchestrator._run_cephadm", _run_cephadm('[]'))
+ @mock.patch("cephadm.module.CephadmOrchestrator.send_command")
+ @mock.patch("cephadm.module.CephadmOrchestrator.mon_command", mon_command)
+ @mock.patch("cephadm.module.CephadmOrchestrator._get_connection")
+ def test_mgr_update(self, _send_command, _get_connection, cephadm_module):
+ with self._with_host(cephadm_module, 'test'):
+ c = cephadm_module.update_mgrs(1, [parse_host_specs('test:0.0.0.0')])
+ [out] = self._wait(cephadm_module, c)
+ assert "(Re)deployed mgr." in out
+ assert " on host 'test'" in out
+
+ @mock.patch("cephadm.module.CephadmOrchestrator._run_cephadm", _run_cephadm('{}'))
+ @mock.patch("cephadm.module.CephadmOrchestrator.send_command")
+ @mock.patch("cephadm.module.CephadmOrchestrator.mon_command", mon_command)
+ @mock.patch("cephadm.module.CephadmOrchestrator._get_connection")
+ def test_create_osds(self, _send_command, _get_connection, cephadm_module):
+ with self._with_host(cephadm_module, 'test'):
+ dg = DriveGroupSpec('test', DeviceSelection(paths=['']))
+ c = cephadm_module.create_osds(dg)
+ assert self._wait(cephadm_module, c) == "Created osd(s) on host 'test'"
+
+ @mock.patch("cephadm.module.CephadmOrchestrator._run_cephadm", _run_cephadm('{}'))
+ @mock.patch("cephadm.module.CephadmOrchestrator.send_command")
+ @mock.patch("cephadm.module.CephadmOrchestrator.mon_command", mon_command)
+ @mock.patch("cephadm.module.CephadmOrchestrator._get_connection")
+ def test_mds(self, _send_command, _get_connection, cephadm_module):
+ with self._with_host(cephadm_module, 'test'):
+ ps = PlacementSpec(nodes=['test'])
+ c = cephadm_module.add_mds(StatelessServiceSpec('name', ps))
+ [out] = self._wait(cephadm_module, c)
+ assert "(Re)deployed mds.name." in out
+ assert " on host 'test'" in out
+
+ @mock.patch("cephadm.module.CephadmOrchestrator._run_cephadm", _run_cephadm('{}'))
+ @mock.patch("cephadm.module.CephadmOrchestrator.send_command")
+ @mock.patch("cephadm.module.CephadmOrchestrator.mon_command", mon_command)
+ @mock.patch("cephadm.module.CephadmOrchestrator._get_connection")
+ def test_rgw(self, _send_command, _get_connection, cephadm_module):
+ with self._with_host(cephadm_module, 'test'):
+ ps = PlacementSpec(nodes=['test'])
+ c = cephadm_module.add_rgw(RGWSpec('realm', 'zone', ps))
+ [out] = self._wait(cephadm_module, c)
+ assert "(Re)deployed rgw.realm.zone." in out
+ assert " on host 'test'" in out
+
+ @mock.patch("cephadm.module.CephadmOrchestrator._run_cephadm", _run_cephadm(
+ json.dumps([
+ dict(
+ name='rgw.myrgw.foobar',
+ style='cephadm',
+ fsid='fsid',
+ container_id='container_id',
+ version='version',
+ state='running',
+ )
+ ])
+ ))
+ def test_remove_rgw(self, cephadm_module):
+ cephadm_module._cluster_fsid = "fsid"
+ with self._with_host(cephadm_module, 'test'):
+ c = cephadm_module.remove_rgw('myrgw')
+ out = self._wait(cephadm_module, c)
+ assert out == ["Removed rgw.myrgw.foobar from host 'test'"]
+
+ @mock.patch("cephadm.module.CephadmOrchestrator._run_cephadm", _run_cephadm('{}'))
+ @mock.patch("cephadm.module.CephadmOrchestrator.send_command")
+ @mock.patch("cephadm.module.CephadmOrchestrator.mon_command", mon_command)
+ @mock.patch("cephadm.module.CephadmOrchestrator._get_connection")
+ def test_rbd_mirror(self, _send_command, _get_connection, cephadm_module):
+ with self._with_host(cephadm_module, 'test'):
+ ps = PlacementSpec(nodes=['test'])
+ c = cephadm_module.add_rbd_mirror(StatelessServiceSpec('name', ps))
+ [out] = self._wait(cephadm_module, c)
+ assert "(Re)deployed rbd-mirror." in out
+ assert " on host 'test'" in out
+
+ @mock.patch("cephadm.module.CephadmOrchestrator._run_cephadm", _run_cephadm('{}'))
+ @mock.patch("cephadm.module.CephadmOrchestrator.send_command")
+ @mock.patch("cephadm.module.CephadmOrchestrator.mon_command", mon_command)
+ @mock.patch("cephadm.module.CephadmOrchestrator._get_connection")
+ def test_blink_device_light(self, _send_command, _get_connection, cephadm_module):
+ with self._with_host(cephadm_module, 'test'):
+ c = cephadm_module.blink_device_light('ident', True, [('test', '')])
+ assert self._wait(cephadm_module, c) == ['Set ident light for test: on']
--- /dev/null
+import sys
+import time
+
+
+try:
+ from typing import Any
+except ImportError:
+ pass
+
+import pytest
+
+
+from orchestrator import raise_if_exception, Completion
+from .fixtures import cephadm_module
+from ..module import trivial_completion, async_completion, async_map_completion, CephadmOrchestrator
+
+
+class TestCompletion(object):
+ def _wait(self, m, c):
+ # type: (CephadmOrchestrator, Completion) -> Any
+ m.process([c])
+ m.process([c])
+
+ for _ in range(30):
+ if c.is_finished:
+ raise_if_exception(c)
+ return c.result
+ time.sleep(0.1)
+ assert False, "timeout" + str(c._state)
+
+ def test_trivial(self, cephadm_module):
+ @trivial_completion
+ def run(x):
+ return x+1
+ assert self._wait(cephadm_module, run(1)) == 2
+
+ @pytest.mark.parametrize("input", [
+ ((1, ), ),
+ ((1, 2), ),
+ (("hallo", ), ),
+ (("hallo", "foo"), ),
+ ])
+ def test_async(self, input, cephadm_module):
+ @async_completion
+ def run(*args):
+ return str(args)
+
+ assert self._wait(cephadm_module, run(*input)) == str(input)
+
+ @pytest.mark.parametrize("input,expected", [
+ ([], []),
+ ([1], ["(1,)"]),
+ (["hallo"], ["('hallo',)"]),
+ ("hi", ["('h',)", "('i',)"]),
+ (list(range(5)), [str((x, )) for x in range(5)]),
+ ([(1, 2), (3, 4)], ["(1, 2)", "(3, 4)"]),
+ ])
+ def test_async_map(self, input, expected, cephadm_module):
+ @async_map_completion
+ def run(*args):
+ return str(args)
+
+ c = run(input)
+ self._wait(cephadm_module, c)
+ assert c.result == expected
+
+ def test_async_self(self, cephadm_module):
+ class Run(object):
+ def __init__(self):
+ self.attr = 1
+
+ @async_completion
+ def run(self, x):
+ assert self.attr == 1
+ return x + 1
+
+ assert self._wait(cephadm_module, Run().run(1)) == 2
+
+ @pytest.mark.parametrize("input,expected", [
+ ([], []),
+ ([1], ["(1,)"]),
+ (["hallo"], ["('hallo',)"]),
+ ("hi", ["('h',)", "('i',)"]),
+ (list(range(5)), [str((x, )) for x in range(5)]),
+ ([(1, 2), (3, 4)], ["(1, 2)", "(3, 4)"]),
+ ])
+ def test_async_map_self(self, input, expected, cephadm_module):
+ class Run(object):
+ def __init__(self):
+ self.attr = 1
+
+ @async_map_completion
+ def run(self, *args):
+ assert self.attr == 1
+ return str(args)
+
+ c = Run().run(input)
+ self._wait(cephadm_module, c)
+ assert c.result == expected
+
+ def test_then1(self, cephadm_module):
+ @async_map_completion
+ def run(x):
+ return x+1
+
+ assert self._wait(cephadm_module, run([1,2]).then(str)) == '[2, 3]'
+
+ def test_then2(self, cephadm_module):
+ @async_map_completion
+ def run(x):
+ time.sleep(0.1)
+ return x+1
+
+ @async_completion
+ def async_str(results):
+ return str(results)
+
+ c = run([1,2]).then(async_str)
+
+ self._wait(cephadm_module, c)
+ assert c.result == '[2, 3]'
+
+ def test_then3(self, cephadm_module):
+ @async_map_completion
+ def run(x):
+ time.sleep(0.1)
+ return x+1
+
+ def async_str(results):
+ return async_completion(str)(results)
+
+ c = run([1,2]).then(async_str)
+
+ self._wait(cephadm_module, c)
+ assert c.result == '[2, 3]'
+
+ def test_then4(self, cephadm_module):
+ @async_map_completion
+ def run(x):
+ time.sleep(0.1)
+ return x+1
+
+ def async_str(results):
+ return async_completion(str)(results).then(lambda x: x + "hello")
+
+ c = run([1,2]).then(async_str)
+
+ self._wait(cephadm_module, c)
+ assert c.result == '[2, 3]hello'
+
+ @pytest.mark.skip(reason="see limitation of async_map_completion")
+ def test_then5(self, cephadm_module):
+ @async_map_completion
+ def run(x):
+ time.sleep(0.1)
+ return async_completion(str)(x+1)
+
+ c = run([1,2])
+
+ self._wait(cephadm_module, c)
+ assert c.result == "['2', '3']"
+
+ def test_raise(self, cephadm_module):
+ @async_completion
+ def run(x):
+ raise ZeroDivisionError()
+
+ with pytest.raises(ZeroDivisionError):
+ self._wait(cephadm_module, run(1))
'type': 'str',
'default': None,
'desc': 'Orchestrator backend',
- 'enum_allowed': ['ssh', 'rook', 'ansible', 'deepsea',
+ 'enum_allowed': ['cephadm', 'rook', 'ansible', 'deepsea',
'test_orchestrator'],
'runtime': True,
},
+++ /dev/null
-.vagrant
-ssh-config
+++ /dev/null
-Development
-===========
-
-
-There are multiple ways to set up a development environment for the SSH orchestrator.
-In the following I'll use the `vstart` method.
-
-1) Make sure remoto is installed (0.35 or newer)
-
-2) Use vstart to spin up a cluster
-
-
-::
-
- # ../src/vstart.sh -n --ssh
-
-*Note that when you specify `--ssh` you have to have passwordless ssh access to localhost*
-
-It will add your ~/.ssh/id_rsa and ~/.ssh/id_rsa.pub to `mgr/ssh/ssh_identity_{key, pub}`
-and add your $HOSTNAME to the list of known hosts.
-
-This will also enable the ssh mgr module and enable it as the orchestrator backend.
-
-*Optional:*
-
-While the above is sufficient for most operations, you may want to add a second host to the mix.
-There is `Vagrantfile` for creating a minimal cluster in `src/pybind/mgr/ssh/`.
-
-If you wish to extend the one-node-localhost cluster to i.e. test more sophisticated OSD deployments you can follow the next steps:
-
-From within the `src/pybind/mgr/ssh` directory.
-
-
-1) Spawn VMs
-
-::
-
- # vagrant up
-
-This will spawn three machines.
-mon0, mgr0, osd0
-
-NUM_DAEMONS can be used to increase the number of VMs created. (defaults to 1)
-
-If will also come with the necessary packages preinstalled as well as your ~/.ssh/id_rsa.pub key
-injected. (to users root and vagrant; the SSH-orchestrator currently connects as root)
-
-
-2) Update the ssh-config
-
-The SSH-orchestrator needs to understand how to connect to the new node. Most likely the VM isn't reachable with the default settings used:
-
-```
-Host *
-User root
-StrictHostKeyChecking no
-```
-
-You want to adjust this by retrieving an adapted ssh_config from Vagrant.
-
-::
-
- # vagrant ssh-config > ssh-config
-
-
-Now set the newly created config for Ceph.
-
-::
-
- # ceph ssh set-ssh-config -i <path_to_ssh_conf>
-
-
-3) Add the new host
-
-Add the newly created host(s) to the inventory.
-
-::
-
-
- # ceph orchestrator host add <host>
-
-
-4) Verify the inventory
-
-::
-
- # ceph orchestrator host ls
-
-
-You should see the hostname in the list.
-
-Understanding ``AsyncCompletion``
-=================================
-
-How can I store temporary variables?
-------------------------------------
-
-Let's imagine you want to write code similar to
-
-.. code:: python
-
- hosts = self.get_hosts()
- inventory = self.get_inventory(hosts)
- return self._create_osd(hosts, drive_group, inventory)
-
-That won't work, as ``get_hosts`` and ``get_inventory`` return objects
-of type ``AsyncCompletion``.
-
-Now let's imaging a Python 3 world, where we can use ``async`` and
-``await``. Then we actually can write this like so:
-
-.. code:: python
-
- hosts = await self.get_hosts()
- inventory = await self.get_inventory(hosts)
- return self._create_osd(hosts, drive_group, inventory)
-
-Let's use a simple example to make this clear:
-
-.. code:: python
-
- val = await func_1()
- return func_2(val)
-
-As we're not yet in Python 3, we need to do write ``await`` manually by
-calling ``orchestrator.Completion.then()``:
-
-.. code:: python
-
- func_1().then(lambda val: func_2(val))
-
- # or
- func_1().then(func_2)
-
-Now let's desugar the original example:
-
-.. code:: python
-
- hosts = await self.get_hosts()
- inventory = await self.get_inventory(hosts)
- return self._create_osd(hosts, drive_group, inventory)
-
-Now let's replace one ``async`` at a time:
-
-.. code:: python
-
- hosts = await self.get_hosts()
- return self.get_inventory(hosts).then(lambda inventory:
- self._create_osd(hosts, drive_group, inventory))
-
-Then finally:
-
-.. code:: python
-
- self.get_hosts().then(lambda hosts:
- self.get_inventory(hosts).then(lambda inventory:
- self._create_osd(hosts,
- drive_group, inventory)))
-
-This also works without lambdas:
-
-.. code:: python
-
- def call_inventory(hosts):
- def call_create(inventory)
- return self._create_osd(hosts, drive_group, inventory)
-
- return self.get_inventory(hosts).then(call_create)
-
- self.get_hosts(call_inventory)
-
-We should add support for ``await`` as soon as we're on Python 3.
-
-I want to call my function for every host!
-------------------------------------------
-
-Imagine you have a function that looks like so:
-
-.. code:: python
-
- @async_completion
- def deploy_stuff(name, node):
- ...
-
-And you want to call ``deploy_stuff`` like so:
-
-.. code:: python
-
- return [deploy_stuff(name, node) for node in nodes]
-
-This won't work as expected. The number of ``AsyncCompletion`` objects
-created should be ``O(1)``. But there is a solution:
-``@async_map_completion``
-
-.. code:: python
-
- @async_map_completion
- def deploy_stuff(name, node):
- ...
-
- return deploy_stuff([(name, node) for node in nodes])
-
-This way, we're only creating one ``AsyncCompletion`` object. Note that
-you should not create new ``AsyncCompletion`` within ``deploy_stuff``, as
-we're then no longer have ``O(1)`` completions:
-
-.. code:: python
-
- @async_completion
- def other_async_function():
- ...
-
- @async_map_completion
- def deploy_stuff(name, node):
- return other_async_function() # wrong!
-
-Why do we need this?
---------------------
-
-I've tried to look into making Completions composable by being able to
-call one completion from another completion. I.e. making them re-usable
-using Promises E.g.:
-
-.. code:: python
-
- >>> return self.get_hosts().then(self._create_osd)
-
-where ``get_hosts`` returns a Completion of list of hosts and
-``_create_osd`` takes a list of hosts.
-
-The concept behind this is to store the computation steps explicit and
-then explicitly evaluate the chain:
-
-.. code:: python
-
- p = Completion(on_complete=lambda x: x*2).then(on_complete=lambda x: str(x))
- p.finalize(2)
- assert p.result = "4"
-
-or graphically:
-
-::
-
- +---------------+ +-----------------+
- | | then | |
- | lambda x: x*x | +--> | lambda x: str(x)|
- | | | |
- +---------------+ +-----------------+
+++ /dev/null
-# vi: set ft=ruby :
-
-NUM_DAEMONS = ENV["NUM_DAEMONS"] ? ENV["NUM_DAEMONS"].to_i : 1
-
-Vagrant.configure("2") do |config|
- config.vm.synced_folder ".", "/vagrant", disabled: true
- config.vm.network "private_network", type: "dhcp"
- config.vm.box = "centos/7"
-
- (0..NUM_DAEMONS - 1).each do |i|
- config.vm.define "mon#{i}" do |mon|
- mon.vm.hostname = "mon#{i}"
- end
- config.vm.define "mgr#{i}" do |mgr|
- mgr.vm.hostname = "mgr#{i}"
- end
- config.vm.define "osd#{i}" do |osd|
- osd.vm.hostname = "osd#{i}"
- osd.vm.provider :libvirt do |libvirt|
- libvirt.storage :file, :size => '5G'
- libvirt.storage :file, :size => '5G'
- end
- end
- end
-
- config.vm.provision "file", source: "~/.ssh/id_rsa.pub", destination: "~/.ssh/id_rsa.pub"
- config.vm.provision "shell", inline: <<-SHELL
- cat /home/vagrant/.ssh/id_rsa.pub >> /home/vagrant/.ssh/authorized_keys
- sudo cp -r /home/vagrant/.ssh /root/.ssh
- SHELL
-
- config.vm.provision "shell", inline: <<-SHELL
- sudo yum install -y yum-utils
- sudo yum install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm
- sudo rpm --import 'https://download.ceph.com/keys/release.asc'
- curl -L https://shaman.ceph.com/api/repos/ceph/master/latest/centos/7/repo/ | sudo tee /etc/yum.repos.d/shaman.repo
- sudo yum install -y python36 podman ceph
- sudo ln -s /usr/bin/python36 /usr/bin/python3 || true
- SHELL
-end
+++ /dev/null
-import os
-
-if 'UNITTEST' in os.environ:
- import tests
-
-from .module import SSHOrchestrator
\ No newline at end of file
+++ /dev/null
-[ceph]
-name=Ceph packages for $basearch
-baseurl=https://download.ceph.com/rpm-mimic/el7/$basearch
-enabled=1
-priority=2
-gpgcheck=1
-gpgkey=https://download.ceph.com/keys/release.asc
-
-[ceph-noarch]
-name=Ceph noarch packages
-baseurl=https://download.ceph.com/rpm-mimic/el7/noarch
-enabled=1
-priority=2
-gpgcheck=1
-gpgkey=https://download.ceph.com/keys/release.asc
-
-[ceph-source]
-name=Ceph source packages
-baseurl=https://download.ceph.com/rpm-mimic/el7/SRPMS
-enabled=0
-priority=2
-gpgcheck=1
-gpgkey=https://download.ceph.com/keys/release.asc
+++ /dev/null
-import json
-import errno
-import logging
-from functools import wraps
-
-import string
-try:
- from typing import List, Dict, Optional, Callable, TypeVar, Type, Any
-except ImportError:
- pass # just for type checking
-
-
-import six
-import os
-import random
-import tempfile
-import multiprocessing.pool
-import shutil
-import subprocess
-
-from ceph.deployment import inventory
-from mgr_module import MgrModule
-import orchestrator
-from orchestrator import OrchestratorError
-
-from . import remotes
-
-try:
- import remoto
- import remoto.process
-except ImportError as e:
- remoto = None
- remoto_import_error = str(e)
-
-try:
- from typing import List
-except ImportError:
- pass
-
-logger = logging.getLogger(__name__)
-
-DEFAULT_SSH_CONFIG = ('Host *\n'
- 'User root\n'
- 'StrictHostKeyChecking no\n'
- 'UserKnownHostsFile /dev/null\n')
-
-# for py2 compat
-try:
- from tempfile import TemporaryDirectory # py3
-except ImportError:
- # define a minimal (but sufficient) equivalent for <= py 3.2
- class TemporaryDirectory(object): # type: ignore
- def __init__(self):
- self.name = tempfile.mkdtemp()
-
- def __enter__(self):
- if not self.name:
- self.name = tempfile.mkdtemp()
- return self.name
-
- def cleanup(self):
- shutil.rmtree(self.name)
-
- def __exit__(self, exc_type, exc_value, traceback):
- self.cleanup()
-
-
-# high-level TODO:
-# - bring over some of the protections from ceph-deploy that guard against
-# multiple bootstrapping / initialization
-
-def _name_to_entity_name(name):
- """
- Map from service names to ceph entity names (as seen in config)
- """
- if name.startswith('rgw.') or name.startswith('rbd-mirror'):
- return 'client.' + name
- else:
- return name
-
-
-class AsyncCompletion(orchestrator.Completion):
- def __init__(self,
- _first_promise=None, # type: Optional[orchestrator.Completion]
- value=orchestrator._Promise.NO_RESULT, # type: Any
- on_complete=None, # type: Optional[Callable]
- name=None, # type: Optional[str]
- many=False, # type: bool
- ):
-
- assert SSHOrchestrator.instance is not None
- self.many = many
- if name is None and on_complete is not None:
- name = on_complete.__name__
- super(AsyncCompletion, self).__init__(_first_promise, value, on_complete, name)
-
- @property
- def _progress_reference(self):
- # type: () -> Optional[orchestrator.ProgressReference]
- if hasattr(self._on_complete_, 'progress_id'): # type: ignore
- return self._on_complete_ # type: ignore
- return None
-
- @property
- def _on_complete(self):
- # type: () -> Optional[Callable]
- if self._on_complete_ is None:
- return None
-
- def callback(result):
- try:
- self._on_complete_ = None
- self._finalize(result)
- except Exception as e:
- self.fail(e)
-
- def error_callback(e):
- self.fail(e)
-
- if six.PY3:
- _callback = self._on_complete_
- else:
- def _callback(*args, **kwargs):
- # Py2 only: _worker_pool doesn't call error_callback
- try:
- return self._on_complete_(*args, **kwargs)
- except Exception as e:
- self.fail(e)
-
- def run(value):
- assert SSHOrchestrator.instance
- if self.many:
- if not value:
- logger.info('calling map_async without values')
- callback([])
- if six.PY3:
- SSHOrchestrator.instance._worker_pool.map_async(_callback, value,
- callback=callback,
- error_callback=error_callback)
- else:
- SSHOrchestrator.instance._worker_pool.map_async(_callback, value,
- callback=callback)
- else:
- if six.PY3:
- SSHOrchestrator.instance._worker_pool.apply_async(_callback, (value,),
- callback=callback, error_callback=error_callback)
- else:
- SSHOrchestrator.instance._worker_pool.apply_async(_callback, (value,),
- callback=callback)
- return self.ASYNC_RESULT
-
- return run
-
- @_on_complete.setter
- def _on_complete(self, inner):
- # type: (Callable) -> None
- self._on_complete_ = inner
-
-
-def ssh_completion(cls=AsyncCompletion, **c_kwargs):
- # type: (Type[orchestrator.Completion], Any) -> Callable
- """
- See ./HACKING.rst for a how-to
- """
- def decorator(f):
- @wraps(f)
- def wrapper(*args):
-
- name = f.__name__
- many = c_kwargs.get('many', False)
-
- # Some weired logic to make calling functions with multiple arguments work.
- if len(args) == 1:
- [value] = args
- if many and value and isinstance(value[0], tuple):
- return cls(on_complete=lambda x: f(*x), value=value, name=name, **c_kwargs)
- else:
- return cls(on_complete=f, value=value, name=name, **c_kwargs)
- else:
- if many:
- self, value = args
-
- def call_self(inner_args):
- if not isinstance(inner_args, tuple):
- inner_args = (inner_args, )
- return f(self, *inner_args)
-
- return cls(on_complete=call_self, value=value, name=name, **c_kwargs)
- else:
- return cls(on_complete=lambda x: f(*x), value=args, name=name, **c_kwargs)
-
-
- return wrapper
- return decorator
-
-
-def async_completion(f):
- # type: (Callable) -> Callable[..., AsyncCompletion]
- """
- See ./HACKING.rst for a how-to
-
- :param f: wrapped function
- """
- return ssh_completion()(f)
-
-
-def async_map_completion(f):
- # type: (Callable) -> Callable[..., AsyncCompletion]
- """
- See ./HACKING.rst for a how-to
-
- :param f: wrapped function
-
- kind of similar to
-
- >>> def sync_map(f):
- ... return lambda x: map(f, x)
-
- """
- return ssh_completion(many=True)(f)
-
-
-def trivial_completion(f):
- # type: (Callable) -> Callable[..., orchestrator.Completion]
- return ssh_completion(cls=orchestrator.Completion)(f)
-
-
-def trivial_result(val):
- return AsyncCompletion(value=val, name='trivial_result')
-
-
-class SSHOrchestrator(MgrModule, orchestrator.Orchestrator):
-
- _STORE_HOST_PREFIX = "host"
-
-
- instance = None
- NATIVE_OPTIONS = [] # type: List[Any]
- MODULE_OPTIONS = [
- {
- 'name': 'ssh_config_file',
- 'type': 'str',
- 'default': None,
- 'desc': 'customized SSH config file to connect to managed hosts',
- },
- {
- 'name': 'inventory_cache_timeout',
- 'type': 'seconds',
- 'default': 10 * 60,
- 'desc': 'seconds to cache device inventory',
- },
- {
- 'name': 'service_cache_timeout',
- 'type': 'seconds',
- 'default': 60,
- 'desc': 'seconds to cache service (daemon) inventory',
- },
- {
- 'name': 'mode',
- 'type': 'str',
- 'enum_allowed': ['root', 'cephadm-package'],
- 'default': 'root',
- 'desc': 'mode for remote execution of cephadm',
- },
- {
- 'name': 'container_image_base',
- 'default': 'ceph/ceph',
- 'desc': 'Container image name, without the tag',
- 'runtime': True,
- },
- ]
-
- def __init__(self, *args, **kwargs):
- super(SSHOrchestrator, self).__init__(*args, **kwargs)
- self._cluster_fsid = self.get('mon_map')['fsid']
-
- self.config_notify()
-
- path = self.get_ceph_option('cephadm_path')
- try:
- with open(path, 'r') as f:
- self._cephadm = f.read()
- except (IOError, TypeError) as e:
- raise RuntimeError("unable to read cephadm at '%s': %s" % (
- path, str(e)))
-
- self._worker_pool = multiprocessing.pool.ThreadPool(1)
-
- self._reconfig_ssh()
-
- SSHOrchestrator.instance = self
- self.all_progress_references = list() # type: List[orchestrator.ProgressReference]
-
- # load inventory
- i = self.get_store('inventory')
- if i:
- self.inventory = json.loads(i)
- else:
- self.inventory = dict()
- self.log.debug('Loaded inventory %s' % self.inventory)
-
- # The values are cached by instance.
- # cache is invalidated by
- # 1. timeout
- # 2. refresh parameter
- self.inventory_cache = orchestrator.OutdatablePersistentDict(
- self, self._STORE_HOST_PREFIX + '.devices')
-
- self.service_cache = orchestrator.OutdatablePersistentDict(
- self, self._STORE_HOST_PREFIX + '.services')
-
- # ensure the host lists are in sync
- for h in self.inventory.keys():
- if h not in self.inventory_cache:
- self.log.debug('adding inventory item for %s' % h)
- self.inventory_cache[h] = orchestrator.OutdatableData()
- if h not in self.service_cache:
- self.log.debug('adding service item for %s' % h)
- self.service_cache[h] = orchestrator.OutdatableData()
- for h in self.inventory_cache:
- if h not in self.inventory:
- del self.inventory_cache[h]
- for h in self.service_cache:
- if h not in self.inventory:
- del self.service_cache[h]
-
- def shutdown(self):
- self.log.error('ssh: shutdown')
- self._worker_pool.close()
- self._worker_pool.join()
-
- def config_notify(self):
- """
- This method is called whenever one of our config options is changed.
- """
- for opt in self.MODULE_OPTIONS:
- setattr(self,
- opt['name'], # type: ignore
- self.get_module_option(opt['name']) or opt['default']) # type: ignore
- self.log.debug(' mgr option %s = %s',
- opt['name'], getattr(self, opt['name'])) # type: ignore
- for opt in self.NATIVE_OPTIONS:
- setattr(self,
- opt, # type: ignore
- self.get_ceph_option(opt))
- self.log.debug(' native option %s = %s', opt, getattr(self, opt)) # type: ignore
-
- def get_unique_name(self, existing, prefix=None, forcename=None):
- """
- Generate a unique random service name
- """
- if forcename:
- if len([d for d in existing if d.service_instance == forcename]):
- raise RuntimeError('specified name %s already in use', forcename)
- return forcename
-
- while True:
- if prefix:
- name = prefix + '.'
- else:
- name = ''
- name += ''.join(random.choice(string.ascii_lowercase)
- for _ in range(6))
- if len([d for d in existing if d.service_instance == name]):
- self.log('name %s exists, trying again', name)
- continue
- return name
-
- def _save_inventory(self):
- self.set_store('inventory', json.dumps(self.inventory))
-
- def _reconfig_ssh(self):
- temp_files = [] # type: list
- ssh_options = [] # type: List[str]
-
- # ssh_config
- ssh_config_fname = self.ssh_config_file
- ssh_config = self.get_store("ssh_config")
- if ssh_config is not None or ssh_config_fname is None:
- if not ssh_config:
- ssh_config = DEFAULT_SSH_CONFIG
- f = tempfile.NamedTemporaryFile(prefix='ceph-mgr-ssh-conf-')
- os.fchmod(f.fileno(), 0o600)
- f.write(ssh_config.encode('utf-8'))
- f.flush() # make visible to other processes
- temp_files += [f]
- ssh_config_fname = f.name
- if ssh_config_fname:
- if not os.path.isfile(ssh_config_fname):
- raise Exception("ssh_config \"{}\" does not exist".format(
- ssh_config_fname))
- ssh_options += ['-F', ssh_config_fname]
-
- # identity
- ssh_key = self.get_store("ssh_identity_key")
- ssh_pub = self.get_store("ssh_identity_pub")
- self.ssh_pub = ssh_pub
- self.ssh_key = ssh_key
- if ssh_key and ssh_pub:
- tkey = tempfile.NamedTemporaryFile(prefix='ceph-mgr-ssh-identity-')
- tkey.write(ssh_key.encode('utf-8'))
- os.fchmod(tkey.fileno(), 0o600)
- tkey.flush() # make visible to other processes
- tpub = open(tkey.name + '.pub', 'w')
- os.fchmod(tpub.fileno(), 0o600)
- tpub.write(ssh_pub)
- tpub.flush() # make visible to other processes
- temp_files += [tkey, tpub]
- ssh_options += ['-i', tkey.name]
-
- self._temp_files = temp_files
- if ssh_options:
- self._ssh_options = ' '.join(ssh_options) # type: Optional[str]
- else:
- self._ssh_options = None
- self.log.info('ssh_options %s' % ssh_options)
-
- if self.mode == 'root':
- self.ssh_user = 'root'
- elif self.mode == 'cephadm-package':
- self.ssh_user = 'cephadm'
-
- @staticmethod
- def can_run():
- if remoto is not None:
- return True, ""
- else:
- return False, "loading remoto library:{}".format(
- remoto_import_error)
-
- def available(self):
- """
- The SSH orchestrator is always available.
- """
- return self.can_run()
-
- def process(self, completions):
- """
- Does nothing, as completions are processed in another thread.
- """
- if completions:
- self.log.info("process: completions={0}".format(orchestrator.pretty_print(completions)))
-
- for p in completions:
- p.finalize()
-
- def _require_hosts(self, hosts):
- """
- Raise an error if any of the given hosts are unregistered.
- """
- if isinstance(hosts, six.string_types):
- hosts = [hosts]
- keys = self.inventory_cache.keys()
- unregistered_hosts = set(hosts) - keys
- if unregistered_hosts:
- logger.warning('keys = {}'.format(keys))
- raise RuntimeError("Host(s) {} not registered".format(
- ", ".join(map(lambda h: "'{}'".format(h),
- unregistered_hosts))))
-
- @orchestrator._cli_write_command(
- prefix='ssh set-ssh-config',
- desc='Set the ssh_config file (use -i <ssh_config>)')
- def _set_ssh_config(self, inbuf=None):
- """
- Set an ssh_config file provided from stdin
-
- TODO:
- - validation
- """
- if inbuf is None or len(inbuf) == 0:
- return -errno.EINVAL, "", "empty ssh config provided"
- self.set_store("ssh_config", inbuf)
- return 0, "", ""
-
- @orchestrator._cli_write_command(
- prefix='ssh clear-ssh-config',
- desc='Clear the ssh_config file')
- def _clear_ssh_config(self):
- """
- Clear the ssh_config file provided from stdin
- """
- self.set_store("ssh_config", None)
- self.ssh_config_tmp = None
- return 0, "", ""
-
- @orchestrator._cli_write_command(
- 'ssh generate-key',
- desc='Generate a cluster SSH key (if not present)')
- def _generate_key(self):
- if not self.ssh_pub or not self.ssh_key:
- self.log.info('Generating ssh key...')
- tmp_dir = TemporaryDirectory()
- path = tmp_dir.name + '/key'
- try:
- subprocess.call([
- '/usr/bin/ssh-keygen',
- '-C', 'ceph-%s' % self._cluster_fsid,
- '-N', '',
- '-f', path
- ])
- with open(path, 'r') as f:
- secret = f.read()
- with open(path + '.pub', 'r') as f:
- pub = f.read()
- finally:
- os.unlink(path)
- os.unlink(path + '.pub')
- tmp_dir.cleanup()
- self.set_store('ssh_identity_key', secret)
- self.set_store('ssh_identity_pub', pub)
- self._reconfig_ssh()
- return 0, '', ''
-
- @orchestrator._cli_write_command(
- 'ssh clear-key',
- desc='Clear cluster SSH key')
- def _clear_key(self):
- self.set_store('ssh_identity_key', None)
- self.set_store('ssh_identity_pub', None)
- self._reconfig_ssh()
- return 0, '', ''
-
- @orchestrator._cli_read_command(
- 'ssh get-pub-key',
- desc='Show SSH public key for connecting to cluster hosts')
- def _get_pub_key(self):
- if self.ssh_pub:
- return 0, self.ssh_pub, ''
- else:
- return -errno.ENOENT, '', 'No cluster SSH key defined'
-
- @orchestrator._cli_read_command(
- 'ssh get-user',
- desc='Show user for SSHing to cluster hosts')
- def _get_user(self):
- return 0, self.ssh_user, ''
-
- @orchestrator._cli_read_command(
- 'ssh check-host',
- 'name=host,type=CephString',
- 'Check whether we can access and manage a remote host')
- def _check_host(self, host):
- out, err, code = self._run_cephadm(host, '', 'check-host', [],
- error_ok=True, no_fsid=True)
- if code:
- return 1, '', err
- return 0, 'host ok', err
-
- @orchestrator._cli_write_command(
- 'ssh prepare-host',
- 'name=host,type=CephString',
- 'Try to prepare a host for remote management')
- def _prepare_host(self, host):
- out, err, code = self._run_cephadm(host, '', 'prepare-host', [],
- error_ok=True, no_fsid=True)
- if code:
- return 1, '', err
- return 0, 'host ok', err
-
- def _get_connection(self, host):
- """
- Setup a connection for running commands on remote host.
- """
- n = self.ssh_user + '@' + host
- self.log.info("Opening connection to {} with ssh options '{}'".format(
- n, self._ssh_options))
- conn = remoto.Connection(
- n,
- logger=self.log.getChild(n),
- ssh_options=self._ssh_options)
-
- conn.import_module(remotes)
-
- return conn
-
- def _executable_path(self, conn, executable):
- """
- Remote validator that accepts a connection object to ensure that a certain
- executable is available returning its full path if so.
-
- Otherwise an exception with thorough details will be raised, informing the
- user that the executable was not found.
- """
- executable_path = conn.remote_module.which(executable)
- if not executable_path:
- raise RuntimeError("Executable '{}' not found on host '{}'".format(
- executable, conn.hostname))
- self.log.info("Found executable '{}' at path '{}'".format(executable,
- executable_path))
- return executable_path
-
- def _run_cephadm(self, host, entity, command, args,
- stdin=None,
- no_fsid=False,
- error_ok=False,
- image=None):
- """
- Run cephadm on the remote host with the given command + args
- """
- conn = self._get_connection(host)
-
- try:
- if not image:
- # get container image
- ret, image, err = self.mon_command({
- 'prefix': 'config get',
- 'who': _name_to_entity_name(entity),
- 'key': 'container_image',
- })
- image = image.strip()
- self.log.debug('%s container image %s' % (entity, image))
-
- final_args = [
- '--image', image,
- command
- ]
- if not no_fsid:
- final_args += ['--fsid', self._cluster_fsid]
- final_args += args
-
- if self.mode == 'root':
- self.log.debug('args: %s' % final_args)
- self.log.debug('stdin: %s' % stdin)
- script = 'injected_argv = ' + json.dumps(final_args) + '\n'
- if stdin:
- script += 'injected_stdin = ' + json.dumps(stdin) + '\n'
- script += self._cephadm
- out, err, code = remoto.process.check(
- conn,
- ['/usr/bin/python', '-u'],
- stdin=script.encode('utf-8'))
- elif self.mode == 'cephadm-package':
- out, err, code = remoto.process.check(
- conn,
- ['sudo', '/usr/bin/cephadm'] + final_args,
- stdin=stdin)
- self.log.debug('exit code %s out %s err %s' % (code, out, err))
- if code and not error_ok:
- raise RuntimeError(
- 'cephadm exited with an error code: %d, stderr:%s' % (
- code, '\n'.join(err)))
- return out, err, code
-
- except Exception as ex:
- self.log.exception(ex)
- raise
-
- finally:
- conn.exit()
-
- def _get_hosts(self, wanted=None):
- return self.inventory_cache.items_filtered(wanted)
-
- @async_completion
- def add_host(self, host):
- """
- Add a host to be managed by the orchestrator.
-
- :param host: host name
- """
- self.inventory[host] = {}
- self._save_inventory()
- self.inventory_cache[host] = orchestrator.OutdatableData()
- self.service_cache[host] = orchestrator.OutdatableData()
- return "Added host '{}'".format(host)
-
- @async_completion
- def remove_host(self, host):
- """
- Remove a host from orchestrator management.
-
- :param host: host name
- """
- del self.inventory[host]
- self._save_inventory()
- del self.inventory_cache[host]
- del self.service_cache[host]
- return "Removed host '{}'".format(host)
-
- @trivial_completion
- def get_hosts(self):
- """
- Return a list of hosts managed by the orchestrator.
-
- Notes:
- - skip async: manager reads from cache.
-
- TODO:
- - InventoryNode probably needs to be able to report labels
- """
- return [orchestrator.InventoryNode(host_name) for host_name in self.inventory_cache]
-
- """
- def add_host_label(self, host, label):
- if host not in self.inventory:
- raise OrchestratorError('host %s does not exist' % host)
-
- @log_exceptions
- def run(host, label):
- if 'labels' not in self.inventory[host]:
- self.inventory[host]['labels'] = list()
- if label not in self.inventory[host]['labels']:
- self.inventory[host]['labels'].append(label)
- self._save_inventory()
- return 'Added label %s to host %s' % (label, host)
-
- return SSHWriteCompletion(
- self._worker_pool.apply_async(run, (host, label)))
-
- def remove_host_label(self, host, label):
- if host not in self.inventory:
- raise OrchestratorError('host %s does not exist' % host)
-
- @log_exceptions
- def run(host, label):
- if 'labels' not in self.inventory[host]:
- self.inventory[host]['labels'] = list()
- if label in self.inventory[host]['labels']:
- self.inventory[host]['labels'].remove(label)
- self._save_inventory()
- return 'Removed label %s to host %s' % (label, host)
-
- return SSHWriteCompletion(
- self._worker_pool.apply_async(run, (host, label)))
- """
-
- @async_map_completion
- def _refresh_host_services(self, host):
- out, err, code = self._run_cephadm(
- host, 'mon', 'ls', [], no_fsid=True)
- data = json.loads(''.join(out))
- self.log.error('refreshed host %s services: %s' % (host, data))
- self.service_cache[host] = orchestrator.OutdatableData(data)
- return data
-
- def _get_services(self,
- service_type=None,
- service_name=None,
- service_id=None,
- node_name=None,
- refresh=False):
- hosts = []
- wait_for_args = []
- in_cache = []
- for host, host_info in self.service_cache.items_filtered():
- hosts.append(host)
- if host_info.outdated(self.service_cache_timeout) or refresh:
- self.log.info("refresing stale services for '{}'".format(host))
- wait_for_args.append((host,))
- else:
- self.log.debug('have recent services for %s: %s' % (
- host, host_info.data))
- in_cache.append(host_info.data)
-
- def _get_services_result(results):
- services = {}
- for host, data in zip(hosts, results + in_cache):
- services[host] = data
-
- result = []
- for host, ls in services.items():
- for d in ls:
- if not d['style'].startswith('cephadm'):
- self.log.debug('ignoring non-cephadm on %s: %s' % (host, d))
- continue
- if d['fsid'] != self._cluster_fsid:
- self.log.debug('ignoring foreign daemon on %s: %s' % (host, d))
- continue
- self.log.debug('including %s' % d)
- sd = orchestrator.ServiceDescription()
- sd.service_type = d['name'].split('.')[0]
- if service_type and service_type != sd.service_type:
- continue
- if '.' in d['name']:
- sd.service_instance = '.'.join(d['name'].split('.')[1:])
- else:
- sd.service_instance = host # e.g., crash
- if service_id and service_id != sd.service_instance:
- continue
- if service_name and not sd.service_instance.startswith(service_name + '.'):
- continue
- sd.nodename = host
- sd.container_id = d.get('container_id')
- sd.container_image_name = d.get('container_image_name')
- sd.container_image_id = d.get('container_image_id')
- sd.version = d.get('version')
- sd.status_desc = d['state']
- sd.status = {
- 'running': 1,
- 'stopped': 0,
- 'error': -1,
- 'unknown': -1,
- }[d['state']]
- result.append(sd)
- return result
-
- return self._refresh_host_services(wait_for_args).then(
- _get_services_result)
-
-
- def describe_service(self, service_type=None, service_id=None,
- node_name=None, refresh=False):
- if service_type not in ("mds", "osd", "mgr", "mon", 'rgw', "nfs", None):
- raise orchestrator.OrchestratorValidationError(
- service_type + " unsupported")
- result = self._get_services(service_type,
- service_id=service_id,
- node_name=node_name,
- refresh=refresh)
- return result
-
- def service_action(self, action, service_type,
- service_name=None,
- service_id=None):
- self.log.debug('service_action action %s type %s name %s id %s' % (
- action, service_type, service_name, service_id))
- if action == 'reload':
- return trivial_result(["Reload is a no-op"])
-
- def _proc_daemons(daemons):
- args = []
- for d in daemons:
- args.append((d.service_type, d.service_instance,
- d.nodename, action))
- if not args:
- if service_name:
- n = service_name + '-*'
- else:
- n = service_id
- raise orchestrator.OrchestratorError(
- 'Unable to find %s.%s daemon(s)' % (
- service_type, n))
- return self._service_action(args)
-
- return self._get_services(
- service_type,
- service_name=service_name,
- service_id=service_id).then(_proc_daemons)
-
- @async_map_completion
- def _service_action(self, service_type, service_id, host, action):
- if action == 'redeploy':
- # recreate the systemd unit and then restart
- if service_type == 'mon':
- # get mon. key
- ret, keyring, err = self.mon_command({
- 'prefix': 'auth get',
- 'entity': 'mon.',
- })
- else:
- ret, keyring, err = self.mon_command({
- 'prefix': 'auth get',
- 'entity': '%s.%s' % (service_type, service_id),
- })
- return self._create_daemon(service_type, service_id, host,
- keyring)
-
- actions = {
- 'start': ['reset-failed', 'start'],
- 'stop': ['stop'],
- 'restart': ['reset-failed', 'restart'],
- }
- name = '%s.%s' % (service_type, service_id)
- for a in actions[action]:
- out, err, code = self._run_cephadm(
- host, name, 'unit',
- ['--name', name, a],
- error_ok=True)
- self.service_cache.invalidate(host)
- self.log.debug('_service_action code %s out %s' % (code, out))
- return trivial_result("{} {} from host '{}'".format(action, name, host))
-
- def get_inventory(self, node_filter=None, refresh=False):
- """
- Return the storage inventory of nodes matching the given filter.
-
- :param node_filter: node filter
-
- TODO:
- - add filtering by label
- """
- if node_filter:
- hosts = node_filter.nodes
- self._require_hosts(hosts)
- hosts = self._get_hosts(hosts)
- else:
- # this implies the returned hosts are registered
- hosts = self._get_hosts()
-
- @async_map_completion
- def _get_inventory(host, host_info):
- # type: (str, orchestrator.OutdatableData) -> orchestrator.InventoryNode
-
- if host_info.outdated(self.inventory_cache_timeout) or refresh:
- self.log.info("refresh stale inventory for '{}'".format(host))
- out, err, code = self._run_cephadm(
- host, 'osd',
- 'ceph-volume',
- ['--', 'inventory', '--format=json'])
- data = json.loads(''.join(out))
- host_info = orchestrator.OutdatableData(data)
- self.inventory_cache[host] = host_info
- else:
- self.log.debug("reading cached inventory for '{}'".format(host))
-
- devices = inventory.Devices.from_json(host_info.data)
- return orchestrator.InventoryNode(host, devices)
-
- return _get_inventory(hosts)
-
- def blink_device_light(self, ident_fault, on, locs):
- @async_map_completion
- def blink(host, dev):
- cmd = [
- 'lsmcli',
- 'local-disk-%s-led-%s' % (
- ident_fault,
- 'on' if on else 'off'),
- '--path', '/dev/' + dev,
- ]
- out, err, code = self._run_cephadm(
- host, 'osd', 'shell', ['--'] + cmd,
- error_ok=True)
- if code:
- raise RuntimeError(
- 'Unable to affect %s light for %s:%s. Command: %s' % (
- ident_fault, host, dev, ' '.join(cmd)))
- return "Set %s light for %s:%s %s" % (
- ident_fault, host, dev, 'on' if on else 'off')
-
- return blink(locs)
-
- @async_completion
- def _create_osd(self, all_hosts_, drive_group):
- all_hosts = orchestrator.InventoryNode.get_host_names(all_hosts_)
- assert len(drive_group.hosts(all_hosts)) == 1
- assert len(drive_group.data_devices.paths) > 0
- assert all(map(lambda p: isinstance(p, six.string_types),
- drive_group.data_devices.paths))
-
- host = drive_group.hosts(all_hosts)[0]
- self._require_hosts(host)
-
-
- # get bootstrap key
- ret, keyring, err = self.mon_command({
- 'prefix': 'auth get',
- 'entity': 'client.bootstrap-osd',
- })
-
- # generate config
- ret, config, err = self.mon_command({
- "prefix": "config generate-minimal-conf",
- })
-
- j = json.dumps({
- 'config': config,
- 'keyring': keyring,
- })
-
- devices = drive_group.data_devices.paths
- for device in devices:
- out, err, code = self._run_cephadm(
- host, 'osd', 'ceph-volume',
- [
- '--config-and-keyring', '-',
- '--',
- 'lvm', 'prepare',
- "--cluster-fsid", self._cluster_fsid,
- "--{}".format(drive_group.objectstore),
- "--data", device,
- ],
- stdin=j)
- self.log.debug('ceph-volume prepare: %s' % out)
-
- # check result
- out, err, code = self._run_cephadm(
- host, 'osd', 'ceph-volume',
- [
- '--',
- 'lvm', 'list',
- '--format', 'json',
- ])
- self.log.debug('code %s out %s' % (code, out))
- osds_elems = json.loads('\n'.join(out))
- fsid = self._cluster_fsid
- for osd_id, osds in osds_elems.items():
- for osd in osds:
- if osd['tags']['ceph.cluster_fsid'] != fsid:
- self.log.debug('mismatched fsid, skipping %s' % osd)
- continue
- if len(list(set(devices) & set(osd['devices']))) == 0 and osd.get('lv_path') not in devices:
- self.log.debug('mismatched devices, skipping %s' % osd)
- continue
-
- # create
- ret, keyring, err = self.mon_command({
- 'prefix': 'auth get',
- 'entity': 'osd.%s' % str(osd_id),
- })
- self._create_daemon(
- 'osd', str(osd_id), host, keyring,
- extra_args=[
- '--osd-fsid', osd['tags']['ceph.osd_fsid'],
- ])
-
- return "Created osd(s) on host '{}'".format(host)
-
- def create_osds(self, drive_group):
- """
- Create a new osd.
-
- The orchestrator CLI currently handles a narrow form of drive
- specification defined by a single block device using bluestore.
-
- :param drive_group: osd specification
-
- TODO:
- - support full drive_group specification
- - support batch creation
- """
-
- return self.get_hosts().then(lambda hosts: self._create_osd(hosts, drive_group))
-
- def remove_osds(self, name):
- def _search(daemons):
- args = [('osd.%s' % d.service_instance, d.nodename) for d in daemons]
- if not args:
- raise OrchestratorError('Unable to find osd.%s' % name)
- return self._remove_daemon(args)
- return self._get_services('osd', service_id=name).then(_search)
-
- def _create_daemon(self, daemon_type, daemon_id, host, keyring,
- extra_args=[]):
- conn = self._get_connection(host)
- try:
- name = '%s.%s' % (daemon_type, daemon_id)
-
- # generate config
- ret, config, err = self.mon_command({
- "prefix": "config generate-minimal-conf",
- })
-
- ret, crash_keyring, err = self.mon_command({
- 'prefix': 'auth get-or-create',
- 'entity': 'client.crash.%s' % host,
- 'caps': ['mon', 'profile crash',
- 'mgr', 'profile crash'],
- })
-
- j = json.dumps({
- 'config': config,
- 'keyring': keyring,
- 'crash_keyring': crash_keyring,
- })
-
- out, err, code = self._run_cephadm(
- host, name, 'deploy',
- [
- '--name', name,
- '--config-and-keyrings', '-',
- ] + extra_args,
- stdin=j)
- self.log.debug('create_daemon code %s out %s' % (code, out))
- self.service_cache.invalidate(host)
- return "(Re)deployed {} on host '{}'".format(name, host)
-
- except Exception as e:
- self.log.error("create_daemon({}): error: {}".format(host, e))
- raise
-
- finally:
- self.log.info("create_daemon({}): finished".format(host))
- conn.exit()
-
- @async_map_completion
- def _remove_daemon(self, name, host):
- """
- Remove a daemon
- """
- out, err, code = self._run_cephadm(
- host, name, 'rm-daemon',
- ['--name', name])
- self.log.debug('_remove_daemon code %s out %s' % (code, out))
- self.service_cache.invalidate(host)
- return "Removed {} from host '{}'".format(name, host)
-
- def _update_service(self, daemon_type, add_func, spec):
- def ___update_service(daemons):
- if len(daemons) > spec.count:
- # remove some
- to_remove = len(daemons) - spec.count
- args = []
- for d in daemons[0:to_remove]:
- args.append(
- ('%s.%s' % (d.service_type, d.service_instance), d.nodename)
- )
- return self._remove_daemon(args)
- elif len(daemons) < spec.count:
- # add some
- spec.count -= len(daemons)
- return add_func(spec)
- return []
- return self._get_services(daemon_type, service_name=spec.name).then(___update_service)
-
- @async_map_completion
- def _create_mon(self, host, network, name):
- """
- Create a new monitor on the given host.
- """
- self.log.info("create_mon({}:{}): starting".format(host, network))
-
- # get mon. key
- ret, keyring, err = self.mon_command({
- 'prefix': 'auth get',
- 'entity': 'mon.',
- })
-
- # infer whether this is a CIDR network, addrvec, or plain IP
- if '/' in network:
- extra_args = ['--mon-network', network]
- elif network.startswith('[v') and network.endswith(']'):
- extra_args = ['--mon-addrv', network]
- elif ':' not in network:
- extra_args = ['--mon-ip', network]
- else:
- raise RuntimeError('Must specify a CIDR network, ceph addrvec, or plain IP: \'%s\'' % network)
-
- return self._create_daemon('mon', name or host, host, keyring,
- extra_args=extra_args)
-
- def update_mons(self, num, host_specs):
- # type: (int, List[orchestrator.HostSpec]) -> orchestrator.Completion
- """
- Adjust the number of cluster monitors.
- """
- # current support limited to adding monitors.
- mon_map = self.get("mon_map")
- num_mons = len(mon_map["mons"])
- if num == num_mons:
- return orchestrator.Completion(value="The requested number of monitors exist.")
- if num < num_mons:
- raise NotImplementedError("Removing monitors is not supported.")
-
- self.log.debug("Trying to update monitors on: {}".format(host_specs))
- # check that all the hosts are registered
- [self._require_hosts(host.hostname) for host in host_specs]
-
- # current support requires a network to be specified
- for host, network, _ in host_specs:
- if not network:
- raise RuntimeError("Host '{}' is missing a network spec".format(host))
-
- def update_mons_with_daemons(daemons):
- for _, _, name in host_specs:
- if name and len([d for d in daemons if d.service_instance == name]):
- raise RuntimeError('name %s alrady exists', name)
-
- # explicit placement: enough hosts provided?
- num_new_mons = num - num_mons
- if len(host_specs) < num_new_mons:
- raise RuntimeError("Error: {} hosts provided, expected {}".format(
- len(host_specs), num_new_mons))
-
- self.log.info("creating {} monitors on hosts: '{}'".format(
- num_new_mons, ",".join(map(lambda h: ":".join(h), host_specs))))
-
- # TODO: we may want to chain the creation of the monitors so they join
- # the quorum one at a time.
- return self._create_mon(host_specs)
- return self._get_services('mon').then(update_mons_with_daemons)
-
- @async_map_completion
- def _create_mgr(self, host, name):
- """
- Create a new manager instance on a host.
- """
- self.log.info("create_mgr({}, mgr.{}): starting".format(host, name))
-
- # get mgr. key
- ret, keyring, err = self.mon_command({
- 'prefix': 'auth get-or-create',
- 'entity': 'mgr.%s' % name,
- 'caps': ['mon', 'profile mgr',
- 'osd', 'allow *',
- 'mds', 'allow *'],
- })
-
- return self._create_daemon('mgr', name, host, keyring)
-
- def update_mgrs(self, num, host_specs):
- """
- Adjust the number of cluster managers.
- """
- return self._get_services('mgr').then(lambda daemons: self._update_mgrs(num, host_specs, daemons))
-
- def _update_mgrs(self, num, host_specs, daemons):
- num_mgrs = len(daemons)
- if num == num_mgrs:
- return orchestrator.Completion(value="The requested number of managers exist.")
-
- self.log.debug("Trying to update managers on: {}".format(host_specs))
- # check that all the hosts are registered
- [self._require_hosts(host.hostname) for host in host_specs]
-
- results = []
- if num < num_mgrs:
- num_to_remove = num_mgrs - num
-
- # first try to remove unconnected mgr daemons that the
- # cluster doesn't see
- connected = []
- mgr_map = self.get("mgr_map")
- if mgr_map.get("active_name", {}):
- connected.append(mgr_map.get('active_name', ''))
- for standby in mgr_map.get('standbys', []):
- connected.append(standby.get('name', ''))
- to_remove_damons = []
- for d in daemons:
- if d.service_instance not in connected:
- to_remove_damons.append(('%s.%s' % (d.service_type, d.service_instance),
- d.nodename))
- num_to_remove -= 1
- if num_to_remove == 0:
- break
-
- # otherwise, remove *any* mgr
- if num_to_remove > 0:
- for d in daemons:
- to_remove_damons.append(('%s.%s' % (d.service_type, d.service_instance), d.nodename))
- num_to_remove -= 1
- if num_to_remove == 0:
- break
- return self._remove_daemon(to_remove_damons)
-
- else:
- # we assume explicit placement by which there are the same number of
- # hosts specified as the size of increase in number of daemons.
- num_new_mgrs = num - num_mgrs
- if len(host_specs) < num_new_mgrs:
- raise RuntimeError(
- "Error: {} hosts provided, expected {}".format(
- len(host_specs), num_new_mgrs))
-
- for host_spec in host_specs:
- if host_spec.name and len([d for d in daemons if d.service_instance == host_spec.name]):
- raise RuntimeError('name %s alrady exists', host_spec.name)
-
- for host_spec in host_specs:
- if host_spec.name and len([d for d in daemons if d.service_instance == host_spec.name]):
- raise RuntimeError('name %s alrady exists', host_spec.name)
-
- self.log.info("creating {} managers on hosts: '{}'".format(
- num_new_mgrs, ",".join([spec.hostname for spec in host_specs])))
-
- args = []
- for host_spec in host_specs:
- name = host_spec.name or self.get_unique_name(daemons)
- host = host_spec.hostname
- args.append((host, name))
- return self._create_mgr(args)
-
- def add_mds(self, spec):
- if not spec.placement.nodes or len(spec.placement.nodes) < spec.count:
- raise RuntimeError("must specify at least %d hosts" % spec.count)
- return self._get_services('mds').then(lambda ds: self._add_mds(ds, spec))
-
- def _add_mds(self, daemons, spec):
- args = []
- num_added = 0
- for host, _, name in spec.placement.nodes:
- if num_added >= spec.count:
- break
- mds_id = self.get_unique_name(daemons, spec.name, name)
- self.log.debug('placing mds.%s on host %s' % (mds_id, host))
- args.append((mds_id, host))
- # add to daemon list so next name(s) will also be unique
- sd = orchestrator.ServiceDescription()
- sd.service_instance = mds_id
- sd.service_type = 'mds'
- sd.nodename = host
- daemons.append(sd)
- num_added += 1
- return self._create_mds(args)
-
- def update_mds(self, spec):
- return self._update_service('mds', self.add_mds, spec)
-
- @async_map_completion
- def _create_mds(self, mds_id, host):
- # get mgr. key
- ret, keyring, err = self.mon_command({
- 'prefix': 'auth get-or-create',
- 'entity': 'mds.' + mds_id,
- 'caps': ['mon', 'profile mds',
- 'osd', 'allow rwx',
- 'mds', 'allow'],
- })
- return self._create_daemon('mds', mds_id, host, keyring)
-
- def remove_mds(self, name):
- self.log.debug("Attempting to remove volume: {}".format(name))
- def _remove_mds(daemons):
- args = []
- for d in daemons:
- if d.service_instance == name or d.service_instance.startswith(name + '.'):
- args.append(
- ('%s.%s' % (d.service_type, d.service_instance), d.nodename)
- )
- if not args:
- raise OrchestratorError('Unable to find mds.%s[-*] daemon(s)' % name)
- return self._remove_daemon(args)
- return self._get_services('mds').then(_remove_mds)
-
- def add_rgw(self, spec):
- if not spec.placement.nodes or len(spec.placement.nodes) < spec.count:
- raise RuntimeError("must specify at least %d hosts" % spec.count)
- # ensure rgw_realm and rgw_zone is set for these daemons
- ret, out, err = self.mon_command({
- 'prefix': 'config set',
- 'who': 'client.rgw.' + spec.name,
- 'name': 'rgw_zone',
- 'value': spec.rgw_zone,
- })
- ret, out, err = self.mon_command({
- 'prefix': 'config set',
- 'who': 'client.rgw.' + spec.rgw_realm,
- 'name': 'rgw_realm',
- 'value': spec.rgw_realm,
- })
-
- def _add_rgw(daemons):
- args = []
- num_added = 0
- for host, _, name in spec.placement.nodes:
- if num_added >= spec.count:
- break
- rgw_id = self.get_unique_name(daemons, spec.name, name)
- self.log.debug('placing rgw.%s on host %s' % (rgw_id, host))
- args.append((rgw_id, host))
- # add to daemon list so next name(s) will also be unique
- sd = orchestrator.ServiceDescription()
- sd.service_instance = rgw_id
- sd.service_type = 'rgw'
- sd.nodename = host
- daemons.append(sd)
- num_added += 1
- return self._create_rgw(args)
-
- return self._get_services('rgw').then(_add_rgw)
-
- @async_map_completion
- def _create_rgw(self, rgw_id, host):
- ret, keyring, err = self.mon_command({
- 'prefix': 'auth get-or-create',
- 'entity': 'client.rgw.' + rgw_id,
- 'caps': ['mon', 'allow rw',
- 'mgr', 'allow rw',
- 'osd', 'allow rwx'],
- })
- return self._create_daemon('rgw', rgw_id, host, keyring)
-
- def remove_rgw(self, name):
-
- def _remove_rgw(daemons):
- args = []
- for d in daemons:
- if d.service_instance == name or d.service_instance.startswith(name + '.'):
- args.append(('%s.%s' % (d.service_type, d.service_instance),
- d.nodename))
- if args:
- return self._remove_daemon(args)
- raise RuntimeError('Unable to find rgw.%s[-*] daemon(s)' % name)
-
- return self._get_services('rgw').then(_remove_rgw)
-
- def update_rgw(self, spec):
- return self._update_service('rgw', self.add_rgw, spec)
-
- def add_rbd_mirror(self, spec):
- if not spec.placement.nodes or len(spec.placement.nodes) < spec.count:
- raise RuntimeError("must specify at least %d hosts" % spec.count)
- self.log.debug('nodes %s' % spec.placement.nodes)
-
- def _add_rbd_mirror(daemons):
- args = []
- num_added = 0
- for host, _, name in spec.placement.nodes:
- if num_added >= spec.count:
- break
- daemon_id = self.get_unique_name(daemons, None, name)
- self.log.debug('placing rbd-mirror.%s on host %s' % (daemon_id,
- host))
- args.append((daemon_id, host))
-
- # add to daemon list so next name(s) will also be unique
- sd = orchestrator.ServiceDescription()
- sd.service_instance = daemon_id
- sd.service_type = 'rbd-mirror'
- sd.nodename = host
- daemons.append(sd)
- num_added += 1
- return self._create_rbd_mirror(args)
-
- return self._get_services('rbd-mirror').then(_add_rbd_mirror)
-
- @async_map_completion
- def _create_rbd_mirror(self, daemon_id, host):
- ret, keyring, err = self.mon_command({
- 'prefix': 'auth get-or-create',
- 'entity': 'client.rbd-mirror.' + daemon_id,
- 'caps': ['mon', 'profile rbd-mirror',
- 'osd', 'profile rbd'],
- })
- return self._create_daemon('rbd-mirror', daemon_id, host, keyring)
-
- def remove_rbd_mirror(self, name):
- def _remove_rbd_mirror(daemons):
- args = []
- for d in daemons:
- if not name or d.service_instance == name:
- args.append(
- ('%s.%s' % (d.service_type, d.service_instance),
- d.nodename)
- )
- if not args and name:
- raise RuntimeError('Unable to find rbd-mirror.%s daemon' % name)
- return self._remove_daemon(args)
-
- return self._get_services('rbd-mirror').then(_remove_rbd_mirror)
-
- def update_rbd_mirror(self, spec):
- return self._update_service('rbd-mirror', self.add_rbd_mirror, spec)
-
- def _get_container_image_id(self, image_name):
- # pick a random host...
- host = None
- for host_name in self.inventory_cache:
- host = host_name
- break
- if not host:
- raise OrchestratorError('no hosts defined')
- self.log.debug('using host %s' % host)
- out, code = self._run_cephadm(
- host, None, 'pull', [],
- image=image_name,
- no_fsid=True)
- return out[0]
-
- def upgrade_check(self, image, version):
- if version:
- target = self.container_image_base + ':v' + version
- elif image:
- target = image
- else:
- raise OrchestratorError('must specify either image or version')
- return self._get_services().then(lambda daemons: self._upgrade_check(target, daemons))
-
- def _upgrade_check(self, target, services):
- # get service state
- target_id = self._get_container_image_id(target)
- self.log.debug('Target image %s id %s' % (target, target_id))
- r = {
- 'target_image_name': target,
- 'target_image_id': target_id,
- 'needs_update': dict(),
- 'up_to_date': list(),
- }
- for s in services:
- if target_id == s.container_image_id:
- r['up_to_date'].append(s.name())
- else:
- r['needs_update'][s.name()] = {
- 'current_name': s.container_image_name,
- 'current_id': s.container_image_id,
- }
- return trivial_result(json.dumps(r, indent=4))
+++ /dev/null
-# ceph-deploy ftw
-import os
-import errno
-import tempfile
-import shutil
-
-def safe_makedirs(path, uid=-1, gid=-1):
- """ create path recursively if it doesn't exist """
- try:
- os.makedirs(path)
- except OSError as e:
- if e.errno == errno.EEXIST:
- pass
- else:
- raise
- else:
- os.chown(path, uid, gid)
-
-def write_conf(path, conf):
- if not os.path.exists(path):
- dirpath = os.path.dirname(path)
- if os.path.exists(dirpath):
- with open(path, "w") as f:
- f.write(conf)
- os.chmod(path, 0o644)
- else:
- raise RuntimeError(
- "{0} does not exist".format(dirpath))
-
-def write_keyring(path, key, overwrite=False, uid=-1, gid=-1):
- dirname = os.path.dirname(path)
- if not os.path.exists(dirname):
- safe_makedirs(dirname, uid, gid)
- if not overwrite and os.path.exists(path):
- return
- with open(path, "wb") as f:
- f.write(key.encode('utf-8'))
-
-def create_mon_path(path, uid=-1, gid=-1):
- """create the mon path if it does not exist"""
- if not os.path.exists(path):
- os.makedirs(path)
- os.chown(path, uid, gid);
-
-def write_file(path, content, mode=0o644, directory=None, uid=-1, gid=-1):
- if directory:
- if path.startswith("/"):
- path = path[1:]
- path = os.path.join(directory, path)
- if os.path.exists(path):
- # Delete file in case we are changing its mode
- os.unlink(path)
- with os.fdopen(os.open(path, os.O_WRONLY | os.O_CREAT, mode), 'wb') as f:
- f.write(content.encode('utf-8'))
- os.chown(path, uid, gid)
-
-def path_getuid(path):
- return os.stat(path).st_uid
-
-def path_getgid(path):
- return os.stat(path).st_gid
-
-def which(executable):
- """find the location of an executable"""
- locations = (
- '/usr/local/bin',
- '/bin',
- '/usr/bin',
- '/usr/local/sbin',
- '/usr/sbin',
- '/sbin',
- )
-
- for location in locations:
- executable_path = os.path.join(location, executable)
- if os.path.exists(executable_path) and os.path.isfile(executable_path):
- return executable_path
-
-if __name__ == '__channelexec__':
- for item in channel: # type: ignore
- channel.send(eval(item)) # type: ignore
+++ /dev/null
-from contextlib import contextmanager
-
-import pytest
-
-from ssh import SSHOrchestrator
-from tests import mock
-
-
-def set_store(self, k, v):
- if v is None:
- del self._store[k]
- else:
- self._store[k] = v
-
-
-def get_store(self, k):
- return self._store[k]
-
-
-def get_store_prefix(self, prefix):
- return {
- k: v for k, v in self._store.items()
- if k.startswith(prefix)
- }
-
-def get_ceph_option(_, key):
- return __file__
-
-@pytest.yield_fixture()
-def ssh_module():
- with mock.patch("ssh.module.SSHOrchestrator.get_ceph_option", get_ceph_option),\
- mock.patch("ssh.module.SSHOrchestrator._configure_logging", lambda *args: None),\
- mock.patch("ssh.module.SSHOrchestrator.set_store", set_store),\
- mock.patch("ssh.module.SSHOrchestrator.get_store", get_store),\
- mock.patch("ssh.module.SSHOrchestrator.get_store_prefix", get_store_prefix):
- SSHOrchestrator._register_commands('')
- m = SSHOrchestrator.__new__ (SSHOrchestrator)
- m._root_logger = mock.MagicMock()
- m._store = {
- 'ssh_config': '',
- 'ssh_identity_key': '',
- 'ssh_identity_pub': '',
- 'inventory': {},
- }
- m.__init__('ssh', 0, 0)
- yield m
+++ /dev/null
-import sys
-import time
-
-
-try:
- from typing import Any
-except ImportError:
- pass
-
-import pytest
-
-
-from orchestrator import raise_if_exception, Completion
-from .fixtures import ssh_module
-from ..module import trivial_completion, async_completion, async_map_completion, SSHOrchestrator
-
-
-class TestCompletion(object):
- def _wait(self, m, c):
- # type: (SSHOrchestrator, Completion) -> Any
- m.process([c])
- m.process([c])
-
- for _ in range(30):
- if c.is_finished:
- raise_if_exception(c)
- return c.result
- time.sleep(0.1)
- assert False, "timeout" + str(c._state)
-
- def test_trivial(self, ssh_module):
- @trivial_completion
- def run(x):
- return x+1
- assert self._wait(ssh_module, run(1)) == 2
-
- @pytest.mark.parametrize("input", [
- ((1, ), ),
- ((1, 2), ),
- (("hallo", ), ),
- (("hallo", "foo"), ),
- ])
- def test_async(self, input, ssh_module):
- @async_completion
- def run(*args):
- return str(args)
-
- assert self._wait(ssh_module, run(*input)) == str(input)
-
- @pytest.mark.parametrize("input,expected", [
- ([], []),
- ([1], ["(1,)"]),
- (["hallo"], ["('hallo',)"]),
- ("hi", ["('h',)", "('i',)"]),
- (list(range(5)), [str((x, )) for x in range(5)]),
- ([(1, 2), (3, 4)], ["(1, 2)", "(3, 4)"]),
- ])
- def test_async_map(self, input, expected, ssh_module):
- @async_map_completion
- def run(*args):
- return str(args)
-
- c = run(input)
- self._wait(ssh_module, c)
- assert c.result == expected
-
- def test_async_self(self, ssh_module):
- class Run(object):
- def __init__(self):
- self.attr = 1
-
- @async_completion
- def run(self, x):
- assert self.attr == 1
- return x + 1
-
- assert self._wait(ssh_module, Run().run(1)) == 2
-
- @pytest.mark.parametrize("input,expected", [
- ([], []),
- ([1], ["(1,)"]),
- (["hallo"], ["('hallo',)"]),
- ("hi", ["('h',)", "('i',)"]),
- (list(range(5)), [str((x, )) for x in range(5)]),
- ([(1, 2), (3, 4)], ["(1, 2)", "(3, 4)"]),
- ])
- def test_async_map_self(self, input, expected, ssh_module):
- class Run(object):
- def __init__(self):
- self.attr = 1
-
- @async_map_completion
- def run(self, *args):
- assert self.attr == 1
- return str(args)
-
- c = Run().run(input)
- self._wait(ssh_module, c)
- assert c.result == expected
-
- def test_then1(self, ssh_module):
- @async_map_completion
- def run(x):
- return x+1
-
- assert self._wait(ssh_module, run([1,2]).then(str)) == '[2, 3]'
-
- def test_then2(self, ssh_module):
- @async_map_completion
- def run(x):
- time.sleep(0.1)
- return x+1
-
- @async_completion
- def async_str(results):
- return str(results)
-
- c = run([1,2]).then(async_str)
-
- self._wait(ssh_module, c)
- assert c.result == '[2, 3]'
-
- def test_then3(self, ssh_module):
- @async_map_completion
- def run(x):
- time.sleep(0.1)
- return x+1
-
- def async_str(results):
- return async_completion(str)(results)
-
- c = run([1,2]).then(async_str)
-
- self._wait(ssh_module, c)
- assert c.result == '[2, 3]'
-
- def test_then4(self, ssh_module):
- @async_map_completion
- def run(x):
- time.sleep(0.1)
- return x+1
-
- def async_str(results):
- return async_completion(str)(results).then(lambda x: x + "hello")
-
- c = run([1,2]).then(async_str)
-
- self._wait(ssh_module, c)
- assert c.result == '[2, 3]hello'
-
- @pytest.mark.skip(reason="see limitation of async_map_completion")
- def test_then5(self, ssh_module):
- @async_map_completion
- def run(x):
- time.sleep(0.1)
- return async_completion(str)(x+1)
-
- c = run([1,2])
-
- self._wait(ssh_module, c)
- assert c.result == "['2', '3']"
-
- def test_raise(self, ssh_module):
- @async_completion
- def run(x):
- raise ZeroDivisionError()
-
- with pytest.raises(ZeroDivisionError):
- self._wait(ssh_module, run(1))
-
+++ /dev/null
-import json
-import time
-from contextlib import contextmanager
-
-from ceph.deployment.drive_group import DriveGroupSpec, DeviceSelection
-
-try:
- from typing import Any
-except ImportError:
- pass
-
-from orchestrator import ServiceDescription, raise_if_exception, Completion, InventoryNode, \
- StatelessServiceSpec, PlacementSpec, RGWSpec, parse_host_specs
-from ..module import SSHOrchestrator
-from tests import mock
-from .fixtures import ssh_module
-
-
-"""
-TODOs:
- There is really room for improvement here. I just quickly assembled theses tests.
- I general, everything should be testes in Teuthology as well. Reasons for
- also testing this here is the development roundtrip time.
-"""
-
-
-
-def _run_cephadm(ret):
- def foo(*args, **kwargs):
- return ret, '', 0
- return foo
-
-def mon_command(*args, **kwargs):
- return 0, '', ''
-
-
-class TestSSH(object):
- def _wait(self, m, c):
- # type: (SSHOrchestrator, Completion) -> Any
- m.process([c])
- m.process([c])
-
- for _ in range(30):
- if c.is_finished:
- raise_if_exception(c)
- return c.result
- time.sleep(0.1)
- assert False, "timeout" + str(c._state)
-
- @contextmanager
- def _with_host(self, m, name):
- self._wait(m, m.add_host(name))
- yield
- self._wait(m, m.remove_host(name))
-
- def test_get_unique_name(self, ssh_module):
- existing = [
- ServiceDescription(service_instance='mon.a')
- ]
- new_mon = ssh_module.get_unique_name(existing, 'mon')
- assert new_mon.startswith('mon.')
- assert new_mon != 'mon.a'
-
- def test_host(self, ssh_module):
- with self._with_host(ssh_module, 'test'):
- assert self._wait(ssh_module, ssh_module.get_hosts()) == [InventoryNode('test')]
- c = ssh_module.get_hosts()
- assert self._wait(ssh_module, c) == []
-
- @mock.patch("ssh.module.SSHOrchestrator._run_cephadm", _run_cephadm('[]'))
- def test_service_ls(self, ssh_module):
- with self._with_host(ssh_module, 'test'):
- c = ssh_module.describe_service()
- assert self._wait(ssh_module, c) == []
-
- @mock.patch("ssh.module.SSHOrchestrator._run_cephadm", _run_cephadm('[]'))
- def test_device_ls(self, ssh_module):
- with self._with_host(ssh_module, 'test'):
- c = ssh_module.get_inventory()
- assert self._wait(ssh_module, c) == [InventoryNode('test')]
-
- @mock.patch("ssh.module.SSHOrchestrator._run_cephadm", _run_cephadm('[]'))
- @mock.patch("ssh.module.SSHOrchestrator.send_command")
- @mock.patch("ssh.module.SSHOrchestrator.mon_command", mon_command)
- @mock.patch("ssh.module.SSHOrchestrator._get_connection")
- def test_mon_update(self, _send_command, _get_connection, ssh_module):
- with self._with_host(ssh_module, 'test'):
- c = ssh_module.update_mons(1, [parse_host_specs('test:0.0.0.0=a')])
- assert self._wait(ssh_module, c) == ["(Re)deployed mon.a on host 'test'"]
-
- @mock.patch("ssh.module.SSHOrchestrator._run_cephadm", _run_cephadm('[]'))
- @mock.patch("ssh.module.SSHOrchestrator.send_command")
- @mock.patch("ssh.module.SSHOrchestrator.mon_command", mon_command)
- @mock.patch("ssh.module.SSHOrchestrator._get_connection")
- def test_mgr_update(self, _send_command, _get_connection, ssh_module):
- with self._with_host(ssh_module, 'test'):
- c = ssh_module.update_mgrs(1, [parse_host_specs('test:0.0.0.0')])
- [out] = self._wait(ssh_module, c)
- assert "(Re)deployed mgr." in out
- assert " on host 'test'" in out
-
- @mock.patch("ssh.module.SSHOrchestrator._run_cephadm", _run_cephadm('{}'))
- @mock.patch("ssh.module.SSHOrchestrator.send_command")
- @mock.patch("ssh.module.SSHOrchestrator.mon_command", mon_command)
- @mock.patch("ssh.module.SSHOrchestrator._get_connection")
- def test_create_osds(self, _send_command, _get_connection, ssh_module):
- with self._with_host(ssh_module, 'test'):
- dg = DriveGroupSpec('test', DeviceSelection(paths=['']))
- c = ssh_module.create_osds(dg)
- assert self._wait(ssh_module, c) == "Created osd(s) on host 'test'"
-
- @mock.patch("ssh.module.SSHOrchestrator._run_cephadm", _run_cephadm('{}'))
- @mock.patch("ssh.module.SSHOrchestrator.send_command")
- @mock.patch("ssh.module.SSHOrchestrator.mon_command", mon_command)
- @mock.patch("ssh.module.SSHOrchestrator._get_connection")
- def test_mds(self, _send_command, _get_connection, ssh_module):
- with self._with_host(ssh_module, 'test'):
- ps = PlacementSpec(nodes=['test'])
- c = ssh_module.add_mds(StatelessServiceSpec('name', ps))
- [out] = self._wait(ssh_module, c)
- assert "(Re)deployed mds.name." in out
- assert " on host 'test'" in out
-
- @mock.patch("ssh.module.SSHOrchestrator._run_cephadm", _run_cephadm('{}'))
- @mock.patch("ssh.module.SSHOrchestrator.send_command")
- @mock.patch("ssh.module.SSHOrchestrator.mon_command", mon_command)
- @mock.patch("ssh.module.SSHOrchestrator._get_connection")
- def test_rgw(self, _send_command, _get_connection, ssh_module):
- with self._with_host(ssh_module, 'test'):
- ps = PlacementSpec(nodes=['test'])
- c = ssh_module.add_rgw(RGWSpec('realm', 'zone', ps))
- [out] = self._wait(ssh_module, c)
- assert "(Re)deployed rgw.realm.zone." in out
- assert " on host 'test'" in out
-
- @mock.patch("ssh.module.SSHOrchestrator._run_cephadm", _run_cephadm(
- json.dumps([
- dict(
- name='rgw.myrgw.foobar',
- style='cephadm',
- fsid='fsid',
- container_id='container_id',
- version='version',
- state='running',
- )
- ])
- ))
- def test_remove_rgw(self, ssh_module):
- ssh_module._cluster_fsid = "fsid"
- with self._with_host(ssh_module, 'test'):
- c = ssh_module.remove_rgw('myrgw')
- out = self._wait(ssh_module, c)
- assert out == ["Removed rgw.myrgw.foobar from host 'test'"]
-
- @mock.patch("ssh.module.SSHOrchestrator._run_cephadm", _run_cephadm('{}'))
- @mock.patch("ssh.module.SSHOrchestrator.send_command")
- @mock.patch("ssh.module.SSHOrchestrator.mon_command", mon_command)
- @mock.patch("ssh.module.SSHOrchestrator._get_connection")
- def test_rbd_mirror(self, _send_command, _get_connection, ssh_module):
- with self._with_host(ssh_module, 'test'):
- ps = PlacementSpec(nodes=['test'])
- c = ssh_module.add_rbd_mirror(StatelessServiceSpec('name', ps))
- [out] = self._wait(ssh_module, c)
- assert "(Re)deployed rbd-mirror." in out
- assert " on host 'test'" in out
-
- @mock.patch("ssh.module.SSHOrchestrator._run_cephadm", _run_cephadm('{}'))
- @mock.patch("ssh.module.SSHOrchestrator.send_command")
- @mock.patch("ssh.module.SSHOrchestrator.mon_command", mon_command)
- @mock.patch("ssh.module.SSHOrchestrator._get_connection")
- def test_blink_device_light(self, _send_command, _get_connection, ssh_module):
- with self._with_host(ssh_module, 'test'):
- c = ssh_module.blink_device_light('ident', True, [('test', '')])
- assert self._wait(ssh_module, c) == ['Set ident light for test: on']
-
-
[testenv]
setenv = UNITTEST = true
deps = -r requirements.txt
-commands = pytest -v --cov --cov-append --cov-report=term --doctest-modules {posargs:mgr_util.py tests/ ssh/}
+commands = pytest -v --cov --cov-append --cov-report=term --doctest-modules {posargs:mgr_util.py tests/ cephadm/}
[testenv:mypy]
basepython = python3
deps =
-r requirements.txt
mypy
-commands = mypy --config-file=../../mypy.ini orchestrator.py ssh/module.py rook/module.py
\ No newline at end of file
+commands = mypy --config-file=../../mypy.ini orchestrator.py cephadm/module.py rook/module.py
\ No newline at end of file
--- /dev/null
+def bootstrap_cluster():
+ create_mon()
+ create_mgr()
+
+
+def create_mon():
+ pass
+
+
+def create_mgr():
+ pass
+++ /dev/null
-def bootstrap_cluster():
- create_mon()
- create_mgr()
-
-
-def create_mon():
- pass
-
-
-def create_mgr():
- pass
smallmds=0
short=0
ec=0
-ssh=0
+cephadm=0
parallel=true
hitset=""
overwrite_conf=1
usage=$usage"\t--osd-args: specify any extra osd specific options\n"
usage=$usage"\t--bluestore-devs: comma-separated list of blockdevs to use for bluestore\n"
usage=$usage"\t--inc-osd: append some more osds into existing vcluster\n"
-usage=$usage"\t--ssh: enable ssh orchestrator with ~/.ssh/id_rsa[.pub]\n"
+usage=$usage"\t--cephadm: enable cephadm orchestrator with ~/.ssh/id_rsa[.pub]\n"
usage=$usage"\t--no-parallel: dont start all OSDs in parallel\n"
usage_exit() {
--msgr21 )
msgr="21"
;;
- --ssh )
- ssh=1
+ --cephadm )
+ cephadm=1
;;
--no-parallel )
parallel=false
fi
fi
- if [ "$ssh" -eq 1 ]; then
- debug echo Enabling ssh orchestrator
+ if [ "$cephadm" -eq 1 ]; then
+ debug echo Enabling cephadm orchestrator
ceph_adm config-key set mgr/ssh/ssh_identity_key -i ~/.ssh/id_rsa
ceph_adm config-key set mgr/ssh/ssh_identity_pub -i ~/.ssh/id_rsa.pub
- ceph_adm mgr module enable ssh
- ceph_adm orchestrator set backend ssh
+ ceph_adm mgr module enable cephadm
+ ceph_adm orchestrator set backend cephadm
ceph_adm orchestrator host add $HOSTNAME
fi
}