]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/orchestrator: Add interface and tests to device lights
authorSebastian Wagner <sebastian.wagner@suse.com>
Tue, 5 Mar 2019 14:46:34 +0000 (15:46 +0100)
committerSebastian Wagner <sebastian.wagner@suse.com>
Tue, 5 Nov 2019 12:02:29 +0000 (13:02 +0100)
Signed-off-by: Sebastian Wagner <sebastian.wagner@suse.com>
doc/mgr/orchestrator_cli.rst
doc/mgr/orchestrator_modules.rst
qa/tasks/mgr/test_orchestrator_cli.py
src/pybind/mgr/orchestrator.py
src/pybind/mgr/orchestrator_cli/module.py
src/pybind/mgr/test_orchestrator/module.py

index 1d8212eb03dc49028dde1c73c828ecd5bad8435d..e7af17de4c42ed0084341e50e2c2b97619f32a48 100644 (file)
@@ -182,10 +182,19 @@ Example::
     ^^^^^^^^^^^^^^^^^^^
     ::
 
-        ceph orchestrator device ident-on <host> <devname>
-        ceph orchestrator device ident-off <host> <devname>
-        ceph orchestrator device fault-on <host> <devname>
-        ceph orchestrator device fault-off <host> <devname>
+        ceph orchestrator device ident-on <dev_id>
+        ceph orchestrator device ident-on <dev_name> <host>
+        ceph orchestrator device fault-on <dev_id>
+        ceph orchestrator device fault-on <dev_name> <host>
+
+        ceph orchestrator device ident-off <dev_id> [--force=true]
+        ceph orchestrator device ident-off <dev_id> <host> [--force=true]
+        ceph orchestrator device fault-off <dev_id> [--force=true]
+        ceph orchestrator device fault-off <dev_id> <host> [--force=true]
+
+    where ``dev_id`` is the device id as listed in ``osd metadata``,
+    ``dev_name`` is the name of the device on the system and ``host`` is the host as
+    returned by ``orchestrator host ls``
 
         ceph orchestrator osd ident-on {primary,journal,db,wal,all} <osd-id>
         ceph orchestrator osd ident-off {primary,journal,db,wal,all} <osd-id>
index b3c7c1b23717065aec60f4e7bdb4bb95ccced55a..c68b6dd8ed3578a67a3cb0a83a4e733324c08a96 100644 (file)
@@ -260,6 +260,9 @@ OSD management
 
 .. py:currentmodule:: orchestrator
 
+.. automethod:: Orchestrator.blink_device_light
+.. autoclass:: DeviceLightLoc
+
 .. _orchestrator-osd-replace:
 
 OSD Replacement
index cbb2190d0307dae699561c5a62cee9cb2fc2315f..9a9c4b3b39c3fedb6f88d3ce446c8deb73a5ab3f 100644 (file)
@@ -14,8 +14,11 @@ log = logging.getLogger(__name__)
 class TestOrchestratorCli(MgrTestCase):
     MGRS_REQUIRED = 1
 
+    def _cmd(self, module, *args):
+        return self.mgr_cluster.mon_manager.raw_cluster_cmd(module, *args)
+
     def _orch_cmd(self, *args):
-        return self.mgr_cluster.mon_manager.raw_cluster_cmd("orchestrator", *args)
+        return self._cmd("orchestrator", *args)
 
     def _progress_cmd(self, *args):
         return self.mgr_cluster.mon_manager.raw_cluster_cmd("progress", *args)
@@ -93,6 +96,21 @@ class TestOrchestratorCli(MgrTestCase):
         with self.assertRaises(CommandFailedError):
             self._orch_cmd("osd", "create", "notfound:device")
 
+    def test_blink_device_light(self):
+        def _ls_lights(what):
+            return json.loads(self._cmd("device", "ls-lights"))[what]
+
+        metadata = json.loads(self._cmd("osd", "metadata"))
+        dev_name_ids = [osd["device_ids"] for osd in metadata]
+        _, dev_id = [d.split('=') for d in dev_name_ids if len(d.split('=')) == 2][0]
+
+        for t in ["ident", "fault"]:
+            self.assertNotIn(dev_id, _ls_lights(t))
+            self._cmd("device", t + "-light-on", dev_id)
+            self.assertIn(dev_id, _ls_lights(t))
+            self._cmd("device", t + "-light-off", dev_id)
+            self.assertNotIn(dev_id, _ls_lights(t))
+
     def test_mds_add(self):
         self._orch_cmd("mds", "add", "service_name")
 
index c7e3e69d5629b8d332ba8d6c4579cfd7fd98c2ee..ea0a650667970d8ebbfe76b09d75375e76458047 100644 (file)
@@ -4,7 +4,6 @@ ceph-mgr orchestrator interface
 
 Please see the ceph-mgr module developer's guide for more information.
 """
-import copy
 import sys
 import time
 import fnmatch
@@ -449,6 +448,17 @@ class Orchestrator(object):
         """
         raise NotImplementedError()
 
+    def blink_device_light(self, ident_fault, on, locations):
+        # type: (str, bool, List[DeviceLightLoc]) -> WriteCompletion
+        """
+        Instructs the orchestrator to enable or disable either the ident or the fault LED.
+
+        :param ident_fault: either ``"ident"`` or ``"fault"``
+        :param on: ``True`` = on.
+        :param locations: See :class:`orchestrator.DeviceLightLoc`
+        """
+        raise NotImplementedError()
+
     def update_mgrs(self, num, hosts):
         # type: (int, List[str]) -> WriteCompletion
         """
@@ -944,6 +954,19 @@ class InventoryNode(object):
         return [cls(item[0], devs(item[1].data)) for item in hosts]
 
 
+class DeviceLightLoc(namedtuple('DeviceLightLoc', ['host', 'dev'])):
+    """
+    Describes a specific device on a specific host. Used for enabling or disabling LEDs
+    on devices.
+
+    hostname as in :func:`orchestrator.Orchestrator.get_hosts`
+
+    device_id: e.g. ``ABC1234DEF567-1R1234_ABC8DE0Q``.
+       See ``ceph osd metadata | jq '.[].device_ids'``
+    """
+    __slots__ = ()
+
+
 def _mk_orch_methods(cls):
     # Needs to be defined outside of for.
     # Otherwise meth is always bound to last key
index b7fe15c0aec7ab2f9e8296d2619f4ea37b6141c6..a009104c0cb6b961d16d90fd7f941b50e0a7b9ed 100644 (file)
@@ -1,14 +1,14 @@
 import errno
 import json
+from functools import wraps
 
 from prettytable import PrettyTable
 
 try:
-    from typing import Dict, List
+    from typing import List, Set
 except ImportError:
     pass  # just for type checking.
 
-from functools import wraps
 
 from ceph.deployment.drive_group import DriveGroupSpec, DriveGroupValidationError, \
     DeviceSelection
@@ -48,9 +48,8 @@ class OrchestratorCli(orchestrator.OrchestratorClientMixin, MgrModule):
 
     def __init__(self, *args, **kwargs):
         super(OrchestratorCli, self).__init__(*args, **kwargs)
-        self.ident = set([])
-        self.fault = set([])
-        self.devs = {}
+        self.ident = set()  # type: Set[str]
+        self.fault = set()  # type: Set[str]
         self._load()
         self._refresh_health()
 
@@ -60,7 +59,7 @@ class OrchestratorCli(orchestrator.OrchestratorClientMixin, MgrModule):
             decoded = json.loads(active)
             self.ident = set(decoded.get('ident', []))
             self.fault = set(decoded.get('fault', []))
-        self.log.debug('ident %s, fault %s' % (self.ident, self.fault))
+        self.log.debug('ident {}, fault {}'.format(self.ident, self.fault))
 
     def _save(self):
         encoded = json.dumps({
@@ -76,90 +75,102 @@ class OrchestratorCli(orchestrator.OrchestratorClientMixin, MgrModule):
                 'severity': 'warning',
                 'summary': '%d devices have ident light turned on' % len(
                     self.ident),
-                'detail': ['%s ident light enabled' % d for d in self.ident]
+                'detail': ['{} ident light enabled'.format(d) for d in self.ident]
             }
         if self.fault:
             h['DEVICE_FAULT_ON'] = {
                 'severity': 'warning',
                 'summary': '%d devices have fault light turned on' % len(
                     self.fault),
-                'detail': ['%s fault light enabled' % d for d in self.fault]
+                'detail': ['{} fault light enabled'.format(d) for d in self.ident]
             }
         self.set_health_checks(h)
 
-    def _get_devices(self):
-        d = self.get('devices')
-        dm = {}
-        for i in d['devices']:
-            dm[i['devid']] = i['location']
-        return dm
+    def _get_device_locations(self, dev_id):
+        # type: (str) -> List[orchestrator.DeviceLightLoc]
+        locs = [d['location'] for d in self.get('devices')['devices'] if d['devid'] == dev_id]
+        return [orchestrator.DeviceLightLoc(**l) for l in  sum(locs, [])]
 
-    @CLIReadCommand(prefix='device ls-lights',
-                    desc='List currently active device indicator lights')
-    def _command_ls(self):
+    @_read_cli(prefix='device ls-lights',
+               desc='List currently active device indicator lights')
+    def _device_ls(self):
         return HandleCommandResult(
             stdout=json.dumps({
                 'ident': list(self.ident),
                 'fault': list(self.fault)
                 }, indent=4))
 
-    @CLIWriteCommand(prefix='device fault-light-on',
-                     args='name=devid,type=CephString',
-                     desc='Enable device *fault* light')
-    def _command_fault_on(self, devid):
-        self.log.debug('fault-on %s' % devid)
-        devs = self._get_devices()
-        if devid not in devs:
-            return HandleCommandResult(stderr='device %s not found' % devid,
+    def light_on(self, fault_ident, devid):
+        # type: (str, str) -> HandleCommandResult
+        assert fault_ident in ("fault", "ident")
+        locs = self._get_device_locations(devid)
+        if locs is None:
+            return HandleCommandResult(stderr='device {} not found'.format(devid),
                                        retval=-errno.ENOENT)
-        self.fault.add(devid)
+
+        getattr(self, fault_ident).add(devid)
         self._save()
         self._refresh_health()
-        #self.remote('orchestrator', '_device_fault_on', devs[devid])
-        return HandleCommandResult(stdout='')
-
-    @CLIWriteCommand(prefix='device ident-light-on',
-                     args='name=devid,type=CephString',
-                     desc='Enable device *ident* light')
-    def _command_ident_on(self, devid):
-        self.log.debug('ident-on %s' % devid)
-        devs = self._get_devices()
-        if devid not in devs:
-            return HandleCommandResult(stderr='device %s not found' % devid,
+        completion = self.blink_device_light(fault_ident, True, locs)
+        self._orchestrator_wait([completion])
+        return HandleCommandResult(stdout=str(completion.result))
+
+    def light_off(self, fault_ident, devid, force):
+        # type: (str, str, bool) -> HandleCommandResult
+        assert fault_ident in ("fault", "ident")
+        locs = self._get_device_locations(devid)
+        if locs is None:
+            return HandleCommandResult(stderr='device {} not found'.format(devid),
                                        retval=-errno.ENOENT)
-        self.ident.add(devid)
-        self._save()
-        self._refresh_health()
-        #self.remote('orchestrator', '_device_ident_on', devs[devid])
-        return HandleCommandResult(stdout='')
-
-    @CLIWriteCommand(prefix='device fault-light-off',
-                     args='name=devid,type=CephString name=force,type=CephBool,req=false',
-                     desc='Disable device *fault* light')
-    def _command_fault_off(self, devid, force=False):
-        self.log.debug('fault-off %s' % devid)
-        devs = self._get_devices()
-#        if devid in devs:
-#            self.remote('orchestrator', '_device_fault_off', devs[devid])
-        if devid in self.fault:
-            self.fault.remove(devid)
-            self._save()
-            self._refresh_health()
-        return HandleCommandResult(stdout='')
-
-    @CLIWriteCommand(prefix='device ident-light-off',
-                     args='name=devid,type=CephString name=force,type=CephBool,req=false',
-                     desc='Disable device *ident* light')
-    def _command_ident_off(self, devid, force=False):
-        self.log.debug('ident-off %s' % devid)
-        devs = self._get_devices()
-#        if devid in devs:
-#            self.remote('orchestrator', '_device_ident_off', devs[devid])
-        if devid in self.ident:
-            self.ident.remove(devid)
-            self._save()
-            self._refresh_health()
-        return HandleCommandResult(stdout='')
+
+        try:
+            completion = self.blink_device_light(fault_ident, False, locs)
+            self._orchestrator_wait([completion])
+
+            if devid in getattr(self, fault_ident):
+                getattr(self, fault_ident).remove(devid)
+                self._save()
+                self._refresh_health()
+            return HandleCommandResult(stdout=str(completion.result))
+
+        except:
+            # There are several reasons the try: block might fail:
+            # 1. the device no longer exist
+            # 2. the device is no longer known to Ceph
+            # 3. the host is not reachable
+            if force and devid in getattr(self, fault_ident):
+                getattr(self, fault_ident).remove(devid)
+                self._save()
+                self._refresh_health()
+            raise
+
+
+    @_write_cli(prefix='device fault-light-on',
+                cmd_args='name=devid,type=CephString',
+                desc='Enable device *fault* light')
+    def _device_fault_on(self, devid):
+        return self.light_on('fault', devid)
+
+    @_write_cli(prefix='device ident-light-on',
+                cmd_args='name=devid,type=CephString',
+                desc='Enable device *ident* light')
+    def _device_ident_on(self, devid):
+        return self.light_on('ident', devid)
+
+
+    @_write_cli(prefix='device fault-light-off',
+                cmd_args='name=devid,type=CephString '
+                         'name=force,type=CephBool,req=false',
+                desc='Disable device *fault* light')
+    def _device_fault_off(self, devid, force=False):
+        return self.light_off('fault', devid, force)
+
+    @_write_cli(prefix='device ident-light-off',
+                cmd_args='name=devid,type=CephString '
+                         'name=force,type=CephBool,req=false',
+                desc='Disable device *ident* light')
+    def _device_ident_off(self, devid, force=False):
+        return self.light_off('ident', devid, force)
 
     def _select_orchestrator(self):
         return self.get_module_option("orchestrator")
index 92ead4657b6a0e22b29377746d805d779630d439..cb097c5f2816ad71ff102c0e93c6ad2e4e14e38b 100644 (file)
@@ -246,6 +246,12 @@ class TestOrchestrator(MgrModule, orchestrator.Orchestrator):
     def remove_osds(self, osd_ids, destroy=False):
         assert isinstance(osd_ids, list)
 
+    @deferred_write("blink_device_light")
+    def blink_device_light(self, ident_fault, on, locations):
+        assert ident_fault in ("ident", "fault")
+        assert len(locations)
+        return ''
+
     @deferred_write("service_action")
     def service_action(self, action, service_type, service_name=None, service_id=None):
         pass