From b14ae26a698020a154e8ea6a360cf149e2ef7fa9 Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Tue, 5 Mar 2019 15:46:34 +0100 Subject: [PATCH] mgr/orchestrator: Add interface and tests to device lights Signed-off-by: Sebastian Wagner --- doc/mgr/orchestrator_cli.rst | 17 ++- doc/mgr/orchestrator_modules.rst | 3 + qa/tasks/mgr/test_orchestrator_cli.py | 20 ++- src/pybind/mgr/orchestrator.py | 25 +++- src/pybind/mgr/orchestrator_cli/module.py | 151 +++++++++++---------- src/pybind/mgr/test_orchestrator/module.py | 6 + 6 files changed, 146 insertions(+), 76 deletions(-) diff --git a/doc/mgr/orchestrator_cli.rst b/doc/mgr/orchestrator_cli.rst index 1d8212eb03dc4..e7af17de4c42e 100644 --- a/doc/mgr/orchestrator_cli.rst +++ b/doc/mgr/orchestrator_cli.rst @@ -182,10 +182,19 @@ Example:: ^^^^^^^^^^^^^^^^^^^ :: - ceph orchestrator device ident-on - ceph orchestrator device ident-off - ceph orchestrator device fault-on - ceph orchestrator device fault-off + ceph orchestrator device ident-on + ceph orchestrator device ident-on + ceph orchestrator device fault-on + ceph orchestrator device fault-on + + ceph orchestrator device ident-off [--force=true] + ceph orchestrator device ident-off [--force=true] + ceph orchestrator device fault-off [--force=true] + ceph orchestrator device fault-off [--force=true] + + where ``dev_id`` is the device id as listed in ``osd metadata``, + ``dev_name`` is the name of the device on the system and ``host`` is the host as + returned by ``orchestrator host ls`` ceph orchestrator osd ident-on {primary,journal,db,wal,all} ceph orchestrator osd ident-off {primary,journal,db,wal,all} diff --git a/doc/mgr/orchestrator_modules.rst b/doc/mgr/orchestrator_modules.rst index b3c7c1b237170..c68b6dd8ed357 100644 --- a/doc/mgr/orchestrator_modules.rst +++ b/doc/mgr/orchestrator_modules.rst @@ -260,6 +260,9 @@ OSD management .. py:currentmodule:: orchestrator +.. automethod:: Orchestrator.blink_device_light +.. autoclass:: DeviceLightLoc + .. _orchestrator-osd-replace: OSD Replacement diff --git a/qa/tasks/mgr/test_orchestrator_cli.py b/qa/tasks/mgr/test_orchestrator_cli.py index cbb2190d0307d..9a9c4b3b39c3f 100644 --- a/qa/tasks/mgr/test_orchestrator_cli.py +++ b/qa/tasks/mgr/test_orchestrator_cli.py @@ -14,8 +14,11 @@ log = logging.getLogger(__name__) class TestOrchestratorCli(MgrTestCase): MGRS_REQUIRED = 1 + def _cmd(self, module, *args): + return self.mgr_cluster.mon_manager.raw_cluster_cmd(module, *args) + def _orch_cmd(self, *args): - return self.mgr_cluster.mon_manager.raw_cluster_cmd("orchestrator", *args) + return self._cmd("orchestrator", *args) def _progress_cmd(self, *args): return self.mgr_cluster.mon_manager.raw_cluster_cmd("progress", *args) @@ -93,6 +96,21 @@ class TestOrchestratorCli(MgrTestCase): with self.assertRaises(CommandFailedError): self._orch_cmd("osd", "create", "notfound:device") + def test_blink_device_light(self): + def _ls_lights(what): + return json.loads(self._cmd("device", "ls-lights"))[what] + + metadata = json.loads(self._cmd("osd", "metadata")) + dev_name_ids = [osd["device_ids"] for osd in metadata] + _, dev_id = [d.split('=') for d in dev_name_ids if len(d.split('=')) == 2][0] + + for t in ["ident", "fault"]: + self.assertNotIn(dev_id, _ls_lights(t)) + self._cmd("device", t + "-light-on", dev_id) + self.assertIn(dev_id, _ls_lights(t)) + self._cmd("device", t + "-light-off", dev_id) + self.assertNotIn(dev_id, _ls_lights(t)) + def test_mds_add(self): self._orch_cmd("mds", "add", "service_name") diff --git a/src/pybind/mgr/orchestrator.py b/src/pybind/mgr/orchestrator.py index c7e3e69d5629b..ea0a650667970 100644 --- a/src/pybind/mgr/orchestrator.py +++ b/src/pybind/mgr/orchestrator.py @@ -4,7 +4,6 @@ ceph-mgr orchestrator interface Please see the ceph-mgr module developer's guide for more information. """ -import copy import sys import time import fnmatch @@ -449,6 +448,17 @@ class Orchestrator(object): """ raise NotImplementedError() + def blink_device_light(self, ident_fault, on, locations): + # type: (str, bool, List[DeviceLightLoc]) -> WriteCompletion + """ + Instructs the orchestrator to enable or disable either the ident or the fault LED. + + :param ident_fault: either ``"ident"`` or ``"fault"`` + :param on: ``True`` = on. + :param locations: See :class:`orchestrator.DeviceLightLoc` + """ + raise NotImplementedError() + def update_mgrs(self, num, hosts): # type: (int, List[str]) -> WriteCompletion """ @@ -944,6 +954,19 @@ class InventoryNode(object): return [cls(item[0], devs(item[1].data)) for item in hosts] +class DeviceLightLoc(namedtuple('DeviceLightLoc', ['host', 'dev'])): + """ + Describes a specific device on a specific host. Used for enabling or disabling LEDs + on devices. + + hostname as in :func:`orchestrator.Orchestrator.get_hosts` + + device_id: e.g. ``ABC1234DEF567-1R1234_ABC8DE0Q``. + See ``ceph osd metadata | jq '.[].device_ids'`` + """ + __slots__ = () + + def _mk_orch_methods(cls): # Needs to be defined outside of for. # Otherwise meth is always bound to last key diff --git a/src/pybind/mgr/orchestrator_cli/module.py b/src/pybind/mgr/orchestrator_cli/module.py index b7fe15c0aec7a..a009104c0cb6b 100644 --- a/src/pybind/mgr/orchestrator_cli/module.py +++ b/src/pybind/mgr/orchestrator_cli/module.py @@ -1,14 +1,14 @@ import errno import json +from functools import wraps from prettytable import PrettyTable try: - from typing import Dict, List + from typing import List, Set except ImportError: pass # just for type checking. -from functools import wraps from ceph.deployment.drive_group import DriveGroupSpec, DriveGroupValidationError, \ DeviceSelection @@ -48,9 +48,8 @@ class OrchestratorCli(orchestrator.OrchestratorClientMixin, MgrModule): def __init__(self, *args, **kwargs): super(OrchestratorCli, self).__init__(*args, **kwargs) - self.ident = set([]) - self.fault = set([]) - self.devs = {} + self.ident = set() # type: Set[str] + self.fault = set() # type: Set[str] self._load() self._refresh_health() @@ -60,7 +59,7 @@ class OrchestratorCli(orchestrator.OrchestratorClientMixin, MgrModule): decoded = json.loads(active) self.ident = set(decoded.get('ident', [])) self.fault = set(decoded.get('fault', [])) - self.log.debug('ident %s, fault %s' % (self.ident, self.fault)) + self.log.debug('ident {}, fault {}'.format(self.ident, self.fault)) def _save(self): encoded = json.dumps({ @@ -76,90 +75,102 @@ class OrchestratorCli(orchestrator.OrchestratorClientMixin, MgrModule): 'severity': 'warning', 'summary': '%d devices have ident light turned on' % len( self.ident), - 'detail': ['%s ident light enabled' % d for d in self.ident] + 'detail': ['{} ident light enabled'.format(d) for d in self.ident] } if self.fault: h['DEVICE_FAULT_ON'] = { 'severity': 'warning', 'summary': '%d devices have fault light turned on' % len( self.fault), - 'detail': ['%s fault light enabled' % d for d in self.fault] + 'detail': ['{} fault light enabled'.format(d) for d in self.ident] } self.set_health_checks(h) - def _get_devices(self): - d = self.get('devices') - dm = {} - for i in d['devices']: - dm[i['devid']] = i['location'] - return dm + def _get_device_locations(self, dev_id): + # type: (str) -> List[orchestrator.DeviceLightLoc] + locs = [d['location'] for d in self.get('devices')['devices'] if d['devid'] == dev_id] + return [orchestrator.DeviceLightLoc(**l) for l in sum(locs, [])] - @CLIReadCommand(prefix='device ls-lights', - desc='List currently active device indicator lights') - def _command_ls(self): + @_read_cli(prefix='device ls-lights', + desc='List currently active device indicator lights') + def _device_ls(self): return HandleCommandResult( stdout=json.dumps({ 'ident': list(self.ident), 'fault': list(self.fault) }, indent=4)) - @CLIWriteCommand(prefix='device fault-light-on', - args='name=devid,type=CephString', - desc='Enable device *fault* light') - def _command_fault_on(self, devid): - self.log.debug('fault-on %s' % devid) - devs = self._get_devices() - if devid not in devs: - return HandleCommandResult(stderr='device %s not found' % devid, + def light_on(self, fault_ident, devid): + # type: (str, str) -> HandleCommandResult + assert fault_ident in ("fault", "ident") + locs = self._get_device_locations(devid) + if locs is None: + return HandleCommandResult(stderr='device {} not found'.format(devid), retval=-errno.ENOENT) - self.fault.add(devid) + + getattr(self, fault_ident).add(devid) self._save() self._refresh_health() - #self.remote('orchestrator', '_device_fault_on', devs[devid]) - return HandleCommandResult(stdout='') - - @CLIWriteCommand(prefix='device ident-light-on', - args='name=devid,type=CephString', - desc='Enable device *ident* light') - def _command_ident_on(self, devid): - self.log.debug('ident-on %s' % devid) - devs = self._get_devices() - if devid not in devs: - return HandleCommandResult(stderr='device %s not found' % devid, + completion = self.blink_device_light(fault_ident, True, locs) + self._orchestrator_wait([completion]) + return HandleCommandResult(stdout=str(completion.result)) + + def light_off(self, fault_ident, devid, force): + # type: (str, str, bool) -> HandleCommandResult + assert fault_ident in ("fault", "ident") + locs = self._get_device_locations(devid) + if locs is None: + return HandleCommandResult(stderr='device {} not found'.format(devid), retval=-errno.ENOENT) - self.ident.add(devid) - self._save() - self._refresh_health() - #self.remote('orchestrator', '_device_ident_on', devs[devid]) - return HandleCommandResult(stdout='') - - @CLIWriteCommand(prefix='device fault-light-off', - args='name=devid,type=CephString name=force,type=CephBool,req=false', - desc='Disable device *fault* light') - def _command_fault_off(self, devid, force=False): - self.log.debug('fault-off %s' % devid) - devs = self._get_devices() -# if devid in devs: -# self.remote('orchestrator', '_device_fault_off', devs[devid]) - if devid in self.fault: - self.fault.remove(devid) - self._save() - self._refresh_health() - return HandleCommandResult(stdout='') - - @CLIWriteCommand(prefix='device ident-light-off', - args='name=devid,type=CephString name=force,type=CephBool,req=false', - desc='Disable device *ident* light') - def _command_ident_off(self, devid, force=False): - self.log.debug('ident-off %s' % devid) - devs = self._get_devices() -# if devid in devs: -# self.remote('orchestrator', '_device_ident_off', devs[devid]) - if devid in self.ident: - self.ident.remove(devid) - self._save() - self._refresh_health() - return HandleCommandResult(stdout='') + + try: + completion = self.blink_device_light(fault_ident, False, locs) + self._orchestrator_wait([completion]) + + if devid in getattr(self, fault_ident): + getattr(self, fault_ident).remove(devid) + self._save() + self._refresh_health() + return HandleCommandResult(stdout=str(completion.result)) + + except: + # There are several reasons the try: block might fail: + # 1. the device no longer exist + # 2. the device is no longer known to Ceph + # 3. the host is not reachable + if force and devid in getattr(self, fault_ident): + getattr(self, fault_ident).remove(devid) + self._save() + self._refresh_health() + raise + + + @_write_cli(prefix='device fault-light-on', + cmd_args='name=devid,type=CephString', + desc='Enable device *fault* light') + def _device_fault_on(self, devid): + return self.light_on('fault', devid) + + @_write_cli(prefix='device ident-light-on', + cmd_args='name=devid,type=CephString', + desc='Enable device *ident* light') + def _device_ident_on(self, devid): + return self.light_on('ident', devid) + + + @_write_cli(prefix='device fault-light-off', + cmd_args='name=devid,type=CephString ' + 'name=force,type=CephBool,req=false', + desc='Disable device *fault* light') + def _device_fault_off(self, devid, force=False): + return self.light_off('fault', devid, force) + + @_write_cli(prefix='device ident-light-off', + cmd_args='name=devid,type=CephString ' + 'name=force,type=CephBool,req=false', + desc='Disable device *ident* light') + def _device_ident_off(self, devid, force=False): + return self.light_off('ident', devid, force) def _select_orchestrator(self): return self.get_module_option("orchestrator") diff --git a/src/pybind/mgr/test_orchestrator/module.py b/src/pybind/mgr/test_orchestrator/module.py index 92ead4657b6a0..cb097c5f2816a 100644 --- a/src/pybind/mgr/test_orchestrator/module.py +++ b/src/pybind/mgr/test_orchestrator/module.py @@ -246,6 +246,12 @@ class TestOrchestrator(MgrModule, orchestrator.Orchestrator): def remove_osds(self, osd_ids, destroy=False): assert isinstance(osd_ids, list) + @deferred_write("blink_device_light") + def blink_device_light(self, ident_fault, on, locations): + assert ident_fault in ("ident", "fault") + assert len(locations) + return '' + @deferred_write("service_action") def service_action(self, action, service_type, service_name=None, service_id=None): pass -- 2.39.5