From e71466cc49050a99ef7919fc2e1f7afb0b4a52c7 Mon Sep 17 00:00:00 2001 From: Patrick Nawracay Date: Thu, 2 Aug 2018 16:51:36 +0200 Subject: [PATCH] mgr/dashboard: Add support for managing individual OSD settings (backend) Add options to mark OSDs in/out/down/reweight/lost/remove/destroy/create Fixes: http://tracker.ceph.com/issues/24270 Signed-off-by: Patrick Nawracay --- qa/suites/rados/mgr/tasks/dashboard.yaml | 3 + qa/tasks/mgr/dashboard/test_osd.py | 58 ++++++++++ src/pybind/mgr/dashboard/controllers/osd.py | 105 +++++++++++++++++- .../mgr/dashboard/services/ceph_service.py | 3 +- 4 files changed, 165 insertions(+), 4 deletions(-) diff --git a/qa/suites/rados/mgr/tasks/dashboard.yaml b/qa/suites/rados/mgr/tasks/dashboard.yaml index 8a134aad22a8b..4304052611b72 100644 --- a/qa/suites/rados/mgr/tasks/dashboard.yaml +++ b/qa/suites/rados/mgr/tasks/dashboard.yaml @@ -18,7 +18,10 @@ tasks: - \(MDS_DAMAGE\) - \(MDS_ALL_DOWN\) - \(MDS_UP_LESS_THAN_MAX\) + - \(OSD_DOWN\) + - \(OSD_HOST_DOWN\) - pauserd,pausewr flag\(s\) set + - Monitor daemon marked osd\.[[:digit:]]+ down, but it is still running - rgw: [client.0] - cephfs_test_runner: fail_on_skip: false diff --git a/qa/tasks/mgr/dashboard/test_osd.py b/qa/tasks/mgr/dashboard/test_osd.py index f4f0b0ea86ce6..6e22399a9821f 100644 --- a/qa/tasks/mgr/dashboard/test_osd.py +++ b/qa/tasks/mgr/dashboard/test_osd.py @@ -3,6 +3,7 @@ from __future__ import absolute_import import json +from time import sleep from .helper import DashboardTestCase, JObj, JAny, JList, JLeaf, JTuple @@ -11,6 +12,9 @@ class OsdTest(DashboardTestCase): AUTH_ROLES = ['cluster-manager'] + def tearDown(self): + self._post('/api/osd/0/mark_in') + @DashboardTestCase.RunAs('test', 'test', ['block-manager']) def test_access_permissions(self): self._get('/api/osd') @@ -51,6 +55,60 @@ class OsdTest(DashboardTestCase): self._post('/api/osd/0/scrub?deep=True') self.assertStatus(200) + def test_mark_out_and_in(self): + self._post('/api/osd/0/mark_out') + self.assertStatus(200) + + self._post('/api/osd/0/mark_in') + self.assertStatus(200) + + def test_mark_down(self): + self._post('/api/osd/0/mark_down') + self.assertStatus(200) + + def test_reweight(self): + self._post('/api/osd/0/reweight', {'weight': 0.4}) + self.assertStatus(200) + + def get_reweight_value(): + self._get('/api/osd/0') + response = self.jsonBody() + if 'osd_map' in response and 'weight' in response['osd_map']: + return round(response['osd_map']['weight'], 1) + self.wait_until_equal(get_reweight_value, 0.4, 10) + self.assertStatus(200) + + # Undo + self._post('/api/osd/0/reweight', {'weight': 1}) + + def test_create_lost_destroy_remove(self): + # Create + self._post('/api/osd', { + 'uuid': 'f860ca2e-757d-48ce-b74a-87052cad563f', + 'svc_id': 5 + }) + self.assertStatus(201) + # Lost + self._post('/api/osd/5/mark_lost') + self.assertStatus(200) + # Destroy + self._post('/api/osd/5/destroy') + self.assertStatus(200) + # Remove + self._post('/api/osd/5/remove') + self.assertStatus(200) + + def test_safe_to_destroy(self): + self._get('/api/osd/5/safe_to_destroy') + self.assertStatus(200) + self.assertJsonBody({'safe-to-destroy': True}) + + def get_destroy_status(): + self._get('/api/osd/0/safe_to_destroy') + return self.jsonBody()['safe-to-destroy'] + self.wait_until_equal(get_destroy_status, False, 10) + self.assertStatus(200) + class OsdFlagsTest(DashboardTestCase): def __init__(self, *args, **kwargs): diff --git a/src/pybind/mgr/dashboard/controllers/osd.py b/src/pybind/mgr/dashboard/controllers/osd.py index 43cd6c8ffba39..33f1b222ce083 100644 --- a/src/pybind/mgr/dashboard/controllers/osd.py +++ b/src/pybind/mgr/dashboard/controllers/osd.py @@ -1,10 +1,10 @@ # -*- coding: utf-8 -*- from __future__ import absolute_import - +import re from . import ApiController, RESTController, UpdatePermission from .. import mgr, logger from ..security import Scope -from ..services.ceph_service import CephService +from ..services.ceph_service import CephService, SendCommandError from ..services.exception import handle_send_command_error from ..tools import str_to_bool @@ -64,6 +64,107 @@ class Osd(RESTController): api_scrub = "osd deep-scrub" if str_to_bool(deep) else "osd scrub" CephService.send_command("mon", api_scrub, who=svc_id) + @RESTController.Resource('POST') + def mark_out(self, svc_id): + CephService.send_command('mon', 'osd out', ids=[svc_id]) + + @RESTController.Resource('POST') + def mark_in(self, svc_id): + CephService.send_command('mon', 'osd in', ids=[svc_id]) + + @RESTController.Resource('POST') + def mark_down(self, svc_id): + CephService.send_command('mon', 'osd down', ids=[svc_id]) + + @RESTController.Resource('POST') + def reweight(self, svc_id, weight): + """ + Reweights the OSD temporarily. + + Note that ‘ceph osd reweight’ is not a persistent setting. When an OSD + gets marked out, the osd weight will be set to 0. When it gets marked + in again, the weight will be changed to 1. + + Because of this ‘ceph osd reweight’ is a temporary solution. You should + only use it to keep your cluster running while you’re ordering more + hardware. + + - Craig Lewis (http://lists.ceph.com/pipermail/ceph-users-ceph.com/2014-June/040967.html) + """ + CephService.send_command( + 'mon', + 'osd reweight', + id=int(svc_id), + weight=float(weight)) + + @RESTController.Resource('POST') + def mark_lost(self, svc_id): + """ + Note: osd must be marked `down` before marking lost. + """ + CephService.send_command( + 'mon', + 'osd lost', + id=int(svc_id), + sure='--yes-i-really-mean-it') + + def create(self, uuid=None, svc_id=None): + """ + :param uuid: Will be set automatically if the OSD starts up. + :param id: The ID is only used if a valid uuid is given. + :return: + """ + result = CephService.send_command( + 'mon', 'osd create', id=svc_id, uuid=uuid) + return { + 'result': result, + 'svc_id': svc_id, + 'uuid': uuid, + } + + @RESTController.Resource('POST') + def remove(self, svc_id): + """ + Note: osd must be marked `down` before removal. + """ + CephService.send_command('mon', 'osd rm', ids=[svc_id]) + + @RESTController.Resource('POST') + def destroy(self, svc_id): + """ + Mark osd as being destroyed. Keeps the ID intact (allowing reuse), but + removes cephx keys, config-key data and lockbox keys, rendering data + permanently unreadable. + + The osd must be marked down before being destroyed. + """ + CephService.send_command( + 'mon', 'osd destroy-actual', id=int(svc_id), sure='--yes-i-really-mean-it') + + @RESTController.Resource('GET') + def safe_to_destroy(self, svc_id): + """ + :type svc_id: int|[int] + """ + if not isinstance(svc_id, list): + svc_id = [svc_id] + svc_id = list(map(str, svc_id)) + try: + CephService.send_command( + 'mon', 'osd safe-to-destroy', ids=svc_id, target=('mgr', '')) + return {'safe-to-destroy': True} + except SendCommandError as e: + match = re.match( + r'OSD\(s\) (\d+) have (\d+) pgs currently mapped to them', + e.message) + if match: + return { + 'message': e.message, + 'safe-to-destroy': False + } + else: + raise e + @ApiController('/osd/flags', Scope.OSD) class OsdFlagsController(RESTController): diff --git a/src/pybind/mgr/dashboard/services/ceph_service.py b/src/pybind/mgr/dashboard/services/ceph_service.py index 174a6ef61c654..aacc241ea1957 100644 --- a/src/pybind/mgr/dashboard/services/ceph_service.py +++ b/src/pybind/mgr/dashboard/services/ceph_service.py @@ -153,8 +153,7 @@ class CephService(object): "prefix": prefix, "format": "json", } - argdict.update({k: v for k, v in kwargs.items() if v}) - + argdict.update({k: v for k, v in kwargs.items() if v is not None}) result = CommandResult("") mgr.send_command(result, srv_type, srv_spec, json.dumps(argdict), "") r, outb, outs = result.wait() -- 2.39.5