From f023ec3c3a2cc642d3cde80dfe30312f1d8998c6 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Tue, 26 Jul 2022 11:56:57 +0200 Subject: [PATCH] mgr/dashboard: display real health in rbd mirroring pools Signed-off-by: Pere Diaz Bou (cherry picked from commit 273286d1a661eda0020bffb2c2a384b4e8dcd36c) --- .../dashboard/controllers/rbd_mirroring.py | 116 +++++++++++------- .../mgr/dashboard/tests/test_rbd_mirroring.py | 16 ++- 2 files changed, 84 insertions(+), 48 deletions(-) diff --git a/src/pybind/mgr/dashboard/controllers/rbd_mirroring.py b/src/pybind/mgr/dashboard/controllers/rbd_mirroring.py index 17ef0b88b2a36..738f2e8b2fce8 100644 --- a/src/pybind/mgr/dashboard/controllers/rbd_mirroring.py +++ b/src/pybind/mgr/dashboard/controllers/rbd_mirroring.py @@ -3,6 +3,7 @@ import json import logging import re +from enum import IntEnum from functools import partial from typing import NamedTuple, Optional, no_type_check @@ -25,7 +26,21 @@ from . import APIDoc, APIRouter, BaseController, CreatePermission, Endpoint, \ logger = logging.getLogger('controllers.rbd_mirror') +class MirrorHealth(IntEnum): + # RBD defined mirroring health states in in src/tools/rbd/action/MirrorPool.cc where the order + # is relevant. + MIRROR_HEALTH_OK = 0 + MIRROR_HEALTH_UNKNOWN = 1 + MIRROR_HEALTH_WARNING = 2 + MIRROR_HEALTH_ERROR = 3 + + # extra states for the dashboard + MIRROR_HEALTH_DISABLED = 4 + MIRROR_HEALTH_INFO = 5 + # pylint: disable=not-callable + + def handle_rbd_mirror_error(): def composed_decorator(func): func = handle_rados_error('rbd-mirroring')(func) @@ -79,28 +94,24 @@ def get_daemons(): def get_daemon_health(daemon): health = { - 'health_color': 'info', - 'health': 'Unknown' + 'health': MirrorHealth.MIRROR_HEALTH_UNKNOWN } for _, pool_data in daemon['status'].items(): - if (health['health'] != 'error' + if (health['health'] != MirrorHealth.MIRROR_HEALTH_ERROR and [k for k, v in pool_data.get('callouts', {}).items() if v['level'] == 'error']): health = { - 'health_color': 'error', - 'health': 'Error' + 'health': MirrorHealth.MIRROR_HEALTH_ERROR } - elif (health['health'] != 'error' + elif (health['health'] != MirrorHealth.MIRROR_HEALTH_ERROR and [k for k, v in pool_data.get('callouts', {}).items() if v['level'] == 'warning']): health = { - 'health_color': 'warning', - 'health': 'Warning' + 'health': MirrorHealth.MIRROR_HEALTH_WARNING } - elif health['health_color'] == 'info': + elif health['health'] == MirrorHealth.MIRROR_HEALTH_INFO: health = { - 'health_color': 'success', - 'health': 'OK' + 'health': MirrorHealth.MIRROR_HEALTH_OK } return health @@ -113,45 +124,48 @@ def get_pools(daemons): # pylint: disable=R0912, R0915 return pool_stats +def transform_mirror_health(stat): + health = 'OK' + health_color = 'success' + if stat['health'] == MirrorHealth.MIRROR_HEALTH_ERROR: + health = 'Error' + health_color = 'error' + elif stat['health'] == MirrorHealth.MIRROR_HEALTH_WARNING: + health = 'Warning' + health_color = 'warning' + elif stat['health'] == MirrorHealth.MIRROR_HEALTH_UNKNOWN: + health = 'Unknown' + health_color = 'warning' + elif stat['health'] == MirrorHealth.MIRROR_HEALTH_OK: + health = 'OK' + health_color = 'success' + elif stat['health'] == MirrorHealth.MIRROR_HEALTH_DISABLED: + health = 'Disabled' + health_color = 'info' + stat['health'] = health + stat['health_color'] = health_color + + def _update_pool_stats(daemons, pool_stats): _update_pool_stats_with_daemons(daemons, pool_stats) - for _, stats in pool_stats.items(): - if stats['mirror_mode'] == 'disabled': - continue - if stats.get('health', None) is None: - # daemon doesn't know about pool - stats['health_color'] = 'error' - stats['health'] = 'Error' - elif stats.get('leader_id', None) is None: - # no daemons are managing the pool as leader instance - stats['health_color'] = 'warning' - stats['health'] = 'Warning' + for pool_stat in pool_stats.values(): + transform_mirror_health(pool_stat) def _update_pool_stats_with_daemons(daemons, pool_stats): for daemon in daemons: for _, pool_data in daemon['status'].items(): - stats = pool_stats.get(pool_data['name'], None) # type: ignore - if stats is None: + pool_stat = pool_stats.get(pool_data['name'], None) # type: ignore + if pool_stat is None: continue if pool_data.get('leader', False): # leader instance stores image counts - stats['leader_id'] = daemon['metadata']['instance_id'] - stats['image_local_count'] = pool_data.get('image_local_count', 0) - stats['image_remote_count'] = pool_data.get('image_remote_count', 0) - - if (stats.get('health_color', '') != 'error' - and pool_data.get('image_error_count', 0) > 0): - stats['health_color'] = 'error' - stats['health'] = 'Error' - elif (stats.get('health_color', '') != 'error' - and pool_data.get('image_warning_count', 0) > 0): - stats['health_color'] = 'warning' - stats['health'] = 'Warning' - elif stats.get('health', None) is None: - stats['health_color'] = 'success' - stats['health'] = 'OK' + pool_stat['leader_id'] = daemon['metadata']['instance_id'] + pool_stat['image_local_count'] = pool_data.get('image_local_count', 0) + pool_stat['image_remote_count'] = pool_data.get('image_remote_count', 0) + + pool_stat['health'] = max(pool_stat['health'], daemon['health']) def _get_pool_stats(pool_names): @@ -176,16 +190,27 @@ def _get_pool_stats(pool_names): stats = {} if mirror_mode == rbd.RBD_MIRROR_MODE_DISABLED: mirror_mode = "disabled" - stats['health_color'] = "info" - stats['health'] = "Disabled" + stats['health'] = MirrorHealth.MIRROR_HEALTH_DISABLED elif mirror_mode == rbd.RBD_MIRROR_MODE_IMAGE: mirror_mode = "image" elif mirror_mode == rbd.RBD_MIRROR_MODE_POOL: mirror_mode = "pool" else: mirror_mode = "unknown" - stats['health_color'] = "warning" - stats['health'] = "Warning" + + if mirror_mode != "disabled": + # In case of a pool being enabled we will infer the health like the RBD cli tool does + # in src/tools/rbd/action/MirrorPool.cc::execute_status + mirror_image_health: MirrorHealth = MirrorHealth.MIRROR_HEALTH_OK + for status, _ in rbdctx.mirror_image_status_summary(ioctx): + if (mirror_image_health < MirrorHealth.MIRROR_HEALTH_WARNING + and status != rbd.MIRROR_IMAGE_STATUS_STATE_REPLAYING + and status != rbd.MIRROR_IMAGE_STATUS_STATE_STOPPED): + mirror_image_health = MirrorHealth.MIRROR_HEALTH_WARNING + if (mirror_image_health < MirrorHealth.MIRROR_HEALTH_ERROR + and status == rbd.MIRROR_IMAGE_STATUS_STATE_ERROR): + mirror_image_health = MirrorHealth.MIRROR_HEALTH_ERROR + stats['health'] = mirror_image_health pool_stats[pool_name] = dict(stats, **{ 'mirror_mode': mirror_mode, @@ -197,10 +222,13 @@ def _get_pool_stats(pool_names): @ViewCache() def get_daemons_and_pools(): # pylint: disable=R0915 daemons = get_daemons() - return { + daemons_and_pools = { 'daemons': daemons, 'pools': get_pools(daemons) } + for daemon in daemons: + transform_mirror_health(daemon) + return daemons_and_pools class ReplayingData(NamedTuple): diff --git a/src/pybind/mgr/dashboard/tests/test_rbd_mirroring.py b/src/pybind/mgr/dashboard/tests/test_rbd_mirroring.py index 60571d8e5543f..1d4dac3062bc1 100644 --- a/src/pybind/mgr/dashboard/tests/test_rbd_mirroring.py +++ b/src/pybind/mgr/dashboard/tests/test_rbd_mirroring.py @@ -108,14 +108,15 @@ class GetDaemonAndPoolsTest(unittest.TestCase): mock_rbd_instance = mock_rbd.return_value mock_rbd_instance.mirror_peer_list.return_value = [] test_cases = self._get_pool_test_cases() - for new_status, mirror_mode, expected_output in test_cases: + for new_status, pool_mirror_mode, images_summary, expected_output in test_cases: _status[1].update(new_status) daemon_status = { 'json': json.dumps(_status) } mgr.get_daemon_status.return_value = daemon_status daemons = get_daemons() - mock_rbd_instance.mirror_mode_get.return_value = mirror_mode + mock_rbd_instance.mirror_mode_get.return_value = pool_mirror_mode + mock_rbd_instance.mirror_image_status_summary.return_value = images_summary res = get_pools(daemons) for k, v in expected_output.items(): self.assertTrue(v == res['rbd'][k]) @@ -123,11 +124,16 @@ class GetDaemonAndPoolsTest(unittest.TestCase): def _get_pool_test_cases(self): test_cases = [ + # 1. daemon status + # 2. Pool mirror mock_get_daemon_status + # 3. Image health summary + # 4. Pool health output ( { 'image_error_count': 7, }, rbd.RBD_MIRROR_MODE_IMAGE, + [(rbd.MIRROR_IMAGE_STATUS_STATE_UNKNOWN, None)], { 'health_color': 'warning', 'health': 'Warning' @@ -137,7 +143,8 @@ class GetDaemonAndPoolsTest(unittest.TestCase): { 'image_error_count': 7, }, - rbd.RBD_MIRROR_MODE_DISABLED, + rbd.RBD_MIRROR_MODE_POOL, + [(rbd.MIRROR_IMAGE_STATUS_STATE_ERROR, None)], { 'health_color': 'error', 'health': 'Error' @@ -150,6 +157,7 @@ class GetDaemonAndPoolsTest(unittest.TestCase): 'leader_id': 1 }, rbd.RBD_MIRROR_MODE_DISABLED, + [], { 'health_color': 'info', 'health': 'Disabled' @@ -280,7 +288,7 @@ class RbdMirroringSummaryControllerTest(ControllerTestCase): self.assertStatus(200) summary = self.json_body()['rbd_mirroring'] - self.assertEqual(summary, {'errors': 0, 'warnings': 1}) + self.assertEqual(summary, {'errors': 0, 'warnings': 2}) class RbdMirroringStatusControllerTest(ControllerTestCase): -- 2.39.5