mgr/dashboard: display real health in rbd mirroring pools

author Pere Diaz Bou <pdiazbou@redhat.com>

Tue, 26 Jul 2022 09:56:57 +0000 (11:56 +0200)

committer Pere Diaz Bou <pdiazbou@redhat.com>

Wed, 14 Dec 2022 11:55:08 +0000 (12:55 +0100)
author Pere Diaz Bou <pdiazbou@redhat.com>
Tue, 26 Jul 2022 09:56:57 +0000 (11:56 +0200)
committer Pere Diaz Bou <pdiazbou@redhat.com>
Wed, 14 Dec 2022 11:55:08 +0000 (12:55 +0100)
diff --git a/src/pybind/mgr/dashboard/controllers/rbd_mirroring.py b/src/pybind/mgr/dashboard/controllers/rbd_mirroring.py

index 17ef0b88b2a362e6fa9b94dd4cedb100fcfcbc31..738f2e8b2fce87e26a547ed27c1890727cc854f5 100644 (file)
--- a/src/pybind/mgr/dashboard/controllers/rbd_mirroring.py
+++ b/src/pybind/mgr/dashboard/controllers/rbd_mirroring.py
@@ -3,6 +3,7 @@
  import json
  import logging
  import re
+from enum import IntEnum
  from functools import partial
  from typing import NamedTuple, Optional, no_type_check
  
@@ -25,7 +26,21 @@ from . import APIDoc, APIRouter, BaseController, CreatePermission, Endpoint, \
  logger = logging.getLogger('controllers.rbd_mirror')
  
  
+class MirrorHealth(IntEnum):
+    # RBD defined mirroring health states in in src/tools/rbd/action/MirrorPool.cc where the order
+    # is relevant.
+    MIRROR_HEALTH_OK = 0
+    MIRROR_HEALTH_UNKNOWN = 1
+    MIRROR_HEALTH_WARNING = 2
+    MIRROR_HEALTH_ERROR = 3
+
+    # extra states for the dashboard
+    MIRROR_HEALTH_DISABLED = 4
+    MIRROR_HEALTH_INFO = 5
+
  # pylint: disable=not-callable
+
+
  def handle_rbd_mirror_error():
      def composed_decorator(func):
          func = handle_rados_error('rbd-mirroring')(func)
@@ -79,28 +94,24 @@ def get_daemons():
  
  def get_daemon_health(daemon):
      health = {
-        'health_color': 'info',
-        'health': 'Unknown'
+        'health': MirrorHealth.MIRROR_HEALTH_UNKNOWN
      }
      for _, pool_data in daemon['status'].items():
-        if (health['health'] != 'error'
+        if (health['health'] != MirrorHealth.MIRROR_HEALTH_ERROR
                  and [k for k, v in pool_data.get('callouts', {}).items()
                       if v['level'] == 'error']):
              health = {
-                'health_color': 'error',
-                'health': 'Error'
+                'health': MirrorHealth.MIRROR_HEALTH_ERROR
              }
-        elif (health['health'] != 'error'
+        elif (health['health'] != MirrorHealth.MIRROR_HEALTH_ERROR
                  and [k for k, v in pool_data.get('callouts', {}).items()
                       if v['level'] == 'warning']):
              health = {
-                'health_color': 'warning',
-                'health': 'Warning'
+                'health': MirrorHealth.MIRROR_HEALTH_WARNING
              }
-        elif health['health_color'] == 'info':
+        elif health['health'] == MirrorHealth.MIRROR_HEALTH_INFO:
              health = {
-                'health_color': 'success',
-                'health': 'OK'
+                'health': MirrorHealth.MIRROR_HEALTH_OK
              }
      return health
  
@@ -113,45 +124,48 @@ def get_pools(daemons):  # pylint: disable=R0912, R0915
      return pool_stats
  
  
+def transform_mirror_health(stat):
+    health = 'OK'
+    health_color = 'success'
+    if stat['health'] == MirrorHealth.MIRROR_HEALTH_ERROR:
+        health = 'Error'
+        health_color = 'error'
+    elif stat['health'] == MirrorHealth.MIRROR_HEALTH_WARNING:
+        health = 'Warning'
+        health_color = 'warning'
+    elif stat['health'] == MirrorHealth.MIRROR_HEALTH_UNKNOWN:
+        health = 'Unknown'
+        health_color = 'warning'
+    elif stat['health'] == MirrorHealth.MIRROR_HEALTH_OK:
+        health = 'OK'
+        health_color = 'success'
+    elif stat['health'] == MirrorHealth.MIRROR_HEALTH_DISABLED:
+        health = 'Disabled'
+        health_color = 'info'
+    stat['health'] = health
+    stat['health_color'] = health_color
+
+
  def _update_pool_stats(daemons, pool_stats):
      _update_pool_stats_with_daemons(daemons, pool_stats)
-    for _, stats in pool_stats.items():
-        if stats['mirror_mode'] == 'disabled':
-            continue
-        if stats.get('health', None) is None:
-            # daemon doesn't know about pool
-            stats['health_color'] = 'error'
-            stats['health'] = 'Error'
-        elif stats.get('leader_id', None) is None:
-            # no daemons are managing the pool as leader instance
-            stats['health_color'] = 'warning'
-            stats['health'] = 'Warning'
+    for pool_stat in pool_stats.values():
+        transform_mirror_health(pool_stat)
  
  
  def _update_pool_stats_with_daemons(daemons, pool_stats):
      for daemon in daemons:
          for _, pool_data in daemon['status'].items():
-            stats = pool_stats.get(pool_data['name'], None)  # type: ignore
-            if stats is None:
+            pool_stat = pool_stats.get(pool_data['name'], None)  # type: ignore
+            if pool_stat is None:
                  continue
  
              if pool_data.get('leader', False):
                  # leader instance stores image counts
-                stats['leader_id'] = daemon['metadata']['instance_id']
-                stats['image_local_count'] = pool_data.get('image_local_count', 0)
-                stats['image_remote_count'] = pool_data.get('image_remote_count', 0)
-
-            if (stats.get('health_color', '') != 'error'
-                    and pool_data.get('image_error_count', 0) > 0):
-                stats['health_color'] = 'error'
-                stats['health'] = 'Error'
-            elif (stats.get('health_color', '') != 'error'
-                    and pool_data.get('image_warning_count', 0) > 0):
-                stats['health_color'] = 'warning'
-                stats['health'] = 'Warning'
-            elif stats.get('health', None) is None:
-                stats['health_color'] = 'success'
-                stats['health'] = 'OK'
+                pool_stat['leader_id'] = daemon['metadata']['instance_id']
+                pool_stat['image_local_count'] = pool_data.get('image_local_count', 0)
+                pool_stat['image_remote_count'] = pool_data.get('image_remote_count', 0)
+
+            pool_stat['health'] = max(pool_stat['health'], daemon['health'])
  
  
  def _get_pool_stats(pool_names):
@@ -176,16 +190,27 @@ def _get_pool_stats(pool_names):
          stats = {}
          if mirror_mode == rbd.RBD_MIRROR_MODE_DISABLED:
              mirror_mode = "disabled"
-            stats['health_color'] = "info"
-            stats['health'] = "Disabled"
+            stats['health'] = MirrorHealth.MIRROR_HEALTH_DISABLED
          elif mirror_mode == rbd.RBD_MIRROR_MODE_IMAGE:
              mirror_mode = "image"
          elif mirror_mode == rbd.RBD_MIRROR_MODE_POOL:
              mirror_mode = "pool"
          else:
              mirror_mode = "unknown"
-            stats['health_color'] = "warning"
-            stats['health'] = "Warning"
+
+        if mirror_mode != "disabled":
+            # In case of a pool being enabled we will infer the health like the RBD cli tool does
+            # in src/tools/rbd/action/MirrorPool.cc::execute_status
+            mirror_image_health: MirrorHealth = MirrorHealth.MIRROR_HEALTH_OK
+            for status, _ in rbdctx.mirror_image_status_summary(ioctx):
+                if (mirror_image_health < MirrorHealth.MIRROR_HEALTH_WARNING
+                    and status != rbd.MIRROR_IMAGE_STATUS_STATE_REPLAYING
+                        and status != rbd.MIRROR_IMAGE_STATUS_STATE_STOPPED):
+                    mirror_image_health = MirrorHealth.MIRROR_HEALTH_WARNING
+                if (mirror_image_health < MirrorHealth.MIRROR_HEALTH_ERROR
+                        and status == rbd.MIRROR_IMAGE_STATUS_STATE_ERROR):
+                    mirror_image_health = MirrorHealth.MIRROR_HEALTH_ERROR
+            stats['health'] = mirror_image_health
  
          pool_stats[pool_name] = dict(stats, **{
              'mirror_mode': mirror_mode,
@@ -197,10 +222,13 @@ def _get_pool_stats(pool_names):
  @ViewCache()
  def get_daemons_and_pools():  # pylint: disable=R0915
      daemons = get_daemons()
-    return {
+    daemons_and_pools = {
          'daemons': daemons,
          'pools': get_pools(daemons)
      }
+    for daemon in daemons:
+        transform_mirror_health(daemon)
+    return daemons_and_pools
  
  
  class ReplayingData(NamedTuple):
diff --git a/src/pybind/mgr/dashboard/tests/test_rbd_mirroring.py b/src/pybind/mgr/dashboard/tests/test_rbd_mirroring.py

index 60571d8e5543f2ec4a9be4c62a516babff2702d2..1d4dac3062bc17acfd34bc36230485725501535b 100644 (file)
--- a/src/pybind/mgr/dashboard/tests/test_rbd_mirroring.py
+++ b/src/pybind/mgr/dashboard/tests/test_rbd_mirroring.py
@@ -108,14 +108,15 @@ class GetDaemonAndPoolsTest(unittest.TestCase):
          mock_rbd_instance = mock_rbd.return_value
          mock_rbd_instance.mirror_peer_list.return_value = []
          test_cases = self._get_pool_test_cases()
-        for new_status, mirror_mode, expected_output in test_cases:
+        for new_status, pool_mirror_mode, images_summary, expected_output in test_cases:
              _status[1].update(new_status)
              daemon_status = {
                  'json': json.dumps(_status)
              }
              mgr.get_daemon_status.return_value = daemon_status
              daemons = get_daemons()
-            mock_rbd_instance.mirror_mode_get.return_value = mirror_mode
+            mock_rbd_instance.mirror_mode_get.return_value = pool_mirror_mode
+            mock_rbd_instance.mirror_image_status_summary.return_value = images_summary
              res = get_pools(daemons)
              for k, v in expected_output.items():
                  self.assertTrue(v == res['rbd'][k])
@@ -123,11 +124,16 @@ class GetDaemonAndPoolsTest(unittest.TestCase):
  
      def _get_pool_test_cases(self):
          test_cases = [
+            # 1. daemon status
+            # 2. Pool mirror mock_get_daemon_status
+            # 3. Image health summary
+            # 4. Pool health output
              (
                  {
                      'image_error_count': 7,
                  },
                  rbd.RBD_MIRROR_MODE_IMAGE,
+                [(rbd.MIRROR_IMAGE_STATUS_STATE_UNKNOWN, None)],
                  {
                      'health_color': 'warning',
                      'health': 'Warning'
@@ -137,7 +143,8 @@ class GetDaemonAndPoolsTest(unittest.TestCase):
                  {
                      'image_error_count': 7,
                  },
-                rbd.RBD_MIRROR_MODE_DISABLED,
+                rbd.RBD_MIRROR_MODE_POOL,
+                [(rbd.MIRROR_IMAGE_STATUS_STATE_ERROR, None)],
                  {
                      'health_color': 'error',
                      'health': 'Error'
@@ -150,6 +157,7 @@ class GetDaemonAndPoolsTest(unittest.TestCase):
                      'leader_id': 1
                  },
                  rbd.RBD_MIRROR_MODE_DISABLED,
+                [],
                  {
                      'health_color': 'info',
                      'health': 'Disabled'
@@ -280,7 +288,7 @@ class RbdMirroringSummaryControllerTest(ControllerTestCase):
          self.assertStatus(200)
  
          summary = self.json_body()['rbd_mirroring']
-        self.assertEqual(summary, {'errors': 0, 'warnings': 1})
+        self.assertEqual(summary, {'errors': 0, 'warnings': 2})
  
  
  class RbdMirroringStatusControllerTest(ControllerTestCase):
author	Pere Diaz Bou <pdiazbou@redhat.com>
	Tue, 26 Jul 2022 09:56:57 +0000 (11:56 +0200)
committer	Pere Diaz Bou <pdiazbou@redhat.com>
	Wed, 14 Dec 2022 11:55:08 +0000 (12:55 +0100)
src/pybind/mgr/dashboard/controllers/rbd_mirroring.py		patch \| blob \| history
src/pybind/mgr/dashboard/tests/test_rbd_mirroring.py		patch \| blob \| history