From f023ec3c3a2cc642d3cde80dfe30312f1d8998c6 Mon Sep 17 00:00:00 2001
From: Pere Diaz Bou <pdiazbou@redhat.com>
Date: Tue, 26 Jul 2022 11:56:57 +0200
Subject: [PATCH] mgr/dashboard: display real health in rbd mirroring pools

Signed-off-by: Pere Diaz Bou <pdiazbou@redhat.com>
(cherry picked from commit 273286d1a661eda0020bffb2c2a384b4e8dcd36c)
---
 .../dashboard/controllers/rbd_mirroring.py    | 116 +++++++++++-------
 .../mgr/dashboard/tests/test_rbd_mirroring.py |  16 ++-
 2 files changed, 84 insertions(+), 48 deletions(-)

diff --git a/src/pybind/mgr/dashboard/controllers/rbd_mirroring.py b/src/pybind/mgr/dashboard/controllers/rbd_mirroring.py
index 17ef0b88b2a36..738f2e8b2fce8 100644
--- a/src/pybind/mgr/dashboard/controllers/rbd_mirroring.py
+++ b/src/pybind/mgr/dashboard/controllers/rbd_mirroring.py
@@ -3,6 +3,7 @@
 import json
 import logging
 import re
+from enum import IntEnum
 from functools import partial
 from typing import NamedTuple, Optional, no_type_check
 
@@ -25,7 +26,21 @@ from . import APIDoc, APIRouter, BaseController, CreatePermission, Endpoint, \
 logger = logging.getLogger('controllers.rbd_mirror')
 
 
+class MirrorHealth(IntEnum):
+    # RBD defined mirroring health states in in src/tools/rbd/action/MirrorPool.cc where the order
+    # is relevant.
+    MIRROR_HEALTH_OK = 0
+    MIRROR_HEALTH_UNKNOWN = 1
+    MIRROR_HEALTH_WARNING = 2
+    MIRROR_HEALTH_ERROR = 3
+
+    # extra states for the dashboard
+    MIRROR_HEALTH_DISABLED = 4
+    MIRROR_HEALTH_INFO = 5
+
 # pylint: disable=not-callable
+
+
 def handle_rbd_mirror_error():
     def composed_decorator(func):
         func = handle_rados_error('rbd-mirroring')(func)
@@ -79,28 +94,24 @@ def get_daemons():
 
 def get_daemon_health(daemon):
     health = {
-        'health_color': 'info',
-        'health': 'Unknown'
+        'health': MirrorHealth.MIRROR_HEALTH_UNKNOWN
     }
     for _, pool_data in daemon['status'].items():
-        if (health['health'] != 'error'
+        if (health['health'] != MirrorHealth.MIRROR_HEALTH_ERROR
                 and [k for k, v in pool_data.get('callouts', {}).items()
                      if v['level'] == 'error']):
             health = {
-                'health_color': 'error',
-                'health': 'Error'
+                'health': MirrorHealth.MIRROR_HEALTH_ERROR
             }
-        elif (health['health'] != 'error'
+        elif (health['health'] != MirrorHealth.MIRROR_HEALTH_ERROR
                 and [k for k, v in pool_data.get('callouts', {}).items()
                      if v['level'] == 'warning']):
             health = {
-                'health_color': 'warning',
-                'health': 'Warning'
+                'health': MirrorHealth.MIRROR_HEALTH_WARNING
             }
-        elif health['health_color'] == 'info':
+        elif health['health'] == MirrorHealth.MIRROR_HEALTH_INFO:
             health = {
-                'health_color': 'success',
-                'health': 'OK'
+                'health': MirrorHealth.MIRROR_HEALTH_OK
             }
     return health
 
@@ -113,45 +124,48 @@ def get_pools(daemons):  # pylint: disable=R0912, R0915
     return pool_stats
 
 
+def transform_mirror_health(stat):
+    health = 'OK'
+    health_color = 'success'
+    if stat['health'] == MirrorHealth.MIRROR_HEALTH_ERROR:
+        health = 'Error'
+        health_color = 'error'
+    elif stat['health'] == MirrorHealth.MIRROR_HEALTH_WARNING:
+        health = 'Warning'
+        health_color = 'warning'
+    elif stat['health'] == MirrorHealth.MIRROR_HEALTH_UNKNOWN:
+        health = 'Unknown'
+        health_color = 'warning'
+    elif stat['health'] == MirrorHealth.MIRROR_HEALTH_OK:
+        health = 'OK'
+        health_color = 'success'
+    elif stat['health'] == MirrorHealth.MIRROR_HEALTH_DISABLED:
+        health = 'Disabled'
+        health_color = 'info'
+    stat['health'] = health
+    stat['health_color'] = health_color
+
+
 def _update_pool_stats(daemons, pool_stats):
     _update_pool_stats_with_daemons(daemons, pool_stats)
-    for _, stats in pool_stats.items():
-        if stats['mirror_mode'] == 'disabled':
-            continue
-        if stats.get('health', None) is None:
-            # daemon doesn't know about pool
-            stats['health_color'] = 'error'
-            stats['health'] = 'Error'
-        elif stats.get('leader_id', None) is None:
-            # no daemons are managing the pool as leader instance
-            stats['health_color'] = 'warning'
-            stats['health'] = 'Warning'
+    for pool_stat in pool_stats.values():
+        transform_mirror_health(pool_stat)
 
 
 def _update_pool_stats_with_daemons(daemons, pool_stats):
     for daemon in daemons:
         for _, pool_data in daemon['status'].items():
-            stats = pool_stats.get(pool_data['name'], None)  # type: ignore
-            if stats is None:
+            pool_stat = pool_stats.get(pool_data['name'], None)  # type: ignore
+            if pool_stat is None:
                 continue
 
             if pool_data.get('leader', False):
                 # leader instance stores image counts
-                stats['leader_id'] = daemon['metadata']['instance_id']
-                stats['image_local_count'] = pool_data.get('image_local_count', 0)
-                stats['image_remote_count'] = pool_data.get('image_remote_count', 0)
-
-            if (stats.get('health_color', '') != 'error'
-                    and pool_data.get('image_error_count', 0) > 0):
-                stats['health_color'] = 'error'
-                stats['health'] = 'Error'
-            elif (stats.get('health_color', '') != 'error'
-                    and pool_data.get('image_warning_count', 0) > 0):
-                stats['health_color'] = 'warning'
-                stats['health'] = 'Warning'
-            elif stats.get('health', None) is None:
-                stats['health_color'] = 'success'
-                stats['health'] = 'OK'
+                pool_stat['leader_id'] = daemon['metadata']['instance_id']
+                pool_stat['image_local_count'] = pool_data.get('image_local_count', 0)
+                pool_stat['image_remote_count'] = pool_data.get('image_remote_count', 0)
+
+            pool_stat['health'] = max(pool_stat['health'], daemon['health'])
 
 
 def _get_pool_stats(pool_names):
@@ -176,16 +190,27 @@ def _get_pool_stats(pool_names):
         stats = {}
         if mirror_mode == rbd.RBD_MIRROR_MODE_DISABLED:
             mirror_mode = "disabled"
-            stats['health_color'] = "info"
-            stats['health'] = "Disabled"
+            stats['health'] = MirrorHealth.MIRROR_HEALTH_DISABLED
         elif mirror_mode == rbd.RBD_MIRROR_MODE_IMAGE:
             mirror_mode = "image"
         elif mirror_mode == rbd.RBD_MIRROR_MODE_POOL:
             mirror_mode = "pool"
         else:
             mirror_mode = "unknown"
-            stats['health_color'] = "warning"
-            stats['health'] = "Warning"
+
+        if mirror_mode != "disabled":
+            # In case of a pool being enabled we will infer the health like the RBD cli tool does
+            # in src/tools/rbd/action/MirrorPool.cc::execute_status
+            mirror_image_health: MirrorHealth = MirrorHealth.MIRROR_HEALTH_OK
+            for status, _ in rbdctx.mirror_image_status_summary(ioctx):
+                if (mirror_image_health < MirrorHealth.MIRROR_HEALTH_WARNING
+                    and status != rbd.MIRROR_IMAGE_STATUS_STATE_REPLAYING
+                        and status != rbd.MIRROR_IMAGE_STATUS_STATE_STOPPED):
+                    mirror_image_health = MirrorHealth.MIRROR_HEALTH_WARNING
+                if (mirror_image_health < MirrorHealth.MIRROR_HEALTH_ERROR
+                        and status == rbd.MIRROR_IMAGE_STATUS_STATE_ERROR):
+                    mirror_image_health = MirrorHealth.MIRROR_HEALTH_ERROR
+            stats['health'] = mirror_image_health
 
         pool_stats[pool_name] = dict(stats, **{
             'mirror_mode': mirror_mode,
@@ -197,10 +222,13 @@ def _get_pool_stats(pool_names):
 @ViewCache()
 def get_daemons_and_pools():  # pylint: disable=R0915
     daemons = get_daemons()
-    return {
+    daemons_and_pools = {
         'daemons': daemons,
         'pools': get_pools(daemons)
     }
+    for daemon in daemons:
+        transform_mirror_health(daemon)
+    return daemons_and_pools
 
 
 class ReplayingData(NamedTuple):
diff --git a/src/pybind/mgr/dashboard/tests/test_rbd_mirroring.py b/src/pybind/mgr/dashboard/tests/test_rbd_mirroring.py
index 60571d8e5543f..1d4dac3062bc1 100644
--- a/src/pybind/mgr/dashboard/tests/test_rbd_mirroring.py
+++ b/src/pybind/mgr/dashboard/tests/test_rbd_mirroring.py
@@ -108,14 +108,15 @@ class GetDaemonAndPoolsTest(unittest.TestCase):
         mock_rbd_instance = mock_rbd.return_value
         mock_rbd_instance.mirror_peer_list.return_value = []
         test_cases = self._get_pool_test_cases()
-        for new_status, mirror_mode, expected_output in test_cases:
+        for new_status, pool_mirror_mode, images_summary, expected_output in test_cases:
             _status[1].update(new_status)
             daemon_status = {
                 'json': json.dumps(_status)
             }
             mgr.get_daemon_status.return_value = daemon_status
             daemons = get_daemons()
-            mock_rbd_instance.mirror_mode_get.return_value = mirror_mode
+            mock_rbd_instance.mirror_mode_get.return_value = pool_mirror_mode
+            mock_rbd_instance.mirror_image_status_summary.return_value = images_summary
             res = get_pools(daemons)
             for k, v in expected_output.items():
                 self.assertTrue(v == res['rbd'][k])
@@ -123,11 +124,16 @@ class GetDaemonAndPoolsTest(unittest.TestCase):
 
     def _get_pool_test_cases(self):
         test_cases = [
+            # 1. daemon status
+            # 2. Pool mirror mock_get_daemon_status
+            # 3. Image health summary
+            # 4. Pool health output
             (
                 {
                     'image_error_count': 7,
                 },
                 rbd.RBD_MIRROR_MODE_IMAGE,
+                [(rbd.MIRROR_IMAGE_STATUS_STATE_UNKNOWN, None)],
                 {
                     'health_color': 'warning',
                     'health': 'Warning'
@@ -137,7 +143,8 @@ class GetDaemonAndPoolsTest(unittest.TestCase):
                 {
                     'image_error_count': 7,
                 },
-                rbd.RBD_MIRROR_MODE_DISABLED,
+                rbd.RBD_MIRROR_MODE_POOL,
+                [(rbd.MIRROR_IMAGE_STATUS_STATE_ERROR, None)],
                 {
                     'health_color': 'error',
                     'health': 'Error'
@@ -150,6 +157,7 @@ class GetDaemonAndPoolsTest(unittest.TestCase):
                     'leader_id': 1
                 },
                 rbd.RBD_MIRROR_MODE_DISABLED,
+                [],
                 {
                     'health_color': 'info',
                     'health': 'Disabled'
@@ -280,7 +288,7 @@ class RbdMirroringSummaryControllerTest(ControllerTestCase):
         self.assertStatus(200)
 
         summary = self.json_body()['rbd_mirroring']
-        self.assertEqual(summary, {'errors': 0, 'warnings': 1})
+        self.assertEqual(summary, {'errors': 0, 'warnings': 2})
 
 
 class RbdMirroringStatusControllerTest(ControllerTestCase):
-- 
2.39.5