]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/dashboard: fix KeyError exception in HardwareService.get_summary() 63721/head
authorGuillaume Abrioux <gabrioux@ibm.com>
Wed, 4 Jun 2025 14:23:43 +0000 (16:23 +0200)
committerGuillaume Abrioux <gabrioux@ibm.com>
Wed, 4 Jun 2025 19:16:02 +0000 (21:16 +0200)
Typical error:

```
[dashboard ERROR exception] Internal Server Error
Traceback (most recent call last):
  File "/usr/share/ceph/mgr/dashboard/services/exception.py", line 48, in dashboard_exception_handler
    return handler(*args, **kwargs)
  File "/lib/python3.9/site-packages/cherrypy/_cpdispatch.py", line 54, in __call__
    return self.callable(*self.args, **self.kwargs)
  File "/usr/share/ceph/mgr/dashboard/controllers/_base_controller.py", line 263, in inner
    ret = func(*args, **kwargs)
  File "/usr/share/ceph/mgr/dashboard/controllers/_rest_controller.py", line 193, in wrapper
    return func(*vpath, **params)
  File "/usr/share/ceph/mgr/dashboard/controllers/hardware.py", line 21, in summary
    return HardwareService.get_summary(categories, hostname)
  File "/usr/share/ceph/mgr/dashboard/services/hardware.py", line 33, in get_summary
    'ok': sum(item['status']['health'] == 'OK' for items in data.values()
  File "/usr/share/ceph/mgr/dashboard/services/hardware.py", line 33, in <genexpr>
    'ok': sum(item['status']['health'] == 'OK' for items in data.values()
KeyError: 'status'
```

The recent change from commit `fbcdf571ca1` introduced this regression.

Fixes: https://tracker.ceph.com/issues/71558
Signed-off-by: Guillaume Abrioux <gabrioux@ibm.com>
src/pybind/mgr/dashboard/services/hardware.py

index df2266443388d9f3f40d7b905248e028d9ce3cf4..31054ab4cc7af4a125e9e5b7e7359910c5fdeb36 100644 (file)
@@ -23,25 +23,41 @@ class HardwareService(object):
             }
         }
 
+        def count_ok(data: dict) -> int:
+            return sum(
+                component.get("status", {}).get("health") == "OK"
+                for node in data.values()
+                for system in node.values()
+                for component in system.values()
+            )
+
+        def count_total(data: dict) -> int:
+            return sum(
+                len(component)
+                for system in data.values()
+                for component in system.values()
+            )
+
         categories = HardwareService.validate_categories(categories)
 
         orch_hardware_instance = OrchClient.instance().hardware
         for category in categories:
             data = orch_hardware_instance.common(category, hostname)
             category_total = {
-                'total': sum(len(items) for items in data.values()),
-                'ok': sum(item['status']['health'] == 'OK' for items in data.values()
-                          for item in items.values()),
+                'total': count_total(data),
+                'ok': count_ok(data),
                 'error': 0
             }
 
-            for host, items in data.items():
+            for host, systems in data.items():
                 output['host'].setdefault(host, {'flawed': False})
                 if not output['host'][host]['flawed']:
-                    output['host'][host]['flawed'] = any(
-                        item['status']['health'] != 'OK' for item in items.values())
+                    for system in systems.values():
+                        if any(dimm['status']['health'] != 'OK' for dimm in system.values()):
+                            output['host'][host]['flawed'] = True
+                            break
 
-            category_total['error'] = category_total['total'] - category_total['ok']
+            category_total['error'] = max(0, category_total['total'] - category_total['ok'])
             output['total']['category'].setdefault(category, {})
             output['total']['category'][category] = category_total