]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
mgr/cephadm: safe status/health access in node-proxy agent and inventory
authorGuillaume Abrioux <gabrioux@ibm.com>
Thu, 29 Jan 2026 12:14:40 +0000 (13:14 +0100)
committerGuillaume Abrioux <gabrioux@ibm.com>
Thu, 5 Feb 2026 09:09:20 +0000 (10:09 +0100)
This adds helpers in NodeProxyEndpoint and NodeProxyCache to safely
read status.health and status.state.

In NodeProxyEndpoint, methods _get_health_value() and _get_state_value()
are used in get_nok_members() to avoid KeyError on malformed data.

In NodeProxyCache, _get_health_value(), _has_health_value(),
_is_error_status(), and _is_unknown_status() are used in fullreport()
and when filtering 'non ok' members instead of accessing
status['status']['health'] inline.

Fixes: https://tracker.ceph.com/issues/74749
Signed-off-by: Guillaume Abrioux <gabrioux@ibm.com>
src/pybind/mgr/cephadm/agent.py
src/pybind/mgr/cephadm/inventory.py

index cec4ab0ab81a50cad7d404d9cb77eeba97d85265..0c429ae158891cf52e08657f3c1aa3aac0d717b9 100644 (file)
@@ -186,6 +186,22 @@ class NodeProxyEndpoint:
         except AttributeError:
             raise cherrypy.HTTPError(400, 'Malformed data received.')
 
+    def _get_health_value(self, member_data: Any) -> str:
+        if not isinstance(member_data, dict):
+            return ''
+        status = member_data.get('status', {})
+        if not isinstance(status, dict):
+            return ''
+        return status.get('health', '').lower()
+
+    def _get_state_value(self, member_data: Any) -> str:
+        if not isinstance(member_data, dict):
+            return ''
+        status = member_data.get('status', {})
+        if not isinstance(status, dict):
+            return ''
+        return status.get('state', '')
+
     # TODO(guits): refactor this
     # TODO(guits): use self.node_proxy.get_critical_from_host() ?
     def get_nok_members(self,
@@ -206,9 +222,10 @@ class NodeProxyEndpoint:
 
         for sys_id in data.keys():
             for member in data[sys_id].keys():
-                _status = data[sys_id][member]['status']['health'].lower()
-                if _status.lower() != 'ok':
-                    state = data[sys_id][member]['status']['state']
+                member_data = data[sys_id][member]
+                _status = self._get_health_value(member_data)
+                if _status and _status != 'ok':
+                    state = self._get_state_value(member_data)
                     _member = dict(
                         sys_id=sys_id,
                         member=member,
index c4a99fbe88e41effcb651a74f7158e83220ce791..c151a6db9af17f3a93a9768cdc34a71e1a511494 100644 (file)
@@ -1657,6 +1657,20 @@ class NodeProxyCache:
         self.keyrings[host] = key
         self.mgr.set_store(f'{NODE_PROXY_CACHE_PREFIX}/keyrings', json.dumps(self.keyrings))
 
+    def _get_health_value(self, status: Any) -> str:
+        if not isinstance(status, dict):
+            return ''
+        return status.get('status', {}).get('health', '').lower()
+
+    def _has_health_value(self, statuses: ValuesView, health_value: str) -> bool:
+        return any([self._get_health_value(status) == health_value for status in statuses])
+
+    def _is_error_status(self, statuses: ValuesView) -> bool:
+        return self._has_health_value(statuses, 'error')
+
+    def _is_unknown_status(self, statuses: ValuesView) -> bool:
+        return self._has_health_value(statuses, 'unknown') and not self._is_error_status(statuses)
+
     def fullreport(self, **kw: Any) -> Dict[str, Any]:
         """
         Retrieves the full report for the specified hostname.
@@ -1694,12 +1708,6 @@ class NodeProxyCache:
         hostname = kw.get('hostname')
         hosts = [hostname] if hostname else self.data.keys()
 
-        def is_unknown(statuses: ValuesView) -> bool:
-            return any([status['status']['health'].lower() == 'unknown' for status in statuses]) and not is_error(statuses)
-
-        def is_error(statuses: ValuesView) -> bool:
-            return any([status['status']['health'].lower() == 'error' for status in statuses])
-
         _result: Dict[str, Any] = {}
 
         for host in hosts:
@@ -1711,9 +1719,9 @@ class NodeProxyCache:
                 _sys_id_res: List[str] = []
                 for element in details.values():
                     values = element.values()
-                    if is_error(values):
+                    if self._is_error_status(values):
                         state = 'error'
-                    elif is_unknown(values) or not values:
+                    elif self._is_unknown_status(values) or not values:
                         state = 'unknown'
                     else:
                         state = 'ok'
@@ -1788,7 +1796,7 @@ class NodeProxyCache:
                     if component_name == 'memory':
                         data_member['status']['health'] = 'critical'
                         data_member['status']['state'] = 'errors detected'
-                    if data_member['status']['health'].lower() != 'ok':
+                    if self._get_health_value(data_member) != 'ok':
                         results[component_name][member] = data_member
         return results