]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
mgr/cephadm: safe status/health access in node-proxy agent and inventory
authorGuillaume Abrioux <gabrioux@ibm.com>
Thu, 29 Jan 2026 12:14:40 +0000 (13:14 +0100)
committerGuillaume Abrioux <gabrioux@ibm.com>
Tue, 10 Feb 2026 15:02:39 +0000 (16:02 +0100)
This adds helpers in NodeProxyEndpoint and NodeProxyCache to safely
read status.health and status.state.

In NodeProxyEndpoint, methods _get_health_value() and _get_state_value()
are used in get_nok_members() to avoid KeyError on malformed data.

In NodeProxyCache, _get_health_value(), _has_health_value(),
_is_error_status(), and _is_unknown_status() are used in fullreport()
and when filtering 'non ok' members instead of accessing
status['status']['health'] inline.

Fixes: https://tracker.ceph.com/issues/74749
Signed-off-by: Guillaume Abrioux <gabrioux@ibm.com>
src/pybind/mgr/cephadm/agent.py
src/pybind/mgr/cephadm/inventory.py
src/pybind/mgr/cephadm/utils.py

index cec4ab0ab81a50cad7d404d9cb77eeba97d85265..8722bac4649adafd5f8256d28eec32641409d9a3 100644 (file)
@@ -25,6 +25,7 @@ import tempfile
 from cephadm.services.service_registry import service_registry
 from cephadm.services.cephadmservice import CephadmAgent
 from cephadm.tlsobject_types import TLSCredentials
+from cephadm.utils import get_node_proxy_status_value
 
 from urllib.error import HTTPError, URLError
 from typing import Any, Dict, List, Set, TYPE_CHECKING, Optional, MutableMapping, IO
@@ -186,6 +187,12 @@ class NodeProxyEndpoint:
         except AttributeError:
             raise cherrypy.HTTPError(400, 'Malformed data received.')
 
+    def _get_health_value(self, member_data: Any) -> str:
+        return get_node_proxy_status_value(member_data, 'health', lower=True)
+
+    def _get_state_value(self, member_data: Any) -> str:
+        return get_node_proxy_status_value(member_data, 'state')
+
     # TODO(guits): refactor this
     # TODO(guits): use self.node_proxy.get_critical_from_host() ?
     def get_nok_members(self,
@@ -206,9 +213,10 @@ class NodeProxyEndpoint:
 
         for sys_id in data.keys():
             for member in data[sys_id].keys():
-                _status = data[sys_id][member]['status']['health'].lower()
-                if _status.lower() != 'ok':
-                    state = data[sys_id][member]['status']['state']
+                member_data = data[sys_id][member]
+                _status = self._get_health_value(member_data)
+                if _status and _status != 'ok':
+                    state = self._get_state_value(member_data)
                     _member = dict(
                         sys_id=sys_id,
                         member=member,
index c4a99fbe88e41effcb651a74f7158e83220ce791..ab37954372a0f53736afe721e18b7db045380558 100644 (file)
@@ -26,7 +26,7 @@ from orchestrator import OrchestratorError, HostSpec, OrchestratorEvent, service
 from cephadm.services.cephadmservice import CephadmDaemonDeploySpec
 from mgr_util import parse_combined_pem_file
 
-from .utils import resolve_ip, SpecialHostLabels
+from .utils import get_node_proxy_status_value, resolve_ip, SpecialHostLabels
 from .migrations import queue_migrate_nfs_spec, queue_migrate_rgw_spec
 
 if TYPE_CHECKING:
@@ -1657,6 +1657,18 @@ class NodeProxyCache:
         self.keyrings[host] = key
         self.mgr.set_store(f'{NODE_PROXY_CACHE_PREFIX}/keyrings', json.dumps(self.keyrings))
 
+    def _get_health_value(self, status: Any) -> str:
+        return get_node_proxy_status_value(status, 'health', lower=True)
+
+    def _has_health_value(self, statuses: ValuesView, health_value: str) -> bool:
+        return any([self._get_health_value(status) == health_value for status in statuses])
+
+    def _is_error_status(self, statuses: ValuesView) -> bool:
+        return self._has_health_value(statuses, 'error')
+
+    def _is_unknown_status(self, statuses: ValuesView) -> bool:
+        return self._has_health_value(statuses, 'unknown') and not self._is_error_status(statuses)
+
     def fullreport(self, **kw: Any) -> Dict[str, Any]:
         """
         Retrieves the full report for the specified hostname.
@@ -1694,12 +1706,6 @@ class NodeProxyCache:
         hostname = kw.get('hostname')
         hosts = [hostname] if hostname else self.data.keys()
 
-        def is_unknown(statuses: ValuesView) -> bool:
-            return any([status['status']['health'].lower() == 'unknown' for status in statuses]) and not is_error(statuses)
-
-        def is_error(statuses: ValuesView) -> bool:
-            return any([status['status']['health'].lower() == 'error' for status in statuses])
-
         _result: Dict[str, Any] = {}
 
         for host in hosts:
@@ -1711,9 +1717,9 @@ class NodeProxyCache:
                 _sys_id_res: List[str] = []
                 for element in details.values():
                     values = element.values()
-                    if is_error(values):
+                    if self._is_error_status(values):
                         state = 'error'
-                    elif is_unknown(values) or not values:
+                    elif self._is_unknown_status(values) or not values:
                         state = 'unknown'
                     else:
                         state = 'ok'
@@ -1788,7 +1794,7 @@ class NodeProxyCache:
                     if component_name == 'memory':
                         data_member['status']['health'] = 'critical'
                         data_member['status']['state'] = 'errors detected'
-                    if data_member['status']['health'].lower() != 'ok':
+                    if self._get_health_value(data_member) != 'ok':
                         results[component_name][member] = data_member
         return results
 
index 9ef394e86dc3f644100a4d86fb4cb14fc83a2aa6..805ca953e918bd5796a76612745035c2a1f2c866 100644 (file)
@@ -162,3 +162,15 @@ def md5_hash(input_value: str) -> str:
     input_str = str(input_value).encode('utf-8')
     hash_object = hashlib.md5(input_str)
     return hash_object.hexdigest()
+
+
+def get_node_proxy_status_value(data: Any, key: str, lower: bool = False) -> str:
+    if not isinstance(data, dict):
+        return ''
+    status = data.get('status', {})
+    if not isinstance(status, dict):
+        return ''
+    value = status.get(key, '')
+    if not isinstance(value, str):
+        return ''
+    return value.lower() if lower else value