]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
mgr/node-proxy: fix "ceph orch hardware status --category criticals"
authorGuillaume Abrioux <gabrioux@ibm.com>
Tue, 10 Feb 2026 14:59:55 +0000 (15:59 +0100)
committerGuillaume Abrioux <gabrioux@ibm.com>
Wed, 18 Feb 2026 08:52:38 +0000 (09:52 +0100)
The criticals path was using the wrong data shape:
node-proxy sends status as:

  component -> sys_id -> member

 but the code assumed:

  sys_id -> component -> member

This fixes get_critical_from_host() and _criticals_table() to iterate
in the correct order and build the criticals result with the right
nesting.

Fixes: https://tracker.ceph.com/issues/74749
Signed-off-by: Guillaume Abrioux <gabrioux@ibm.com>
src/pybind/mgr/cephadm/agent.py
src/pybind/mgr/cephadm/inventory.py
src/pybind/mgr/orchestrator/module.py

index 8722bac4649adafd5f8256d28eec32641409d9a3..6f52ad47514b89a0169faa01cc28a706272a4131 100644 (file)
@@ -214,16 +214,17 @@ class NodeProxyEndpoint:
         for sys_id in data.keys():
             for member in data[sys_id].keys():
                 member_data = data[sys_id][member]
+                if member == 'firmwares':
+                    continue
                 _status = self._get_health_value(member_data)
                 if _status and _status != 'ok':
                     state = self._get_state_value(member_data)
-                    _member = dict(
-                        sys_id=sys_id,
-                        member=member,
-                        status=_status,
-                        state=state
-                    )
-                    nok_members.append(_member)
+                    nok_members.append({
+                        'sys_id': sys_id,
+                        'member': member,
+                        'status': _status,
+                        'state': state
+                    })
 
         return nok_members
 
index ab37954372a0f53736afe721e18b7db045380558..37e346845c3d68eec9f439a9eeb8f69c9dfe08c4 100644 (file)
@@ -1783,19 +1783,19 @@ class NodeProxyCache:
 
     def get_critical_from_host(self, hostname: str) -> Dict[str, Any]:
         results: Dict[str, Any] = {}
-        for sys_id, component in self.data[hostname]['status'].items():
-            for component_name, data_component in component.items():
-                if component_name not in results.keys():
-                    results[component_name] = {}
-                for member, data_member in data_component.items():
-                    if component_name == 'power':
-                        data_member['status']['health'] = 'critical'
-                        data_member['status']['state'] = 'unplugged'
-                    if component_name == 'memory':
-                        data_member['status']['health'] = 'critical'
-                        data_member['status']['state'] = 'errors detected'
-                    if self._get_health_value(data_member) != 'ok':
-                        results[component_name][member] = data_member
+
+        for component, component_data in self.data[hostname]['status'].items():
+            for sys_id, data_sys in component_data.items():
+                if sys_id not in results.keys():
+                    results[sys_id] = {}
+                if component not in results[sys_id].keys():
+                    results[sys_id][component] = {}
+                for member_name, member_data in data_sys.items():
+                    _health = self._get_health_value(member_data)
+                    if _health and _health != 'ok':
+                        if member_name not in results.keys():
+                            results[sys_id][component][member_name] = {}
+                        results[sys_id][component][member_name] = member_data
         return results
 
     def criticals(self, **kw: Any) -> Dict[str, Any]:
index 5fc2fce63fae5501964671fd3f3264a62d2bff69..a00b195614ed13abe2fcf55a006aa49305a130b3 100644 (file)
@@ -522,7 +522,7 @@ class OrchestratorCli(OrchestratorClientMixin, MgrModule):
             'summary': ['HOST', 'SN', 'STORAGE', 'CPU', 'NET', 'MEMORY', 'POWER', 'FANS'],
             'fullreport': [],
             'firmwares': ['HOST', 'COMPONENT', 'NAME', 'DATE', 'VERSION', 'STATUS'],
-            'criticals': ['HOST', 'COMPONENT', 'NAME', 'STATUS', 'STATE'],
+            'criticals': ['HOST', 'SYS_ID', 'COMPONENT', 'NAME', 'STATUS', 'STATE'],
             'memory': ['HOST', 'SYS_ID', 'NAME', 'STATUS', 'STATE'],
             'storage': ['HOST', 'SYS_ID', 'NAME', 'MODEL', 'SIZE', 'PROTOCOL', 'SN', 'STATUS', 'STATE'],
             'processors': ['HOST', 'SYS_ID', 'NAME', 'MODEL', 'CORES', 'THREADS', 'STATUS', 'STATE'],
@@ -587,14 +587,18 @@ class OrchestratorCli(OrchestratorClientMixin, MgrModule):
     def _criticals_table(self, hostname: Optional[str], table: PrettyTable, format: Format) -> str:
         completion = self.node_proxy_criticals(hostname=hostname)
         data = raise_if_exception(completion)
-        # data = self.node_proxy_criticals(hostname=hostname)
+
         if format == Format.json:
             return json.dumps(data)
         for host, host_details in data.items():
-            for component, component_details in host_details.items():
-                for member, member_details in component_details.items():
-                    description = member_details.get('description') or member_details.get('name')
-                    table.add_row((host, component, description, member_details['status']['health'], member_details['status']['state']))
+            for sys_id, components in host_details.items():
+                for component, component_details in components.items():
+                    for _, member_details in component_details.items():
+                        description = member_details.get('description') or member_details.get('name') or member_details.get('id')
+                        status = member_details.get('status') or {}
+                        health = status.get('health', 'N/A')
+                        state = status.get('state', 'N/A')
+                        table.add_row((host, sys_id, component, description, health, state))
         return table.get_string()
 
     def _common_table(self, category: str, hostname: Optional[str], table: PrettyTable, format: Format) -> str: