]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
node-proxy: implement criticals endpoint
authorGuillaume Abrioux <gabrioux@ibm.com>
Mon, 30 Oct 2023 15:51:56 +0000 (15:51 +0000)
committerGuillaume Abrioux <gabrioux@ibm.com>
Thu, 25 Jan 2024 15:15:42 +0000 (15:15 +0000)
This adds the required changes in order to implement the endpoint
'/criticals'.

The goal of this endpoint is to provide a report of all critical statuses
for either a given host or all hosts across the cluster.

Signed-off-by: Guillaume Abrioux <gabrioux@ibm.com>
(cherry picked from commit ae791f8721027a9a508c7cd27e85f86f6fe7c492)

src/pybind/mgr/cephadm/agent.py
src/pybind/mgr/cephadm/inventory.py

index fb45d01f46bcfc0d5e8c42bc18351921da6f4147..39b0cb3d0ef882c7810c2b91ad8a8b81bca77fc6 100644 (file)
@@ -142,23 +142,16 @@ class NodeProxy:
         except AttributeError:
             raise cherrypy.HTTPError(400, 'Malformed data received.')
 
+    # TODO(guits): refactor this
+    # TODO(guits): use self.node_proxy.get_critical_from_host() ?
     def get_nok_members(self,
-                        component: str,
                         data: Dict[str, Any]) -> List[Dict[str, str]]:
         nok_members: List[Dict[str, str]] = []
 
         for member in data.keys():
-            # Force a fake error for testing purpose
-            if component == 'storage':
-                _status = 'critical'
-                state = "[Fake error] device is faulty."
-            elif component == 'power':
-                _status = 'critical'
-                state = "[Fake error] power supply unplugged."
-            else:
-                _status = data[member]['status']['health'].lower()
+            _status = data[member]['status']['health'].lower()
             if _status.lower() != 'ok':
-                state = data[member]['status']['state']
+                state = data[member]['status']['state']
                 _member = dict(
                     member=member,
                     status=_status,
@@ -179,13 +172,14 @@ class NodeProxy:
         }
 
         for component in data['patch']['status'].keys():
-            self.mgr.remove_health_warning(mapping[component])
-            nok_members = self.get_nok_members(component, data['patch']['status'][component])
+            alert_name = f"HARDWARE_{component.upper()}"
+            self.mgr.remove_health_warning(alert_name)
+            nok_members = self.get_nok_members(data['patch']['status'][component])
 
             if nok_members:
                 count = len(nok_members)
                 self.mgr.set_health_warning(
-                    mapping[component],
+                    alert_name,
                     summary=f'{count} {component} member{"s" if count > 1 else ""} {"are" if count > 1 else "is"} not ok',
                     count=count,
                     detail=[f"{member['member']} is {member['status']}: {member['state']}" for member in nok_members],
@@ -285,7 +279,7 @@ class NodeProxy:
     @cherrypy.tools.allow(methods=['GET'])
     @cherrypy.tools.json_out()
     def criticals(self, **kw: Any) -> Dict[str, Any]:
-        return self.mgr.node_proxy.criticals()
+        return self.mgr.node_proxy.criticals(**kw)
 
     @cherrypy.expose
     @cherrypy.tools.allow(methods=['GET'])
index 401d675885c5d32de5610f31c73ab2b0896451c6..31029f0a7ee51da47056e1b31f0c0caa98dfdaa2 100644 (file)
@@ -1499,8 +1499,43 @@ class NodeProxyCache:
 
         return {host: self.data[host]['firmwares'] for host in hosts}
 
-    def criticals(self, **kw):
-        return {}
+    def get_critical_from_host(self, hostname: str) -> Dict[str, Any]:
+        results: Dict[str, Any] = {}
+        for component, data_component in self.data[hostname]['status'].items():
+            if component not in results.keys():
+                results[component] = {}
+            for member, data_member in data_component.items():
+                if component == 'power':
+                    data_member['status']['health'] = 'critical'
+                    data_member['status']['state'] = 'unplugged'
+                if component == 'memory':
+                    data_member['status']['health'] = 'critical'
+                    data_member['status']['state'] = 'errors detected'
+                if data_member['status']['health'].lower() != 'ok':
+                    results[component][member] = data_member
+        return results
+
+    def criticals(self, **kw: Any) -> Dict[str, Any]:
+        """
+        Retrieves critical information for a specific hostname or all hosts.
+
+        If a 'hostname' is provided in the keyword arguments, retrieves critical
+        information for that specific host. Otherwise, retrieves critical
+        information for all available hosts.
+
+        :param kw: Keyword arguments, including 'hostname' if specified.
+        :type kw: dict
+
+        :return: A dictionary containing critical information for each host.
+        :rtype: List[Dict[str, Any]]
+        """
+        hostname = kw.get('hostname')
+        results: Dict[str, Any] = {}
+
+        hosts = [hostname] if hostname else self.data.keys()
+        for host in hosts:
+            results[host] = self.get_critical_from_host(host)
+        return results
 
 
 class AgentCache():