]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
node-proxy: split redfishdell class
authorGuillaume Abrioux <gabrioux@ibm.com>
Wed, 27 Sep 2023 08:27:28 +0000 (08:27 +0000)
committerGuillaume Abrioux <gabrioux@ibm.com>
Thu, 25 Jan 2024 14:54:53 +0000 (14:54 +0000)
This refactors split the redfishdell class in order
to collect power and thermal details from the redfish API.

'power' and 'thermal' details are very different in many points:

- not available at the same endpoint,
- data structure is different.

For these two reasons, let's split that class.

Signed-off-by: Guillaume Abrioux <gabrioux@ibm.com>
(cherry picked from commit db0172186a753d57c357a5396378d1158e3167e3)

src/cephadm/cephadmlib/node_proxy/baseredfishsystem.py [new file with mode: 0644]
src/cephadm/cephadmlib/node_proxy/redfish_dell.py
src/cephadm/cephadmlib/node_proxy/redfish_system.py [deleted file]
src/cephadm/cephadmlib/node_proxy/redfishdellchassis.py [new file with mode: 0644]
src/cephadm/cephadmlib/node_proxy/redfishdellsystem.py [new file with mode: 0644]
src/pybind/mgr/cephadm/agent.py

diff --git a/src/cephadm/cephadmlib/node_proxy/baseredfishsystem.py b/src/cephadm/cephadmlib/node_proxy/baseredfishsystem.py
new file mode 100644 (file)
index 0000000..2e1d908
--- /dev/null
@@ -0,0 +1,139 @@
+from .basesystem import BaseSystem
+from .redfish_client import RedFishClient
+from threading import Thread, Lock
+from time import sleep
+from .util import Logger, retry
+from typing import Dict, Any, List
+
+
+class BaseRedfishSystem(BaseSystem):
+    def __init__(self, **kw: Any) -> None:
+        super().__init__(**kw)
+        self.log = Logger(__name__)
+        self.host: str = kw['host']
+        self.username: str = kw['username']
+        self.password: str = kw['password']
+        # move the following line (class attribute?)
+        self.client = RedFishClient(host=self.host, username=self.username, password=self.password)
+        self.log.logger.info(f"redfish system initialization, host: {self.host}, user: {self.username}")
+
+        self.run: bool = False
+        self.thread: Thread
+        self.data_ready: bool = False
+        self.previous_data: Dict = {}
+        self.lock: Lock = Lock()
+        self.data: Dict[str, Dict[str, Any]] = {}
+        self._system: Dict[str, Dict[str, Any]] = {}
+        self.start_client()
+
+    def start_client(self) -> None:
+        if not self.client:
+            self.client = RedFishClient(host=self.host, username=self.username, password=self.password)
+        self.client.login()
+
+    def start_update_loop(self) -> None:
+        self.run = True
+        self.thread = Thread(target=self.update)
+        self.thread.start()
+
+    def stop_update_loop(self) -> None:
+        self.run = False
+        self.thread.join()
+
+    def update(self) -> None:
+        #  this loop can have:
+        #  - caching logic
+        try:
+            while self.run:
+                self.log.logger.debug("waiting for a lock.")
+                self.lock.acquire()
+                self.log.logger.debug("lock acquired.")
+                try:
+                    self._update_system()
+                    # following calls in theory can be done in parallel
+                    self._update_metadata()
+                    self._update_memory()
+                    self._update_power()
+                    self._update_fans()
+                    self._update_network()
+                    self._update_processors()
+                    self._update_storage()
+                    self.data_ready = True
+                    sleep(5)
+                finally:
+                    self.lock.release()
+                    self.log.logger.debug("lock released.")
+        # Catching 'Exception' is probably not a good idea (devel only)
+        except Exception as e:
+            self.log.logger.error(f"Error detected, logging out from redfish api.\n{e}")
+            self.client.logout()
+            raise
+
+    def flush(self) -> None:
+        self.log.logger.info("Acquiring lock to flush data.")
+        self.lock.acquire()
+        self.log.logger.info("Lock acquired, flushing data.")
+        self._system = {}
+        self.previous_data = {}
+        self.log.logger.info("Data flushed.")
+        self.data_ready = False
+        self.log.logger.info("Data marked as not ready.")
+        self.lock.release()
+        self.log.logger.info("Lock released.")
+
+    @retry(retries=10, delay=2)
+    def _get_path(self, path: str) -> Dict:
+        result = self.client.get_path(path)
+        if result is None:
+            self.log.logger.error(f"The client reported an error when getting path: {path}")
+            raise RuntimeError(f"Could not get path: {path}")
+        return result
+
+    def get_members(self, path: str) -> List:
+        _path = self._system[path]['@odata.id']
+        data = self._get_path(_path)
+        return [self._get_path(member['@odata.id']) for member in data['Members']]
+
+    def build_data(self,
+                   fields: List,
+                   path: str) -> Dict[str, Dict[str, Dict]]:
+        raise NotImplementedError()
+
+    # def _update_system(self) -> None:
+    #     raise NotImplementedError()
+
+    def get_system(self) -> Dict[str, Dict[str, Dict]]:
+        result = {
+            'storage': self.get_storage(),
+            'processors': self.get_processors(),
+            'network': self.get_network(),
+            'memory': self.get_memory(),
+            'power': self.get_power(),
+            'fans': self.get_fans()
+        }
+        return result
+
+    def _update_system(self) -> None:
+        redfish_system = self.client.get_path(self.system_endpoint)
+        self._system = {**redfish_system, **self._system}
+
+    def _update_metadata(self) -> None:
+        raise NotImplementedError()
+
+    def _update_memory(self) -> None:
+        raise NotImplementedError()
+
+    def _update_power(self) -> None:
+        raise NotImplementedError()
+
+    def _update_fans(self) -> None:
+        raise NotImplementedError()
+
+    def _update_network(self) -> None:
+        raise NotImplementedError()
+
+    def _update_processors(self) -> None:
+        raise NotImplementedError()
+
+    def _update_storage(self) -> None:
+        raise NotImplementedError()
index 796c989f8834d67b131d9f630ec38b3ce3a52e19..f6a01664629c968d0f79e5d9677c17b05140051c 100644 (file)
@@ -1,63 +1,14 @@
-from .redfish_system import RedfishSystem
-from .util import Logger, normalize_dict, to_snake_case
-from typing import Dict, Any
+from .redfishdellchassis import RedfishDellChassis
+from .redfishdellsystem import RedfishDellSystem
+from .util import Logger
+from typing import Any
 
 
-class RedfishDell(RedfishSystem):
+class RedfishDell(RedfishDellSystem, RedfishDellChassis):
     def __init__(self, **kw: Any) -> None:
-        self.log = Logger(__name__)
         if kw.get('system_endpoint') is None:
             kw['system_endpoint'] = '/Systems/System.Embedded.1'
+        if kw.get('chassis_endpoint') is None:
+            kw['chassis_endpoint'] = '/Chassis/System.Embedded.1'
         super().__init__(**kw)
-
-    def _update_network(self) -> None:
-        fields = ['Description', 'Name', 'SpeedMbps', 'Status']
-        self.log.logger.info("Updating network")
-        self._system['network'] = self.build_data(fields, 'EthernetInterfaces')
-
-    def _update_processors(self) -> None:
-        fields = ['Description',
-                  'TotalCores',
-                  'TotalThreads',
-                  'ProcessorType',
-                  'Model',
-                  'Status',
-                  'Manufacturer']
-        self.log.logger.info("Updating processors")
-        self._system['processors'] = self.build_data(fields, 'Processors')
-
-    def _update_storage(self) -> None:
-        fields = ['Description',
-                  'CapacityBytes',
-                  'Model', 'Protocol',
-                  'SerialNumber', 'Status',
-                  'PhysicalLocation']
-        entities = self.get_members('Storage')
-        self.log.logger.info("Updating storage")
-        result: Dict[str, Dict[str, Dict]] = dict()
-        for entity in entities:
-            for drive in entity['Drives']:
-                drive_path = drive['@odata.id']
-                drive_info = self._get_path(drive_path)
-                drive_id = drive_info['Id']
-                result[drive_id] = dict()
-                for field in fields:
-                    result[drive_id][to_snake_case(field)] = drive_info[field]
-                    result[drive_id]['entity'] = entity['Id']
-        self._system['storage'] = normalize_dict(result)
-
-    def _update_metadata(self) -> None:
-        self.log.logger.info("Updating metadata")
-        pass
-
-    def _update_memory(self) -> None:
-        fields = ['Description',
-                  'MemoryDeviceType',
-                  'CapacityMiB',
-                  'Status']
-        self.log.logger.info("Updating memory")
-        self._system['memory'] = self.build_data(fields, 'Memory')
-
-    def _update_power(self) -> None:
-        self.log.logger.info("Updating power")
-        pass
+        self.log = Logger(__name__)
diff --git a/src/cephadm/cephadmlib/node_proxy/redfish_system.py b/src/cephadm/cephadmlib/node_proxy/redfish_system.py
deleted file mode 100644 (file)
index 95c8296..0000000
+++ /dev/null
@@ -1,160 +0,0 @@
-from .basesystem import BaseSystem
-from .redfish_client import RedFishClient
-from threading import Thread, Lock
-from time import sleep
-from .util import Logger, retry, normalize_dict, to_snake_case
-from typing import Dict, Any, List
-
-
-class RedfishSystem(BaseSystem):
-    def __init__(self, **kw: Any) -> None:
-        super().__init__(**kw)
-        self.log = Logger(__name__)
-        self.host: str = kw['host']
-        self.username: str = kw['username']
-        self.password: str = kw['password']
-        self.system_endpoint = kw.get('system_endpoint', '/Systems/1')
-        self.client = RedFishClient(host=self.host, username=self.username, password=self.password)
-        self.log.logger.info(f"redfish system initialization, host: {self.host}, user: {self.username}")
-
-        self._system: Dict[str, Dict[str, Any]] = {}
-        self.run: bool = False
-        self.thread: Thread
-        self.start_client()
-        self.data_ready: bool = False
-        self.previous_data: Dict = {}
-        self.lock: Lock = Lock()
-
-    @retry(retries=10, delay=2)
-    def _get_path(self, path: str) -> Dict:
-        result = self.client.get_path(path)
-        if result is None:
-            self.log.logger.error(f"The client reported an error when getting path: {path}")
-            raise RuntimeError(f"Could not get path: {path}")
-        return result
-
-    def get_members(self, path: str) -> List:
-        _path = self._system[path]['@odata.id']
-        data = self._get_path(_path)
-        return [self._get_path(member['@odata.id']) for member in data['Members']]
-
-    def build_data(self,
-                   fields: List,
-                   path: str) -> Dict[str, Dict[str, Dict]]:
-        result: Dict[str, Dict[str, Dict]] = dict()
-        for member_info in self.get_members(path):
-            member_id = member_info['Id']
-            result[member_id] = dict()
-            for field in fields:
-                try:
-                    result[member_id][to_snake_case(field)] = member_info[field]
-                except KeyError:
-                    self.log.logger.warning(f"Could not find field: {field} in member_info: {member_info}")
-
-        return normalize_dict(result)
-
-    def start_client(self) -> None:
-        if not self.client:
-            self.client = RedFishClient(host=self.host, username=self.username, password=self.password)
-        self.client.login()
-
-    def get_system(self) -> Dict[str, Dict[str, Dict]]:
-        result = {
-            'storage': self.get_storage(),
-            'processors': self.get_processors(),
-            'network': self.get_network(),
-            'memory': self.get_memory(),
-        }
-        return result
-
-    def get_status(self) -> Dict[str, Dict[str, Dict]]:
-        return self._system['status']
-
-    def get_metadata(self) -> Dict[str, Dict[str, Dict]]:
-        return self._system['metadata']
-
-    def get_memory(self) -> Dict[str, Dict[str, Dict]]:
-        return self._system['memory']
-
-    def get_power(self) -> Dict[str, Dict[str, Dict]]:
-        return self._system['power']
-
-    def get_processors(self) -> Dict[str, Dict[str, Dict]]:
-        return self._system['processors']
-
-    def get_network(self) -> Dict[str, Dict[str, Dict]]:
-        return self._system['network']
-
-    def get_storage(self) -> Dict[str, Dict[str, Dict]]:
-        return self._system['storage']
-
-    def _update_system(self) -> None:
-        redfish_system = self.client.get_path(self.system_endpoint)
-        self._system = {**redfish_system, **self._system}
-
-    def _update_metadata(self) -> None:
-        raise NotImplementedError()
-
-    def _update_memory(self) -> None:
-        raise NotImplementedError()
-
-    def _update_power(self) -> None:
-        raise NotImplementedError()
-
-    def _update_network(self) -> None:
-        raise NotImplementedError()
-
-    def _update_processors(self) -> None:
-        raise NotImplementedError()
-
-    def _update_storage(self) -> None:
-        raise NotImplementedError()
-
-    def start_update_loop(self) -> None:
-        self.run = True
-        self.thread = Thread(target=self.update)
-        self.thread.start()
-
-    def stop_update_loop(self) -> None:
-        self.run = False
-        self.thread.join()
-
-    def update(self) -> None:
-        #  this loop can have:
-        #  - caching logic
-        try:
-            while self.run:
-                self.log.logger.debug("waiting for a lock.")
-                self.lock.acquire()
-                self.log.logger.debug("lock acquired.")
-                try:
-                    self._update_system()
-                    # following calls in theory can be done in parallel
-                    self._update_metadata()
-                    self._update_memory()
-                    self._update_power()
-                    self._update_network()
-                    self._update_processors()
-                    self._update_storage()
-                    self.data_ready = True
-                    sleep(5)
-                finally:
-                    self.lock.release()
-                    self.log.logger.debug("lock released.")
-        # Catching 'Exception' is probably not a good idea (devel only)
-        except Exception as e:
-            self.log.logger.error(f"Error detected, logging out from redfish api.\n{e}")
-            self.client.logout()
-            raise
-
-    def flush(self) -> None:
-        self.log.logger.info("Acquiring lock to flush data.")
-        self.lock.acquire()
-        self.log.logger.info("Lock acquired, flushing data.")
-        self._system = {}
-        self.previous_data = {}
-        self.log.logger.info("Data flushed.")
-        self.data_ready = False
-        self.log.logger.info("Data marked as not ready.")
-        self.lock.release()
-        self.log.logger.info("Lock released.")
diff --git a/src/cephadm/cephadmlib/node_proxy/redfishdellchassis.py b/src/cephadm/cephadmlib/node_proxy/redfishdellchassis.py
new file mode 100644 (file)
index 0000000..39610dc
--- /dev/null
@@ -0,0 +1,67 @@
+from .baseredfishsystem import BaseRedfishSystem
+from .redfish_client import RedFishClient
+from threading import Thread, Lock
+from time import sleep
+from .util import Logger, retry, normalize_dict, to_snake_case
+from typing import Dict, Any, List, Union
+
+
+class RedfishDellChassis(BaseRedfishSystem):
+    def __init__(self, **kw: Any) -> None:
+        self.chassis_endpoint = kw.get('chassis_endpoint', '/Chassis/System.Embedded.1')
+        super().__init__(**kw)
+        self.log = Logger(__name__)
+        self.log.logger.info(f"{__name__} initialization.")
+
+    def get_power(self) -> Dict[str, Dict[str, Dict]]:
+        return self._system['power']
+
+    def get_fans(self) -> Dict[str, Dict[str, Dict]]:
+        return self._system['fans']
+
+    def get_chassis(self) -> Dict[str, Dict[str, Dict]]:
+        result = {
+            'power': self.get_power(),
+            'fans': self.get_fans()
+        }
+        return result
+
+    def _update_power(self) -> None:
+        fields = {
+            "PowerSupplies": [
+                "Name",
+                "Model",
+                "Manufacturer",
+                "Status"
+            ]
+        }
+        self.log.logger.info("Updating powersupplies")
+        self._system['power'] = self.build_chassis_data(fields, 'Power')
+
+    def _update_fans(self) -> None:
+        fields = {
+            "Fans": [
+                "Name",
+                "PhysicalContext",
+                "Status"
+            ],
+        }
+        self.log.logger.info("Updating fans")
+        self._system['fans'] = self.build_chassis_data(fields, 'Thermal')
+
+    def build_chassis_data(self,
+                   fields: Dict[str, List[str]],
+                   path: str) -> Dict[str, Dict[str, Dict]]:
+        result: Dict[str, Dict[str, Dict]] = dict()
+        data = self._get_path(f"{self.chassis_endpoint}/{path}")
+
+        for elt, _fields in fields.items():
+            for member_elt in data[elt]:
+                _id = member_elt['MemberId']
+                result[_id] = dict()
+                for field in _fields:
+                    try:
+                        result[_id][to_snake_case(field)] = member_elt[field]
+                    except KeyError:
+                        self.log.logger.warning(f"Could not find field: {field} in data: {data[elt]}")
+        return normalize_dict(result)
diff --git a/src/cephadm/cephadmlib/node_proxy/redfishdellsystem.py b/src/cephadm/cephadmlib/node_proxy/redfishdellsystem.py
new file mode 100644 (file)
index 0000000..de9756f
--- /dev/null
@@ -0,0 +1,95 @@
+from .baseredfishsystem import BaseRedfishSystem
+from .util import Logger, normalize_dict, to_snake_case
+from typing import Dict, Any, List
+
+
+class RedfishDellSystem(BaseRedfishSystem):
+    def __init__(self, **kw: Any) -> None:
+        self.system_endpoint = kw.get('systemd_endpoint', '/Systems/System.Embedded.1')
+        super().__init__(**kw)
+        self.log = Logger(__name__)
+
+    def build_system_data(self,
+                   fields: List,
+                   path: str) -> Dict[str, Dict[str, Dict]]:
+        result: Dict[str, Dict[str, Dict]] = dict()
+        for member_info in self.get_members(path):
+            member_id = member_info['Id']
+            result[member_id] = dict()
+            for field in fields:
+                try:
+                    result[member_id][to_snake_case(field)] = member_info[field]
+                except KeyError:
+                    self.log.logger.warning(f"Could not find field: {field} in member_info: {member_info}")
+
+        return normalize_dict(result)
+
+    def get_status(self) -> Dict[str, Dict[str, Dict]]:
+        return self._system['status']
+
+    def get_metadata(self) -> Dict[str, Dict[str, Dict]]:
+        return self._system['metadata']
+
+    def get_memory(self) -> Dict[str, Dict[str, Dict]]:
+        return self._system['memory']
+
+    def get_processors(self) -> Dict[str, Dict[str, Dict]]:
+        return self._system['processors']
+
+    def get_network(self) -> Dict[str, Dict[str, Dict]]:
+        return self._system['network']
+
+    def get_storage(self) -> Dict[str, Dict[str, Dict]]:
+        return self._system['storage']
+
+    # def _update_system(self) -> None:
+    #     redfish_system = self.client.get_path(self.system_endpoint)
+    #     self._system = {**redfish_system, **self._system}
+
+    def _update_network(self) -> None:
+        fields = ['Description', 'Name', 'SpeedMbps', 'Status']
+        self.log.logger.info("Updating network")
+        self._system['network'] = self.build_system_data(fields, 'EthernetInterfaces')
+
+    def _update_processors(self) -> None:
+        fields = ['Description',
+                  'TotalCores',
+                  'TotalThreads',
+                  'ProcessorType',
+                  'Model',
+                  'Status',
+                  'Manufacturer']
+        self.log.logger.info("Updating processors")
+        self._system['processors'] = self.build_system_data(fields, 'Processors')
+
+    def _update_storage(self) -> None:
+        fields = ['Description',
+                  'CapacityBytes',
+                  'Model', 'Protocol',
+                  'SerialNumber', 'Status',
+                  'PhysicalLocation']
+        entities = self.get_members('Storage')
+        self.log.logger.info("Updating storage")
+        result: Dict[str, Dict[str, Dict]] = dict()
+        for entity in entities:
+            for drive in entity['Drives']:
+                drive_path = drive['@odata.id']
+                drive_info = self._get_path(drive_path)
+                drive_id = drive_info['Id']
+                result[drive_id] = dict()
+                for field in fields:
+                    result[drive_id][to_snake_case(field)] = drive_info[field]
+                    result[drive_id]['entity'] = entity['Id']
+        self._system['storage'] = normalize_dict(result)
+
+    def _update_metadata(self) -> None:
+        self.log.logger.info("Updating metadata")
+        pass
+
+    def _update_memory(self) -> None:
+        fields = ['Description',
+                  'MemoryDeviceType',
+                  'CapacityMiB',
+                  'Status']
+        self.log.logger.info("Updating memory")
+        self._system['memory'] = self.build_system_data(fields, 'Memory')
index d3376d23588902fbbd5a677cb3402501aa02d2fd..697f097e1434851012390f5e0eb678bfc4159fb1 100644 (file)
@@ -147,7 +147,10 @@ class NodeProxy:
             # Force a fake error for testing purpose
             if component == 'storage':
                 _status = 'critical'
-                state = "Fake error"
+                state = "[Fake error] device is faulty."
+            elif component == 'power':
+                _status = 'critical'
+                state = "[Fake error] power supply unplugged."
             else:
                 _status = data[component][member]['status']['health'].lower()
             if _status.lower() != 'ok':
@@ -167,6 +170,8 @@ class NodeProxy:
             'memory': 'NODE_PROXY_MEMORY',
             'processors': 'NODE_PROXY_PROCESSORS',
             'network': 'NODE_PROXY_NETWORK',
+            'power': 'NODE_PROXY_POWER',
+            'fans': 'NODE_PROXY_FANS'
         }
 
         for component in data['data'].keys():