From 83964cf27ea8fb61ec753dc3aed62a10bc8082f4 Mon Sep 17 00:00:00 2001 From: Guillaume Abrioux Date: Wed, 27 Sep 2023 08:27:28 +0000 Subject: [PATCH] node-proxy: split redfishdell class This refactors split the redfishdell class in order to collect power and thermal details from the redfish API. 'power' and 'thermal' details are very different in many points: - not available at the same endpoint, - data structure is different. For these two reasons, let's split that class. Signed-off-by: Guillaume Abrioux (cherry picked from commit db0172186a753d57c357a5396378d1158e3167e3) --- ...redfish_system.py => baseredfishsystem.py} | 149 ++++++++---------- .../cephadmlib/node_proxy/redfish_dell.py | 65 +------- .../node_proxy/redfishdellchassis.py | 67 ++++++++ .../node_proxy/redfishdellsystem.py | 95 +++++++++++ src/pybind/mgr/cephadm/agent.py | 7 +- 5 files changed, 240 insertions(+), 143 deletions(-) rename src/cephadm/cephadmlib/node_proxy/{redfish_system.py => baseredfishsystem.py} (76%) create mode 100644 src/cephadm/cephadmlib/node_proxy/redfishdellchassis.py create mode 100644 src/cephadm/cephadmlib/node_proxy/redfishdellsystem.py diff --git a/src/cephadm/cephadmlib/node_proxy/redfish_system.py b/src/cephadm/cephadmlib/node_proxy/baseredfishsystem.py similarity index 76% rename from src/cephadm/cephadmlib/node_proxy/redfish_system.py rename to src/cephadm/cephadmlib/node_proxy/baseredfishsystem.py index 95c82960eae1c..2e1d9087d7978 100644 --- a/src/cephadm/cephadmlib/node_proxy/redfish_system.py +++ b/src/cephadm/cephadmlib/node_proxy/baseredfishsystem.py @@ -2,114 +2,35 @@ from .basesystem import BaseSystem from .redfish_client import RedFishClient from threading import Thread, Lock from time import sleep -from .util import Logger, retry, normalize_dict, to_snake_case +from .util import Logger, retry from typing import Dict, Any, List -class RedfishSystem(BaseSystem): +class BaseRedfishSystem(BaseSystem): def __init__(self, **kw: Any) -> None: super().__init__(**kw) self.log = Logger(__name__) self.host: str = kw['host'] self.username: str = kw['username'] self.password: str = kw['password'] - self.system_endpoint = kw.get('system_endpoint', '/Systems/1') + # move the following line (class attribute?) self.client = RedFishClient(host=self.host, username=self.username, password=self.password) self.log.logger.info(f"redfish system initialization, host: {self.host}, user: {self.username}") - self._system: Dict[str, Dict[str, Any]] = {} self.run: bool = False self.thread: Thread - self.start_client() self.data_ready: bool = False self.previous_data: Dict = {} self.lock: Lock = Lock() - - @retry(retries=10, delay=2) - def _get_path(self, path: str) -> Dict: - result = self.client.get_path(path) - if result is None: - self.log.logger.error(f"The client reported an error when getting path: {path}") - raise RuntimeError(f"Could not get path: {path}") - return result - - def get_members(self, path: str) -> List: - _path = self._system[path]['@odata.id'] - data = self._get_path(_path) - return [self._get_path(member['@odata.id']) for member in data['Members']] - - def build_data(self, - fields: List, - path: str) -> Dict[str, Dict[str, Dict]]: - result: Dict[str, Dict[str, Dict]] = dict() - for member_info in self.get_members(path): - member_id = member_info['Id'] - result[member_id] = dict() - for field in fields: - try: - result[member_id][to_snake_case(field)] = member_info[field] - except KeyError: - self.log.logger.warning(f"Could not find field: {field} in member_info: {member_info}") - - return normalize_dict(result) + self.data: Dict[str, Dict[str, Any]] = {} + self._system: Dict[str, Dict[str, Any]] = {} + self.start_client() def start_client(self) -> None: if not self.client: self.client = RedFishClient(host=self.host, username=self.username, password=self.password) self.client.login() - def get_system(self) -> Dict[str, Dict[str, Dict]]: - result = { - 'storage': self.get_storage(), - 'processors': self.get_processors(), - 'network': self.get_network(), - 'memory': self.get_memory(), - } - return result - - def get_status(self) -> Dict[str, Dict[str, Dict]]: - return self._system['status'] - - def get_metadata(self) -> Dict[str, Dict[str, Dict]]: - return self._system['metadata'] - - def get_memory(self) -> Dict[str, Dict[str, Dict]]: - return self._system['memory'] - - def get_power(self) -> Dict[str, Dict[str, Dict]]: - return self._system['power'] - - def get_processors(self) -> Dict[str, Dict[str, Dict]]: - return self._system['processors'] - - def get_network(self) -> Dict[str, Dict[str, Dict]]: - return self._system['network'] - - def get_storage(self) -> Dict[str, Dict[str, Dict]]: - return self._system['storage'] - - def _update_system(self) -> None: - redfish_system = self.client.get_path(self.system_endpoint) - self._system = {**redfish_system, **self._system} - - def _update_metadata(self) -> None: - raise NotImplementedError() - - def _update_memory(self) -> None: - raise NotImplementedError() - - def _update_power(self) -> None: - raise NotImplementedError() - - def _update_network(self) -> None: - raise NotImplementedError() - - def _update_processors(self) -> None: - raise NotImplementedError() - - def _update_storage(self) -> None: - raise NotImplementedError() - def start_update_loop(self) -> None: self.run = True self.thread = Thread(target=self.update) @@ -133,6 +54,7 @@ class RedfishSystem(BaseSystem): self._update_metadata() self._update_memory() self._update_power() + self._update_fans() self._update_network() self._update_processors() self._update_storage() @@ -158,3 +80,60 @@ class RedfishSystem(BaseSystem): self.log.logger.info("Data marked as not ready.") self.lock.release() self.log.logger.info("Lock released.") + + @retry(retries=10, delay=2) + def _get_path(self, path: str) -> Dict: + result = self.client.get_path(path) + if result is None: + self.log.logger.error(f"The client reported an error when getting path: {path}") + raise RuntimeError(f"Could not get path: {path}") + return result + + def get_members(self, path: str) -> List: + _path = self._system[path]['@odata.id'] + data = self._get_path(_path) + return [self._get_path(member['@odata.id']) for member in data['Members']] + + def build_data(self, + fields: List, + path: str) -> Dict[str, Dict[str, Dict]]: + raise NotImplementedError() + + # def _update_system(self) -> None: + # raise NotImplementedError() + + def get_system(self) -> Dict[str, Dict[str, Dict]]: + result = { + 'storage': self.get_storage(), + 'processors': self.get_processors(), + 'network': self.get_network(), + 'memory': self.get_memory(), + 'power': self.get_power(), + 'fans': self.get_fans() + } + return result + + def _update_system(self) -> None: + redfish_system = self.client.get_path(self.system_endpoint) + self._system = {**redfish_system, **self._system} + + def _update_metadata(self) -> None: + raise NotImplementedError() + + def _update_memory(self) -> None: + raise NotImplementedError() + + def _update_power(self) -> None: + raise NotImplementedError() + + def _update_fans(self) -> None: + raise NotImplementedError() + + def _update_network(self) -> None: + raise NotImplementedError() + + def _update_processors(self) -> None: + raise NotImplementedError() + + def _update_storage(self) -> None: + raise NotImplementedError() diff --git a/src/cephadm/cephadmlib/node_proxy/redfish_dell.py b/src/cephadm/cephadmlib/node_proxy/redfish_dell.py index 796c989f8834d..f6a01664629c9 100644 --- a/src/cephadm/cephadmlib/node_proxy/redfish_dell.py +++ b/src/cephadm/cephadmlib/node_proxy/redfish_dell.py @@ -1,63 +1,14 @@ -from .redfish_system import RedfishSystem -from .util import Logger, normalize_dict, to_snake_case -from typing import Dict, Any +from .redfishdellchassis import RedfishDellChassis +from .redfishdellsystem import RedfishDellSystem +from .util import Logger +from typing import Any -class RedfishDell(RedfishSystem): +class RedfishDell(RedfishDellSystem, RedfishDellChassis): def __init__(self, **kw: Any) -> None: - self.log = Logger(__name__) if kw.get('system_endpoint') is None: kw['system_endpoint'] = '/Systems/System.Embedded.1' + if kw.get('chassis_endpoint') is None: + kw['chassis_endpoint'] = '/Chassis/System.Embedded.1' super().__init__(**kw) - - def _update_network(self) -> None: - fields = ['Description', 'Name', 'SpeedMbps', 'Status'] - self.log.logger.info("Updating network") - self._system['network'] = self.build_data(fields, 'EthernetInterfaces') - - def _update_processors(self) -> None: - fields = ['Description', - 'TotalCores', - 'TotalThreads', - 'ProcessorType', - 'Model', - 'Status', - 'Manufacturer'] - self.log.logger.info("Updating processors") - self._system['processors'] = self.build_data(fields, 'Processors') - - def _update_storage(self) -> None: - fields = ['Description', - 'CapacityBytes', - 'Model', 'Protocol', - 'SerialNumber', 'Status', - 'PhysicalLocation'] - entities = self.get_members('Storage') - self.log.logger.info("Updating storage") - result: Dict[str, Dict[str, Dict]] = dict() - for entity in entities: - for drive in entity['Drives']: - drive_path = drive['@odata.id'] - drive_info = self._get_path(drive_path) - drive_id = drive_info['Id'] - result[drive_id] = dict() - for field in fields: - result[drive_id][to_snake_case(field)] = drive_info[field] - result[drive_id]['entity'] = entity['Id'] - self._system['storage'] = normalize_dict(result) - - def _update_metadata(self) -> None: - self.log.logger.info("Updating metadata") - pass - - def _update_memory(self) -> None: - fields = ['Description', - 'MemoryDeviceType', - 'CapacityMiB', - 'Status'] - self.log.logger.info("Updating memory") - self._system['memory'] = self.build_data(fields, 'Memory') - - def _update_power(self) -> None: - self.log.logger.info("Updating power") - pass + self.log = Logger(__name__) diff --git a/src/cephadm/cephadmlib/node_proxy/redfishdellchassis.py b/src/cephadm/cephadmlib/node_proxy/redfishdellchassis.py new file mode 100644 index 0000000000000..39610dc744562 --- /dev/null +++ b/src/cephadm/cephadmlib/node_proxy/redfishdellchassis.py @@ -0,0 +1,67 @@ +from .baseredfishsystem import BaseRedfishSystem +from .redfish_client import RedFishClient +from threading import Thread, Lock +from time import sleep +from .util import Logger, retry, normalize_dict, to_snake_case +from typing import Dict, Any, List, Union + + +class RedfishDellChassis(BaseRedfishSystem): + def __init__(self, **kw: Any) -> None: + self.chassis_endpoint = kw.get('chassis_endpoint', '/Chassis/System.Embedded.1') + super().__init__(**kw) + self.log = Logger(__name__) + self.log.logger.info(f"{__name__} initialization.") + + def get_power(self) -> Dict[str, Dict[str, Dict]]: + return self._system['power'] + + def get_fans(self) -> Dict[str, Dict[str, Dict]]: + return self._system['fans'] + + def get_chassis(self) -> Dict[str, Dict[str, Dict]]: + result = { + 'power': self.get_power(), + 'fans': self.get_fans() + } + return result + + def _update_power(self) -> None: + fields = { + "PowerSupplies": [ + "Name", + "Model", + "Manufacturer", + "Status" + ] + } + self.log.logger.info("Updating powersupplies") + self._system['power'] = self.build_chassis_data(fields, 'Power') + + def _update_fans(self) -> None: + fields = { + "Fans": [ + "Name", + "PhysicalContext", + "Status" + ], + } + self.log.logger.info("Updating fans") + self._system['fans'] = self.build_chassis_data(fields, 'Thermal') + + def build_chassis_data(self, + fields: Dict[str, List[str]], + path: str) -> Dict[str, Dict[str, Dict]]: + result: Dict[str, Dict[str, Dict]] = dict() + data = self._get_path(f"{self.chassis_endpoint}/{path}") + + for elt, _fields in fields.items(): + for member_elt in data[elt]: + _id = member_elt['MemberId'] + result[_id] = dict() + for field in _fields: + try: + result[_id][to_snake_case(field)] = member_elt[field] + except KeyError: + self.log.logger.warning(f"Could not find field: {field} in data: {data[elt]}") + return normalize_dict(result) diff --git a/src/cephadm/cephadmlib/node_proxy/redfishdellsystem.py b/src/cephadm/cephadmlib/node_proxy/redfishdellsystem.py new file mode 100644 index 0000000000000..de9756fe79fe8 --- /dev/null +++ b/src/cephadm/cephadmlib/node_proxy/redfishdellsystem.py @@ -0,0 +1,95 @@ +from .baseredfishsystem import BaseRedfishSystem +from .util import Logger, normalize_dict, to_snake_case +from typing import Dict, Any, List + + +class RedfishDellSystem(BaseRedfishSystem): + def __init__(self, **kw: Any) -> None: + self.system_endpoint = kw.get('systemd_endpoint', '/Systems/System.Embedded.1') + super().__init__(**kw) + self.log = Logger(__name__) + + def build_system_data(self, + fields: List, + path: str) -> Dict[str, Dict[str, Dict]]: + result: Dict[str, Dict[str, Dict]] = dict() + for member_info in self.get_members(path): + member_id = member_info['Id'] + result[member_id] = dict() + for field in fields: + try: + result[member_id][to_snake_case(field)] = member_info[field] + except KeyError: + self.log.logger.warning(f"Could not find field: {field} in member_info: {member_info}") + + return normalize_dict(result) + + def get_status(self) -> Dict[str, Dict[str, Dict]]: + return self._system['status'] + + def get_metadata(self) -> Dict[str, Dict[str, Dict]]: + return self._system['metadata'] + + def get_memory(self) -> Dict[str, Dict[str, Dict]]: + return self._system['memory'] + + def get_processors(self) -> Dict[str, Dict[str, Dict]]: + return self._system['processors'] + + def get_network(self) -> Dict[str, Dict[str, Dict]]: + return self._system['network'] + + def get_storage(self) -> Dict[str, Dict[str, Dict]]: + return self._system['storage'] + + # def _update_system(self) -> None: + # redfish_system = self.client.get_path(self.system_endpoint) + # self._system = {**redfish_system, **self._system} + + def _update_network(self) -> None: + fields = ['Description', 'Name', 'SpeedMbps', 'Status'] + self.log.logger.info("Updating network") + self._system['network'] = self.build_system_data(fields, 'EthernetInterfaces') + + def _update_processors(self) -> None: + fields = ['Description', + 'TotalCores', + 'TotalThreads', + 'ProcessorType', + 'Model', + 'Status', + 'Manufacturer'] + self.log.logger.info("Updating processors") + self._system['processors'] = self.build_system_data(fields, 'Processors') + + def _update_storage(self) -> None: + fields = ['Description', + 'CapacityBytes', + 'Model', 'Protocol', + 'SerialNumber', 'Status', + 'PhysicalLocation'] + entities = self.get_members('Storage') + self.log.logger.info("Updating storage") + result: Dict[str, Dict[str, Dict]] = dict() + for entity in entities: + for drive in entity['Drives']: + drive_path = drive['@odata.id'] + drive_info = self._get_path(drive_path) + drive_id = drive_info['Id'] + result[drive_id] = dict() + for field in fields: + result[drive_id][to_snake_case(field)] = drive_info[field] + result[drive_id]['entity'] = entity['Id'] + self._system['storage'] = normalize_dict(result) + + def _update_metadata(self) -> None: + self.log.logger.info("Updating metadata") + pass + + def _update_memory(self) -> None: + fields = ['Description', + 'MemoryDeviceType', + 'CapacityMiB', + 'Status'] + self.log.logger.info("Updating memory") + self._system['memory'] = self.build_system_data(fields, 'Memory') diff --git a/src/pybind/mgr/cephadm/agent.py b/src/pybind/mgr/cephadm/agent.py index d3376d2358890..697f097e14348 100644 --- a/src/pybind/mgr/cephadm/agent.py +++ b/src/pybind/mgr/cephadm/agent.py @@ -147,7 +147,10 @@ class NodeProxy: # Force a fake error for testing purpose if component == 'storage': _status = 'critical' - state = "Fake error" + state = "[Fake error] device is faulty." + elif component == 'power': + _status = 'critical' + state = "[Fake error] power supply unplugged." else: _status = data[component][member]['status']['health'].lower() if _status.lower() != 'ok': @@ -167,6 +170,8 @@ class NodeProxy: 'memory': 'NODE_PROXY_MEMORY', 'processors': 'NODE_PROXY_PROCESSORS', 'network': 'NODE_PROXY_NETWORK', + 'power': 'NODE_PROXY_POWER', + 'fans': 'NODE_PROXY_FANS' } for component in data['data'].keys(): -- 2.39.5