From d43452d57f4342a8d0bf0b38e138e92945ba4eb6 Mon Sep 17 00:00:00 2001 From: Guillaume Abrioux Date: Fri, 6 Oct 2023 13:55:21 +0000 Subject: [PATCH] cephadm/node-proxy: logging issues / error handling refactor - fix multiple logging issue because of new handler added each time `Logger` is called - do not propagate to parent (root) logger: as it makes it log the messages too - implement a new method `is_logged()` in `RedFishClient` - refactor the logic regarding caught errors in `RedFishClient` Signed-off-by: Guillaume Abrioux --- .../cephadmlib/node_proxy/baseclient.py | 2 +- .../node_proxy/baseredfishsystem.py | 56 +++++++-------- .../cephadmlib/node_proxy/redfish_client.py | 72 ++++++++++++------- src/cephadm/cephadmlib/node_proxy/server.py | 16 +++-- src/cephadm/cephadmlib/node_proxy/util.py | 2 + 5 files changed, 88 insertions(+), 60 deletions(-) diff --git a/src/cephadm/cephadmlib/node_proxy/baseclient.py b/src/cephadm/cephadmlib/node_proxy/baseclient.py index 1415bc3fe1942..6b46561486d5d 100644 --- a/src/cephadm/cephadmlib/node_proxy/baseclient.py +++ b/src/cephadm/cephadmlib/node_proxy/baseclient.py @@ -10,7 +10,7 @@ class BaseClient: self.username = username self.password = password - def login(self) -> Dict[str, Any]: + def login(self) -> None: raise NotImplementedError() def logout(self) -> Dict[str, Any]: diff --git a/src/cephadm/cephadmlib/node_proxy/baseredfishsystem.py b/src/cephadm/cephadmlib/node_proxy/baseredfishsystem.py index 2e1d9087d7978..b61d63a5bcc35 100644 --- a/src/cephadm/cephadmlib/node_proxy/baseredfishsystem.py +++ b/src/cephadm/cephadmlib/node_proxy/baseredfishsystem.py @@ -27,9 +27,8 @@ class BaseRedfishSystem(BaseSystem): self.start_client() def start_client(self) -> None: - if not self.client: - self.client = RedFishClient(host=self.host, username=self.username, password=self.password) self.client.login() + self.start_update_loop() def start_update_loop(self) -> None: self.run = True @@ -43,31 +42,29 @@ class BaseRedfishSystem(BaseSystem): def update(self) -> None: # this loop can have: # - caching logic - try: - while self.run: - self.log.logger.debug("waiting for a lock.") - self.lock.acquire() - self.log.logger.debug("lock acquired.") - try: - self._update_system() - # following calls in theory can be done in parallel - self._update_metadata() - self._update_memory() - self._update_power() - self._update_fans() - self._update_network() - self._update_processors() - self._update_storage() - self.data_ready = True - sleep(5) - finally: - self.lock.release() - self.log.logger.debug("lock released.") - # Catching 'Exception' is probably not a good idea (devel only) - except Exception as e: - self.log.logger.error(f"Error detected, logging out from redfish api.\n{e}") - self.client.logout() - raise + while self.run: + self.log.logger.debug("waiting for a lock.") + self.lock.acquire() + self.log.logger.debug("lock acquired.") + try: + self._update_system() + # following calls in theory can be done in parallel + self._update_metadata() + self._update_memory() + self._update_power() + self._update_fans() + self._update_network() + self._update_processors() + self._update_storage() + self.data_ready = True + sleep(5) + except RuntimeError as e: + self.run = False + self.log.logger.error(f"Error detected, trying to gracefully log out from redfish api.\n{e}") + self.client.logout() + finally: + self.lock.release() + self.log.logger.debug("lock released.") def flush(self) -> None: self.log.logger.info("Acquiring lock to flush data.") @@ -83,7 +80,10 @@ class BaseRedfishSystem(BaseSystem): @retry(retries=10, delay=2) def _get_path(self, path: str) -> Dict: - result = self.client.get_path(path) + try: + result = self.client.get_path(path) + except RuntimeError: + raise if result is None: self.log.logger.error(f"The client reported an error when getting path: {path}") raise RuntimeError(f"Could not get path: {path}") diff --git a/src/cephadm/cephadmlib/node_proxy/redfish_client.py b/src/cephadm/cephadmlib/node_proxy/redfish_client.py index 437e4be131a4e..439ad750a4656 100644 --- a/src/cephadm/cephadmlib/node_proxy/redfish_client.py +++ b/src/cephadm/cephadmlib/node_proxy/redfish_client.py @@ -11,6 +11,7 @@ from typing import Dict, Any, Tuple, Optional class RedFishClient(BaseClient): PREFIX = '/redfish/v1/' + def __init__(self, host: str = "", port: str = "443", @@ -23,27 +24,49 @@ class RedFishClient(BaseClient): self.token: Dict[str, str] = {} self.location: str = '' - def login(self) -> Dict[str, Any]: - self.log.logger.info(f"Logging in to {self.host} as '{self.username}'") - idrac_credentials = json.dumps({"UserName": self.username, "Password": self.password}) - headers = {"Content-Type": "application/json"} + def login(self) -> None: + if not self.is_logged_in(): + self.log.logger.info("Logging in to " + f"{self.host} as '{self.username}'") + idrac_credentials = json.dumps({"UserName": self.username, + "Password": self.password}) + headers = {"Content-Type": "application/json"} + + try: + _headers, _data, _status_code = self.query(data=idrac_credentials, + headers=headers, + endpoint='/redfish/v1/SessionService/Sessions/') + if _status_code != 201: + self.log.logger.error(f"Can't log in to {self.host} as '{self.username}': {_status_code}") + raise RuntimeError + except URLError as e: + msg = f"Can't log in to {self.host} as '{self.username}': {e}" + self.log.logger.error(msg) + raise RuntimeError + self.token = {"X-Auth-Token": _headers['X-Auth-Token']} + self.location = _headers['Location'] + def is_logged_in(self) -> bool: + self.log.logger.debug(f"Checking token validity for {self.host}") + if not self.location or not self.token.get('X-Auth-Token'): + self.log.logger.debug(f"No token found for {self.host}.") + return False + headers = {"X-Auth-Token": self.token['X-Auth-Token']} try: - _headers, _data = self.query(data=idrac_credentials, - headers=headers, - endpoint='/redfish/v1/SessionService/Sessions/') + _headers, _data, _status_code = self.query(headers=headers, + endpoint=self.location) except URLError as e: - self.log.logger.error(f"Can't log in to {self.host} as '{self.username}'.\n{e}") - return {} - self.token = {"X-Auth-Token": _headers['X-Auth-Token']} - self.location = _headers['Location'] - - return json.loads(_data) + self.log.logger.error("Can't check token " + f"validity for {self.host}: {e}") + raise RuntimeError + return _status_code == 200 def logout(self) -> Dict[str, Any]: try: - _, _data = self.query(method='DELETE', headers=self.token, endpoint=self.location) - except URLError as e: + _, _data, _status_code = self.query(method='DELETE', + headers=self.token, + endpoint=self.location) + except URLError: self.log.logger.error(f"Can't log out from {self.host}") return {} @@ -55,18 +78,20 @@ class RedFishClient(BaseClient): if self.PREFIX not in path: path = f"{self.PREFIX}{path}" try: - _, result = self.query(headers=self.token, endpoint=path) + _, result, _status_code = self.query(headers=self.token, + endpoint=path) result_json = json.loads(result) return result_json except URLError as e: self.log.logger.error(f"Can't get path {path}:\n{e}") - return {} + raise RuntimeError def query(self, data: Optional[str] = None, headers: Dict[str, str] = {}, method: Optional[str] = None, - endpoint: str = '') -> Tuple[Dict[str, str], str]: + endpoint: str = '', + timeout: int = 10) -> Tuple[Dict[str, str], str, int]: url = f'{self.host}{endpoint}' # ssl_ctx = ssl.create_default_context() @@ -76,14 +101,11 @@ class RedFishClient(BaseClient): _data = bytes(data, 'ascii') if data else None try: req = Request(url, _data, headers=headers, method=method) - send_time = time.monotonic() - with urlopen(req, context=ssl_ctx) as response: + with urlopen(req, context=ssl_ctx, timeout=timeout) as response: response_str = response.read() response_headers = response.headers - response_json = json.loads(response_str) - total_request_time = datetime.timedelta(seconds=(time.monotonic() - send_time)).total_seconds() - except Exception as e: - self.log.logger.error(f"{e}") + except URLError as e: + self.log.logger.debug(f"{e}") raise - return response_headers, response_str + return response_headers, response_str, response.status diff --git a/src/cephadm/cephadmlib/node_proxy/server.py b/src/cephadm/cephadmlib/node_proxy/server.py index 78d1e7065a963..9f7ed6c1cb6a2 100644 --- a/src/cephadm/cephadmlib/node_proxy/server.py +++ b/src/cephadm/cephadmlib/node_proxy/server.py @@ -122,7 +122,7 @@ class Start: def POST(self) -> str: self.backend.start_client() - self.backend.start_update_loop() + # self.backend.start_update_loop() self.reporter.run() return 'node-proxy daemon started' @@ -218,11 +218,15 @@ def main(host: str = '', # create the redfish system and the obsever log.logger.info("Server initialization...") - system = RedfishDell(host=host, - username=username, - password=password, - system_endpoint='/Systems/System.Embedded.1', - config=config) + try: + system = RedfishDell(host=host, + username=username, + password=password, + system_endpoint='/Systems/System.Embedded.1', + config=config) + except RuntimeError: + log.logger.error(f"Can't initialize the redfish system.") + reporter_agent = Reporter(system, data, f"https://{mgr_target_ip}:{mgr_target_port}/node-proxy/data") cherrypy.config.update({ 'node_proxy': config, diff --git a/src/cephadm/cephadmlib/node_proxy/util.py b/src/cephadm/cephadmlib/node_proxy/util.py index 98c1a7d36712e..40b01f0c7640f 100644 --- a/src/cephadm/cephadmlib/node_proxy/util.py +++ b/src/cephadm/cephadmlib/node_proxy/util.py @@ -23,7 +23,9 @@ class Logger: handler.setLevel(self.level) fmt = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') handler.setFormatter(fmt) + logger.handlers.clear() logger.addHandler(handler) + logger.propagate = False return logger -- 2.39.5