]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
cephadm/node-proxy: logging issues / error handling refactor
authorGuillaume Abrioux <gabrioux@ibm.com>
Fri, 6 Oct 2023 13:55:21 +0000 (13:55 +0000)
committerGuillaume Abrioux <gabrioux@ibm.com>
Thu, 25 Jan 2024 15:09:07 +0000 (15:09 +0000)
- fix multiple logging issue because of new handler
  added each time `Logger` is called
- do not propagate to parent (root) logger: as it makes it log the messages too
- implement a new method `is_logged()` in `RedFishClient`
- refactor the logic regarding caught errors in `RedFishClient`

Signed-off-by: Guillaume Abrioux <gabrioux@ibm.com>
(cherry picked from commit d43452d57f4342a8d0bf0b38e138e92945ba4eb6)

src/cephadm/cephadmlib/node_proxy/baseclient.py
src/cephadm/cephadmlib/node_proxy/baseredfishsystem.py
src/cephadm/cephadmlib/node_proxy/redfish_client.py
src/cephadm/cephadmlib/node_proxy/server.py
src/cephadm/cephadmlib/node_proxy/util.py

index 1415bc3fe19428c6ceccb3fa9ee49e8df03a8c78..6b46561486d5d8a0ced9d19744985d22f5ed0c50 100644 (file)
@@ -10,7 +10,7 @@ class BaseClient:
         self.username = username
         self.password = password
 
-    def login(self) -> Dict[str, Any]:
+    def login(self) -> None:
         raise NotImplementedError()
 
     def logout(self) -> Dict[str, Any]:
index 2e1d9087d797802008106febd4eb67150b428bf3..b61d63a5bcc35a74839d6c96d1ef800ad4913091 100644 (file)
@@ -27,9 +27,8 @@ class BaseRedfishSystem(BaseSystem):
         self.start_client()
 
     def start_client(self) -> None:
-        if not self.client:
-            self.client = RedFishClient(host=self.host, username=self.username, password=self.password)
         self.client.login()
+        self.start_update_loop()
 
     def start_update_loop(self) -> None:
         self.run = True
@@ -43,31 +42,29 @@ class BaseRedfishSystem(BaseSystem):
     def update(self) -> None:
         #  this loop can have:
         #  - caching logic
-        try:
-            while self.run:
-                self.log.logger.debug("waiting for a lock.")
-                self.lock.acquire()
-                self.log.logger.debug("lock acquired.")
-                try:
-                    self._update_system()
-                    # following calls in theory can be done in parallel
-                    self._update_metadata()
-                    self._update_memory()
-                    self._update_power()
-                    self._update_fans()
-                    self._update_network()
-                    self._update_processors()
-                    self._update_storage()
-                    self.data_ready = True
-                    sleep(5)
-                finally:
-                    self.lock.release()
-                    self.log.logger.debug("lock released.")
-        # Catching 'Exception' is probably not a good idea (devel only)
-        except Exception as e:
-            self.log.logger.error(f"Error detected, logging out from redfish api.\n{e}")
-            self.client.logout()
-            raise
+        while self.run:
+            self.log.logger.debug("waiting for a lock.")
+            self.lock.acquire()
+            self.log.logger.debug("lock acquired.")
+            try:
+                self._update_system()
+                # following calls in theory can be done in parallel
+                self._update_metadata()
+                self._update_memory()
+                self._update_power()
+                self._update_fans()
+                self._update_network()
+                self._update_processors()
+                self._update_storage()
+                self.data_ready = True
+                sleep(5)
+            except RuntimeError as e:
+                self.run = False
+                self.log.logger.error(f"Error detected, trying to gracefully log out from redfish api.\n{e}")
+                self.client.logout()
+            finally:
+                self.lock.release()
+                self.log.logger.debug("lock released.")
 
     def flush(self) -> None:
         self.log.logger.info("Acquiring lock to flush data.")
@@ -83,7 +80,10 @@ class BaseRedfishSystem(BaseSystem):
 
     @retry(retries=10, delay=2)
     def _get_path(self, path: str) -> Dict:
-        result = self.client.get_path(path)
+        try:
+            result = self.client.get_path(path)
+        except RuntimeError:
+            raise
         if result is None:
             self.log.logger.error(f"The client reported an error when getting path: {path}")
             raise RuntimeError(f"Could not get path: {path}")
index 437e4be131a4e3f4889ecfb85c07132a4df00431..439ad750a4656710e63e1721acb3a0aca9e7b9ed 100644 (file)
@@ -11,6 +11,7 @@ from typing import Dict, Any, Tuple, Optional
 
 class RedFishClient(BaseClient):
     PREFIX = '/redfish/v1/'
+
     def __init__(self,
                  host: str = "",
                  port: str = "443",
@@ -23,27 +24,49 @@ class RedFishClient(BaseClient):
         self.token: Dict[str, str] = {}
         self.location: str = ''
 
-    def login(self) -> Dict[str, Any]:
-        self.log.logger.info(f"Logging in to {self.host} as '{self.username}'")
-        idrac_credentials = json.dumps({"UserName": self.username, "Password": self.password})
-        headers = {"Content-Type": "application/json"}
+    def login(self) -> None:
+        if not self.is_logged_in():
+            self.log.logger.info("Logging in to "
+                                 f"{self.host} as '{self.username}'")
+            idrac_credentials = json.dumps({"UserName": self.username,
+                                            "Password": self.password})
+            headers = {"Content-Type": "application/json"}
+
+            try:
+                _headers, _data, _status_code = self.query(data=idrac_credentials,
+                                                           headers=headers,
+                                                           endpoint='/redfish/v1/SessionService/Sessions/')
+                if _status_code != 201:
+                    self.log.logger.error(f"Can't log in to {self.host} as '{self.username}': {_status_code}")
+                    raise RuntimeError
+            except URLError as e:
+                msg = f"Can't log in to {self.host} as '{self.username}': {e}"
+                self.log.logger.error(msg)
+                raise RuntimeError
+            self.token = {"X-Auth-Token": _headers['X-Auth-Token']}
+            self.location = _headers['Location']
 
+    def is_logged_in(self) -> bool:
+        self.log.logger.debug(f"Checking token validity for {self.host}")
+        if not self.location or not self.token.get('X-Auth-Token'):
+            self.log.logger.debug(f"No token found for {self.host}.")
+            return False
+        headers = {"X-Auth-Token": self.token['X-Auth-Token']}
         try:
-            _headers, _data = self.query(data=idrac_credentials,
-                                         headers=headers,
-                                         endpoint='/redfish/v1/SessionService/Sessions/')
+            _headers, _data, _status_code = self.query(headers=headers,
+                                                       endpoint=self.location)
         except URLError as e:
-            self.log.logger.error(f"Can't log in to {self.host} as '{self.username}'.\n{e}")
-            return {}
-        self.token = {"X-Auth-Token": _headers['X-Auth-Token']}
-        self.location = _headers['Location']
-
-        return json.loads(_data)
+            self.log.logger.error("Can't check token "
+                                  f"validity for {self.host}: {e}")
+            raise RuntimeError
+        return _status_code == 200
 
     def logout(self) -> Dict[str, Any]:
         try:
-            _, _data = self.query(method='DELETE', headers=self.token, endpoint=self.location)
-        except URLError as e:
+            _, _data, _status_code = self.query(method='DELETE',
+                                                headers=self.token,
+                                                endpoint=self.location)
+        except URLError:
             self.log.logger.error(f"Can't log out from {self.host}")
             return {}
 
@@ -55,18 +78,20 @@ class RedFishClient(BaseClient):
         if self.PREFIX not in path:
             path = f"{self.PREFIX}{path}"
         try:
-            _, result = self.query(headers=self.token, endpoint=path)
+            _, result, _status_code = self.query(headers=self.token,
+                                                 endpoint=path)
             result_json = json.loads(result)
             return result_json
         except URLError as e:
             self.log.logger.error(f"Can't get path {path}:\n{e}")
-            return {}
+            raise RuntimeError
 
     def query(self,
               data: Optional[str] = None,
               headers: Dict[str, str] = {},
               method: Optional[str] = None,
-              endpoint: str = '') -> Tuple[Dict[str, str], str]:
+              endpoint: str = '',
+              timeout: int = 10) -> Tuple[Dict[str, str], str, int]:
         url = f'{self.host}{endpoint}'
 
         # ssl_ctx = ssl.create_default_context()
@@ -76,14 +101,11 @@ class RedFishClient(BaseClient):
         _data = bytes(data, 'ascii') if data else None
         try:
             req = Request(url, _data, headers=headers, method=method)
-            send_time = time.monotonic()
-            with urlopen(req, context=ssl_ctx) as response:
+            with urlopen(req, context=ssl_ctx, timeout=timeout) as response:
                 response_str = response.read()
                 response_headers = response.headers
-                response_json = json.loads(response_str)
-                total_request_time = datetime.timedelta(seconds=(time.monotonic() - send_time)).total_seconds()
-        except Exception as e:
-            self.log.logger.error(f"{e}")
+        except URLError as e:
+            self.log.logger.debug(f"{e}")
             raise
 
-        return response_headers, response_str
+        return response_headers, response_str, response.status
index 78d1e7065a963955025a4ab6497ba4a776747937..9f7ed6c1cb6a21d89a9bad661c6b541e8f1f3bbd 100644 (file)
@@ -122,7 +122,7 @@ class Start:
 
     def POST(self) -> str:
         self.backend.start_client()
-        self.backend.start_update_loop()
+        self.backend.start_update_loop()
         self.reporter.run()
         return 'node-proxy daemon started'
 
@@ -218,11 +218,15 @@ def main(host: str = '',
 
     # create the redfish system and the obsever
     log.logger.info("Server initialization...")
-    system = RedfishDell(host=host,
-                         username=username,
-                         password=password,
-                         system_endpoint='/Systems/System.Embedded.1',
-                         config=config)
+    try:
+        system = RedfishDell(host=host,
+                            username=username,
+                            password=password,
+                            system_endpoint='/Systems/System.Embedded.1',
+                            config=config)
+    except RuntimeError:
+        log.logger.error(f"Can't initialize the redfish system.")
+
     reporter_agent = Reporter(system, data, f"https://{mgr_target_ip}:{mgr_target_port}/node-proxy/data")
     cherrypy.config.update({
         'node_proxy': config,
index 98c1a7d36712eed635e1f15cdf397606f55e1d7f..40b01f0c7640f32a052bbb89b07107cb0b1e7697 100644 (file)
@@ -23,7 +23,9 @@ class Logger:
         handler.setLevel(self.level)
         fmt = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
         handler.setFormatter(fmt)
+        logger.handlers.clear()
         logger.addHandler(handler)
+        logger.propagate = False
 
         return logger