]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
node-proxy: add a lock mechanism
authorGuillaume Abrioux <gabrioux@ibm.com>
Mon, 22 May 2023 12:20:54 +0000 (14:20 +0200)
committerGuillaume Abrioux <gabrioux@ibm.com>
Thu, 25 Jan 2024 14:43:29 +0000 (14:43 +0000)
The loop in the reporter agent has to wait that the data are all
collected before checking and pushing them to the ceph-mgr (if needed).
The idea is to use the lock mechanism offered by the threading module
from python.

Signed-off-by: Guillaume Abrioux <gabrioux@ibm.com>
src/cephadm/node-proxy/redfish_system.py
src/cephadm/node-proxy/reporter.py

index 94c6682774395aa5dd10002019172bb557db0de8..7c6ebe6de734536e5118aabd7d48fa804e93ce01 100644 (file)
@@ -1,6 +1,6 @@
 from system import System
 from redfish_client import RedFishClient
-from threading import Thread
+from threading import Thread, Lock
 from time import sleep
 from util import logger
 
@@ -19,6 +19,9 @@ class RedfishSystem(System):
         self.run = False
         self.thread = None
         self.start_client()
+        self.data_ready = False
+        self.previous_data = {}
+        self.lock = Lock()
 
     def start_client(self):
         log.info(f"redfish system initialization, host: {self.host}, user: {self.username}")
@@ -85,15 +88,23 @@ class RedfishSystem(System):
         #  - caching logic
         try:
             while self.run:
-                self._update_system()
-                # following calls in theory can be done in parallel
-                self._update_metadata()
-                self._update_memory()
-                self._update_power()
-                self._update_network()
-                self._update_processors()
-                self._update_storage()
-                sleep(3)
+                log.debug("waiting for a lock.")
+                self.lock.acquire()
+                log.debug("lock acquired.")
+                try:
+                    self._update_system()
+                    # following calls in theory can be done in parallel
+                    self._update_metadata()
+                    self._update_memory()
+                    self._update_power()
+                    self._update_network()
+                    self._update_processors()
+                    self._update_storage()
+                    self.data_ready = True
+                    sleep(5)
+                finally:
+                    self.lock.release()
+                    log.debug("lock released.")
         # Catching 'Exception' is probably not a good idea (devel only)
         except Exception as e:
             log.error(f"Error detected, logging out from redfish api.\n{e}")
index 3942f38cb5dfcf64a53a3cdb61a048e2a49ee279..ec27e36e3db35e2ee954a30864cdbd020175e627 100644 (file)
@@ -23,6 +23,19 @@ class Reporter:
             # scenario probably we should just send the sub-parts
             # that have changed to minimize the traffic in
             # dense clusters
-            d = self.system.get_system()
-            requests.post(self.observer_url, json=d)
-            time.sleep(10)
+            if self.system.data_ready:
+                log.debug("waiting for a lock.")
+                try:
+                    self.system.lock.acquire()
+                    log.debug("lock acquired.")
+                    if not self.system.get_system() == self.system.previous_data:
+                        self.system.previous_data = self.system.get_system()
+                        log.info('data has changed since last iteration.')
+                        d = self.system.get_system()
+                        requests.post(f"{self.observer_url}/fake_endpoint", json=d)
+                    else:
+                        log.info('no diff, not sending data to the mgr.')
+                finally:
+                    self.system.lock.release()
+                    log.debug("lock released.")
+            time.sleep(20)