From 8fb4cb806d056f00a5331609c93519d22d7a248d Mon Sep 17 00:00:00 2001 From: Guillaume Abrioux Date: Mon, 22 May 2023 14:20:54 +0200 Subject: [PATCH] node-proxy: add a lock mechanism The loop in the reporter agent has to wait that the data are all collected before checking and pushing them to the ceph-mgr (if needed). The idea is to use the lock mechanism offered by the threading module from python. Signed-off-by: Guillaume Abrioux (cherry picked from commit fe03bf3676ee2b351a0155491bc5eb4bb7b3d1a3) --- src/cephadm/node-proxy/redfish_system.py | 31 ++++++++++++++++-------- src/cephadm/node-proxy/reporter.py | 19 ++++++++++++--- 2 files changed, 37 insertions(+), 13 deletions(-) diff --git a/src/cephadm/node-proxy/redfish_system.py b/src/cephadm/node-proxy/redfish_system.py index 94c66827743..7c6ebe6de73 100644 --- a/src/cephadm/node-proxy/redfish_system.py +++ b/src/cephadm/node-proxy/redfish_system.py @@ -1,6 +1,6 @@ from system import System from redfish_client import RedFishClient -from threading import Thread +from threading import Thread, Lock from time import sleep from util import logger @@ -19,6 +19,9 @@ class RedfishSystem(System): self.run = False self.thread = None self.start_client() + self.data_ready = False + self.previous_data = {} + self.lock = Lock() def start_client(self): log.info(f"redfish system initialization, host: {self.host}, user: {self.username}") @@ -85,15 +88,23 @@ class RedfishSystem(System): # - caching logic try: while self.run: - self._update_system() - # following calls in theory can be done in parallel - self._update_metadata() - self._update_memory() - self._update_power() - self._update_network() - self._update_processors() - self._update_storage() - sleep(3) + log.debug("waiting for a lock.") + self.lock.acquire() + log.debug("lock acquired.") + try: + self._update_system() + # following calls in theory can be done in parallel + self._update_metadata() + self._update_memory() + self._update_power() + self._update_network() + self._update_processors() + self._update_storage() + self.data_ready = True + sleep(5) + finally: + self.lock.release() + log.debug("lock released.") # Catching 'Exception' is probably not a good idea (devel only) except Exception as e: log.error(f"Error detected, logging out from redfish api.\n{e}") diff --git a/src/cephadm/node-proxy/reporter.py b/src/cephadm/node-proxy/reporter.py index 3942f38cb5d..ec27e36e3db 100644 --- a/src/cephadm/node-proxy/reporter.py +++ b/src/cephadm/node-proxy/reporter.py @@ -23,6 +23,19 @@ class Reporter: # scenario probably we should just send the sub-parts # that have changed to minimize the traffic in # dense clusters - d = self.system.get_system() - requests.post(self.observer_url, json=d) - time.sleep(10) + if self.system.data_ready: + log.debug("waiting for a lock.") + try: + self.system.lock.acquire() + log.debug("lock acquired.") + if not self.system.get_system() == self.system.previous_data: + self.system.previous_data = self.system.get_system() + log.info('data has changed since last iteration.') + d = self.system.get_system() + requests.post(f"{self.observer_url}/fake_endpoint", json=d) + else: + log.info('no diff, not sending data to the mgr.') + finally: + self.system.lock.release() + log.debug("lock released.") + time.sleep(20) -- 2.39.5