From 9bbe87159a75a75196cc691f897ebe920dbd6864 Mon Sep 17 00:00:00 2001 From: Guillaume Abrioux Date: Wed, 7 Jun 2023 14:23:57 +0200 Subject: [PATCH] node-proxy: catch RequestException in reporter This catches the requests.exceptions.RequestException exception in the reporter agent so we can better handle the case where it can't reach the endpoint when trying to send the collected data. Before this change, if for some reason the refreshed data couldn't be sent to the endpoint, it wouldn't have retried because `self.system.previous_data` was overwritten anyway. Signed-off-by: Guillaume Abrioux (cherry picked from commit 6d9198519d7b0d51e00d785d7be1f06e2e7509e3) --- src/cephadm/node-proxy/reporter.py | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/src/cephadm/node-proxy/reporter.py b/src/cephadm/node-proxy/reporter.py index a454b8a5bb740..c20195b535d4d 100644 --- a/src/cephadm/node-proxy/reporter.py +++ b/src/cephadm/node-proxy/reporter.py @@ -28,17 +28,21 @@ class Reporter: # dense clusters if self.system.data_ready: log.debug("waiting for a lock.") - try: - self.system.lock.acquire() - log.debug("lock acquired.") - if not self.system.get_system() == self.system.previous_data: - self.system.previous_data = self.system.get_system() - log.info('data has changed since last iteration.') - d = self.system.get_system() + self.system.lock.acquire() + log.debug("lock acquired.") + if not self.system.get_system() == self.system.previous_data: + log.info('data has changed since last iteration.') + d = self.system.get_system() + try: requests.post(f"{self.observer_url}/fake_endpoint", json=d) + except requests.exceptions.RequestException as e: + log.error(f"The reporter couldn't send data to the mgr: {e}") + # Need to add a new parameter 'max_retries' to the reporter if it can't + # send the data for more than x times, maybe the daemon should stop altogether else: - log.info('no diff, not sending data to the mgr.') - finally: - self.system.lock.release() - log.debug("lock released.") - time.sleep(20) + self.system.previous_data = self.system.get_system() + else: + log.info('no diff, not sending data to the mgr.') + self.system.lock.release() + log.debug("lock released.") + time.sleep(5) -- 2.39.5