From: Guillaume Abrioux Date: Wed, 7 Jun 2023 12:23:57 +0000 (+0200) Subject: node-proxy: catch RequestException in reporter X-Git-Tag: testing/wip-pdonnell-testing-20240430.123648-reef-debug~291^2~98 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=9bbe87159a75a75196cc691f897ebe920dbd6864;p=ceph-ci.git node-proxy: catch RequestException in reporter This catches the requests.exceptions.RequestException exception in the reporter agent so we can better handle the case where it can't reach the endpoint when trying to send the collected data. Before this change, if for some reason the refreshed data couldn't be sent to the endpoint, it wouldn't have retried because `self.system.previous_data` was overwritten anyway. Signed-off-by: Guillaume Abrioux (cherry picked from commit 6d9198519d7b0d51e00d785d7be1f06e2e7509e3) --- diff --git a/src/cephadm/node-proxy/reporter.py b/src/cephadm/node-proxy/reporter.py index a454b8a5bb7..c20195b535d 100644 --- a/src/cephadm/node-proxy/reporter.py +++ b/src/cephadm/node-proxy/reporter.py @@ -28,17 +28,21 @@ class Reporter: # dense clusters if self.system.data_ready: log.debug("waiting for a lock.") - try: - self.system.lock.acquire() - log.debug("lock acquired.") - if not self.system.get_system() == self.system.previous_data: - self.system.previous_data = self.system.get_system() - log.info('data has changed since last iteration.') - d = self.system.get_system() + self.system.lock.acquire() + log.debug("lock acquired.") + if not self.system.get_system() == self.system.previous_data: + log.info('data has changed since last iteration.') + d = self.system.get_system() + try: requests.post(f"{self.observer_url}/fake_endpoint", json=d) + except requests.exceptions.RequestException as e: + log.error(f"The reporter couldn't send data to the mgr: {e}") + # Need to add a new parameter 'max_retries' to the reporter if it can't + # send the data for more than x times, maybe the daemon should stop altogether else: - log.info('no diff, not sending data to the mgr.') - finally: - self.system.lock.release() - log.debug("lock released.") - time.sleep(20) + self.system.previous_data = self.system.get_system() + else: + log.info('no diff, not sending data to the mgr.') + self.system.lock.release() + log.debug("lock released.") + time.sleep(5)