From: Boris Ranto Date: Wed, 25 Nov 2020 09:32:35 +0000 (+0100) Subject: mgr/prometheus: Log collection issues X-Git-Tag: v14.2.17~45^2~2^2~2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=f0a80514c97dc19b9c0f0d2cb01da39a708e7296;p=ceph.git mgr/prometheus: Log collection issues Log any issues encountered during the data collection and continue to collect the data anyway (after a sleep). Signed-off-by: Boris Ranto (cherry picked from commit 28a5c13bf993679e3098d73df27ded249f34dc99) --- diff --git a/src/pybind/mgr/prometheus/module.py b/src/pybind/mgr/prometheus/module.py index e58a9b8df2b..02157f215e3 100644 --- a/src/pybind/mgr/prometheus/module.py +++ b/src/pybind/mgr/prometheus/module.py @@ -190,7 +190,15 @@ class MetricCollectionThread(threading.Thread): self.mod.log.debug('collecting cache in thread') if self.mod.have_mon_connection(): start_time = time.time() - data = self.mod.collect() + + try: + data = self.mod.collect() + except Exception as e: + # Log any issues encountered during the data collection and continue + self.mod.log.exception("failed to collect metrics:") + time.sleep(self.mod.scrape_interval) + continue + duration = time.time() - start_time sleep_time = self.mod.scrape_interval - duration