From dbd6da650293da1feae02252268983fccd8b7935 Mon Sep 17 00:00:00 2001 From: Boris Ranto Date: Wed, 25 Nov 2020 10:32:35 +0100 Subject: [PATCH] mgr/prometheus: Log collection issues Log any issues encountered during the data collection and continue to collect the data anyway (after a sleep). Signed-off-by: Boris Ranto (cherry picked from commit 28a5c13bf993679e3098d73df27ded249f34dc99) --- src/pybind/mgr/prometheus/module.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/pybind/mgr/prometheus/module.py b/src/pybind/mgr/prometheus/module.py index c420bd763f993..5008fcc8ece02 100644 --- a/src/pybind/mgr/prometheus/module.py +++ b/src/pybind/mgr/prometheus/module.py @@ -197,7 +197,15 @@ class MetricCollectionThread(threading.Thread): self.mod.log.debug('collecting cache in thread') if self.mod.have_mon_connection(): start_time = time.time() - data = self.mod.collect() + + try: + data = self.mod.collect() + except Exception as e: + # Log any issues encountered during the data collection and continue + self.mod.log.exception("failed to collect metrics:") + time.sleep(self.mod.scrape_interval) + continue + duration = time.time() - start_time self.mod.log.debug('collecting cache in thread done') -- 2.39.5