From: Boris Ranto Date: Wed, 25 Nov 2020 09:32:35 +0000 (+0100) Subject: mgr/prometheus: Log collection issues X-Git-Tag: v16.1.0~451^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=28a5c13bf993679e3098d73df27ded249f34dc99;p=ceph.git mgr/prometheus: Log collection issues Log any issues encountered during the data collection and continue to collect the data anyway (after a sleep). Signed-off-by: Boris Ranto --- diff --git a/src/pybind/mgr/prometheus/module.py b/src/pybind/mgr/prometheus/module.py index 763a65ce1f22..c5ec065853b2 100644 --- a/src/pybind/mgr/prometheus/module.py +++ b/src/pybind/mgr/prometheus/module.py @@ -196,7 +196,15 @@ class MetricCollectionThread(threading.Thread): self.mod.log.debug('collecting cache in thread') if self.mod.have_mon_connection(): start_time = time.time() - data = self.mod.collect() + + try: + data = self.mod.collect() + except Exception as e: + # Log any issues encountered during the data collection and continue + self.mod.log.exception("failed to collect metrics:") + time.sleep(self.mod.scrape_interval) + continue + duration = time.time() - start_time self.mod.log.debug('collecting cache in thread done')