From: Kefu Chai Date: Sat, 28 Nov 2020 08:15:30 +0000 (+0800) Subject: Merge pull request #38277 from b-ranto/wip-prom-fixes X-Git-Tag: v16.1.0~451 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=fd3c0f905b4ddc20dd0f60ffb1ac5c52da940845;p=ceph.git Merge pull request #38277 from b-ranto/wip-prom-fixes mgr/prometheus: Make module more stable Reviewed-by: Mykola Golub Reviewed-by: Sebastian Wagner Reviewed-by: Kefu Chai --- fd3c0f905b4ddc20dd0f60ffb1ac5c52da940845 diff --cc src/pybind/mgr/prometheus/module.py index c122ec7e4c5e,359578e5415b..bfac1e5ab29d --- a/src/pybind/mgr/prometheus/module.py +++ b/src/pybind/mgr/prometheus/module.py @@@ -195,10 -197,19 +197,18 @@@ class MetricCollectionThread(threading. self.mod.log.debug('collecting cache in thread') if self.mod.have_mon_connection(): start_time = time.time() - data = self.mod.collect() + + try: + data = self.mod.collect() + except Exception as e: + # Log any issues encountered during the data collection and continue + self.mod.log.exception("failed to collect metrics:") + self.event.wait(self.mod.scrape_interval) + continue + duration = time.time() - start_time - self.mod.log.debug('collecting cache in thread done') - + sleep_time = self.mod.scrape_interval - duration if sleep_time < 0: self.mod.log.warning(