]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/prometheus: Log collection issues
authorBoris Ranto <branto@redhat.com>
Wed, 25 Nov 2020 09:32:35 +0000 (10:32 +0100)
committerBoris Ranto <branto@redhat.com>
Thu, 28 Jan 2021 14:54:24 +0000 (15:54 +0100)
Log any issues encountered during the data collection and continue to
collect the data anyway (after a sleep).

Signed-off-by: Boris Ranto <branto@redhat.com>
(cherry picked from commit 28a5c13bf993679e3098d73df27ded249f34dc99)

src/pybind/mgr/prometheus/module.py

index c420bd763f993630d510e5f3963bc21550da3b08..5008fcc8ece023c5144609c8473d72ac1b58aada 100644 (file)
@@ -197,7 +197,15 @@ class MetricCollectionThread(threading.Thread):
             self.mod.log.debug('collecting cache in thread')
             if self.mod.have_mon_connection():
                 start_time = time.time()
-                data = self.mod.collect()
+
+                try:
+                    data = self.mod.collect()
+                except Exception as e:
+                    # Log any issues encountered during the data collection and continue
+                    self.mod.log.exception("failed to collect metrics:")
+                    time.sleep(self.mod.scrape_interval)
+                    continue
+
                 duration = time.time() - start_time
                 self.mod.log.debug('collecting cache in thread done')