mgr/prometheus: Clean up collection thread

author Boris Ranto <branto@redhat.com>

Wed, 25 Nov 2020 09:25:49 +0000 (10:25 +0100)

committer Boris Ranto <branto@redhat.com>

Mon, 30 Nov 2020 10:28:08 +0000 (11:28 +0100)
author Boris Ranto <branto@redhat.com>
Wed, 25 Nov 2020 09:25:49 +0000 (10:25 +0100)
committer Boris Ranto <branto@redhat.com>
Mon, 30 Nov 2020 10:28:08 +0000 (11:28 +0100)
diff --git a/src/pybind/mgr/prometheus/module.py b/src/pybind/mgr/prometheus/module.py

index 25b36c331c6ae19d27aabeded683cfe2bf509797..3f2bafd8138dfca62c294413693fd10264ef7403 100644 (file)
--- a/src/pybind/mgr/prometheus/module.py
+++ b/src/pybind/mgr/prometheus/module.py
@@ -178,42 +178,46 @@ class Metric(object):
  
  
  class MetricCollectionThread(threading.Thread):
-    def __init__(self):
+    def __init__(self, module):
+        # type: (Module) -> None
+        self.mod = module
+        self.active = True
          super(MetricCollectionThread, self).__init__(target=self.collect)
  
-    @staticmethod
-    def collect():
-        inst = _global_instance
-        inst.log.info('starting metric collection thread')
-        while True:
-            if inst.have_mon_connection():
+    def collect(self):
+        self.mod.log.info('starting metric collection thread')
+        while self.active:
+            self.mod.log.debug('collecting cache in thread')
+            if self.mod.have_mon_connection():
                  start_time = time.time()
-                data = inst.collect()
+                data = self.mod.collect()
                  duration = time.time() - start_time
                  
-                sleep_time = inst.scrape_interval - duration
+                sleep_time = self.mod.scrape_interval - duration
                  if sleep_time < 0:
-                    inst.log.warning(
+                    self.mod.log.warning(
                          'Collecting data took more time than configured scrape interval. '
                          'This possibly results in stale data. Please check the '
                          '`stale_cache_strategy` configuration option. '
                          'Collecting data took {:.2f} seconds but scrape interval is configured '
                          'to be {:.0f} seconds.'.format(
                              duration,
-                            inst.scrape_interval,
+                            self.mod.scrape_interval,
                          )
                      )
                      sleep_time = 0
  
-                with inst.collect_lock:
-                    inst.collect_cache = data
-                    inst.collect_time = duration
+                with self.mod.collect_lock:
+                    self.mod.collect_cache = data
+                    self.mod.collect_time = duration
  
                  time.sleep(sleep_time)
              else:
-                inst.log.error('No MON connection')
-                time.sleep(inst.scrape_interval)
+                self.mod.log.error('No MON connection')
+                time.sleep(self.mod.scrape_interval)
  
+    def stop(self):
+        self.active = False
  
  class Module(MgrModule):
      COMMANDS = [
@@ -265,7 +269,7 @@ class Module(MgrModule):
          }
          global _global_instance
          _global_instance = self
-        MetricCollectionThread().start()
+        self.metrics_thread = MetricCollectionThread(_global_instance)
  
      def _setup_static_metrics(self):
          metrics = {}
@@ -1169,6 +1173,8 @@ class Module(MgrModule):
              (server_addr, server_port)
          )
  
+        self.metrics_thread.start()
+
          # Publish the URI that others may use to access the service we're
          # about to start serving
          self.set_uri('http://{0}:{1}/'.format(
@@ -1188,9 +1194,13 @@ class Module(MgrModule):
          # wait for the shutdown event
          self.shutdown_event.wait()
          self.shutdown_event.clear()
+        # tell metrics collection thread to stop collecting new metrics
+        self.metrics_thread.stop()
          cherrypy.engine.stop()
          self.log.info('Engine stopped.')
          self.shutdown_rbd_stats()
+        # wait for the metrics collection thread to stop
+        self.metrics_thread.join()
  
      def shutdown(self):
          self.log.info('Stopping engine...')
author	Boris Ranto <branto@redhat.com>
	Wed, 25 Nov 2020 09:25:49 +0000 (10:25 +0100)
committer	Boris Ranto <branto@redhat.com>
	Mon, 30 Nov 2020 10:28:08 +0000 (11:28 +0100)