From 9b718e17b75f07e5022619c825a342829d3d0aa9 Mon Sep 17 00:00:00 2001 From: Boris Ranto Date: Wed, 25 Nov 2020 10:25:49 +0100 Subject: [PATCH] mgr/prometheus: Clean up collection thread We need to clean up the metrics collection thread. Signed-off-by: Boris Ranto (cherry picked from commit 03fcaccafc877d10a894b1c39af5547f172c1ed3) Conflicts: prometheus/module.py: Pass _global_instance as an argument to MetricCollectionThread, collect can't be a static function anymore --- src/pybind/mgr/prometheus/module.py | 44 ++++++++++++++++++----------- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/src/pybind/mgr/prometheus/module.py b/src/pybind/mgr/prometheus/module.py index 25b36c331c6ae..3f2bafd8138df 100644 --- a/src/pybind/mgr/prometheus/module.py +++ b/src/pybind/mgr/prometheus/module.py @@ -178,42 +178,46 @@ class Metric(object): class MetricCollectionThread(threading.Thread): - def __init__(self): + def __init__(self, module): + # type: (Module) -> None + self.mod = module + self.active = True super(MetricCollectionThread, self).__init__(target=self.collect) - @staticmethod - def collect(): - inst = _global_instance - inst.log.info('starting metric collection thread') - while True: - if inst.have_mon_connection(): + def collect(self): + self.mod.log.info('starting metric collection thread') + while self.active: + self.mod.log.debug('collecting cache in thread') + if self.mod.have_mon_connection(): start_time = time.time() - data = inst.collect() + data = self.mod.collect() duration = time.time() - start_time - sleep_time = inst.scrape_interval - duration + sleep_time = self.mod.scrape_interval - duration if sleep_time < 0: - inst.log.warning( + self.mod.log.warning( 'Collecting data took more time than configured scrape interval. ' 'This possibly results in stale data. Please check the ' '`stale_cache_strategy` configuration option. ' 'Collecting data took {:.2f} seconds but scrape interval is configured ' 'to be {:.0f} seconds.'.format( duration, - inst.scrape_interval, + self.mod.scrape_interval, ) ) sleep_time = 0 - with inst.collect_lock: - inst.collect_cache = data - inst.collect_time = duration + with self.mod.collect_lock: + self.mod.collect_cache = data + self.mod.collect_time = duration time.sleep(sleep_time) else: - inst.log.error('No MON connection') - time.sleep(inst.scrape_interval) + self.mod.log.error('No MON connection') + time.sleep(self.mod.scrape_interval) + def stop(self): + self.active = False class Module(MgrModule): COMMANDS = [ @@ -265,7 +269,7 @@ class Module(MgrModule): } global _global_instance _global_instance = self - MetricCollectionThread().start() + self.metrics_thread = MetricCollectionThread(_global_instance) def _setup_static_metrics(self): metrics = {} @@ -1169,6 +1173,8 @@ class Module(MgrModule): (server_addr, server_port) ) + self.metrics_thread.start() + # Publish the URI that others may use to access the service we're # about to start serving self.set_uri('http://{0}:{1}/'.format( @@ -1188,9 +1194,13 @@ class Module(MgrModule): # wait for the shutdown event self.shutdown_event.wait() self.shutdown_event.clear() + # tell metrics collection thread to stop collecting new metrics + self.metrics_thread.stop() cherrypy.engine.stop() self.log.info('Engine stopped.') self.shutdown_rbd_stats() + # wait for the metrics collection thread to stop + self.metrics_thread.join() def shutdown(self): self.log.info('Stopping engine...') -- 2.39.5