From 5da1018373c54c61a074f1199fe18cd97e07edfb Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 15 Aug 2018 11:27:43 -0500 Subject: [PATCH] mgr/devicehealth: scrape at scrape_interval intervals Also, align the timing to the interval to avoid drift, and restructure the loop so that we do the work immediately on startup without an initial sleep. Signed-off-by: Sage Weil --- src/pybind/mgr/devicehealth/module.py | 50 ++++++++++++++++++++------- 1 file changed, 37 insertions(+), 13 deletions(-) diff --git a/src/pybind/mgr/devicehealth/module.py b/src/pybind/mgr/devicehealth/module.py index c2fd43717de71..f75969b96bab5 100644 --- a/src/pybind/mgr/devicehealth/module.py +++ b/src/pybind/mgr/devicehealth/module.py @@ -149,27 +149,51 @@ class Module(MgrModule): def serve(self): self.log.info("Starting") + + last_scrape = None + ls = self.get_store('last_scrape') + if ls: + try: + last_scrape = datetime.strptime(ls, TIME_FORMAT) + except ValueError as e: + pass + self.log.debug('Last scrape %s', last_scrape) + while self.run: self.refresh_config() + if self.enable_monitoring: + self.log.debug('Running') + self.check_health() + + now = datetime.utcnow() + if not last_scrape: + next_scrape = now + else: + # align to scrape interval + scrape_frequency = int(self.scrape_frequency) or 86400 + seconds = (last_scrape - datetime.utcfromtimestamp(0)).total_seconds() + seconds -= seconds % scrape_frequency + seconds += scrape_frequency + next_scrape = datetime.utcfromtimestamp(seconds) + if last_scrape: + self.log.debug('Last scrape %s, next scrape due %s', + last_scrape.strftime(TIME_FORMAT), + next_scrape.strftime(TIME_FORMAT)) + else: + self.log.debug('Last scrape never, next scrape due %s', + next_scrape.strftime(TIME_FORMAT)) + if now >= next_scrape: + self.scrape_all() + last_scrape = now + self.set_store('last_scrape', last_scrape.strftime(TIME_FORMAT)) + + # sleep sleep_interval = int(self.sleep_interval) or 60 - self.log.debug('Sleeping for %d seconds', sleep_interval) ret = self.event.wait(sleep_interval) self.event.clear() - # in case 'wait' was interrupted, it could mean config was changed - # by the user; go back and read config vars - if ret: - continue - - self.log.debug('Waking up [%s]', - "active" if self.enable_monitoring else "inactive") - if not self.enable_monitoring: - continue - self.log.debug('Running') - self.check_health() - def shutdown(self): self.log.info('Stopping') self.run = False -- 2.39.5