From: Zack Cerza Date: Tue, 13 Jun 2023 23:49:48 +0000 (-0600) Subject: exporter: Restart every 24h X-Git-Tag: 1.2.0~96^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=35fc1b03dee1f5d7a8d1c25b9a9beabc48ad1665;p=teuthology.git exporter: Restart every 24h A design limitation of prometheus-client's multiprocessing mode is that each process creates files to store its own metrics; the exporter then has to read each file, even if the process which created it is dead. This results in request latency growing over time, to the point of multiple seconds when the file count gets into the thousands. This eventually results in prometheus failing to fetch, leaving gaps in our data. We can work around this by restarting at a regular interval; 24h seems like a fine place to start. Signed-off-by: Zack Cerza --- diff --git a/teuthology/dispatcher/__init__.py b/teuthology/dispatcher/__init__.py index 4ec6fc90b..3f46a74d4 100644 --- a/teuthology/dispatcher/__init__.py +++ b/teuthology/dispatcher/__init__.py @@ -39,7 +39,7 @@ def sentinel(path): return file_mtime > start_time -def restart(): +def restart(log=log): log.info('Restarting...') args = sys.argv[:] args.insert(0, sys.executable) diff --git a/teuthology/exporter.py b/teuthology/exporter.py index b5986de14..b688d4d75 100644 --- a/teuthology/exporter.py +++ b/teuthology/exporter.py @@ -47,6 +47,7 @@ class TeuthologyExporter: JobProcesses(), Nodes(), ] + self._created_time = time.perf_counter() def start(self): start_http_server(self.port, registry=registry) @@ -63,6 +64,8 @@ class TeuthologyExporter: while True: try: before = time.perf_counter() + if before - self._created_time > 24 * 60 * 60: + self.restart() try: self.update() except Exception: @@ -79,6 +82,11 @@ class TeuthologyExporter: log.info("Stopping.") raise SystemExit + def restart(self): + # Use the dispatcher's restart function - note that by using this here, + # it restarts the exporter, *not* the dispatcher. + return teuthology.dispatcher.restart(log=log) + class TeuthologyMetric: def __init__(self):