]> git-server-git.apps.pok.os.sepia.ceph.com Git - teuthology.git/commitdiff
dispatcher: Use SIGTERM to stop 2173/head
authorZack Cerza <zack@cerza.org>
Wed, 29 Apr 2026 17:42:56 +0000 (11:42 -0600)
committerZack Cerza <zack@cerza.org>
Wed, 13 May 2026 22:09:38 +0000 (16:09 -0600)
The sentinel file method was a "quick hack" that lasted years

Signed-off-by: Zack Cerza <zack@cerza.org>
systemd/teuthology-dispatcher@.service
teuthology/dispatcher/__init__.py
teuthology/dispatcher/test/test_dispatcher.py

index ca4deb3dce258dcd0318307e3b8deab7cca8fdab..d169871db56bf972b447d60ec38812980e0be026 100644 (file)
@@ -13,7 +13,6 @@ ExecStart=/home/teuthworker/src/git.ceph.com_git_teuthology_main/virtualenv/bin/
     --archive-dir /home/teuthworker/archive \
     --tube %i \
     --log-dir /home/teuthworker/archive/worker_logs
-ExecStop=touch /tmp/teuthology-stop-dispatcher
 Restart=on-failure
 TimeoutStopSec=infinity
 KillMode=process
index 6580444580981ebcbb05df0add3a7e84c2c084af..d4d36dde4f921c5ed8190899fdbda5ab3cb18e35 100644 (file)
@@ -2,6 +2,7 @@ import datetime
 import logging
 import os
 import psutil
+import signal
 import subprocess
 import sys
 import yaml
@@ -32,29 +33,8 @@ from teuthology.util.time import parse_timestamp
 from teuthology import safepath
 
 log = logging.getLogger(__name__)
-start_time = datetime.datetime.now(datetime.timezone.utc)
-restart_file_path = '/tmp/teuthology-restart-dispatcher'
-stop_file_path = '/tmp/teuthology-stop-dispatcher'
 
-
-def sentinel(path):
-    if not os.path.exists(path):
-        return False
-    file_mtime = datetime.datetime.fromtimestamp(
-        os.path.getmtime(path),
-        datetime.timezone.utc,
-    )
-    return file_mtime > start_time
-
-
-def restart(log=log):
-    log.info('Restarting...')
-    args = sys.argv[:]
-    args.insert(0, sys.executable)
-    os.execv(sys.executable, args)
-
-
-def stop():
+def stop(_sig, _frame) -> None:
     log.info('Stopping...')
     sys.exit(0)
 
@@ -72,6 +52,7 @@ def load_config(archive_dir=None):
 
 
 def main(args):
+    signal.signal(signal.SIGTERM, stop)
     archive_dir = args.archive_dir or teuth_config.archive_base
 
     # Refuse to start more than one dispatcher per machine type
@@ -105,11 +86,6 @@ def main(args):
 
     while keep_running:
         try:
-            if sentinel(restart_file_path):
-                restart()
-            elif sentinel(stop_file_path):
-                stop()
-
             load_config()
             for proc in list(job_procs):
                 rc = proc.poll()
index afb2a965b1e586ba1cfed06e5a427b15fb83c2d1..cf35aff595e5efdd1db0f8474681e6322c832f33 100644 (file)
@@ -18,28 +18,6 @@ class TestDispatcher(object):
         self.ctx.log_dir = str(tmp_path / "log/dir")
         self.ctx.tube = 'tube'
 
-    @patch("os.path.exists")
-    def test_restart_file_path_doesnt_exist(self, m_exists):
-        m_exists.return_value = False
-        result = dispatcher.sentinel(dispatcher.restart_file_path)
-        assert not result
-
-    @patch("os.path.getmtime")
-    @patch("os.path.exists")
-    def test_needs_restart(self, m_exists, m_getmtime):
-        m_exists.return_value = True
-        now = datetime.datetime.now(datetime.timezone.utc)
-        m_getmtime.return_value = (now + datetime.timedelta(days=1)).timestamp()
-        assert dispatcher.sentinel(dispatcher.restart_file_path)
-
-    @patch("os.path.getmtime")
-    @patch("os.path.exists")
-    def test_does_not_need_restart(self, m_exists, m_getmtime):
-        m_exists.return_value = True
-        now = datetime.datetime.now(datetime.timezone.utc)
-        m_getmtime.return_value = (now - datetime.timedelta(days=1)).timestamp()
-        assert not dispatcher.sentinel(dispatcher.restart_file_path)
-
     @patch("teuthology.repo_utils.ls_remote")
     @patch("os.path.isdir")
     @patch("teuthology.repo_utils.fetch_teuthology")