]> git.apps.os.sepia.ceph.com Git - teuthology.git/commitdiff
kill: Don't unlock nodes if killing procs fails 1899/head
authorZack Cerza <zack@redhat.com>
Fri, 10 Nov 2023 22:24:21 +0000 (15:24 -0700)
committerZack Cerza <zack@redhat.com>
Thu, 23 Nov 2023 00:44:48 +0000 (17:44 -0700)
... so that we don't unlock nodes while their jobs are running.

Signed-off-by: Zack Cerza <zack@redhat.com>
teuthology/kill.py

index 2df126d9a856371651a651d13a89ea4fdb4855ce..1ba4ea0ad57a92e464d11a786fbe750842d35a3f 100755 (executable)
@@ -70,7 +70,8 @@ def kill_run(run_name, archive_base=None, owner=None, machine_type=None,
     if not preserve_queue:
         remove_beanstalk_jobs(run_name, machine_type)
         remove_paddles_jobs(run_name)
-    kill_processes(run_name, run_info.get('pids'))
+    if kill_processes(run_name, run_info.get('pids')):
+        return
     if owner is not None:
         targets = find_targets(run_name, owner)
         nuke_targets(targets, owner)
@@ -85,7 +86,8 @@ def kill_job(run_name, job_id, archive_base=None, owner=None, skip_nuke=False):
                 "I could not figure out the owner of the requested job. "
                 "Please pass --owner <owner>.")
         owner = job_info['owner']
-    kill_processes(run_name, [job_info.get('pid')])
+    if kill_processes(run_name, [job_info.get('pid')]):
+        return
     if 'machine_type' in job_info:
         teuthology.exporter.JobResults.record(
             job_info["machine_type"],
@@ -187,6 +189,7 @@ def kill_processes(run_name, pids=None):
     if len(to_kill) == 0:
         log.info("No teuthology processes running")
     else:
+        survivors = []
         log.info("Killing Pids: " + str(to_kill))
         may_need_sudo = \
             psutil.Process(int(pid)).username() != getpass.getuser()
@@ -200,7 +203,13 @@ def kill_processes(run_name, pids=None):
             # Don't attempt to use sudo if it's not necessary
             if use_sudo:
                 args = ['sudo', '-n'] + args
-            subprocess.call(args)
+            try:
+                subprocess.check_call(args)
+            except subprocess.CalledProcessError:
+                survivors.append(pid)
+    if survivors:
+        log.error(f"Failed to kill PIDs: {survivors}")
+    return survivors
 
 
 def process_matches_run(pid, run_name):