Avoid race condition in find_stale_locks()

author Zack Cerza <zack@redhat.com>

Fri, 13 Feb 2015 19:25:19 +0000 (12:25 -0700)

committer Zack Cerza <zack@redhat.com>

Fri, 13 Feb 2015 20:49:03 +0000 (13:49 -0700)
author Zack Cerza <zack@redhat.com>
Fri, 13 Feb 2015 19:25:19 +0000 (12:25 -0700)
committer Zack Cerza <zack@redhat.com>
Fri, 13 Feb 2015 20:49:03 +0000 (13:49 -0700)
diff --git a/teuthology/lock.py b/teuthology/lock.py

index 3e011873a17c1e6a981fbaff27f52deedb65c29c..0ac26c323774e424fbc8a4641c428081ab0a34ee 100644 (file)
--- a/teuthology/lock.py
+++ b/teuthology/lock.py
@@ -504,26 +504,24 @@ def find_stale_locks(owner=None):
          nodes = [node for node in nodes if node['locked_by'] == owner]
      nodes = filter(might_be_stale, nodes)
  
-    # What jobs are currently running?
-    url = os.path.join(config.results_server, 'runs', 'status', 'running', '')
-    resp = requests.get(url)
-    running_runs = resp.json()
-    running_jobs = []
-    for run in running_runs:
-        url = os.path.join(run['href'][0], 'jobs',
-                           '?status=running&fields=name,job_id,targets,href')
-        resp = requests.get(url)
-        jobs = resp.json()
-        running_jobs.extend(jobs)
-
-    def node_matches_job(node, job):
+    def node_job_is_active(node, cache):
          """
-        Is or was this node used by this job?
+        Is this node's job active (e.g. running or waiting)?
+
+        :param node:  The node dict as returned from the lock server
+        :param cache: A set() used for caching results
+        :returns:     True or False
          """
-        job_str = '/'.join((job['name'], job['job_id']))
-        if node['description'].endswith(job_str):
+        description = node['description']
+        if description in cache:
              return True
-        elif job['targets'] and node['name'] in job['targets'].keys():
+        (name, job_id) = description.split('/')[-2:]
+        url = os.path.join(config.results_server, 'runs', name, 'jobs', job_id,
+                           '')
+        resp = requests.get(url)
+        job_info = resp.json()
+        if job_info['status'] in ('running', 'waiting'):
+            cache.add(description)
              return True
          return False
  
@@ -531,13 +529,9 @@ def find_stale_locks(owner=None):
      # Here we build the list of of nodes that are locked, for a job (as opposed
      # to being locked manually for random monkeying), where the job is not
      # running
+    active_jobs = set()
      for node in nodes:
-        matched = False
-        for job in running_jobs:
-            if node_matches_job(node, job):
-                matched = True
-                break
-        if matched:
+        if node_job_is_active(node, active_jobs):
              continue
          result.append(node)
      return result
author	Zack Cerza <zack@redhat.com>
	Fri, 13 Feb 2015 19:25:19 +0000 (12:25 -0700)
committer	Zack Cerza <zack@redhat.com>
	Fri, 13 Feb 2015 20:49:03 +0000 (13:49 -0700)