]> git-server-git.apps.pok.os.sepia.ceph.com Git - teuthology.git/commitdiff
locker: try to make up for apache timeouts
authorJosh Durgin <josh.durgin@inktank.com>
Fri, 29 Mar 2013 23:33:49 +0000 (16:33 -0700)
committerJosh Durgin <josh.durgin@inktank.com>
Fri, 29 Mar 2013 23:34:15 +0000 (16:34 -0700)
If the lock request succeeds in updating the db, but the client gets a
timeout from apache, they can now try again and get back the machines
they just locked.

Only automatic runs have a description set when locking several
machines, so this does not affect users of teuthology-lock
--lock-many, where no description can be set in the same request.

Signed-off-by: Josh Durgin <josh.durgin@inktank.com>
teuthology/locker/api.py

index 80e89764088220025f4327a1bc510a461548d6b5..e1f11c63ed2ddcfca5a7c6ad9675bdcaddaab185 100644 (file)
@@ -87,12 +87,34 @@ class Lock:
             raise web.BadRequest()
 
         tries = 0
+        check_existing = True
         while True:
             try:
                 # transaction will be rolled back if an exception is raised
                 with DB.transaction():
-                    results = list(DB.select('machine', machinetype, what='name, sshpubkey',
-                                             where='locked = false AND up = true AND type =$machinetype',
+                    if desc is not None and check_existing:
+                        # if a description is provided, treat it as a
+                        # key for locking in case the same run locked
+                        # machines in the db successfully before, but
+                        # the web server reported failure to it
+                        # because the request took too long. Only try
+                        # this once per request.
+                        check_existing = False
+                        results = list(DB.select('machine',
+                                                 machinetype, desc, user,
+                                                 what='name, sshpubkey',
+                                                 where='locked = true AND up = true AND type = $machinetype AND description = $desc AND locked_by = $user',
+                                                 limit=num))
+                        if len(results) == num:
+                            name_keys = {}
+                            for row in results:
+                                name_keys[row.name] = row.sshpubkey
+                            print 'reusing machines', name_keys.keys()
+                            break
+
+                    results = list(DB.select('machine', machinetype,
+                                             what='name, sshpubkey',
+                                             where='locked = false AND up = true AND type = $machinetype',
                                              limit=num))
                     if len(results) < num:
                         raise web.HTTPError(status='503 Service Unavailable')