]> git.apps.os.sepia.ceph.com Git - teuthology.git/commitdiff
dispatcher: Do not time out when locking machines
authorZack Cerza <zack@redhat.com>
Tue, 28 Feb 2023 00:39:25 +0000 (17:39 -0700)
committerZack Cerza <zack@redhat.com>
Tue, 28 Feb 2023 00:39:27 +0000 (17:39 -0700)
Signed-off-by: Zack Cerza <zack@redhat.com>
teuthology/dispatcher/__init__.py
teuthology/lock/ops.py
teuthology/lock/query.py

index 273f747e278ed30a3a44e7514e90a40501a6e15f..9aea132dde6a02457726f96a241d3d2d45a597cb 100644 (file)
@@ -220,8 +220,13 @@ def find_dispatcher_processes(machine_type):
 def lock_machines(job_config):
     report.try_push_job_info(job_config, dict(status='running'))
     fake_ctx = supervisor.create_fake_context(job_config, block=True)
-    block_and_lock_machines(fake_ctx, len(job_config['roles']),
-                            job_config['machine_type'], reimage=False)
+    block_and_lock_machines(
+        fake_ctx,
+        len(job_config['roles']),
+        job_config['machine_type'],
+        tries=-1,
+        reimage=False,
+    )
     job_config = fake_ctx.config
     return job_config
 
index b0c7d8033fbd9c58d5ebf8b3ffa3a162975af40d..5ab995ad7c679c4927df8d722460676c2f5a1314 100644 (file)
@@ -327,7 +327,7 @@ def reimage_machines(ctx, machines, machine_type):
     return reimaged
 
 
-def block_and_lock_machines(ctx, total_requested, machine_type, reimage=True):
+def block_and_lock_machines(ctx, total_requested, machine_type, reimage=True, tries=10):
     # It's OK for os_type and os_version to be None here.  If we're trying
     # to lock a bare metal machine, we'll take whatever is available.  If
     # we want a vps, defaults will be provided by misc.get_distro and
@@ -347,8 +347,13 @@ def block_and_lock_machines(ctx, total_requested, machine_type, reimage=True):
     requested = total_requested
     while True:
         # get a candidate list of machines
-        machines = query.list_locks(machine_type=machine_type, up=True,
-                                    locked=False, count=requested + reserved)
+        machines = query.list_locks(
+            machine_type=machine_type,
+            up=True,
+            locked=False,
+            count=requested + reserved,
+            tries=tries,
+        )
         if machines is None:
             if ctx.block:
                 log.error('Error listing machines, trying again')
index bb1044c2b343032acf43a3ee899079bcebe75914..9fd09d9abe6efb5da37af1d81e66e12e77c1add8 100644 (file)
@@ -51,7 +51,7 @@ def is_vm(name=None, status=None):
     return status.get('is_vm', False)
 
 
-def list_locks(keyed_by_name=False, **kwargs):
+def list_locks(keyed_by_name=False, tries=10, **kwargs):
     uri = os.path.join(config.lock_server, 'nodes', '')
     for key, value in kwargs.items():
         if kwargs[key] is False:
@@ -63,7 +63,11 @@ def list_locks(keyed_by_name=False, **kwargs):
             kwargs['machine_type'] = kwargs['machine_type'].replace(',','|')
         uri += '?' + urlencode(kwargs)
     with safe_while(
-            sleep=1, increment=0.5, action='list_locks') as proceed:
+            sleep=1,
+            increment=0.5,
+            tries=-1,
+            action='list_locks'
+    ) as proceed:
         while proceed():
             try:
                 response = requests.get(uri)