]> git.apps.os.sepia.ceph.com Git - teuthology.git/commitdiff
Move internal.lock_machines to its own module
authorZack Cerza <zack@redhat.com>
Thu, 17 Mar 2016 20:27:09 +0000 (14:27 -0600)
committerZack Cerza <zack@redhat.com>
Tue, 1 Nov 2016 22:46:54 +0000 (16:46 -0600)
Signed-off-by: Zack Cerza <zack@redhat.com>
teuthology/task/internal/__init__.py
teuthology/task/internal/lock_machines.py [new file with mode: 0644]

index 092a1cf3e3de309057ad7c8823d2c92fcc1e0192..5187402d31690756b36d8f091297decb2d220ac3 100644 (file)
@@ -11,18 +11,17 @@ import time
 import yaml
 import subprocess
 
-from teuthology import lockstatus
 from teuthology import lock
+from teuthology import lockstatus
 from teuthology import misc
-from teuthology import provision
 from teuthology.packaging import get_builder_project
+from teuthology import report
+from teuthology.config import config as teuth_config
 from teuthology.exceptions import VersionNotFoundError
 from teuthology.job_status import get_status, set_status
-from teuthology.config import config as teuth_config
+from teuthology.orchestra import cluster, remote, run
 from teuthology.parallel import parallel
-from ..orchestra import cluster, remote, run
-from .. import report
-from . import ansible
+from teuthology.task import ansible
 
 log = logging.getLogger(__name__)
 
@@ -56,149 +55,6 @@ def base(ctx, config):
         )
 
 
-@contextlib.contextmanager
-def lock_machines(ctx, config):
-    """
-    Lock machines.  Called when the teuthology run finds and locks
-    new machines.  This is not called if the one has teuthology-locked
-    machines and placed those keys in the Targets section of a yaml file.
-    """
-    # It's OK for os_type and os_version to be None here.  If we're trying
-    # to lock a bare metal machine, we'll take whatever is available.  If
-    # we want a vps, defaults will be provided by misc.get_distro and
-    # misc.get_distro_version in provision.create_if_vm
-    os_type = ctx.config.get("os_type")
-    os_version = ctx.config.get("os_version")
-    arch = ctx.config.get('arch')
-    log.info('Locking machines...')
-    assert isinstance(config[0], int), 'config[0] must be an integer'
-    machine_type = config[1]
-    total_requested = config[0]
-    # We want to make sure there are always this many machines available
-    reserved = teuth_config.reserve_machines
-    assert isinstance(reserved, int), 'reserve_machines must be integer'
-    assert (reserved >= 0), 'reserve_machines should >= 0'
-
-    # change the status during the locking process
-    report.try_push_job_info(ctx.config, dict(status='waiting'))
-
-    all_locked = dict()
-    requested = total_requested
-    while True:
-        # get a candidate list of machines
-        machines = lock.list_locks(machine_type=machine_type, up=True,
-                                   locked=False, count=requested + reserved)
-        if machines is None:
-            if ctx.block:
-                log.error('Error listing machines, trying again')
-                time.sleep(20)
-                continue
-            else:
-                raise RuntimeError('Error listing machines')
-
-        # make sure there are machines for non-automated jobs to run
-        if len(machines) < reserved + requested and ctx.owner.startswith('scheduled'):
-            if ctx.block:
-                log.info(
-                    'waiting for more %s machines to be free (need %s + %s, have %s)...',
-                    machine_type,
-                    reserved,
-                    requested,
-                    len(machines),
-                )
-                time.sleep(10)
-                continue
-            else:
-                assert 0, ('not enough machines free; need %s + %s, have %s' %
-                           (reserved, requested, len(machines)))
-
-        try:
-            newly_locked = lock.lock_many(ctx, requested, machine_type,
-                                          ctx.owner, ctx.archive, os_type,
-                                          os_version, arch)
-        except Exception:
-            # Lock failures should map to the 'dead' status instead of 'fail'
-            set_status(ctx.summary, 'dead')
-            raise
-        all_locked.update(newly_locked)
-        log.info(
-            '{newly_locked} {mtype} machines locked this try, '
-            '{total_locked}/{total_requested} locked so far'.format(
-                newly_locked=len(newly_locked),
-                mtype=machine_type,
-                total_locked=len(all_locked),
-                total_requested=total_requested,
-            )
-        )
-        if len(all_locked) == total_requested:
-            vmlist = []
-            for lmach in all_locked:
-                if misc.is_vm(lmach):
-                    vmlist.append(lmach)
-            if vmlist:
-                log.info('Waiting for virtual machines to come up')
-                keys_dict = dict()
-                loopcount = 0
-                while len(keys_dict) != len(vmlist):
-                    loopcount += 1
-                    time.sleep(10)
-                    keys_dict = misc.ssh_keyscan(vmlist)
-                    log.info('virtual machine is still unavailable')
-                    if loopcount == 40:
-                        loopcount = 0
-                        log.info('virtual machine(s) still not up, ' +
-                                 'recreating unresponsive ones.')
-                        for guest in vmlist:
-                            if guest not in keys_dict.keys():
-                                log.info('recreating: ' + guest)
-                                full_name = misc.canonicalize_hostname(guest)
-                                provision.destroy_if_vm(ctx, full_name)
-                                provision.create_if_vm(ctx, full_name)
-                if lock.do_update_keys(keys_dict):
-                    log.info("Error in virtual machine keys")
-                newscandict = {}
-                for dkey in all_locked.iterkeys():
-                    stats = lockstatus.get_status(dkey)
-                    newscandict[dkey] = stats['ssh_pub_key']
-                ctx.config['targets'] = newscandict
-            else:
-                ctx.config['targets'] = all_locked
-            locked_targets = yaml.safe_dump(
-                ctx.config['targets'],
-                default_flow_style=False
-            ).splitlines()
-            log.info('\n  '.join(['Locked targets:', ] + locked_targets))
-            # successfully locked machines, change status back to running
-            report.try_push_job_info(ctx.config, dict(status='running'))
-            break
-        elif not ctx.block:
-            assert 0, 'not enough machines are available'
-        else:
-            requested = requested - len(newly_locked)
-            assert requested > 0, "lock_machines: requested counter went" \
-                                  "negative, this shouldn't happen"
-
-        log.info(
-            "{total} machines locked ({new} new); need {more} more".format(
-                total=len(all_locked), new=len(newly_locked), more=requested)
-        )
-        log.warn('Could not lock enough machines, waiting...')
-        time.sleep(10)
-    try:
-        yield
-    finally:
-        # If both unlock_on_failure and nuke-on-error are set, don't unlock now
-        # because we're just going to nuke (and unlock) later.
-        unlock_on_failure = (
-            ctx.config.get('unlock_on_failure', False)
-            and not ctx.config.get('nuke-on-error', False)
-        )
-        if get_status(ctx.summary) == 'pass' or unlock_on_failure:
-            log.info('Unlocking machines...')
-            for machine in ctx.config['targets'].iterkeys():
-                lock.unlock_one(ctx, machine, ctx.owner, ctx.archive)
-
-
 def save_config(ctx, config):
     """
     Store the config in a yaml file
diff --git a/teuthology/task/internal/lock_machines.py b/teuthology/task/internal/lock_machines.py
new file mode 100644 (file)
index 0000000..fcf8d21
--- /dev/null
@@ -0,0 +1,158 @@
+import contextlib
+import logging
+import time
+import yaml
+
+from teuthology import lock
+from teuthology import lockstatus
+from teuthology import misc
+from teuthology import provision
+from teuthology import report
+
+from teuthology.config import config as teuth_config
+from teuthology.job_status import get_status, set_status
+
+log = logging.getLogger(__name__)
+
+
+@contextlib.contextmanager
+def lock_machines(ctx, config):
+    """
+    Lock machines.  Called when the teuthology run finds and locks
+    new machines.  This is not called if the one has teuthology-locked
+    machines and placed those keys in the Targets section of a yaml file.
+    """
+    # It's OK for os_type and os_version to be None here.  If we're trying
+    # to lock a bare metal machine, we'll take whatever is available.  If
+    # we want a vps, defaults will be provided by misc.get_distro and
+    # misc.get_distro_version in provision.create_if_vm
+    os_type = ctx.config.get("os_type")
+    os_version = ctx.config.get("os_version")
+    arch = ctx.config.get('arch')
+    log.info('Locking machines...')
+    assert isinstance(config[0], int), 'config[0] must be an integer'
+    machine_type = config[1]
+    total_requested = config[0]
+    # We want to make sure there are always this many machines available
+    reserved = teuth_config.reserve_machines
+    assert isinstance(reserved, int), 'reserve_machines must be integer'
+    assert (reserved >= 0), 'reserve_machines should >= 0'
+
+    # change the status during the locking process
+    report.try_push_job_info(ctx.config, dict(status='waiting'))
+
+    all_locked = dict()
+    requested = total_requested
+    while True:
+        # get a candidate list of machines
+        machines = lock.list_locks(machine_type=machine_type, up=True,
+                                   locked=False, count=requested + reserved)
+        if machines is None:
+            if ctx.block:
+                log.error('Error listing machines, trying again')
+                time.sleep(20)
+                continue
+            else:
+                raise RuntimeError('Error listing machines')
+
+        # make sure there are machines for non-automated jobs to run
+        if len(machines) < reserved + requested and ctx.owner.startswith('scheduled'):
+            if ctx.block:
+                log.info(
+                    'waiting for more %s machines to be free (need %s + %s, have %s)...',
+                    machine_type,
+                    reserved,
+                    requested,
+                    len(machines),
+                )
+                time.sleep(10)
+                continue
+            else:
+                assert 0, ('not enough machines free; need %s + %s, have %s' %
+                           (reserved, requested, len(machines)))
+
+        try:
+            newly_locked = lock.lock_many(ctx, requested, machine_type,
+                                          ctx.owner, ctx.archive, os_type,
+                                          os_version, arch)
+        except Exception:
+            # Lock failures should map to the 'dead' status instead of 'fail'
+            set_status(ctx.summary, 'dead')
+            raise
+        all_locked.update(newly_locked)
+        log.info(
+            '{newly_locked} {mtype} machines locked this try, '
+            '{total_locked}/{total_requested} locked so far'.format(
+                newly_locked=len(newly_locked),
+                mtype=machine_type,
+                total_locked=len(all_locked),
+                total_requested=total_requested,
+            )
+        )
+        if len(all_locked) == total_requested:
+            vmlist = []
+            for lmach in all_locked:
+                if misc.is_vm(lmach):
+                    vmlist.append(lmach)
+            if vmlist:
+                log.info('Waiting for virtual machines to come up')
+                keys_dict = dict()
+                loopcount = 0
+                while len(keys_dict) != len(vmlist):
+                    loopcount += 1
+                    time.sleep(10)
+                    keys_dict = misc.ssh_keyscan(vmlist)
+                    log.info('virtual machine is still unavailable')
+                    if loopcount == 40:
+                        loopcount = 0
+                        log.info('virtual machine(s) still not up, ' +
+                                 'recreating unresponsive ones.')
+                        for guest in vmlist:
+                            if guest not in keys_dict.keys():
+                                log.info('recreating: ' + guest)
+                                full_name = misc.canonicalize_hostname(guest)
+                                provision.destroy_if_vm(ctx, full_name)
+                                provision.create_if_vm(ctx, full_name)
+                if lock.do_update_keys(keys_dict):
+                    log.info("Error in virtual machine keys")
+                newscandict = {}
+                for dkey in all_locked.iterkeys():
+                    stats = lockstatus.get_status(dkey)
+                    newscandict[dkey] = stats['ssh_pub_key']
+                ctx.config['targets'] = newscandict
+            else:
+                ctx.config['targets'] = all_locked
+            locked_targets = yaml.safe_dump(
+                ctx.config['targets'],
+                default_flow_style=False
+            ).splitlines()
+            log.info('\n  '.join(['Locked targets:', ] + locked_targets))
+            # successfully locked machines, change status back to running
+            report.try_push_job_info(ctx.config, dict(status='running'))
+            break
+        elif not ctx.block:
+            assert 0, 'not enough machines are available'
+        else:
+            requested = requested - len(newly_locked)
+            assert requested > 0, "lock_machines: requested counter went" \
+                                  "negative, this shouldn't happen"
+
+        log.info(
+            "{total} machines locked ({new} new); need {more} more".format(
+                total=len(all_locked), new=len(newly_locked), more=requested)
+        )
+        log.warn('Could not lock enough machines, waiting...')
+        time.sleep(10)
+    try:
+        yield
+    finally:
+        # If both unlock_on_failure and nuke-on-error are set, don't unlock now
+        # because we're just going to nuke (and unlock) later.
+        unlock_on_failure = (
+            ctx.config.get('unlock_on_failure', False)
+            and not ctx.config.get('nuke-on-error', False)
+        )
+        if get_status(ctx.summary) == 'pass' or unlock_on_failure:
+            log.info('Unlocking machines...')
+            for machine in ctx.config['targets'].iterkeys():
+                lock.unlock_one(ctx, machine, ctx.owner, ctx.archive)