]> git-server-git.apps.pok.os.sepia.ceph.com Git - teuthology.git/commitdiff
Allow locking nodes with mixed OSes
authorZack Cerza <zack@redhat.com>
Tue, 14 Mar 2017 19:13:13 +0000 (13:13 -0600)
committerZack Cerza <zack@redhat.com>
Tue, 21 Mar 2017 19:38:57 +0000 (13:38 -0600)
Instead of either not specifying an OS type/version and just getting
what's available - or requesting the same OS type/version for all nodes
in the job, allow requesting arbitrary mixes of OSes.

The path forward is going to be replacing things like:
roles:
- ['osd.0', 'mon.a']
- ['osd.1', 'osd.2]
- ['client.0']

With things like:
nodes:
- os_type: ubuntu
  os_version: '16.04'
  roles: ['osd.0', 'mon.a']
- os_type: ubuntu
  os_version: '14.04'
  roles: ['osd.1', 'osd.2]
- os_type: centos
  os_version: '7.3'
  roles: ['client.0']

Signed-off-by: Zack Cerza <zack@redhat.com>
teuthology/run.py
teuthology/task/internal/lock_machines.py
teuthology/test/test_run.py

index eda7e43c3ebf9c7166251f117d351b45f3c47aed..3107c83b6189a9636774c0918c4b6341ffce81bd 100644 (file)
@@ -4,6 +4,7 @@ import StringIO
 import contextlib
 import sys
 import logging
+from collections import OrderedDict
 from traceback import format_tb
 
 import teuthology
@@ -122,13 +123,13 @@ def setup_config(config_paths):
         job_id = str(job_id)
         config['job_id'] = job_id
 
-    # targets must be >= than roles
-    if 'targets' in config and 'roles' in config:
+    # targets must be >= than nodes
+    if 'targets' in config and 'nodes' in config:
         targets = len(config['targets'])
-        roles = len(config['roles'])
-        assert targets >= roles, \
-            '%d targets are needed for all roles but found %d listed.' % (
-                roles, targets)
+        nodes = len(config['nodes'])
+        assert targets >= nodes, \
+            '%d targets are needed for all nodes but found %d listed.' % (
+                nodes, targets)
 
     return config
 
@@ -179,26 +180,89 @@ def validate_tasks(config):
     return config["tasks"]
 
 
+def get_nodes_request(config, machine_type):
+    """
+    Examine each item in a job's 'nodes' stanza. Consolidate those with the
+    same requirements into 'node requests' so that we may later call
+    lock_many() as few times as possible.
+
+    Each resulting request contains role lists, each of which will be mapped to
+    a target when the machines are locked.
+    """
+    request = list()
+    os_specs = OrderedDict()
+    # Group node confs by 'spec'. We use an OrderedDict to contain them
+    # briefly, because it combines the functionality of an ordered set with
+    # key-value storage.
+    for item in config:
+        item['arch'] = item.get('arch')
+        item['machine_type'] = item.get('machine_type', machine_type)
+        spec_key = (
+            item.get('os_type'),
+            item.get('os_version'),
+            item.get('arch'),
+            item.get('machine_type'),
+        )
+        spec_roles = os_specs.get(spec_key, list())
+        assert isinstance(spec_roles, list)
+        spec_roles.append(item['roles'])
+        os_specs[spec_key] = spec_roles
+
+    # Build a 'request' for each 'spec'
+    for spec, roles in os_specs.items():
+        os_type, os_version, arch, machine_type = spec
+        request.append(dict(
+            os_type=os_type or None,
+            os_version=os_version or None,
+            arch=arch or None,
+            machine_type=machine_type,
+            roles=roles,
+        ))
+    return request
+
+
 def get_initial_tasks(lock, config, machine_type):
     init_tasks = [
         {'internal.check_packages': None},
         {'internal.buildpackages_prep': None},
     ]
-    if 'roles' in config and lock:
-        msg = ('You cannot specify targets in a config file when using the ' +
-               '--lock option')
-        assert 'targets' not in config, msg
-        init_tasks.append({'internal.lock_machines': (
-            len(config['roles']), machine_type)})
+
+    target_conflict_msg = 'You cannot specify targets in a config file when' \
+        'using the --lock option'
+    if lock and ('roles' in config or 'nodes' in config):
+        assert 'targets' not in config, target_conflict_msg
+    if lock and 'roles' in config:
+        if 'nodes' in config:
+            log.warn(
+                "Config specifies both 'roles' and 'nodes'; "
+                "using 'nodes' and ignoring 'roles'"
+            )
+        else:
+            # Convert old 'roles' stanza into new 'nodes' stanza, so that
+            # elsewhere in teuthology we can consolidate to one set of
+            # codepaths for node specification
+            nodes_config = list()
+            for node_roles in config['roles']:
+                nodes_config.append(dict(
+                    roles=node_roles,
+                    os_type=config.get("os_type"),
+                    os_version=config.get("os_version"),
+                    arch=config.get('arch'),
+                ))
+            config['nodes'] = nodes_config
+            del config['roles']
+    if lock and 'nodes' in config:
+        nodes_request = get_nodes_request(config['nodes'], machine_type)
+        init_tasks.append({'internal.lock_machines': nodes_request})
 
     init_tasks.append({'internal.save_config': None})
 
-    if 'roles' in config:
+    if 'nodes' in config:
         init_tasks.append({'internal.check_lock': None})
 
     init_tasks.append({'internal.add_remotes': None})
 
-    if 'roles' in config:
+    if 'nodes' in config:
         init_tasks.extend([
             {'console_log': None},
             {'internal.connect': None},
@@ -207,8 +271,8 @@ def get_initial_tasks(lock, config, machine_type):
             {'internal.check_conflict': None},
         ])
 
-    if ('roles' in config and
-        not config.get('use_existing_cluster', False)):
+    if ('nodes' in config and
+            not config.get('use_existing_cluster', False)):
         init_tasks.extend([
             {'internal.check_ceph_data': None},
             {'internal.vm_setup': None},
@@ -217,10 +281,10 @@ def get_initial_tasks(lock, config, machine_type):
     if 'kernel' in config:
         init_tasks.append({'kernel': config['kernel']})
 
-    if 'roles' in config:
+    if 'nodes' in config:
         init_tasks.append({'internal.base': None})
     init_tasks.append({'internal.archive_upload': None})
-    if 'roles' in config:
+    if 'nodes' in config:
         init_tasks.extend([
             {'internal.archive': None},
             {'internal.coredump': None},
@@ -237,7 +301,7 @@ def get_initial_tasks(lock, config, machine_type):
             {'kernel.install_latest_rh_kernel': None}
         ])
 
-    if 'roles' in config:
+    if 'nodes' in config:
         init_tasks.extend([
             {'pcp': None},
             {'selinux': None},
@@ -357,6 +421,14 @@ def main(args):
     if suite_repo:
         teuth_config.ceph_qa_suite_git_url = suite_repo
 
+    # overwrite the config value of os_type if --os-type is provided
+    if os_type:
+        config["os_type"] = os_type
+
+    # overwrite the config value of os_version if --os-version is provided
+    if os_version:
+        config["os_version"] = os_version
+
     config["tasks"] = validate_tasks(config)
 
     init_tasks = get_initial_tasks(lock, config, machine_type)
@@ -370,14 +442,6 @@ def main(args):
     # fetches the tasks and returns a new suite_path if needed
     config["suite_path"] = fetch_tasks_if_needed(config)
 
-    # overwrite the config value of os_type if --os-type is provided
-    if os_type:
-        config["os_type"] = os_type
-
-    # overwrite the config value of os_version if --os-version is provided
-    if os_version:
-        config["os_version"] = os_version
-
     # If the job has a 'use_shaman' key, use that value to override the global
     # config's value.
     if config.get('use_shaman') is not None:
index 217f8aaa76ff215166fde269e2ef694be481382d..c1e0830d417bed874be41999d0f446ff94c9b34d 100644 (file)
@@ -25,10 +25,29 @@ def lock_machines(ctx, config):
     machines and placed those keys in the Targets section of a yaml file.
     """
     ctx.config['targets'] = dict()
-    # change the status during the locking process
+    # Change the status during the locking process
     report.try_push_job_info(ctx.config, dict(status='waiting'))
+    total_required = sum(map(lambda c: len(c['roles']), config))
+    assert len(ctx.config['nodes']) == total_required
+    # Now lock all the machines we need
     for request in config:
-        result = do_lock_machines(ctx, request)
+        request['targets'] = do_lock_machines(ctx, request)
+        assert len(request['targets']) == len(request['roles'])
+
+    # Then, map the resulting locked targets to each appropriate 'node conf' in
+    # ctx.config['nodes']
+    for node_conf in ctx.config['nodes']:
+
+        def request_matches(request):
+            def get_spec(obj):
+                keys = ('os_type', 'os_version', 'arch', 'machine_type')
+                return [obj.get(key) for key in keys]
+            return get_spec(node_conf) == get_spec(request)
+        request = filter(request_matches, config)[0]
+        node_conf['targets'] = dict()
+        key = request['targets'].keys()[0]
+        node_conf['targets'][key] = request['targets'].pop(key)
+
     # successfully locked machines, change status back to running
     report.try_push_job_info(ctx.config, dict(status='running'))
     try:
@@ -56,7 +75,7 @@ def do_lock_machines(ctx, request):
     os_version = request['os_version']
     arch = request['arch'] or ctx.config.get('arch')
     machine_type = request['machine_type']
-    total_requested = request['count']
+    total_requested = len(request['roles'])
     log.info('Locking machines...')
 
     all_locked = dict()
@@ -94,11 +113,11 @@ def do_lock_machines(ctx, request):
             else:
                 ctx.config['targets'].update(all_locked)
             locked_targets = yaml.safe_dump(
-                ctx.config['targets'],
+                all_locked,
                 default_flow_style=False
             ).splitlines()
             log.info('\n  '.join(['Locked targets:', ] + locked_targets))
-            break
+            return all_locked
         elif not ctx.block:
             assert 0, 'not enough machines are available'
         else:
index 7fda102809624d232730907651ace27e5fe89a81..84799f7aee48c2ddae23bca6cea15646a6c2af4e 100644 (file)
@@ -32,15 +32,15 @@ class TestRun(object):
 
     @patch("teuthology.run.merge_configs")
     def test_setup_config_targets_ok(self, m_merge_configs):
-        config = {"targets": range(4), "roles": range(2)}
+        config = {"targets": range(4), "nodes": range(2)}
         m_merge_configs.return_value = config
         result = run.setup_config(["some/config.yaml"])
         assert result["targets"] == [0, 1, 2, 3]
-        assert result["roles"] == [0, 1]
+        assert result["nodes"] == [0, 1]
 
     @patch("teuthology.run.merge_configs")
     def test_setup_config_targets_invalid(self, m_merge_configs):
-        config = {"targets": range(2), "roles": range(4)}
+        config = {"targets": range(2), "nodes": range(4)}
         m_merge_configs.return_value = config
         with pytest.raises(AssertionError):
             run.setup_config(["some/config.yaml"])
@@ -100,14 +100,102 @@ class TestRun(object):
         assert excinfo.value.message.startswith("You cannot")
 
     def test_get_inital_tasks(self):
-        config = {"roles": range(2), "kernel": "the_kernel", "use_existing_cluster": False}
+        config = dict(
+            roles=['r1', 'r2'],
+            kernel='the_kernel',
+            use_existing_cluster=False,
+        )
         result = run.get_initial_tasks(True, config, "machine_type")
-        assert {"internal.lock_machines": (2, "machine_type")} in result
+        assert {
+            'internal.lock_machines': [dict(
+                os_version=None, os_type=None, arch=None,
+                machine_type='machine_type', roles=['r1', 'r2'],
+            )]
+        } in result
         assert {"kernel": "the_kernel"} in result
         # added because use_existing_cluster == False
         assert {'internal.vm_setup': None} in result
         assert {'internal.buildpackages_prep': None} in result
 
+    @pytest.mark.parametrize(
+        'input_conf, machine_type, expected',
+        [
+            [
+                dict(nodes=[
+                    dict(
+                        roles=['u16'],
+                        os_type='ubuntu',
+                        os_version='16.04',
+                        arch='aarch64',
+                    ),
+                ]),
+                'mtype',
+                [
+                    dict(
+                        os_type='ubuntu',
+                        os_version='16.04',
+                        arch='aarch64',
+                        machine_type='mtype',
+                        roles=[['u16']],
+                    ),
+                ],
+            ],
+            [
+                dict(nodes=[
+                    dict(
+                        roles=['u16_1'],
+                        os_type='ubuntu',
+                        os_version='16.04',
+                        arch='x86_64',
+                    ),
+                    dict(
+                        roles=['u16_2'],
+                        os_type='ubuntu',
+                        os_version='16.04',
+                        arch='x86_64',
+                    ),
+                    dict(
+                        roles=['c7_1'],
+                        os_type='centos',
+                        os_version='7.3',
+                    ),
+                    dict(
+                        roles=[],
+                    ),
+                ]),
+                'mtype',
+                [
+                    dict(
+                        os_type='ubuntu',
+                        os_version='16.04',
+                        arch='x86_64',
+                        machine_type='mtype',
+                        roles=[['u16_1'], ['u16_2']],
+                    ),
+                    dict(
+                        os_type='centos',
+                        os_version='7.3',
+                        arch=None,
+                        machine_type='mtype',
+                        roles=[['c7_1']],
+                    ),
+                    dict(
+                        os_type=None,
+                        os_version=None,
+                        arch=None,
+                        machine_type='mtype',
+                        roles=[[]],
+                    ),
+                ],
+            ],
+        ]
+    )
+    def test_lock_request(self, input_conf, machine_type, expected):
+        print expected
+        result = run.get_initial_tasks(True, input_conf, machine_type)
+        print result
+        assert {'internal.lock_machines': expected} in result
+
     @patch("teuthology.run.fetch_qa_suite")
     def test_fetch_tasks_if_needed(self, m_fetch_qa_suite):
         config = {"suite_path": "/some/suite/path", "suite_branch": "feature_branch"}