From dbe2ad24212b9ff9a183d931402a49b544307e00 Mon Sep 17 00:00:00 2001
From: John Spray <jspray@redhat.com>
Date: Wed, 29 Apr 2015 19:53:59 +0100
Subject: [PATCH] tasks/ceph_deploy: fix for multiple mons

Now that service IDs are modified during run, we have
to avoid repeatedly evaluating first_mon for where
to run ceph_deploy, as the answer will change.

Fixes: #11495
Signed-off-by: John Spray <john.spray@redhat.com>
---
 tasks/ceph_deploy.py | 67 ++++++++++++++++++++------------------------
 1 file changed, 31 insertions(+), 36 deletions(-)

diff --git a/tasks/ceph_deploy.py b/tasks/ceph_deploy.py
index cb299f522085d..7919a158a2906 100644
--- a/tasks/ceph_deploy.py
+++ b/tasks/ceph_deploy.py
@@ -160,29 +160,27 @@ def get_all_nodes(ctx, config):
     nodelist = " ".join(nodelist)
     return nodelist
 
-def execute_ceph_deploy(ctx, config, cmd):
-    """Remotely execute a ceph_deploy command"""
-    testdir = teuthology.get_testdir(ctx)
-    ceph_admin = teuthology.get_first_mon(ctx, config)
-    exec_cmd = cmd
-    (remote,) = ctx.cluster.only(ceph_admin).remotes.iterkeys()
-    proc = remote.run(
-        args = [
-            'cd',
-            '{tdir}/ceph-deploy'.format(tdir=testdir),
-            run.Raw('&&'),
-            run.Raw(exec_cmd),
-            ],
-            check_status=False,
-        )
-    exitstatus = proc.exitstatus
-    return exitstatus
-
-
 @contextlib.contextmanager
 def build_ceph_cluster(ctx, config):
     """Build a ceph cluster"""
 
+    # Expect to find ceph_admin on the first mon by ID, same place that the download task
+    # puts it.  Remember this here, because subsequently IDs will change from those in
+    # the test config to those that ceph-deploy invents.
+    (ceph_admin,) = ctx.cluster.only(teuthology.get_first_mon(ctx, config)).remotes.iterkeys()
+
+    def execute_ceph_deploy(cmd):
+        """Remotely execute a ceph_deploy command"""
+        return ceph_admin.run(
+            args=[
+                'cd',
+                '{tdir}/ceph-deploy'.format(tdir=testdir),
+                run.Raw('&&'),
+                run.Raw(cmd),
+            ],
+            check_status=False,
+        ).exitstatus
+
     try:
         log.info('Building ceph cluster using ceph-deploy...')
         testdir = teuthology.get_testdir(ctx)
@@ -212,30 +210,28 @@ def build_ceph_cluster(ctx, config):
         if mon_nodes is None:
             raise RuntimeError("no monitor nodes in the config file")
 
-        estatus_new = execute_ceph_deploy(ctx, config, new_mon)
+        estatus_new = execute_ceph_deploy(new_mon)
         if estatus_new != 0:
             raise RuntimeError("ceph-deploy: new command failed")
 
         log.info('adding config inputs...')
         testdir = teuthology.get_testdir(ctx)
         conf_path = '{tdir}/ceph-deploy/ceph.conf'.format(tdir=testdir)
-        first_mon = teuthology.get_first_mon(ctx, config)
-        (remote,) = ctx.cluster.only(first_mon).remotes.keys()
 
         lines = None
         if config.get('conf') is not None:
             confp = config.get('conf')
             for section, keys in confp.iteritems():
                 lines = '[{section}]\n'.format(section=section)
-                teuthology.append_lines_to_file(remote, conf_path, lines,
+                teuthology.append_lines_to_file(ceph_admin, conf_path, lines,
                                                 sudo=True)
                 for key, value in keys.iteritems():
                     log.info("[%s] %s = %s" % (section, key, value))
                     lines = '{key} = {value}\n'.format(key=key, value=value)
-                    teuthology.append_lines_to_file(remote, conf_path, lines,
+                    teuthology.append_lines_to_file(ceph_admin, conf_path, lines,
                                                     sudo=True)
 
-        estatus_install = execute_ceph_deploy(ctx, config, install_nodes)
+        estatus_install = execute_ceph_deploy(install_nodes)
         if estatus_install != 0:
             raise RuntimeError("ceph-deploy: Failed to install ceph")
 
@@ -243,9 +239,9 @@ def build_ceph_cluster(ctx, config):
         # If the following fails, it is OK, it might just be that the monitors
         # are taking way more than a minute/monitor to form quorum, so lets
         # try the next block which will wait up to 15 minutes to gatherkeys.
-        execute_ceph_deploy(ctx, config, mon_create_nodes)
+        execute_ceph_deploy(mon_create_nodes)
 
-        estatus_gather = execute_ceph_deploy(ctx, config, gather_keys)
+        estatus_gather = execute_ceph_deploy(gather_keys)
         max_gather_tries = 90
         gather_tries = 0
         while (estatus_gather != 0):
@@ -253,19 +249,18 @@ def build_ceph_cluster(ctx, config):
             if gather_tries >= max_gather_tries:
                 msg = 'ceph-deploy was not able to gatherkeys after 15 minutes'
                 raise RuntimeError(msg)
-            estatus_gather = execute_ceph_deploy(ctx, config, gather_keys)
+            estatus_gather = execute_ceph_deploy(gather_keys)
             time.sleep(10)
 
         if mds_nodes:
-            estatus_mds = execute_ceph_deploy(ctx, config, deploy_mds)
+            estatus_mds = execute_ceph_deploy(deploy_mds)
             if estatus_mds != 0:
                 raise RuntimeError("ceph-deploy: Failed to deploy mds")
 
         if config.get('test_mon_destroy') is not None:
             for d in range(1, len(mon_node)):
                 mon_destroy_nodes = './ceph-deploy mon destroy'+" "+mon_node[d]
-                estatus_mon_d = execute_ceph_deploy(ctx, config,
-                                                    mon_destroy_nodes)
+                estatus_mon_d = execute_ceph_deploy(mon_destroy_nodes)
                 if estatus_mon_d != 0:
                     raise RuntimeError("ceph-deploy: Failed to delete monitor")
 
@@ -276,7 +271,7 @@ def build_ceph_cluster(ctx, config):
                 osd_create_cmd_d = osd_create_cmd+'--dmcrypt'+" "+d
             else:
                 osd_create_cmd_d = osd_create_cmd+d
-            estatus_osd = execute_ceph_deploy(ctx, config, osd_create_cmd_d)
+            estatus_osd = execute_ceph_deploy(osd_create_cmd_d)
             if estatus_osd == 0:
                 log.info('successfully created osd')
                 no_of_osds += 1
@@ -286,8 +281,8 @@ def build_ceph_cluster(ctx, config):
                 dev_disk = disks[0]+":"+disks[1]
                 j_disk = disks[0]+":"+disks[2]
                 zap_disk = './ceph-deploy disk zap '+dev_disk+" "+j_disk
-                execute_ceph_deploy(ctx, config, zap_disk)
-                estatus_osd = execute_ceph_deploy(ctx, config, osd_create_cmd_d)
+                execute_ceph_deploy(zap_disk)
+                estatus_osd = execute_ceph_deploy(osd_create_cmd_d)
                 if estatus_osd == 0:
                     log.info('successfully created osd')
                     no_of_osds += 1
@@ -443,9 +438,9 @@ def build_ceph_cluster(ctx, config):
         purgedata_nodes = './ceph-deploy purgedata'+" "+all_nodes
 
         log.info('Purging package...')
-        execute_ceph_deploy(ctx, config, purge_nodes)
+        execute_ceph_deploy(purge_nodes)
         log.info('Purging data...')
-        execute_ceph_deploy(ctx, config, purgedata_nodes)
+        execute_ceph_deploy(purgedata_nodes)
 
 
 @contextlib.contextmanager
-- 
2.39.5