]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
qa/tasks/cephadm: use 'orch apply mon' to deploy mons 40314/head
authorSage Weil <sage@newdream.net>
Mon, 22 Mar 2021 13:58:39 +0000 (08:58 -0500)
committerSage Weil <sage@newdream.net>
Mon, 22 Mar 2021 21:28:40 +0000 (16:28 -0500)
The 'orch daemon add ...' command is not idempotent and can cause
duplicate (and failing) attempts to add the same mon.

Maintain the ability to add mons the old way for the benefit of testing
upgrades from early octopus versions of cephadm.

Signed-off-by: Sage Weil <sage@newdream.net>
qa/suites/rados/cephadm/upgrade/1-start-distro/1-start-centos_8.3-octopus.yaml
qa/suites/rados/cephadm/upgrade/1-start-distro/1-start-ubuntu_20.04-15.2.9.yaml
qa/suites/rados/cephadm/upgrade/1-start-distro/1-start-ubuntu_20.04.yaml
qa/suites/upgrade/octopus-x/parallel/1-tasks.yaml
qa/suites/upgrade/octopus-x/stress-split/1-start.yaml
qa/tasks/cephadm.py

index c6760f5bf82780a72c4cf2d1796f38a9de2e11e5..f5441eb93e0a63e57a7c93ab17323d1e64e2338c 100644 (file)
@@ -12,6 +12,8 @@ tasks:
     cephadm_git_url: https://github.com/ceph/ceph
     # avoid --cap-add=PTRACE + --privileged for older cephadm versions
     allow_ptrace: false
+    # deploy additional mons the "old" (octopus) way
+    add_mons_via_daemon_add: true
 
 
 roles:
index 3aa60374f87e376f34560aef4662c273551ac798..e60b8872d9fbbedba0e97a6beb12a0bf08d37cf1 100644 (file)
@@ -8,6 +8,8 @@ tasks:
     cephadm_git_url: https://github.com/ceph/ceph
     # avoid --cap-add=PTRACE + --privileged for older cephadm versions
     allow_ptrace: false
+    # deploy additional mons the "old" (octopus) way
+    add_mons_via_daemon_add: true
 
 roles:
 - - mon.a
index b26227dc88b1d274c276e0e9a943d0210302c2c9..2af315930f29e65160018d997777b6a146a528be 100644 (file)
@@ -8,6 +8,8 @@ tasks:
     cephadm_git_url: https://github.com/ceph/ceph
     # avoid --cap-add=PTRACE + --privileged for older cephadm versions
     allow_ptrace: false
+    # deploy additional mons the "old" (octopus) way
+    add_mons_via_daemon_add: true
 
 roles:
 - - mon.a
index 72ca04adf7c4f3b5af2833f32e55c3c9905d8c51..a392953d286e4dfbd372ce9873b60e10944a5d89 100644 (file)
@@ -12,6 +12,8 @@ tasks:
         #set config option for which cls modules are allowed to be loaded / used
         osd_class_load_list: "*"
         osd_class_default_list: "*"
+    # deploy additional mons the "old" (octopus) way
+    add_mons_via_daemon_add: true
 - print: "**** done end installing octopus cephadm ..."
 
 - cephadm.shell:
index 8458365e871cc6366e40a3d2f64d9c26b0e082f1..2cfeb54ec18a42deb52e49702f8a9701171633eb 100644 (file)
@@ -11,6 +11,8 @@ tasks:
         #set config option for which cls modules are allowed to be loaded / used
         osd_class_load_list: "*"
         osd_class_default_list: "*"
+    # deploy additional mons the "old" (octopus) way
+    add_mons_via_daemon_add: true
 
 - cephadm.shell:
     mon.a:
index 19634c73ccd052b6cc0b6415749b242c717ab2d7..33bc9e4f922ac9d8d4909b61a24405c8a89e4c1f 100644 (file)
@@ -491,21 +491,70 @@ def ceph_mons(ctx, config):
     """
     cluster_name = config['cluster']
     fsid = ctx.ceph[cluster_name].fsid
-    num_mons = 1
 
     try:
-        for remote, roles in ctx.cluster.remotes.items():
-            for mon in [r for r in roles
-                        if teuthology.is_type('mon', cluster_name)(r)]:
-                c_, _, id_ = teuthology.split_role(mon)
-                if c_ == cluster_name and id_ == ctx.ceph[cluster_name].first_mon:
-                    continue
-                log.info('Adding %s on %s' % (mon, remote.shortname))
-                num_mons += 1
-                _shell(ctx, cluster_name, remote, [
-                    'ceph', 'orch', 'daemon', 'add', 'mon',
-                    remote.shortname + ':' + ctx.ceph[cluster_name].mons[mon] + '=' + id_,
-                ])
+        daemons = {}
+        if config.get('add_mons_via_daemon_add'):
+            # This is the old way of adding mons that works with the (early) octopus
+            # cephadm scheduler.
+            num_mons = 1
+            for remote, roles in ctx.cluster.remotes.items():
+                for mon in [r for r in roles
+                            if teuthology.is_type('mon', cluster_name)(r)]:
+                    c_, _, id_ = teuthology.split_role(mon)
+                    if c_ == cluster_name and id_ == ctx.ceph[cluster_name].first_mon:
+                        continue
+                    log.info('Adding %s on %s' % (mon, remote.shortname))
+                    num_mons += 1
+                    _shell(ctx, cluster_name, remote, [
+                        'ceph', 'orch', 'daemon', 'add', 'mon',
+                        remote.shortname + ':' + ctx.ceph[cluster_name].mons[mon] + '=' + id_,
+                    ])
+                    ctx.daemons.register_daemon(
+                        remote, 'mon', id_,
+                        cluster=cluster_name,
+                        fsid=fsid,
+                        logger=log.getChild(mon),
+                        wait=False,
+                        started=True,
+                    )
+                    daemons[mon] = (remote, id_)
+
+                    with contextutil.safe_while(sleep=1, tries=180) as proceed:
+                        while proceed():
+                            log.info('Waiting for %d mons in monmap...' % (num_mons))
+                            r = _shell(
+                                ctx=ctx,
+                                cluster_name=cluster_name,
+                                remote=remote,
+                                args=[
+                                    'ceph', 'mon', 'dump', '-f', 'json',
+                                ],
+                                stdout=StringIO(),
+                            )
+                            j = json.loads(r.stdout.getvalue())
+                            if len(j['mons']) == num_mons:
+                                break
+        else:
+            nodes = []
+            for remote, roles in ctx.cluster.remotes.items():
+                for mon in [r for r in roles
+                            if teuthology.is_type('mon', cluster_name)(r)]:
+                    c_, _, id_ = teuthology.split_role(mon)
+                    log.info('Adding %s on %s' % (mon, remote.shortname))
+                    nodes.append(remote.shortname
+                                 + ':' + ctx.ceph[cluster_name].mons[mon]
+                                 + '=' + id_)
+                    if c_ == cluster_name and id_ == ctx.ceph[cluster_name].first_mon:
+                        continue
+                    daemons[mon] = (remote, id_)
+
+            _shell(ctx, cluster_name, remote, [
+                'ceph', 'orch', 'apply', 'mon',
+                str(len(nodes)) + ';' + ';'.join(nodes)]
+                   )
+            for mgr, i in daemons.items():
+                remote, id_ = i
                 ctx.daemons.register_daemon(
                     remote, 'mon', id_,
                     cluster=cluster_name,
@@ -515,21 +564,21 @@ def ceph_mons(ctx, config):
                     started=True,
                 )
 
-                with contextutil.safe_while(sleep=1, tries=180) as proceed:
-                    while proceed():
-                        log.info('Waiting for %d mons in monmap...' % (num_mons))
-                        r = _shell(
-                            ctx=ctx,
-                            cluster_name=cluster_name,
-                            remote=remote,
-                            args=[
-                                'ceph', 'mon', 'dump', '-f', 'json',
-                            ],
-                            stdout=StringIO(),
-                        )
-                        j = json.loads(r.stdout.getvalue())
-                        if len(j['mons']) == num_mons:
-                            break
+            with contextutil.safe_while(sleep=1, tries=180) as proceed:
+                while proceed():
+                    log.info('Waiting for %d mons in monmap...' % (len(nodes)))
+                    r = _shell(
+                        ctx=ctx,
+                        cluster_name=cluster_name,
+                        remote=remote,
+                        args=[
+                            'ceph', 'mon', 'dump', '-f', 'json',
+                        ],
+                        stdout=StringIO(),
+                    )
+                    j = json.loads(r.stdout.getvalue())
+                    if len(j['mons']) == len(nodes):
+                        break
 
         # refresh our (final) ceph.conf file
         bootstrap_remote = ctx.ceph[cluster_name].bootstrap_remote