From: Sage Weil Date: Mon, 22 Mar 2021 13:58:39 +0000 (-0500) Subject: qa/tasks/cephadm: use 'orch apply mon' to deploy mons X-Git-Tag: v16.2.0~24^2~4 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=68028ad443f27a16218e31722d3d2aef7087e54a;p=ceph.git qa/tasks/cephadm: use 'orch apply mon' to deploy mons The 'orch daemon add ...' command is not idempotent and can cause duplicate (and failing) attempts to add the same mon. Maintain the ability to add mons the old way for the benefit of testing upgrades from early octopus versions of cephadm. Signed-off-by: Sage Weil (cherry picked from commit a17593a177798e5b5371c224ede40dc411626d76) --- diff --git a/qa/suites/rados/cephadm/upgrade/1-start-distro/1-start-centos_8.3-octopus.yaml b/qa/suites/rados/cephadm/upgrade/1-start-distro/1-start-centos_8.3-octopus.yaml index c6760f5bf8278..f5441eb93e0a6 100644 --- a/qa/suites/rados/cephadm/upgrade/1-start-distro/1-start-centos_8.3-octopus.yaml +++ b/qa/suites/rados/cephadm/upgrade/1-start-distro/1-start-centos_8.3-octopus.yaml @@ -12,6 +12,8 @@ tasks: cephadm_git_url: https://github.com/ceph/ceph # avoid --cap-add=PTRACE + --privileged for older cephadm versions allow_ptrace: false + # deploy additional mons the "old" (octopus) way + add_mons_via_daemon_add: true roles: diff --git a/qa/suites/rados/cephadm/upgrade/1-start-distro/1-start-ubuntu_20.04-15.2.9.yaml b/qa/suites/rados/cephadm/upgrade/1-start-distro/1-start-ubuntu_20.04-15.2.9.yaml index 3aa60374f87e3..e60b8872d9fbb 100644 --- a/qa/suites/rados/cephadm/upgrade/1-start-distro/1-start-ubuntu_20.04-15.2.9.yaml +++ b/qa/suites/rados/cephadm/upgrade/1-start-distro/1-start-ubuntu_20.04-15.2.9.yaml @@ -8,6 +8,8 @@ tasks: cephadm_git_url: https://github.com/ceph/ceph # avoid --cap-add=PTRACE + --privileged for older cephadm versions allow_ptrace: false + # deploy additional mons the "old" (octopus) way + add_mons_via_daemon_add: true roles: - - mon.a diff --git a/qa/suites/rados/cephadm/upgrade/1-start-distro/1-start-ubuntu_20.04.yaml b/qa/suites/rados/cephadm/upgrade/1-start-distro/1-start-ubuntu_20.04.yaml index b26227dc88b1d..2af315930f29e 100644 --- a/qa/suites/rados/cephadm/upgrade/1-start-distro/1-start-ubuntu_20.04.yaml +++ b/qa/suites/rados/cephadm/upgrade/1-start-distro/1-start-ubuntu_20.04.yaml @@ -8,6 +8,8 @@ tasks: cephadm_git_url: https://github.com/ceph/ceph # avoid --cap-add=PTRACE + --privileged for older cephadm versions allow_ptrace: false + # deploy additional mons the "old" (octopus) way + add_mons_via_daemon_add: true roles: - - mon.a diff --git a/qa/suites/upgrade/octopus-x/parallel/1-tasks.yaml b/qa/suites/upgrade/octopus-x/parallel/1-tasks.yaml index 22cf4e14a2843..a765d7a27d6ac 100644 --- a/qa/suites/upgrade/octopus-x/parallel/1-tasks.yaml +++ b/qa/suites/upgrade/octopus-x/parallel/1-tasks.yaml @@ -12,6 +12,8 @@ tasks: #set config option for which cls modules are allowed to be loaded / used osd_class_load_list: "*" osd_class_default_list: "*" + # deploy additional mons the "old" (octopus) way + add_mons_via_daemon_add: true - print: "**** done end installing octopus cephadm ..." - cephadm.shell: diff --git a/qa/suites/upgrade/octopus-x/stress-split/1-start.yaml b/qa/suites/upgrade/octopus-x/stress-split/1-start.yaml index 1a1ae08e82e1b..71dfacba2cfe2 100644 --- a/qa/suites/upgrade/octopus-x/stress-split/1-start.yaml +++ b/qa/suites/upgrade/octopus-x/stress-split/1-start.yaml @@ -11,6 +11,8 @@ tasks: #set config option for which cls modules are allowed to be loaded / used osd_class_load_list: "*" osd_class_default_list: "*" + # deploy additional mons the "old" (octopus) way + add_mons_via_daemon_add: true - cephadm.shell: mon.a: diff --git a/qa/tasks/cephadm.py b/qa/tasks/cephadm.py index 19634c73ccd05..33bc9e4f922ac 100644 --- a/qa/tasks/cephadm.py +++ b/qa/tasks/cephadm.py @@ -491,21 +491,70 @@ def ceph_mons(ctx, config): """ cluster_name = config['cluster'] fsid = ctx.ceph[cluster_name].fsid - num_mons = 1 try: - for remote, roles in ctx.cluster.remotes.items(): - for mon in [r for r in roles - if teuthology.is_type('mon', cluster_name)(r)]: - c_, _, id_ = teuthology.split_role(mon) - if c_ == cluster_name and id_ == ctx.ceph[cluster_name].first_mon: - continue - log.info('Adding %s on %s' % (mon, remote.shortname)) - num_mons += 1 - _shell(ctx, cluster_name, remote, [ - 'ceph', 'orch', 'daemon', 'add', 'mon', - remote.shortname + ':' + ctx.ceph[cluster_name].mons[mon] + '=' + id_, - ]) + daemons = {} + if config.get('add_mons_via_daemon_add'): + # This is the old way of adding mons that works with the (early) octopus + # cephadm scheduler. + num_mons = 1 + for remote, roles in ctx.cluster.remotes.items(): + for mon in [r for r in roles + if teuthology.is_type('mon', cluster_name)(r)]: + c_, _, id_ = teuthology.split_role(mon) + if c_ == cluster_name and id_ == ctx.ceph[cluster_name].first_mon: + continue + log.info('Adding %s on %s' % (mon, remote.shortname)) + num_mons += 1 + _shell(ctx, cluster_name, remote, [ + 'ceph', 'orch', 'daemon', 'add', 'mon', + remote.shortname + ':' + ctx.ceph[cluster_name].mons[mon] + '=' + id_, + ]) + ctx.daemons.register_daemon( + remote, 'mon', id_, + cluster=cluster_name, + fsid=fsid, + logger=log.getChild(mon), + wait=False, + started=True, + ) + daemons[mon] = (remote, id_) + + with contextutil.safe_while(sleep=1, tries=180) as proceed: + while proceed(): + log.info('Waiting for %d mons in monmap...' % (num_mons)) + r = _shell( + ctx=ctx, + cluster_name=cluster_name, + remote=remote, + args=[ + 'ceph', 'mon', 'dump', '-f', 'json', + ], + stdout=StringIO(), + ) + j = json.loads(r.stdout.getvalue()) + if len(j['mons']) == num_mons: + break + else: + nodes = [] + for remote, roles in ctx.cluster.remotes.items(): + for mon in [r for r in roles + if teuthology.is_type('mon', cluster_name)(r)]: + c_, _, id_ = teuthology.split_role(mon) + log.info('Adding %s on %s' % (mon, remote.shortname)) + nodes.append(remote.shortname + + ':' + ctx.ceph[cluster_name].mons[mon] + + '=' + id_) + if c_ == cluster_name and id_ == ctx.ceph[cluster_name].first_mon: + continue + daemons[mon] = (remote, id_) + + _shell(ctx, cluster_name, remote, [ + 'ceph', 'orch', 'apply', 'mon', + str(len(nodes)) + ';' + ';'.join(nodes)] + ) + for mgr, i in daemons.items(): + remote, id_ = i ctx.daemons.register_daemon( remote, 'mon', id_, cluster=cluster_name, @@ -515,21 +564,21 @@ def ceph_mons(ctx, config): started=True, ) - with contextutil.safe_while(sleep=1, tries=180) as proceed: - while proceed(): - log.info('Waiting for %d mons in monmap...' % (num_mons)) - r = _shell( - ctx=ctx, - cluster_name=cluster_name, - remote=remote, - args=[ - 'ceph', 'mon', 'dump', '-f', 'json', - ], - stdout=StringIO(), - ) - j = json.loads(r.stdout.getvalue()) - if len(j['mons']) == num_mons: - break + with contextutil.safe_while(sleep=1, tries=180) as proceed: + while proceed(): + log.info('Waiting for %d mons in monmap...' % (len(nodes))) + r = _shell( + ctx=ctx, + cluster_name=cluster_name, + remote=remote, + args=[ + 'ceph', 'mon', 'dump', '-f', 'json', + ], + stdout=StringIO(), + ) + j = json.loads(r.stdout.getvalue()) + if len(j['mons']) == len(nodes): + break # refresh our (final) ceph.conf file bootstrap_remote = ctx.ceph[cluster_name].bootstrap_remote