From: Sage Weil Date: Mon, 22 Mar 2021 13:58:39 +0000 (-0500) Subject: qa/tasks/cephadm: use 'orch apply mon' to deploy mons X-Git-Tag: v17.1.0~2498^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=refs%2Fpull%2F40314%2Fhead;p=ceph.git qa/tasks/cephadm: use 'orch apply mon' to deploy mons The 'orch daemon add ...' command is not idempotent and can cause duplicate (and failing) attempts to add the same mon. Maintain the ability to add mons the old way for the benefit of testing upgrades from early octopus versions of cephadm. Signed-off-by: Sage Weil --- diff --git a/qa/suites/rados/cephadm/upgrade/1-start-distro/1-start-centos_8.3-octopus.yaml b/qa/suites/rados/cephadm/upgrade/1-start-distro/1-start-centos_8.3-octopus.yaml index c6760f5bf827..f5441eb93e0a 100644 --- a/qa/suites/rados/cephadm/upgrade/1-start-distro/1-start-centos_8.3-octopus.yaml +++ b/qa/suites/rados/cephadm/upgrade/1-start-distro/1-start-centos_8.3-octopus.yaml @@ -12,6 +12,8 @@ tasks: cephadm_git_url: https://github.com/ceph/ceph # avoid --cap-add=PTRACE + --privileged for older cephadm versions allow_ptrace: false + # deploy additional mons the "old" (octopus) way + add_mons_via_daemon_add: true roles: diff --git a/qa/suites/rados/cephadm/upgrade/1-start-distro/1-start-ubuntu_20.04-15.2.9.yaml b/qa/suites/rados/cephadm/upgrade/1-start-distro/1-start-ubuntu_20.04-15.2.9.yaml index 3aa60374f87e..e60b8872d9fb 100644 --- a/qa/suites/rados/cephadm/upgrade/1-start-distro/1-start-ubuntu_20.04-15.2.9.yaml +++ b/qa/suites/rados/cephadm/upgrade/1-start-distro/1-start-ubuntu_20.04-15.2.9.yaml @@ -8,6 +8,8 @@ tasks: cephadm_git_url: https://github.com/ceph/ceph # avoid --cap-add=PTRACE + --privileged for older cephadm versions allow_ptrace: false + # deploy additional mons the "old" (octopus) way + add_mons_via_daemon_add: true roles: - - mon.a diff --git a/qa/suites/rados/cephadm/upgrade/1-start-distro/1-start-ubuntu_20.04.yaml b/qa/suites/rados/cephadm/upgrade/1-start-distro/1-start-ubuntu_20.04.yaml index b26227dc88b1..2af315930f29 100644 --- a/qa/suites/rados/cephadm/upgrade/1-start-distro/1-start-ubuntu_20.04.yaml +++ b/qa/suites/rados/cephadm/upgrade/1-start-distro/1-start-ubuntu_20.04.yaml @@ -8,6 +8,8 @@ tasks: cephadm_git_url: https://github.com/ceph/ceph # avoid --cap-add=PTRACE + --privileged for older cephadm versions allow_ptrace: false + # deploy additional mons the "old" (octopus) way + add_mons_via_daemon_add: true roles: - - mon.a diff --git a/qa/suites/upgrade/octopus-x/parallel/1-tasks.yaml b/qa/suites/upgrade/octopus-x/parallel/1-tasks.yaml index 72ca04adf7c4..a392953d286e 100644 --- a/qa/suites/upgrade/octopus-x/parallel/1-tasks.yaml +++ b/qa/suites/upgrade/octopus-x/parallel/1-tasks.yaml @@ -12,6 +12,8 @@ tasks: #set config option for which cls modules are allowed to be loaded / used osd_class_load_list: "*" osd_class_default_list: "*" + # deploy additional mons the "old" (octopus) way + add_mons_via_daemon_add: true - print: "**** done end installing octopus cephadm ..." - cephadm.shell: diff --git a/qa/suites/upgrade/octopus-x/stress-split/1-start.yaml b/qa/suites/upgrade/octopus-x/stress-split/1-start.yaml index 8458365e871c..2cfeb54ec18a 100644 --- a/qa/suites/upgrade/octopus-x/stress-split/1-start.yaml +++ b/qa/suites/upgrade/octopus-x/stress-split/1-start.yaml @@ -11,6 +11,8 @@ tasks: #set config option for which cls modules are allowed to be loaded / used osd_class_load_list: "*" osd_class_default_list: "*" + # deploy additional mons the "old" (octopus) way + add_mons_via_daemon_add: true - cephadm.shell: mon.a: diff --git a/qa/tasks/cephadm.py b/qa/tasks/cephadm.py index 19634c73ccd0..33bc9e4f922a 100644 --- a/qa/tasks/cephadm.py +++ b/qa/tasks/cephadm.py @@ -491,21 +491,70 @@ def ceph_mons(ctx, config): """ cluster_name = config['cluster'] fsid = ctx.ceph[cluster_name].fsid - num_mons = 1 try: - for remote, roles in ctx.cluster.remotes.items(): - for mon in [r for r in roles - if teuthology.is_type('mon', cluster_name)(r)]: - c_, _, id_ = teuthology.split_role(mon) - if c_ == cluster_name and id_ == ctx.ceph[cluster_name].first_mon: - continue - log.info('Adding %s on %s' % (mon, remote.shortname)) - num_mons += 1 - _shell(ctx, cluster_name, remote, [ - 'ceph', 'orch', 'daemon', 'add', 'mon', - remote.shortname + ':' + ctx.ceph[cluster_name].mons[mon] + '=' + id_, - ]) + daemons = {} + if config.get('add_mons_via_daemon_add'): + # This is the old way of adding mons that works with the (early) octopus + # cephadm scheduler. + num_mons = 1 + for remote, roles in ctx.cluster.remotes.items(): + for mon in [r for r in roles + if teuthology.is_type('mon', cluster_name)(r)]: + c_, _, id_ = teuthology.split_role(mon) + if c_ == cluster_name and id_ == ctx.ceph[cluster_name].first_mon: + continue + log.info('Adding %s on %s' % (mon, remote.shortname)) + num_mons += 1 + _shell(ctx, cluster_name, remote, [ + 'ceph', 'orch', 'daemon', 'add', 'mon', + remote.shortname + ':' + ctx.ceph[cluster_name].mons[mon] + '=' + id_, + ]) + ctx.daemons.register_daemon( + remote, 'mon', id_, + cluster=cluster_name, + fsid=fsid, + logger=log.getChild(mon), + wait=False, + started=True, + ) + daemons[mon] = (remote, id_) + + with contextutil.safe_while(sleep=1, tries=180) as proceed: + while proceed(): + log.info('Waiting for %d mons in monmap...' % (num_mons)) + r = _shell( + ctx=ctx, + cluster_name=cluster_name, + remote=remote, + args=[ + 'ceph', 'mon', 'dump', '-f', 'json', + ], + stdout=StringIO(), + ) + j = json.loads(r.stdout.getvalue()) + if len(j['mons']) == num_mons: + break + else: + nodes = [] + for remote, roles in ctx.cluster.remotes.items(): + for mon in [r for r in roles + if teuthology.is_type('mon', cluster_name)(r)]: + c_, _, id_ = teuthology.split_role(mon) + log.info('Adding %s on %s' % (mon, remote.shortname)) + nodes.append(remote.shortname + + ':' + ctx.ceph[cluster_name].mons[mon] + + '=' + id_) + if c_ == cluster_name and id_ == ctx.ceph[cluster_name].first_mon: + continue + daemons[mon] = (remote, id_) + + _shell(ctx, cluster_name, remote, [ + 'ceph', 'orch', 'apply', 'mon', + str(len(nodes)) + ';' + ';'.join(nodes)] + ) + for mgr, i in daemons.items(): + remote, id_ = i ctx.daemons.register_daemon( remote, 'mon', id_, cluster=cluster_name, @@ -515,21 +564,21 @@ def ceph_mons(ctx, config): started=True, ) - with contextutil.safe_while(sleep=1, tries=180) as proceed: - while proceed(): - log.info('Waiting for %d mons in monmap...' % (num_mons)) - r = _shell( - ctx=ctx, - cluster_name=cluster_name, - remote=remote, - args=[ - 'ceph', 'mon', 'dump', '-f', 'json', - ], - stdout=StringIO(), - ) - j = json.loads(r.stdout.getvalue()) - if len(j['mons']) == num_mons: - break + with contextutil.safe_while(sleep=1, tries=180) as proceed: + while proceed(): + log.info('Waiting for %d mons in monmap...' % (len(nodes))) + r = _shell( + ctx=ctx, + cluster_name=cluster_name, + remote=remote, + args=[ + 'ceph', 'mon', 'dump', '-f', 'json', + ], + stdout=StringIO(), + ) + j = json.loads(r.stdout.getvalue()) + if len(j['mons']) == len(nodes): + break # refresh our (final) ceph.conf file bootstrap_remote = ctx.ceph[cluster_name].bootstrap_remote