From 1ab352dd3105d320bd7ad2c0b37e5de750879dd5 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 7 Dec 2018 13:16:31 -0600 Subject: [PATCH] qa/tasks/ceph.py: move methods from teuthology.git into ceph.py directly; support mon bind * options Having these live in teuthology.git is silly, since they are only consumed by the ceph task, and it is hard to revise the behavior. Revise the behavior by adding mon_bind_* options. Signed-off-by: Sage Weil --- qa/tasks/ceph.conf.template | 91 +++++++++++++++++++++++ qa/tasks/ceph.py | 141 +++++++++++++++++++++++++++++++++++- 2 files changed, 229 insertions(+), 3 deletions(-) create mode 100644 qa/tasks/ceph.conf.template diff --git a/qa/tasks/ceph.conf.template b/qa/tasks/ceph.conf.template new file mode 100644 index 0000000000000..c14370ad4c283 --- /dev/null +++ b/qa/tasks/ceph.conf.template @@ -0,0 +1,91 @@ +[global] + chdir = "" + pid file = /var/run/ceph/$cluster-$name.pid + auth supported = cephx + + filestore xattr use omap = true + + mon clock drift allowed = 1.000 + + osd crush chooseleaf type = 0 + auth debug = true + + ms die on old message = true + + mon pg warn min per osd = 1 + mon pg warn max per osd = 10000 # <= luminous + mon max pg per osd = 10000 # >= luminous + mon pg warn max object skew = 0 + + osd pool default size = 2 + + mon osd allow primary affinity = true + mon osd allow pg remap = true + mon warn on legacy crush tunables = false + mon warn on crush straw calc version zero = false + mon warn on no sortbitwise = false + mon warn on osd down out interval zero = false + + osd pool default erasure code profile = "plugin=jerasure technique=reed_sol_van k=2 m=1 ruleset-failure-domain=osd crush-failure-domain=osd" + + osd default data pool replay window = 5 + + mon allow pool delete = true + + mon cluster log file level = debug + debug asserts on shutdown = true + +[osd] + osd journal size = 100 + + osd scrub load threshold = 5.0 + osd scrub max interval = 600 + + osd recover clone overlap = true + osd recovery max chunk = 1048576 + + osd debug shutdown = true + osd debug op order = true + osd debug verify stray on activate = true + + osd open classes on start = true + osd debug pg log writeout = true + + osd deep scrub update digest min age = 30 + + osd map max advance = 10 + + journal zero on create = true + + filestore ondisk finisher threads = 3 + filestore apply finisher threads = 3 + + bdev debug aio = true + osd debug misdirected ops = true + +[mgr] + debug ms = 1 + debug mgr = 20 + debug mon = 20 + debug auth = 20 + mon reweight min pgs per osd = 4 + mon reweight min bytes per osd = 10 + +[mon] + debug ms = 1 + debug mon = 20 + debug paxos = 20 + debug auth = 20 + mon data avail warn = 5 + mon mgr mkfs grace = 120 + mon reweight min pgs per osd = 4 + mon osd reporter subtree level = osd + mon osd prime pg temp = true + mon reweight min bytes per osd = 10 + +[client] + rgw cache enabled = true + rgw enable ops log = true + rgw enable usage log = true + log file = /var/log/ceph/$cluster-$name.$pid.log + admin socket = /var/run/ceph/$cluster-$name.$pid.asok diff --git a/qa/tasks/ceph.py b/qa/tasks/ceph.py index f925339ac02f5..b556f7eddeade 100644 --- a/qa/tasks/ceph.py +++ b/qa/tasks/ceph.py @@ -6,6 +6,7 @@ Handle the setup, starting, and clean-up of a Ceph cluster. from cStringIO import StringIO import argparse +import configobj import contextlib import errno import logging @@ -13,6 +14,7 @@ import os import json import time import gevent +import re import socket from paramiko import SSHException @@ -417,6 +419,135 @@ def cephfs_setup(ctx, config): yield +def get_mons(roles, ips, + mon_bind_msgr2=False, + mon_bind_addrvec=False): + """ + Get monitors and their associated addresses + """ + mons = {} + v1_ports = {} + v2_ports = {} + mon_id = 0 + is_mon = teuthology.is_type('mon') + for idx, roles in enumerate(roles): + for role in roles: + if not is_mon(role): + continue + if ips[idx] not in v1_ports: + v1_ports[ips[idx]] = 6789 + else: + v1_ports[ips[idx]] += 1 + if mon_bind_msgr2: + if ips[idx] not in v2_ports: + v2_ports[ips[idx]] = 3300 + addr = '{ip}'.format(ip=ips[idx]) + else: + assert mon_bind_addrvec + v2_ports[ips[idx]] += 1 + addr = 'v2:{ip}:{port2},v1:{ip}:{port1}'.format( + ip=ips[idx], + port2=v2_ports[ips[idx]], + port1=v1_ports[ips[idx]], + ) + elif mon_bind_addrvec: + addr = 'v1:{ip}:{port}'.format( + ip=ips[idx], + port=v1_ports[ips[idx]], + ) + else: + addr = '{ip}:{port}'.format( + ip=ips[idx], + port=v1_ports[ips[idx]], + ) + mon_id += 1 + mons[role] = addr + assert mons + return mons + +def skeleton_config(ctx, roles, ips, mons, cluster='ceph'): + """ + Returns a ConfigObj that is prefilled with a skeleton config. + + Use conf[section][key]=value or conf.merge to change it. + + Use conf.write to write it out, override .filename first if you want. + """ + path = os.path.join(os.path.dirname(__file__), 'ceph.conf.template') + t = open(path, 'r') + skconf = t.read().format(testdir=teuthology.get_testdir(ctx)) + conf = configobj.ConfigObj(StringIO(skconf), file_error=True) + mon_hosts = [] + for role, addr in mons.iteritems(): + mon_cluster, _, _ = teuthology.split_role(role) + if mon_cluster != cluster: + continue + name = teuthology.ceph_role(role) + conf.setdefault(name, {}) + mon_hosts.append(addr) + conf.setdefault('global', {}) + conf['global']['mon host'] = ','.join(mon_hosts) + # set up standby mds's + is_mds = teuthology.is_type('mds', cluster) + for roles_subset in roles: + for role in roles_subset: + if is_mds(role): + name = teuthology.ceph_role(role) + conf.setdefault(name, {}) + if '-s-' in name: + standby_mds = name[name.find('-s-') + 3:] + conf[name]['mds standby for name'] = standby_mds + return conf + +def create_simple_monmap(ctx, remote, conf, mons, + path=None, + mon_bind_addrvec=False): + """ + Writes a simple monmap based on current ceph.conf into path, or + /monmap by default. + + Assumes ceph_conf is up to date. + + Assumes mon sections are named "mon.*", with the dot. + + :return the FSID (as a string) of the newly created monmap + """ + + addresses = list(mons.iteritems()) + assert addresses, "There are no monitors in config!" + log.debug('Ceph mon addresses: %s', addresses) + + testdir = teuthology.get_testdir(ctx) + args = [ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'monmaptool', + '--create', + '--clobber', + ] + for (name, addr) in addresses: + n = name[4:] + if mon_bind_addrvec: + args.extend(('--addv', n, addr)) + else: + args.extend(('--add', n, addr)) + if not path: + path = '{tdir}/monmap'.format(tdir=testdir) + args.extend([ + '--print', + path + ]) + + r = remote.run( + args=args, + stdout=StringIO() + ) + monmap_output = r.stdout.getvalue() + fsid = re.search("generated fsid (.+)$", + monmap_output, re.MULTILINE).group(1) + return fsid + @contextlib.contextmanager def cluster(ctx, config): """ @@ -518,10 +649,13 @@ def cluster(ctx, config): roles = [role_list for (remote, role_list) in remotes_and_roles] ips = [host for (host, port) in (remote.ssh.get_transport().getpeername() for (remote, role_list) in remotes_and_roles)] - conf = teuthology.skeleton_config( - ctx, roles=roles, ips=ips, cluster=cluster_name, + mons = get_mons( + roles, ips, mon_bind_msgr2=config.get('mon_bind_msgr2'), mon_bind_addrvec=config.get('mon_bind_addrvec'), + ) + conf = skeleton_config( + ctx, roles=roles, ips=ips, mons=mons, cluster=cluster_name, ) for remote, roles_to_journals in remote_to_roles_to_journals.iteritems(): for role, journal in roles_to_journals.iteritems(): @@ -586,10 +720,11 @@ def cluster(ctx, config): (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys() monmap_path = '{tdir}/{cluster}.monmap'.format(tdir=testdir, cluster=cluster_name) - fsid = teuthology.create_simple_monmap( + fsid = create_simple_monmap( ctx, remote=mon0_remote, conf=conf, + mons=mons, path=monmap_path, mon_bind_addrvec=config.get('mon_bind_addrvec'), ) -- 2.39.5