From: Shilpa Jagannath Date: Fri, 1 Mar 2019 09:02:10 +0000 (+0530) Subject: Multisite configuration modifications X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=06d96e1a4efb1403742da0ab7abd1a6aa63c8e4a;p=ceph.git Multisite configuration modifications --- diff --git a/qa/tasks/multisite_test.py b/qa/tasks/multisite_test.py index 94b09228103..ccbd955d842 100644 --- a/qa/tasks/multisite_test.py +++ b/qa/tasks/multisite_test.py @@ -9,18 +9,56 @@ import pwd import time import argparse - +""" # Test yaml to test script mapper for boto3 -tests_mapper_v2 = {'Mbuckets': 'test_Mbuckets', - 'Mbuckets_sharding': 'test_Mbuckets', - 'Mbuckets_with_Nobjects_create': 'test_Mbuckets_with_Nobjects', - 'Mbuckets_with_Nobjects_delete': 'test_Mbuckets_with_Nobjects', - 'Mbuckets_with_Nobjects_download': 'test_Mbuckets_with_Nobjects', - 'Mbuckets_with_Nobjects_sharding': 'test_Mbuckets_with_Nobjects' +tests_mapper_v2 = {'test_Mbuckets_basic': 'test_Mbuckets_basic', + 'test_Mbuckets_with_Nobjects_basic': 'test_Mbuckets_with_Nobjects_basic', + 'test_Mbuckets_with_Nobjects_delete': 'test_Mbuckets_with_Nobjects', + 'test_Mbuckets_with_Nobjects_download': 'test_Mbuckets_with_Nobjects', + 'test_Mbuckets_with_Nobjects_sharding': 'test_Mbuckets_with_Nobjects', + 'test_Mbuckets_with_Nobjects_encryption': 'test_Mbuckets_with_Nobjects', + 'test_bucket_lifecycle_config_disable': 'test_bucket_lifecycle_config_ops', + 'test_bucket_lifecycle_config_modify': 'test_bucket_lifecycle_config_ops', + 'test_bucket_lifecycle_config_read': 'test_bucket_lifecycle_config_ops', + 'test_bucket_lifecycle_config_versioning': 'test_bucket_lifecycle_config_ops', + 'test_acls': 'test_acls', + 'test_bucket_policy_delete': 'test_bucket_policy_ops', + 'test_bucket_policy_modify': 'test_bucket_policy_ops', + 'test_bucket_policy_replace': 'test_bucket_policy_ops', + 'test_bucket_request_payer': 'test_bucket_request_payer', + 'test_bucket_request_payer_download': 'test_bucket_request_payer', + 'test_dynamic_sharding_offline': 'test_dynamic_bucket_resharding', + 'test_dynamic_sharding_online': 'test_dynamic_bucket_resharding', + 'test_multitenant_access': 'test_multitenant_user_access', + 'test_storage_policy_s3': 'test_storage_policy', + 'test_storage_policy_swift': 'test_storage_policy', + 'test_swift_basic_ops': 'test_swift_basic_ops', + 'test_versioning_enable': 'test_versioning_with_objects', + 'test_versioning_objects_copy': 'test_versioning_copy_objects', + 'test_versioning_objects_delete': 'test_versioning_with_objects', + 'test_versioning_objects_enable': 'test_versioning_with_objects', + 'test_versioning_objects_suspend': 'test_versioning_with_objects', + 'test_versioning_objects_suspend_reupload': 'test_versioning_with_objects', } -def user_creation(user_config, mclient, tclient, version): +""" + + +def get_remotes(ctx): + + rgws = ctx.cluster.only(teuthology.is_type('rgw')) + haproxys = ctx.cluster.only(teuthology.is_type('haproxy')) + remotes = [] + for remote, roles_for_host in rgws.remotes.iteritems(): + remotes.append(remote) + for remote, roles_for_host in haproxys.remotes.iteritems(): + remotes.append(remote) + + return remotes + + +def user_creation(ctx, user_config, mclient, version): log.info('Create user on master client') @@ -62,8 +100,15 @@ def user_creation(user_config, mclient, tclient, version): log.info('copy user_file to target client') - if mclient != tclient: - tclient.put_file(user_file, 'user_details') +# if mclient != tclient: +# tclient.put_file(user_file, 'user_details') + + remotes = get_remotes(ctx) + + for remote in remotes: + if remote != mclient: + log.info('copy user_details to {}'.format(remote)) + remote.put_file(user_file, 'user_details') def test_data(tclient, test_name, script_name, version): @@ -123,57 +168,34 @@ def pull_io_info(ctx, config): if config is None: config = {} - mclient = ctx.multisite_test.master tclient = ctx.multisite_test.target - if mclient != tclient: - mclient.run(args=[run.Raw('sudo mv io_info.yaml io_info_2.yaml')]) + remotes = get_remotes(ctx) - clients = ctx.cluster.only(teuthology.is_type('rgw')) - for remote, roles_for_host in clients.remotes.iteritems(): + for remote in remotes: if remote != tclient: copy_file_from(tclient, remote) yield -@contextlib.contextmanager -def userexec(ctx, config): - - # Create user and copy the user_details to target client - - """ - -multisite-test.userexec: - test_dir_version: v1 - master_client: source.rgw.0 - master_config: - cluster_name: source - user_count: 3 - target_client: target.rgw.1 - """ - - log.info('starting the task') - - log.info('config %s' % config) - - if config is None: - config = {} - - assert isinstance(config, dict), \ - "task userexec only supports a dictionary for configuration" - - log.info('cloning the repo to client machines') +def cleanup(ctx): - remotes = ctx.cluster.only(teuthology.is_type('rgw')) - for remote, roles_for_host in remotes.remotes.iteritems(): + remotes = get_remotes(ctx) + for remote in remotes: cleanup = lambda x: remote.run(args=[run.Raw('sudo rm -rf %s' % x)]) soot = ['venv', 'rgw-tests', '*.json', 'Download.*', 'Download', '*.mpFile', 'x*', 'key.*', 'Mp.*', - '*.key.*', 'user_details', 'io_info.yaml', 'io_info_2.yaml'] + '*.key.*', 'user_details', 'io_info.yaml'] map(cleanup, soot) + +def clone_repo(ctx): + remotes = get_remotes(ctx) + + for remote in remotes: remote.run(args=['mkdir', 'rgw-tests']) remote.run( args=[ @@ -183,7 +205,7 @@ def userexec(ctx, config): 'git', 'clone', '-b', - 'multisite-boto3', + 'add-encryption', 'http://gitlab.cee.redhat.com/ceph/ceph-qe-scripts.git', ]) @@ -193,31 +215,59 @@ def userexec(ctx, config): 'source', 'venv/bin/activate', run.Raw(';'), - run.Raw('pip install boto boto3 names PyYaml psutil ConfigParser simplejson'), + run.Raw('pip install boto boto3 names python-swiftclient PyYaml psutil ConfigParser simplejson'), run.Raw(';'), 'deactivate']) + +@contextlib.contextmanager +def userexec(ctx, config): + + # Create user and copy the user_details to target client + + """ + -multisite-test.userexec: + test_dir_version: v1 + master_client: source.rgw.0 + master_config: + user_count: 3 + """ + + log.info('starting the task') + + log.info('config %s' % config) + + if config is None: + config = {} + + if not hasattr(ctx, 'userexec'): + ctx.userexec = argparse.Namespace() + + assert isinstance(config, dict), \ + "task userexec only supports a dictionary for configuration" + + log.info('cloning the repo to client machines') + + cleanup(ctx) + clone_repo(ctx) + master_client = config['master_client'] (mclient,) = ctx.cluster.only(master_client).remotes.iterkeys() - target_client = config['target_client'] - (tclient,) = ctx.cluster.only(target_client).remotes.iterkeys() - user_config = config['master_config'] user_data = None user_data = dict( config=dict( - cluster_name=user_config['cluster_name'], user_count=user_config['user_count'], ) ) if config['test_dir_version'] == 'v1': - user_creation(user_data, mclient, tclient, version='v1') + user_creation(ctx, user_data, mclient, version='v1') elif config['test_dir_version'] == 'v2': - user_creation(user_data, mclient, tclient, version='v2') + user_creation(ctx, user_data, mclient, version='v2') yield @@ -240,10 +290,10 @@ def execute_v2(tclient, config): # Tests using boto3 here - test_name = config['test-name'] + ".yaml" - script_name = tests_mapper_v2.get(config['test-name'], None) + ".py" + test_name = config['test_name'] + ".yaml" + script_name = config['script_name'] + ".py" - log.info('test name :%s' % config['test-name']) + log.info('test name :%s' % config['test_name']) # Execute test @@ -253,31 +303,45 @@ def execute_v2(tclient, config): @contextlib.contextmanager def task(ctx, config): + """ + - multisite-test: + test-name: test_multipart_upload_download + test_dir_version: v1 + test_client: c2.rgw.1 + target_config: + bucket_count: 5 + min_file_size: 100 + max_file_size: 200 + + - multisite-test: + test_name: test_bucket_policy_replace + script_name: test_bucket_policy_ops + test_dir_version: v2 + test_client: c1.rgw.0 + """ + log.info('starting the task') log.info('config %s' % config) - if config is None: - config = {} - assert isinstance(config, dict), \ "task multisite_test only supports a dictionary for configuration" - # Master node for metadata - - master_client = config['master_client'] - (mclient,) = ctx.cluster.only(master_client).remotes.iterkeys() - # Target node where the tests will be run. Can be primary or secondary multisite zones. - target_client = config['target_client'] + target_client = config['test_client'] (tclient,) = ctx.cluster.only(target_client).remotes.iterkeys() - ctx.multisite_test = argparse.Namespace() - ctx.multisite_test.master = mclient + if not hasattr(ctx, 'multisite_test'): + ctx.multisite_test = argparse.Namespace() + ctx.multisite_test.target = tclient ctx.multisite_test.version = config['test_dir_version'] + if not hasattr(ctx, 'userexec'): + cleanup(ctx) + clone_repo(ctx) + log.info('test_dir_version: %s' % config['test_dir_version']) if config['test_dir_version'] == 'v1': @@ -286,29 +350,4 @@ def task(ctx, config): if config['test_dir_version'] == 'v2': execute_v2(tclient, config) - try: - yield - finally: - - remotes = ctx.cluster.only(teuthology.is_type('rgw')) - for remote, roles_for_host in remotes.remotes.iteritems(): - - remote.run( - args=[ - 'source', - 'venv/bin/activate', - run.Raw(';'), - run.Raw('pip uninstall boto boto3 names PyYaml -y'), - run.Raw(';'), - 'deactivate']) - - log.info('test completed') - - log.info("Deleting repos") - - cleanup = lambda x: remote.run(args=[run.Raw('sudo rm -rf %s' % x)]) - - soot = ['venv', 'rgw-tests', '*.json', 'Download.*', 'Download', '*.mpFile', 'x*', 'key.*', 'Mp.*', - '*.key.*', 'user_details', 'io_info.yaml', 'io_info_2.yaml'] - - map(cleanup, soot) + yield diff --git a/qa/tasks/netem.py b/qa/tasks/netem.py index 7cea4546396..12e54437c73 100644 --- a/qa/tasks/netem.py +++ b/qa/tasks/netem.py @@ -1,5 +1,7 @@ """ -Task to run tests with network delay between two remotes using netem +Task to run tests with network delay between two remotes using tc and netem. +Reference:https://wiki.linuxfoundation.org/networking/netem. + """ import logging @@ -13,52 +15,60 @@ from paramiko import SSHException import socket import time import gevent +import argparse log = logging.getLogger(__name__) -def set_priority(): + +def set_priority(interface): # create a priority queueing discipline - return ['sudo', 'tc', 'qdisc', 'add', 'dev', 'eno1', 'root', 'handle', '1:', 'prio'] + return ['sudo', 'tc', 'qdisc', 'add', 'dev', interface, 'root', 'handle', '1:', 'prio'] + -def show_tc(): +def show_tc(interface): # shows tc device present - return ['sudo', 'tc', 'qdisc', 'show', 'dev', 'eno1'] + return ['sudo', 'tc', 'qdisc', 'show', 'dev', interface] -def del_tc(): - return ['sudo', 'tc', 'qdisc', 'del', 'dev', 'eno1', 'root'] +def del_tc(interface): -def cmd_prefix(): + return ['sudo', 'tc', 'qdisc', 'del', 'dev', interface, 'root'] + + +def cmd_prefix(interface): # prepare command to set delay - cmd1 = ['sudo', 'tc', 'qdisc', 'add', 'dev', 'eno1', 'parent', + cmd1 = ['sudo', 'tc', 'qdisc', 'add', 'dev', interface, 'parent', '1:1', 'handle', '2:', 'netem', 'delay'] # prepare command to change delay - cmd2 = ['sudo', 'tc', 'qdisc', 'replace', 'dev', 'eno1', 'root', 'netem', 'delay'] + cmd2 = ['sudo', 'tc', 'qdisc', 'replace', 'dev', interface, 'root', 'netem', 'delay'] # prepare command to apply filter to the matched ip/host - cmd3 = ['sudo', 'tc', 'filter', 'add', 'dev', 'eno1', + cmd3 = ['sudo', 'tc', 'filter', 'add', 'dev', interface, 'parent', '1:0', 'protocol', 'ip', 'pref', '55', 'handle', '::55', 'u32', 'match', 'ip', 'dst'] return cmd1, cmd2, cmd3 -def static_delay(remote, host, delay): - set_delay, change_delay, set_ip = cmd_prefix() +def static_delay(remote, host, interface, delay): + + """ Sets a constant delay between two hosts to emulate network delays using tc qdisc and netem""" + + set_delay, change_delay, set_ip = cmd_prefix(interface) ip = socket.gethostbyname(host.hostname) - r = remote.run(args=show_tc(), stdout=StringIO()) + r = remote.run(args=show_tc(interface), stdout=StringIO()) if r.stdout.getvalue().strip().find('refcnt') == -1: # call set_priority() func to create priority queue # if not already created(indicated by -1) log.info('Create priority queue') - remote.run(args=set_priority()) + remote.run(args=set_priority(interface)) # set static delay, with +/- 5ms jitter with normal distribution as default log.info('Setting delay to %s' % delay) @@ -69,19 +79,20 @@ def static_delay(remote, host, delay): log.info('Delay set on %s' % remote) set_ip.extend(['%s' % ip, 'flowid', '2:1']) remote.run(args=set_ip) - remote.run(args=show_tc(), stdout=StringIO()) + remote.run(args=show_tc(interface)) else: # if the device is already created, only change the delay log.info('Setting delay to %s' % delay) change_delay.extend(['%s' % delay, '5ms', 'distribution', 'normal']) remote.run(args=change_delay) - remote.run(args=show_tc(), stdout=StringIO()) + remote.run(args=show_tc(interface)) + -def variable_delay(remote, host, delay_range=[]): +def variable_delay(remote, host, interface, delay_range=[]): """ Vary delay between two values""" - set_delay, change_delay, set_ip = cmd_prefix() + set_delay, change_delay, set_ip = cmd_prefix(interface) ip = socket.gethostbyname(host.hostname) @@ -89,11 +100,11 @@ def variable_delay(remote, host, delay_range=[]): delay1 = delay_range[0] delay2 = delay_range[1] - r = remote.run(args=show_tc(), stdout=StringIO()) + r = remote.run(args=show_tc(interface), stdout=StringIO()) if r.stdout.getvalue().strip().find('refcnt') == -1: # call set_priority() func to create priority queue # if not already created(indicated by -1) - remote.run(args=set_priority()) + remote.run(args=set_priority(interface)) # set variable delay log.info('Setting varying delay') @@ -104,48 +115,50 @@ def variable_delay(remote, host, delay_range=[]): log.info('Delay set on %s' % remote) set_ip.extend(['%s' % ip, 'flowid', '2:1']) remote.run(args=set_ip) - remote.run(args=show_tc(), stdout=StringIO()) else: # if the device is already created, only change the delay log.info('Setting varying delay') change_delay.extend(['%s' % delay1, '%s' % delay2]) remote.run(args=change_delay) - remote.run(args=show_tc(), stdout=StringIO()) + + +def delete_dev(remote, interface): + + """ Delete the qdisc if present""" + + log.info('Delete tc') + r = remote.run(args=show_tc(interface), stdout=StringIO()) + if r.stdout.getvalue().strip().find('refcnt') != -1: + remote.run(args=del_tc(interface)) class Toggle: stop_event = gevent.event.Event() - def __init__(self, remote, host, interval): + def __init__(self, remote, host, interface, interval): self.remote = remote self.host = host self.interval = interval + self.interface = interface self.ip = socket.gethostbyname(self.host.hostname) - def delete_dev(self): - - """ Delete the qdisc """ - - log.info('Delete tc') - self.remote.run(args=del_tc()) - def packet_drop(self): """ Drop packets to the remote ip specified""" - _, _, set_ip = cmd_prefix() + _, _, set_ip = cmd_prefix(self.interface) - r = self.remote.run(args=show_tc(), stdout=StringIO()) + r = self.remote.run(args=show_tc(self.interface), stdout=StringIO()) if r.stdout.getvalue().strip().find('refcnt') == -1: - self.remote.run(args=set_priority()) + self.remote.run(args=set_priority(self.interface)) # packet drop to specific ip log.info('Drop all packets to %s' % self.host) set_ip.extend(['%s' % self.ip, 'action', 'drop']) self.remote.run(args=set_ip) - def link_toggle(self): + # For toggling packet drop and recovery in regular interval. # If interval is 5s, link is up for 5s and link is down for 5s @@ -161,7 +174,7 @@ class Toggle: self.stop_event.wait(timeout=self.interval) # if qdisc exist,delete it. try: - self.delete_dev() + delete_dev(self.remote, self.interface) log.info('link up') except SSHException as e: log.debug('Failed to run command') @@ -180,19 +193,27 @@ def task(ctx, config): """ - netem: clients: [c1.rgw.0] + iface: eno1 dst_client: [c2.rgw.1] delay: 10ms - netem: clients: [c1.rgw.0] + iface: eno1 dst_client: [c2.rgw.1] delay_range: [10ms, 20ms] # (min, max) - netem: clients: [rgw.1, mon.0] + iface: eno1 dst_client: [c2.rgw.1] link_toggle_interval: 10 # no unit mentioned. By default takes seconds. + - netem: + clients: [rgw.1, mon.0] + iface: eno1 + link_recover: [t1, t2] + """ @@ -200,31 +221,41 @@ def task(ctx, config): assert isinstance(config, dict), \ "please list clients to run on" + if not hasattr(ctx, 'netem'): + ctx.netem = argparse.Namespace() - dst = config.get('dst_client') - (host,) = ctx.cluster.only(dst).remotes.iterkeys() + if config.get('dst_client') is not None: + dst = config.get('dst_client') + (host,) = ctx.cluster.only(dst).remotes.iterkeys() - for role in config.get('clients'): - (remote,) = ctx.cluster.only(role).remotes.iterkeys() + for role in config.get('clients', None): + (remote,) = ctx.cluster.only(role).remotes.iterkeys() + ctx.netem.remote = remote if config.get('delay', False): - static_delay(remote, host, config.get('delay')) + static_delay(remote, host, config.get('iface'), config.get('delay')) if config.get('delay_range', False): - variable_delay(remote, host, config.get('delay_range')) + variable_delay(remote, host, config.get('iface'), config.get('delay_range')) if config.get('link_toggle_interval', False): log.info('Toggling link for %s' % config.get('link_toggle_interval')) - toggle = Toggle(remote, host, config.get('link_toggle_interval')) + global toggle + toggle = Toggle(remote, host, config.get('iface'), config.get('link_toggle_interval')) + ctx.netem.toggle = toggle toggle.begin() - - # t = threading.Thread(target=link_toggle(remote, config.get('dst_client'), config.get('interval'))) - # t.daemon = True - # t.start() + if config.get('link_recover', False): + log.info('Recovering link') + toggle.end() + log.info('sleeping') + time.sleep(config.get('link_toggle_interval')) + delete_dev(ctx.netem.remote, config.get('iface')) try: yield finally: - if config.get('link_toggle_interval'): - toggle.end() + if ctx.config.get('link_toggle_interval') and not ctx.config.get('link_recover'): + # Ends toggle only if 'link_recover' has not been run before. + log.info('Ending toggle') + ctx.netem.toggle.end() for role in config.get('clients'): (remote,) = ctx.cluster.only(role).remotes.iterkeys() - remote.run(args=['sudo', 'tc', 'qdisc', 'del', 'eno1', 'root']) + delete_dev(remote, config.get('iface')) diff --git a/qa/tasks/new_rgw_multisite.py b/qa/tasks/new_rgw_multisite.py index 7a5a1b586a1..830cebca8bf 100644 --- a/qa/tasks/new_rgw_multisite.py +++ b/qa/tasks/new_rgw_multisite.py @@ -9,22 +9,23 @@ from teuthology import misc as teuthology from teuthology import contextutil from requests.packages.urllib3 import PoolManager from requests.packages.urllib3.util import Retry +import ConfigParser log = logging.getLogger(__name__) access_key = None secret = None + def rgwadmin(ctx, client, cmd, stdin=StringIO(), check_status=False, format='json', decode=True, log_level=logging.DEBUG): - log.info('rgwadmin: {client} : {cmd}'.format(client=client,cmd=cmd)) + log.info('rgwadmin: {client} : {cmd}'.format(client=client, cmd=cmd)) testdir = teuthology.get_testdir(ctx) cluster_name, daemon_type, client_id = teuthology.split_role(client) pre = ['sudo', - 'radosgw-admin'.format(tdir=testdir), - '--log-to-stderr', - '--cluster', cluster_name, - ] + 'radosgw-admin'.format(tdir=testdir), + '--log-to-stderr', + ] pre.extend(cmd) log.log(log_level, 'rgwadmin: cmd=%s' % pre) (remote,) = ctx.cluster.only(client).remotes.iterkeys() @@ -38,7 +39,7 @@ def rgwadmin(ctx, client, cmd, stdin=StringIO(), check_status=False, r = proc.exitstatus out = proc.stdout.getvalue() if not decode: - return (r, out) + return r, out j = None if not r and out != '': try: @@ -47,27 +48,38 @@ def rgwadmin(ctx, client, cmd, stdin=StringIO(), check_status=False, except ValueError: j = out log.log(log_level, ' raw result: %s' % j) - return (r, j) + return r, j -def extract_endpoints(ctx, role): +def extract_endpoints(ctx, roles): port = 8080 - role_endpoints = {} - remote, = ctx.cluster.only(role).remotes.iterkeys() - role_endpoints[role] = (remote.name.split('@')[1], port) - log.info('Endpoints are {role_endpoints}'.format(role_endpoints=role_endpoints)) + url_endpoint = {} + + if isinstance(roles, basestring): + roles = [roles] + + for role in roles: + remote, = ctx.cluster.only(role).remotes.iterkeys() + url_endpoint[role] = (remote.name.split('@')[1], port) + + log.info('Endpoints are {}'.format(url_endpoint)) + + url = '' + for machine, (host, port) in url_endpoint.iteritems(): + url = url + 'http://{host}:{port}'.format(host=host, port=port) + ',' + url = url[:-1] + + log.debug("endpoints: %s", url) - return role_endpoints + return url def get_config_clients(ctx, config): master_zonegroup = None master_zone = None - master_client = None target_zone = None - target_client = None zonegroups_config = config['zonegroups'] for zonegroup_config in zonegroups_config: @@ -77,13 +89,41 @@ def get_config_clients(ctx, config): if zone.get('is_master', False): mz_config = zone master_zone = mz_config.get('name') - master_client = mz_config.get('endpoints')[0] + master_clients = mz_config.get('endpoints') else: tz_config = zone target_zone = tz_config.get('name') - target_client = tz_config.get('endpoints')[0] + target_clients = tz_config.get('endpoints') + + return master_zonegroup, master_zone, master_clients, target_zone, target_clients + + +def zone_to_conf(ctx, hosts, zone_name): + + """ + Add zone entry in ceph conf file + """ + parser = ConfigParser.ConfigParser() - return master_zonegroup, master_zone, master_client, target_zone, target_client + for host in hosts: + cluster_name, _, _ = teuthology.split_role(host) + (remote,) = ctx.cluster.only(host).remotes.iterkeys() + conf_path = '/etc/ceph/ceph.conf' + conf_file = remote.get_file(conf_path, '/tmp') + config_section = 'client.rgw.{}'.format(remote.shortname) + parser.read(conf_file) + if not parser.has_section(config_section): + log.info('RGW might not be installed') + raise ConfigParser.NoSectionError + else: + parser.set(config_section, 'rgw_zone', zone_name) + + with open(conf_file, 'w') as fp: + parser.write(fp) + fp.close() + + remote.put_file(conf_file, '/tmp/ceph.conf') + remote.run(args=['sudo', 'cp', '/tmp/ceph.conf', conf_path]) def gen_access_key(): @@ -96,47 +136,80 @@ def gen_secret(): def wait_for_radosgw(ctx, client): - role_endpoints = extract_endpoints(ctx, client) - host, port = role_endpoints[client] - url = "http://%s:%d" % (host, port) + url_endpoint = extract_endpoints(ctx, client) http = PoolManager(retries=Retry(connect=8, backoff_factor=1)) - http.request('GET', url) + http.request('GET', url_endpoint) @contextlib.contextmanager -def configure_master_zonegroup_and_zones(ctx, config, master_zonegroup, master_zone, realm, master_client): +def create_zone(ctx, config, target_zone, master_zonegroup, target_clients): + + # used by addzone() and task() to configure secondary zone + + url_endpoint = extract_endpoints(ctx, target_clients) + log.info('creating zone on {}'.format(target_clients)) + + if config.get('is_read_only', False): + rgwadmin(ctx, target_clients[0], + cmd=['zone', 'create', '--rgw-zonegroup', master_zonegroup, + '--rgw-zone', target_zone, '--endpoints', url_endpoint, '--access-key', + access_key, '--secret', + secret, '--default', '--read-only'], + check_status=True) + else: + rgwadmin(ctx, target_clients[0], + cmd=['zone', 'create', '--rgw-zonegroup', master_zonegroup, + '--rgw-zone', target_zone, '--endpoints', url_endpoint, '--access-key', + access_key, '--secret', + secret, '--default'], + check_status=True) + + rgwadmin(ctx, target_clients[0], + cmd=['period', 'update', '--commit', + '--access_key', + access_key, '--secret', + secret], + check_status=True) + + zone_to_conf(ctx, target_clients, target_zone) + + yield + + +@contextlib.contextmanager +def configure_master_zonegroup_and_zones(ctx, config, master_zonegroup, master_zone, realm, master_clients): """ Create zonegroup and zone on master""" global access_key, secret access_key = gen_access_key() secret = gen_secret() - role_endpoints = extract_endpoints(ctx, master_client) - host, port = role_endpoints[master_client] - - endpoint = 'http://{host}:{port}'.format(host=host, port=port) - log.debug("endpoint: %s", endpoint) + zone_endpoint = extract_endpoints(ctx, master_clients) + log.info('client {}'.format(master_clients[0])) + zg_endpoint = extract_endpoints(ctx, master_clients[0]) - log.info('creating master zonegroup and zone on {}'.format(master_client)) - rgwadmin(ctx, master_client, + log.info('creating master zonegroup and zone on {}'.format(master_clients)) + rgwadmin(ctx, master_clients[0], cmd=['realm', 'create', '--rgw-realm', realm, '--default'], check_status=True) - rgwadmin(ctx, master_client, - cmd=['zonegroup', 'create', '--rgw-zonegroup', master_zonegroup, '--master', '--endpoints', endpoint, + rgwadmin(ctx, master_clients[0], + cmd=['zonegroup', 'create', '--rgw-zonegroup', master_zonegroup, '--master', '--endpoints', zg_endpoint, '--default'], check_status=True) - rgwadmin(ctx, master_client, + rgwadmin(ctx, master_clients[0], cmd=['zone', 'create', '--rgw-zonegroup', master_zonegroup, - '--rgw-zone', master_zone, '--endpoints', endpoint, '--access-key', + '--rgw-zone', master_zone, '--endpoints', zone_endpoint, '--access-key', access_key, '--secret', secret, '--master', '--default'], check_status=True) - rgwadmin(ctx, master_client, + rgwadmin(ctx, master_clients[0], cmd=['period', 'update', '--commit'], check_status=True) + zone_to_conf(ctx, master_clients, master_zone) + yield @@ -168,16 +241,12 @@ def pull_configuration(ctx, realm, master_client, target_client): """ Pull realm and period from master zone""" - role_endpoints = extract_endpoints(ctx, master_client) - host, port = role_endpoints[master_client] - - endpoint = 'http://{host}:{port}'.format(host=host, port=port) - log.debug("endpoint: %s", endpoint) + url_endpoint = extract_endpoints(ctx, master_client) log.info('Pulling master config information from {}'.format(master_client)) rgwadmin(ctx, target_client, cmd=['realm', 'pull', '--url', - endpoint, '--access_key', + url_endpoint, '--access_key', access_key, '--secret', secret], check_status=True) @@ -186,7 +255,7 @@ def pull_configuration(ctx, realm, master_client, target_client): cmd=['realm', 'default', '--rgw-realm', realm]) rgwadmin(ctx, target_client, - cmd=['period', 'pull', '--url', endpoint, '--access_key', + cmd=['period', 'pull', '--url', url_endpoint, '--access_key', access_key, '--secret', secret], check_status=True) @@ -194,64 +263,40 @@ def pull_configuration(ctx, realm, master_client, target_client): yield -@contextlib.contextmanager -def configure_target_zone(ctx, config, target_zone, master_zonegroup, target_client): - - role_endpoints = extract_endpoints(ctx, target_client) - host, port = role_endpoints[target_client] +def restart_rgw(ctx, role): - endpoint = 'http://{host}:{port}'.format(host=host, port=port) - log.debug("endpoint: %s", endpoint) + log.info('Restarting rgw...') + log.debug('client %r', role) + (remote,) = ctx.cluster.only(role).remotes.iterkeys() + hostname = remote.name.split('@')[1].split('.')[0] + rgw_cmd = [ + 'sudo', 'systemctl', 'restart', 'ceph-radosgw@rgw.{hostname}'.format(hostname=hostname)] - log.info('creating zone on {}'.format(target_client)) + run_cmd = list(rgw_cmd) + remote.run(args=run_cmd) - zone_config = {} + wait_for_radosgw(ctx, role) - zgs = ctx.new_rgw_multisite.config['zonegroups'] - for zg in zgs: - for zone in zg.get('zones'): - zone_config = zone - if zone_config.get('is_read_only', False): - rgwadmin(ctx, target_client, - cmd=['zone', 'create', '--rgw-zonegroup', master_zonegroup, - '--rgw-zone', target_zone, '--endpoints', endpoint, '--access-key', - access_key, '--secret', - secret, '--default', '--read-only'], - check_status=True) - else: - rgwadmin(ctx, target_client, - cmd=['zone', 'create', '--rgw-zonegroup', master_zonegroup, - '--rgw-zone', target_zone, '--endpoints', endpoint, '--access-key', - access_key, '--secret', - secret, '--default'], - check_status=True) +@contextlib.contextmanager +def check_sync_status(ctx, clients): - rgwadmin(ctx, target_client, - cmd=['period', 'update', '--commit', - '--access_key', - access_key, '--secret', - secret], - check_status=True) + """Check multisite sync status""" + log.info("Clients are {}".format(clients)) + for each_client in clients: + rgwadmin(ctx, each_client, + cmd=['sync', 'status'], + check_status=True) yield @contextlib.contextmanager -def restart_rgw(ctx, on_client): - - log.info('Restarting rgw...') - log.debug('client %r', on_client) - (remote,) = ctx.cluster.only(on_client).remotes.iterkeys() - hostname = remote.name.split('@')[1].split('.')[0] - rgw_cmd = [ - 'sudo', 'systemctl', 'restart', 'ceph-radosgw@rgw.{hostname}'.format(hostname=hostname)] - - run_cmd = list(rgw_cmd) - remote.run(args=run_cmd) +def start_rgw(ctx, on_client): - wait_for_radosgw(ctx, on_client) + for client in on_client: + restart_rgw(ctx, client) yield @@ -260,6 +305,8 @@ def restart_rgw(ctx, on_client): def failover(ctx, config): """ - new-rgw-multisite.failover: + new_master_zone: test-zone2 + new_master: c2.client.1 """ # When master is down, bring up secondary as the master zone @@ -272,18 +319,17 @@ def failover(ctx, config): assert isinstance(config, dict), \ "task only supports a dictionary for configuration" - master_zonegroup, master_zone, master_client, target_zone, target_client = \ - get_config_clients(ctx, ctx.new_rgw_multisite.config) - + new_master = config['new_master'] + zone = config['new_master_zone'] # Make secondary zone master - rgwadmin(ctx, target_client, - cmd=['zone', 'modify', '--rgw-zone', target_zone, '--master', '--default', '--access-key', + rgwadmin(ctx, new_master, + cmd=['zone', 'modify', '--rgw-zone', zone, '--master', '--default', '--access-key', access_key, '--secret', - secret], + secret, '--read-only=false'], check_status=True) # Do period commit - rgwadmin(ctx, target_client, + rgwadmin(ctx, new_master, cmd=['period', 'update', '--commit', '--access_key', access_key, '--secret', @@ -292,7 +338,7 @@ def failover(ctx, config): # Restart gateway - restart_rgw(ctx, target_client) + restart_rgw(ctx, new_master) yield @@ -313,25 +359,22 @@ def failback(ctx, config): assert isinstance(config, dict), \ "task only supports a dictionary for configuration" - master_zonegroup, master_zone, master_client, target_zone, target_client = \ + master_zonegroup, master_zone, master_clients, target_zone, target_clients = \ get_config_clients(ctx, ctx.new_rgw_multisite.config) - role_endpoints = extract_endpoints(ctx, target_client) - host, port = role_endpoints[target_client] - - endpoint = 'http://{host}:{port}'.format(host=host, port=port) + url_endpoint = extract_endpoints(ctx, target_clients[0]) # Period pull in former master zone from current master zone - rgwadmin(ctx, master_client, - cmd=['period', 'pull', '--url', endpoint, '--access_key', + rgwadmin(ctx, master_clients[0], + cmd=['period', 'pull', '--url', url_endpoint, '--access_key', access_key, '--secret', secret], check_status=True) # Make the original master zone as master - rgwadmin(ctx, master_client, + rgwadmin(ctx, master_clients[0], cmd=['zone', 'modify', '--rgw-zone', master_zone, '--master', '--default', '--access-key', access_key, '--secret', secret], @@ -339,7 +382,7 @@ def failback(ctx, config): # Do period commit - rgwadmin(ctx, master_client, + rgwadmin(ctx, master_clients[0], cmd=['period', 'update', '--commit', '--access_key', access_key, '--secret', @@ -347,8 +390,8 @@ def failback(ctx, config): check_status=True) # Restart gateway - - restart_rgw(ctx, master_client) + for client in master_clients: + restart_rgw(ctx, client) # If secondary zone was read-only before failover, explicitly set it to --read-only again. @@ -360,14 +403,14 @@ def failback(ctx, config): zone_config = zone if zone_config.get('is_read_only', False): - rgwadmin(ctx, target_client, + rgwadmin(ctx, target_clients[0], cmd=['zone', 'modify', '--rgw-zone', target_zone, '--read-only', '--access-key', access_key, '--secret', secret], check_status=True) # Do period commit - rgwadmin(ctx, target_client, + rgwadmin(ctx, target_clients[0], cmd=['period', 'update', '--commit', '--access_key', access_key, '--secret', @@ -375,17 +418,147 @@ def failback(ctx, config): check_status=True) # Restart gateway + for client in target_clients: + restart_rgw(ctx, client) + + yield - restart_rgw(ctx, target_client) + +@contextlib.contextmanager +def addzone(ctx, config): + + + # to add a new zone + + """ + new-rgw_multisite.addzone: + name: test-zone2 + is_read_only: true + endpoints: c1.client.0 + + """ + + log.info('config %s' % config) + + if config is None: + config = {} + + log.info('config is {}'.format(config)) + master_zonegroup = None + + roles = config.get('endpoints') + if isinstance(roles, basestring): + roles = [roles] + zone_name = config.get('name') + + log.info('creating zone on {}'.format(roles)) + + zgs = ctx.new_rgw_multisite.config['zonegroups'] + for zg in zgs: + if zg.get('is_master', False): + master_zonegroup = zg.get('name') + + log.info('Pull configuration from master node') + + subtasks = [ + lambda: pull_configuration(ctx=ctx, + realm=ctx.new_rgw_multisite.realm_name, + master_client=ctx.new_rgw_multisite.master_clients[0], + target_client=roles[0], + ), + ] + + subtasks.extend([ + lambda: create_zone(ctx=ctx, + config=config, + target_clients=roles, + master_zonegroup=master_zonegroup, + target_zone=zone_name) + ]) + + subtasks.extend([ + lambda: start_rgw(ctx=ctx, + on_client=roles), + ]) + + # Also restart former master client and the target client. + + subtasks.extend([ + lambda: start_rgw(ctx=ctx, + on_client=ctx.new_rgw_multisite.master_clients), + ]) + + subtasks.extend([ + lambda: start_rgw(ctx=ctx, + on_client=ctx.new_rgw_multisite.target_clients), + ]) + + subtasks.extend([ + lambda: check_sync_status(ctx=ctx, + clients=ctx.new_rgw_multisite.clients), + ]) + + with contextutil.nested(*subtasks): + yield + + +@contextlib.contextmanager +def modify_master(ctx, config, master_zonegroup, master_zone, realm, master_clients): + + """ Create zonegroup and zone on master.""" + + global access_key, secret + access_key = gen_access_key() + secret = gen_secret() + + url_endpoint = extract_endpoints(ctx, master_clients) + + log.info('creating realm {}'.format(realm)) + rgwadmin(ctx, master_clients[0], + cmd=['realm', 'create', '--rgw-realm', realm, '--default'], + check_status=True) + + rgwadmin(ctx, master_clients[0], + cmd=['zonegroup', 'rename', '--rgw-zonegroup', 'default', '--zonegroup-new-name', + master_zonegroup], check_status=True) + + rgwadmin(ctx, master_clients[0], + cmd=['zone', 'rename', '--rgw-zone', 'default', '--zone-new-name', master_zone, + '--rgw-zonegroup', master_zonegroup], + check_status=True) + + rgwadmin(ctx, master_clients[0], + cmd=['zonegroup', 'modify', '--rgw-realm', realm, '--rgw-zonegroup', master_zonegroup, '--master', + '--endpoints', url_endpoint, + '--default'], check_status=True) + + rgwadmin(ctx, master_clients[0], + cmd=['zone', 'modify', '--rgw-realm', realm, '--rgw-zonegroup', master_zonegroup, + '--rgw-zone', master_zone, '--endpoints', url_endpoint, '--access-key', + access_key, '--secret', + secret, '--master', '--default'], + check_status=True) + + rgwadmin(ctx, master_clients[0], + cmd=['period', 'update', '--commit'], + check_status=True) yield +def remove_cluster_names(clients): + + for idx, client in enumerate(clients): + clients[idx] = teuthology.ceph_role(client) + return clients + + @contextlib.contextmanager def task(ctx, config): """ - new-multisite: + migrate: true realm: name: test-realm is_default: true @@ -424,60 +597,133 @@ def task(ctx, config): ctx.new_rgw_multisite = argparse.Namespace() ctx.new_rgw_multisite.realm = realm + ctx.new_rgw_multisite.realm_name = realm_name ctx.new_rgw_multisite.zonegroups = zonegroups ctx.new_rgw_multisite.config = config - master_zonegroup, master_zone, master_client, target_zone, target_client = get_config_clients(ctx, config) + master_zonegroup, master_zone, master_clients, target_zone, target_clients = get_config_clients(ctx, config) - ctx.new_rgw_multisite.master_client = master_client - ctx.new_rgw_multisite.target_client = target_client + ctx.new_rgw_multisite.master_clients = master_clients + ctx.new_rgw_multisite.target_clients = target_clients - subtasks = [ - lambda: configure_master_zonegroup_and_zones( - ctx=ctx, - config=config, - master_zonegroup=master_zonegroup, - master_zone = master_zone, - realm=realm_name, - master_client=master_client - ), - ] + ctx.new_rgw_multisite.clients = [master_clients[0], target_clients[0]] - subtasks.extend([ - lambda: configure_user_for_client( - ctx=ctx, - master_client=master_client - ), - ]) + zone_config = {} - subtasks.extend([ - lambda: restart_rgw(ctx=ctx, on_client=master_client), - ]) + zgs = ctx.new_rgw_multisite.config['zonegroups'] + for zg in zgs: + for zone in zg.get('zones'): + zone_config = zone - subtasks.extend([ - lambda: pull_configuration(ctx=ctx, - realm=realm_name, - master_client=master_client, - target_client=target_client, - ), - ]) + # procedure for migrating from single-site to multisite - subtasks.extend([ - lambda: configure_target_zone(ctx=ctx, - config=config, - target_zone=target_zone, - master_zonegroup=master_zonegroup, - target_client=target_client, - ), - ]), + if config.get('migrate', False): - subtasks.extend([ - lambda: restart_rgw(ctx=ctx, - on_client=target_client), - ]) + subtasks = [ + lambda: modify_master( + ctx=ctx, + config=config, + master_zonegroup=master_zonegroup, + master_zone=master_zone, + realm=realm_name, + master_clients=master_clients + ), + ] - with contextutil.nested(*subtasks): - yield + subtasks.extend([ + lambda: configure_user_for_client( + ctx=ctx, + master_client=master_clients[0] + ), + ]) + + subtasks.extend([ + lambda: start_rgw(ctx=ctx, on_client=master_clients), + ]) + + subtasks.extend([ + lambda: pull_configuration(ctx=ctx, + realm=realm_name, + master_client=master_clients[0], + target_client=target_clients[0], + ), + ]) + + subtasks.extend([ + lambda: create_zone(ctx=ctx, + config=zone_config, + target_zone=target_zone, + master_zonegroup=master_zonegroup, + target_clients=target_clients, + ), + ]), + + subtasks.extend([ + lambda: start_rgw(ctx=ctx, + on_client=target_clients), + ]) + + subtasks.extend([ + lambda: check_sync_status(ctx=ctx, + clients=ctx.new_rgw_multisite.clients), + ]) + + with contextutil.nested(*subtasks): + yield + + else: + # procedure for creating a new multisite cluster + subtasks = [ + lambda: configure_master_zonegroup_and_zones( + ctx=ctx, + config=config, + master_zonegroup=master_zonegroup, + master_zone = master_zone, + realm=realm_name, + master_clients=master_clients + ), + ] + + subtasks.extend([ + lambda: configure_user_for_client( + ctx=ctx, + master_client=master_clients[0] + ), + ]) + + subtasks.extend([ + lambda: start_rgw(ctx=ctx, on_client=master_clients), + ]) + + subtasks.extend([ + lambda: pull_configuration(ctx=ctx, + realm=realm_name, + master_client=master_clients[0], + target_client=target_clients[0], + ), + ]) + + subtasks.extend([ + lambda: create_zone(ctx=ctx, + config=zone_config, + target_zone=target_zone, + master_zonegroup=master_zonegroup, + target_clients=target_clients, + ), + ]), + + subtasks.extend([ + lambda: start_rgw(ctx=ctx, + on_client=target_clients), + ]) + + subtasks.extend([ + lambda: check_sync_status(ctx=ctx, + clients=ctx.new_rgw_multisite.clients), + ]) + + with contextutil.nested(*subtasks): + yield