From: sunilkumarn417 Date: Wed, 19 Feb 2020 15:54:41 +0000 (+0530) Subject: Added system test cases - Initial tests X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=5908fcd2ac0bfa89b3468840ef21e9c3fffea723;p=ceph.git Added system test cases - Initial tests Signed-off-by: sunilkumarn417 --- diff --git a/qa/suites/mixed-system-tests/single/clusters/10-node-cluster.yaml b/qa/suites/mixed-system-tests/single/clusters/10-node-cluster.yaml new file mode 100644 index 00000000000..7d247c71e83 --- /dev/null +++ b/qa/suites/mixed-system-tests/single/clusters/10-node-cluster.yaml @@ -0,0 +1,11 @@ +roles: +- [mon.a, mds.a, mgr.z] +- [mon.b, mds.b, mgr.y] +- [mon.c, mds.c, mgr.x] +- [osd.0, osd.1, osd.2] +- [osd.3, osd.4, osd.5] +- [osd.6, osd.7, osd.8] +- [osd.9, osd.10, osd.11] +- [osd.12, osd.13, osd.14] +- [rgw.0, client.0] +- [rgw.1, client.1] diff --git a/qa/suites/mixed-system-tests/single/install.yaml b/qa/suites/mixed-system-tests/single/install.yaml new file mode 100644 index 00000000000..6705c0c08f7 --- /dev/null +++ b/qa/suites/mixed-system-tests/single/install.yaml @@ -0,0 +1,3 @@ +tasks: +- ssh-keys: null +- ceph-ansible: null diff --git a/qa/suites/mixed-system-tests/single/tasks/basic.yaml b/qa/suites/mixed-system-tests/single/tasks/basic.yaml new file mode 100644 index 00000000000..06b6d288a26 --- /dev/null +++ b/qa/suites/mixed-system-tests/single/tasks/basic.yaml @@ -0,0 +1,94 @@ +tasks: +- parallel: + - mixed_system_test.rgw_ios: + test: Mbuckets_with_Nobjects + script: test_Mbuckets_with_Nobjects.py + clients: ['client.0'] + config: + user_count: 5 + bucket_count: 5 + objects_count: 20 + objects_size_range: + min: 100 + max: 200 + test_ops: + create_bucket: true + create_object: true + download_object: true + delete_bucket_object: true + sharding: + enable: false + max_shards: 0 + compression: + enable: false + type: zlib + - mixed_system_test.rgw_ios: + test: Mbuckets_with_Nobjects_multipart + script: test_Mbuckets_with_Nobjects.py + clients: ['client.0'] + config: + user_count: 2 + bucket_count: 5 + objects_count: 10 + objects_size_range: + min: 1000 + max: 1500 + test_ops: + create_bucket: true + create_object: true + download_object: true + delete_bucket_object: true + upload_type: multipart + sharding: + enable: false + max_shards: 0 + compression: + enable: false + type: zlib + - mixed_system_test.rgw_ios: + test: versioning_ops + script: test_versioning_with_objects.py + clients: ['client.0'] + config: + user_count: 2 + bucket_count: 2 + objects_count: 10 + version_count: 5 + objects_size_range: + min: 50 + max: 80 + test_ops: + enable_version: true + suspend_version: true + copy_to_version: false + delete_object_versions: false + upload_after_suspend: true + - mixed_system_test.rgw_ios: + test: versioning_ops_delete + script: test_versioning_with_objects.py + clients: ['client.0'] + config: + user_count: 2 + bucket_count: 2 + objects_count: 10 + version_count: 5 + objects_size_range: + min: 50 + max: 80 + test_ops: + enable_version: true + suspend_version: false + copy_to_version: false + delete_object_versions: true + upload_after_suspend: false + - rbd_fio: + client.0: + fio-io-size: 100% + formats: [2] + features: [[layering],[layering,exclusive-lock,object-map]] + io-engine: rbd + test-clone-io: 1 + rw: randrw + runtime: 600 + - mixed_system_test.restart_tests: + daemons: ["mgr", "mon", "osd"] diff --git a/qa/tasks/mixed_system_test.py b/qa/tasks/mixed_system_test.py new file mode 100644 index 00000000000..83ce2750c0d --- /dev/null +++ b/qa/tasks/mixed_system_test.py @@ -0,0 +1,69 @@ +import contextlib +import logging + +from tasks.mixed_system_tests import system +from tasks.mixed_system_tests import ios + +log = logging.getLogger(__name__) + + +@contextlib.contextmanager +def rgw_ios(ctx, config): + """ + Task to run RGW IO's using ceph-QE-scripts repo. + Args: + ctx: cluster obj + config: test data + example: + tasks: + - rgw-system-test: + test: + script: | default value is .py + test_version: | ex: v1 or v2, default value is v2 + clients: | ex: [client.0, client.1] + default value is ['client.0] + config: | + default values is the yaml file config from ceph-qe-scripts + """ + rgw_ios_internal = ios.rgw_ios(ctx, config) + try: + rgw_ios_internal.__enter__() + yield + except Exception as err: + log.info(err) + assert False, err + finally: + rgw_ios_internal.__exit__() + + +@contextlib.contextmanager +def restart_tests(ctx, config): + """ + Perform restart test scenarios based on the daemon sequentially + 1) stop daemon. + a. verify IO & cluster health. + b. start daemon and wait for cluster status to be healthy. + 2) restart daemon. + a. verify IO & cluster health. + b. wait for cluster status to be healthy. + 3) reboot daemon node. + a. verify IO & cluster health. + b. wait for node up & running, and cluster status to be healthy. + Args: + ctx: context obj + config: test configuration + example: + mixed_system_test.restart_tests: + config: + daemon: ["mon", "mgr", "mds"] + """ + daemons = config.get('daemons') + try: + for daemon in daemons: + assert system.ceph_daemon_system_test(ctx, daemon) + log.info("{} completed".format(daemon)) + yield + except Exception as err: + assert False, err + finally: + log.info("Daemon(s) Service system tests completed") diff --git a/qa/tasks/mixed_system_tests/__init__.py b/qa/tasks/mixed_system_tests/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/tasks/mixed_system_tests/constants.py b/qa/tasks/mixed_system_tests/constants.py new file mode 100644 index 00000000000..1533d685611 --- /dev/null +++ b/qa/tasks/mixed_system_tests/constants.py @@ -0,0 +1,19 @@ +""" +Add your constants here + +""" + +cephqe_scripts = { + "DIR": {"v1": {"script": "rgw/v1/tests/s3/", "config": "rgw/v1/tests/s3/yamls"}, + "v2": {"script": "rgw/v2/tests/s3_swift/", "config": "rgw/v2/tests/s3_swift/configs"} + }, + "MASTER_BRANCH": "master", + "REPO_NAME": "ceph-qe-scripts", + "WIP_BRANCH": None +} + +HEALTH = { + "error": "HEALTH_ERR", + "warn": "HEALTH_WARN", + "good": "HEALTH_OK" +} diff --git a/qa/tasks/mixed_system_tests/ios.py b/qa/tasks/mixed_system_tests/ios.py new file mode 100644 index 00000000000..d55b70c438c --- /dev/null +++ b/qa/tasks/mixed_system_tests/ios.py @@ -0,0 +1,131 @@ +""" +Component IOs +""" + +import yaml +import logging +import os +import pwd +import time + +from teuthology.orchestra import run +from constants import cephqe_scripts + +log = logging.getLogger(__name__) + + +class rgw_ios: + """ + RGW IOS using ceph-qe-scripts + """ + + def __init__(self, ctx, config): + self.ctx = ctx + self.config = config + + def __enter__(self): + log.info('starting rgw-tests') + log.info('config %s' % self.config) + if self.config is None: + self.config = {} + assert isinstance(self.config, dict), \ + "task set-repo only supports a dictionary for configuration" + config_file_name = self.config['test'] + ".yaml" + log.info('test_version: %s' % self.config.get('test_version', 'v2')) + log.info('test: %s' % self.config['test']) + log.info('script: %s' % self.config.get('script', self.config['test'] + ".py")) + test_root_dir = self.config['test'] + "_%d" % int(time.time()) + test_venv = os.path.join(test_root_dir, "venv") + script = os.path.join(cephqe_scripts['REPO_NAME'], + cephqe_scripts['DIR'][self.config.get('test_version', 'v2')]['script'], + self.config.get('script', self.config['test'] + ".py")) + config_file = os.path.join(cephqe_scripts['REPO_NAME'], + cephqe_scripts['DIR'][self.config.get('test_version', 'v2')]['config'], + config_file_name) + log.info('script: %s' % script) + log.info('config_file: %s' % config_file) + self.soot = [test_venv, test_root_dir, 'io_info.yaml', '*.json', 'Download.*', + 'Download', '*.mpFile', 'x*', 'key.*', 'Mp.*', '*.key.*'] + self.cleanup = lambda x: remote.run(args=[run.Raw('sudo rm -rf %s' % x)]) + log.info('listing all clients: %s' % self.config.get('clients')) + for role in self.config.get('clients', ['client.0']): + wip_branch = cephqe_scripts["WIP_BRANCH"] + master_branch = cephqe_scripts["MASTER_BRANCH"] + assert isinstance(role, basestring) + prefix = 'client.' + assert role.startswith(prefix) + id_ = role[len(prefix):] + (remote,) = self.ctx.cluster.only(role).remotes.iterkeys() + map(self.cleanup, self.soot) + remote.run(args=['mkdir', test_root_dir]) + log.info('cloning the repo to %s' % remote.hostname) + remote.run( + args=[ + 'cd', + '%s' % test_root_dir, + run.Raw(';'), + 'git', + 'clone', + 'https://github.com/red-hat-storage/ceph-qe-scripts.git', + '-b', + '%s' % master_branch if wip_branch is None else wip_branch + ]) + if self.config.get('config', None) is not None: + test_config = {'config': self.config.get('config')} + log.info('config: %s' % test_config) + log.info('creating configuration from data: %s' % test_config) + local_file = os.path.join('/tmp/', + config_file_name + + "_" + str(os.getpid()) + + pwd.getpwuid(os.getuid()).pw_name) + with open(local_file, 'w') as outfile: + outfile.write(yaml.dump(test_config, default_flow_style=False)) + out = remote.run(args=[run.Raw('sudo echo $HOME')], + wait=False, + stdout=run.PIPE) + out = out.stdout.read().strip() + conf_file = os.path.join(out, test_root_dir, config_file) + log.info('local_file: %s' % local_file) + log.info('config_file: %s' % conf_file) + log.info('copying temp yaml to the client node') + remote.put_file(local_file, conf_file) + remote.run(args=['ls', '-lt', os.path.dirname(conf_file)]) + remote.run(args=['cat', conf_file]) + os.remove(local_file) + remote.run(args=['python3', '-m', 'venv', test_venv]) + remote.run( + args=[ + 'source', + '{}/bin/activate'.format(test_venv), + run.Raw(';'), + run.Raw('pip3 install boto boto3 names PyYaml ConfigParser'), + run.Raw(';'), + 'deactivate']) + + time.sleep(60) + log.info('trying to restart rgw service after sleep 60 secs') + out = remote.run(args=[run.Raw('sudo systemctl is-active ceph-radosgw.target')], + wait=False, + stdout=run.PIPE) + try: + out = out.stdout.read().strip() + except AttributeError: + out = "inactive" + if "inactive" in out: + log.info('Restarting RGW service') + remote.run(args=[run.Raw('sudo systemctl restart ceph-radosgw.target')]) + log.info('starting the tests after sleep of 60 secs') + time.sleep(60) + remote.run( + args=[run.Raw('sudo cd %s ' % test_root_dir)]) + remote.run(args=[ + run.Raw('cd %s; sudo venv/bin/python3 %s -c %s ' % (test_root_dir, + script, + config_file))]) + + def __exit__(self): + for role in self.config.get('clients', ['client.0']): + (remote,) = self.ctx.cluster.only(role).remotes.iterkeys() + log.info('Test completed') + log.info("Deleting leftovers") + map(self.cleanup, self.soot) diff --git a/qa/tasks/mixed_system_tests/system.py b/qa/tasks/mixed_system_tests/system.py new file mode 100644 index 00000000000..a1a7b6543f7 --- /dev/null +++ b/qa/tasks/mixed_system_tests/system.py @@ -0,0 +1,168 @@ +""" +System tests +""" +import logging +from time import sleep +from constants import HEALTH + +from teuthology.orchestra import run + +log = logging.getLogger(__name__) + + +def check_service_status(ctx, dstate, **args): + """ + check service status and cluster health_status + Args: + ctx: ceph context obj + dstate: daemon state obj + args: arguments + (ex., timeout: 120(default) + state: Health states list (ex., [HEALTH_ERR, HEALTH_WARN]) + exit_status: exit status + check: true) + """ + try: + # Check daemon restart/start status + timeout = 120 + interval = 5 + + if args.get('timeout'): + timeout = args['timeout'] + + iterations = timeout / interval + exit_status = args.get('exit_status') + + while iterations: + log.info("Check {} {} daemon status".format(dstate.role, + dstate.id_)) + if dstate.check_status() is not exit_status: + log.warn("{} {} is still not {}".format(dstate.role, + dstate.id_, exit_status)) + sleep(interval) + iterations -= 1 + continue + break + else: + assert False + + # check cluster health + cluster = ctx.managers.keys()[0] + check_status = args.get('check_status', False) + check_key = args.get('check_keys') + health_state = args.get('state') + + while timeout: + sleep(interval) + timeout -= interval + cluster_status = ctx.managers[cluster].raw_cluster_status() + health = cluster_status.get('health') + status = health['status'] + checks = health['checks'] + + try: + if check_status: + assert status in health_state, \ + "[ {} ] not found in health status {}".format(health_state, status) + log.info(" Cluster health status : {} as expected".format(status)) + if check_key: + check_key = [check_key] if not isinstance(check_key, list) else check_key + + for chk in check_key: + assert chk.upper() in checks, \ + "[ {} ] not found in health checks {}".format(chk, checks) + log.info("[ {} ] found in cluster health checks as expected".format(chk)) + log.info(" Cluster health status : {}".format(checks)) + return health + except AssertionError as err: + log.warn(err) + log.warn("Retrying with {} seconds left".format(timeout)) + continue + else: + assert False, "[ {} ] not found in health checks".format(health_state) + except AssertionError: + assert False + + +def reboot_node(dstate, **args): + """ + Reboot daemon node + Args: + dstate: daemon dstate + args: reboot arguments(ex., timeout=300, interval=30) + """ + timeout = 600 + interval = 30 + + if args.get('timeout'): + timeout = args['timeout'] + if args.get('interval'): + interval = args['interval'] + + try: + # reboot node + dstate.remote.run(args=["sudo", "shutdown", "-r", "now", run.Raw("&")]) + + # wait for ssh reconnection + assert dstate.remote.reconnect(timeout=timeout, sleep_time=interval),\ + " [ {} ] Reboot failed".format(dstate.id_) + log.info(" [ {} ] Reboot successful".format(dstate.id_)) + return True + except AssertionError as err: + assert False, err + + +def ceph_daemon_system_test(ctx, daemon): + """ + Perform sequential actions on daemon. + 1) stop daemon, check IO and cluster status + 2) re/start daemon, check IO and cluster status + 3) reboot node, check IO and cluster + Args: + ctx: ceph context obj + daemon: ceph daemon + """ + daemon = "ceph.%s" % daemon.lower() \ + if not daemon.lower().startswith("ceph.") else daemon + + kwargs = { + "timeout": 120, + "exit_status": None, + "state": None, + "check_status": True, + "verify_status": None, + "check_keys": None + } + + try: + # Get daemon nodes with SystemDState obj from ctx + daemons = ctx.daemons.daemons.get(daemon) + for name, dstate in daemons.items(): + # stop and verify the cluster status + dstate.stop() + kwargs['exit_status'] = 0 + kwargs['state'] = [HEALTH['warn']] + kwargs['check_keys'] = "{}_down".format(dstate.daemon_type) + + check_service_status(ctx, dstate, **kwargs) + + # start and verify the cluster status + dstate.restart() + kwargs['exit_status'] = None + kwargs['state'] = [HEALTH['warn'], HEALTH['good']] + kwargs['check_keys'] = None + check_service_status(ctx, dstate, **kwargs) + + # restart daemon and verify cluster status + dstate.restart() + check_service_status(ctx, dstate, **kwargs) + + # reboot daemon node and verify cluster status + reboot_node(dstate, timeout=600, interval=30) + log.info("[ ({}, {}) ] daemon system tests Completed".format(daemon, dstate.id_)) + return True + except KeyError as err: + log.error("No {}(s) found".format(daemon)) + assert False, err + finally: + log.info("Daemon service system tests Completed")