]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
Added system test cases - Initial tests
authorsunilkumarn417 <sunnagar@redhat.com>
Wed, 19 Feb 2020 15:54:41 +0000 (21:24 +0530)
committersunilkumarn417 <sunnagar@redhat.com>
Wed, 26 Feb 2020 13:15:49 +0000 (18:45 +0530)
Signed-off-by: sunilkumarn417 <sunnagar@redhat.com>
qa/suites/mixed-system-tests/single/clusters/10-node-cluster.yaml [new file with mode: 0644]
qa/suites/mixed-system-tests/single/install.yaml [new file with mode: 0644]
qa/suites/mixed-system-tests/single/tasks/basic.yaml [new file with mode: 0644]
qa/tasks/mixed_system_test.py [new file with mode: 0644]
qa/tasks/mixed_system_tests/__init__.py [new file with mode: 0644]
qa/tasks/mixed_system_tests/constants.py [new file with mode: 0644]
qa/tasks/mixed_system_tests/ios.py [new file with mode: 0644]
qa/tasks/mixed_system_tests/system.py [new file with mode: 0644]

diff --git a/qa/suites/mixed-system-tests/single/clusters/10-node-cluster.yaml b/qa/suites/mixed-system-tests/single/clusters/10-node-cluster.yaml
new file mode 100644 (file)
index 0000000..7d247c7
--- /dev/null
@@ -0,0 +1,11 @@
+roles:
+- [mon.a, mds.a, mgr.z]
+- [mon.b, mds.b, mgr.y]
+- [mon.c, mds.c, mgr.x]
+- [osd.0, osd.1, osd.2]
+- [osd.3, osd.4, osd.5]
+- [osd.6, osd.7, osd.8]
+- [osd.9, osd.10, osd.11]
+- [osd.12, osd.13, osd.14]
+- [rgw.0, client.0]
+- [rgw.1, client.1]
diff --git a/qa/suites/mixed-system-tests/single/install.yaml b/qa/suites/mixed-system-tests/single/install.yaml
new file mode 100644 (file)
index 0000000..6705c0c
--- /dev/null
@@ -0,0 +1,3 @@
+tasks:
+- ssh-keys: null
+- ceph-ansible: null
diff --git a/qa/suites/mixed-system-tests/single/tasks/basic.yaml b/qa/suites/mixed-system-tests/single/tasks/basic.yaml
new file mode 100644 (file)
index 0000000..06b6d28
--- /dev/null
@@ -0,0 +1,94 @@
+tasks:
+- parallel:
+    - mixed_system_test.rgw_ios:
+        test: Mbuckets_with_Nobjects
+        script: test_Mbuckets_with_Nobjects.py
+        clients: ['client.0']
+        config:
+            user_count: 5
+            bucket_count: 5
+            objects_count: 20
+            objects_size_range:
+                min: 100
+                max: 200
+            test_ops:
+                create_bucket: true
+                create_object: true
+                download_object: true
+                delete_bucket_object: true
+                sharding:
+                    enable: false
+                    max_shards: 0
+                compression:
+                    enable: false
+                    type: zlib
+    - mixed_system_test.rgw_ios:
+        test: Mbuckets_with_Nobjects_multipart
+        script: test_Mbuckets_with_Nobjects.py
+        clients: ['client.0']
+        config:
+            user_count: 2
+            bucket_count: 5
+            objects_count: 10
+            objects_size_range:
+                min: 1000
+                max: 1500
+            test_ops:
+                create_bucket: true
+                create_object: true
+                download_object: true
+                delete_bucket_object: true
+                upload_type: multipart
+                sharding:
+                    enable: false
+                    max_shards: 0
+                compression:
+                    enable: false
+                    type: zlib
+    - mixed_system_test.rgw_ios:
+        test: versioning_ops
+        script: test_versioning_with_objects.py
+        clients: ['client.0']
+        config:
+            user_count: 2
+            bucket_count: 2
+            objects_count: 10
+            version_count: 5
+            objects_size_range:
+                min: 50
+                max: 80
+            test_ops:
+                enable_version: true
+                suspend_version: true
+                copy_to_version: false
+                delete_object_versions: false
+                upload_after_suspend: true
+    - mixed_system_test.rgw_ios:
+        test: versioning_ops_delete
+        script: test_versioning_with_objects.py
+        clients: ['client.0']
+        config:
+            user_count: 2
+            bucket_count: 2
+            objects_count: 10
+            version_count: 5
+            objects_size_range:
+                min: 50
+                max: 80
+            test_ops:
+                enable_version: true
+                suspend_version: false
+                copy_to_version: false
+                delete_object_versions: true
+                upload_after_suspend: false
+    - rbd_fio:
+        client.0:
+            fio-io-size: 100%
+            formats: [2]
+            features: [[layering],[layering,exclusive-lock,object-map]]
+            io-engine: rbd
+            test-clone-io: 1
+            rw: randrw
+            runtime: 600
+    - mixed_system_test.restart_tests:
+        daemons: ["mgr", "mon", "osd"]
diff --git a/qa/tasks/mixed_system_test.py b/qa/tasks/mixed_system_test.py
new file mode 100644 (file)
index 0000000..83ce275
--- /dev/null
@@ -0,0 +1,69 @@
+import contextlib
+import logging
+
+from tasks.mixed_system_tests import system
+from tasks.mixed_system_tests import ios
+
+log = logging.getLogger(__name__)
+
+
+@contextlib.contextmanager
+def rgw_ios(ctx, config):
+    """
+    Task to run RGW IO's using ceph-QE-scripts repo.
+    Args:
+        ctx: cluster obj
+        config: test data
+    example:
+    tasks:
+    - rgw-system-test:
+        test: <test-name>
+        script: <script-name>        | default value is <test-name>.py
+        test_version: <test-version> | ex: v1 or v2, default value is v2
+        clients: <clients list>      | ex: [client.0, client.1]
+                                        default value is ['client.0]
+        config: <configuration of the test-name> |
+            default values is the yaml file config from ceph-qe-scripts
+    """
+    rgw_ios_internal = ios.rgw_ios(ctx, config)
+    try:
+        rgw_ios_internal.__enter__()
+        yield
+    except Exception as err:
+        log.info(err)
+        assert False, err
+    finally:
+        rgw_ios_internal.__exit__()
+
+
+@contextlib.contextmanager
+def restart_tests(ctx, config):
+    """
+    Perform restart test scenarios based on the daemon sequentially
+        1) stop daemon.
+            a. verify IO & cluster health.
+            b. start daemon and wait for cluster status to be healthy.
+        2) restart daemon.
+            a. verify IO & cluster health.
+            b. wait for cluster status to be healthy.
+        3) reboot daemon node.
+            a. verify IO & cluster health.
+            b. wait for node up & running, and cluster status to be healthy.
+    Args:
+        ctx: context obj
+        config: test configuration
+    example:
+        mixed_system_test.restart_tests:
+            config:
+                daemon: ["mon", "mgr", "mds"]
+    """
+    daemons = config.get('daemons')
+    try:
+        for daemon in daemons:
+            assert system.ceph_daemon_system_test(ctx, daemon)
+            log.info("{} completed".format(daemon))
+        yield
+    except Exception as err:
+        assert False, err
+    finally:
+        log.info("Daemon(s) Service system tests completed")
diff --git a/qa/tasks/mixed_system_tests/__init__.py b/qa/tasks/mixed_system_tests/__init__.py
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/qa/tasks/mixed_system_tests/constants.py b/qa/tasks/mixed_system_tests/constants.py
new file mode 100644 (file)
index 0000000..1533d68
--- /dev/null
@@ -0,0 +1,19 @@
+"""
+Add your constants here
+
+"""
+
+cephqe_scripts = {
+    "DIR": {"v1": {"script": "rgw/v1/tests/s3/", "config": "rgw/v1/tests/s3/yamls"},
+            "v2": {"script": "rgw/v2/tests/s3_swift/", "config": "rgw/v2/tests/s3_swift/configs"}
+    },
+    "MASTER_BRANCH": "master",
+    "REPO_NAME": "ceph-qe-scripts",
+    "WIP_BRANCH": None
+}
+
+HEALTH = {
+    "error": "HEALTH_ERR",
+    "warn": "HEALTH_WARN",
+    "good": "HEALTH_OK"
+}
diff --git a/qa/tasks/mixed_system_tests/ios.py b/qa/tasks/mixed_system_tests/ios.py
new file mode 100644 (file)
index 0000000..d55b70c
--- /dev/null
@@ -0,0 +1,131 @@
+"""
+Component IOs
+"""
+
+import yaml
+import logging
+import os
+import pwd
+import time
+
+from teuthology.orchestra import run
+from constants import cephqe_scripts
+
+log = logging.getLogger(__name__)
+
+
+class rgw_ios:
+    """
+    RGW IOS using ceph-qe-scripts
+    """
+
+    def __init__(self, ctx, config):
+        self.ctx = ctx
+        self.config = config
+
+    def __enter__(self):
+        log.info('starting rgw-tests')
+        log.info('config %s' % self.config)
+        if self.config is None:
+            self.config = {}
+        assert isinstance(self.config, dict), \
+            "task set-repo only supports a dictionary for configuration"
+        config_file_name = self.config['test'] + ".yaml"
+        log.info('test_version: %s' % self.config.get('test_version', 'v2'))
+        log.info('test: %s' % self.config['test'])
+        log.info('script: %s' % self.config.get('script', self.config['test'] + ".py"))
+        test_root_dir = self.config['test'] + "_%d" % int(time.time())
+        test_venv = os.path.join(test_root_dir, "venv")
+        script = os.path.join(cephqe_scripts['REPO_NAME'],
+                              cephqe_scripts['DIR'][self.config.get('test_version', 'v2')]['script'],
+                              self.config.get('script', self.config['test'] + ".py"))
+        config_file = os.path.join(cephqe_scripts['REPO_NAME'],
+                                   cephqe_scripts['DIR'][self.config.get('test_version', 'v2')]['config'],
+                                   config_file_name)
+        log.info('script: %s' % script)
+        log.info('config_file: %s' % config_file)
+        self.soot = [test_venv, test_root_dir, 'io_info.yaml', '*.json', 'Download.*',
+                     'Download', '*.mpFile', 'x*', 'key.*', 'Mp.*', '*.key.*']
+        self.cleanup = lambda x: remote.run(args=[run.Raw('sudo rm -rf %s' % x)])
+        log.info('listing all clients: %s' % self.config.get('clients'))
+        for role in self.config.get('clients', ['client.0']):
+            wip_branch = cephqe_scripts["WIP_BRANCH"]
+            master_branch = cephqe_scripts["MASTER_BRANCH"]
+            assert isinstance(role, basestring)
+            prefix = 'client.'
+            assert role.startswith(prefix)
+            id_ = role[len(prefix):]
+            (remote,) = self.ctx.cluster.only(role).remotes.iterkeys()
+            map(self.cleanup, self.soot)
+            remote.run(args=['mkdir', test_root_dir])
+            log.info('cloning the repo to %s' % remote.hostname)
+            remote.run(
+                args=[
+                    'cd',
+                    '%s' % test_root_dir,
+                    run.Raw(';'),
+                    'git',
+                    'clone',
+                    'https://github.com/red-hat-storage/ceph-qe-scripts.git',
+                    '-b',
+                    '%s' % master_branch if wip_branch is None else wip_branch
+                ])
+            if self.config.get('config', None) is not None:
+                test_config = {'config': self.config.get('config')}
+                log.info('config: %s' % test_config)
+                log.info('creating configuration from data: %s' % test_config)
+                local_file = os.path.join('/tmp/',
+                                          config_file_name +
+                                          "_" + str(os.getpid()) +
+                                          pwd.getpwuid(os.getuid()).pw_name)
+                with open(local_file, 'w') as outfile:
+                    outfile.write(yaml.dump(test_config, default_flow_style=False))
+                out = remote.run(args=[run.Raw('sudo echo $HOME')],
+                                 wait=False,
+                                 stdout=run.PIPE)
+                out = out.stdout.read().strip()
+                conf_file = os.path.join(out, test_root_dir, config_file)
+                log.info('local_file: %s' % local_file)
+                log.info('config_file: %s' % conf_file)
+                log.info('copying temp yaml to the client node')
+                remote.put_file(local_file, conf_file)
+                remote.run(args=['ls', '-lt', os.path.dirname(conf_file)])
+                remote.run(args=['cat', conf_file])
+                os.remove(local_file)
+            remote.run(args=['python3', '-m', 'venv', test_venv])
+            remote.run(
+                args=[
+                    'source',
+                    '{}/bin/activate'.format(test_venv),
+                    run.Raw(';'),
+                    run.Raw('pip3 install boto boto3 names PyYaml ConfigParser'),
+                    run.Raw(';'),
+                    'deactivate'])
+
+            time.sleep(60)
+            log.info('trying to restart rgw service after sleep 60 secs')
+            out = remote.run(args=[run.Raw('sudo systemctl is-active ceph-radosgw.target')],
+                             wait=False,
+                             stdout=run.PIPE)
+            try:
+                out = out.stdout.read().strip()
+            except AttributeError:
+                out = "inactive"
+            if "inactive" in out:
+                log.info('Restarting RGW service')
+                remote.run(args=[run.Raw('sudo systemctl restart ceph-radosgw.target')])
+            log.info('starting the tests after sleep of 60 secs')
+            time.sleep(60)
+            remote.run(
+                args=[run.Raw('sudo cd %s ' % test_root_dir)])
+            remote.run(args=[
+                run.Raw('cd %s; sudo venv/bin/python3 %s -c %s ' % (test_root_dir,
+                                                                    script,
+                                                                    config_file))])
+
+    def __exit__(self):
+        for role in self.config.get('clients', ['client.0']):
+            (remote,) = self.ctx.cluster.only(role).remotes.iterkeys()
+            log.info('Test completed')
+            log.info("Deleting leftovers")
+            map(self.cleanup, self.soot)
diff --git a/qa/tasks/mixed_system_tests/system.py b/qa/tasks/mixed_system_tests/system.py
new file mode 100644 (file)
index 0000000..a1a7b65
--- /dev/null
@@ -0,0 +1,168 @@
+"""
+System  tests
+"""
+import logging
+from time import sleep
+from constants import HEALTH
+
+from teuthology.orchestra import run
+
+log = logging.getLogger(__name__)
+
+
+def check_service_status(ctx, dstate, **args):
+    """
+    check service status and cluster health_status
+    Args:
+        ctx: ceph context obj
+        dstate: daemon state obj
+        args: arguments
+                (ex., timeout: 120(default)
+                 state: Health states list (ex., [HEALTH_ERR, HEALTH_WARN])
+                 exit_status: exit status
+                 check: true)
+    """
+    try:
+        # Check daemon restart/start status
+        timeout = 120
+        interval = 5
+
+        if args.get('timeout'):
+            timeout = args['timeout']
+
+        iterations = timeout / interval
+        exit_status = args.get('exit_status')
+
+        while iterations:
+            log.info("Check {} {} daemon status".format(dstate.role,
+                                                        dstate.id_))
+            if dstate.check_status() is not exit_status:
+                log.warn("{} {} is still not {}".format(dstate.role,
+                                                        dstate.id_, exit_status))
+                sleep(interval)
+                iterations -= 1
+                continue
+            break
+        else:
+            assert False
+
+        # check cluster health
+        cluster = ctx.managers.keys()[0]
+        check_status = args.get('check_status', False)
+        check_key = args.get('check_keys')
+        health_state = args.get('state')
+
+        while timeout:
+            sleep(interval)
+            timeout -= interval
+            cluster_status = ctx.managers[cluster].raw_cluster_status()
+            health = cluster_status.get('health')
+            status = health['status']
+            checks = health['checks']
+
+            try:
+                if check_status:
+                    assert status in health_state, \
+                        "[ {} ] not found in health status {}".format(health_state, status)
+                    log.info(" Cluster health status : {} as expected".format(status))
+                    if check_key:
+                        check_key = [check_key] if not isinstance(check_key, list) else check_key
+
+                        for chk in check_key:
+                            assert chk.upper() in checks, \
+                                "[ {} ] not found in health checks {}".format(chk, checks)
+                            log.info("[ {} ] found in cluster health checks as expected".format(chk))
+                        log.info(" Cluster health status : {}".format(checks))
+                return health
+            except AssertionError as err:
+                log.warn(err)
+                log.warn("Retrying with {} seconds left".format(timeout))
+                continue
+        else:
+            assert False, "[ {} ] not found in health checks".format(health_state)
+    except AssertionError:
+        assert False
+
+
+def reboot_node(dstate, **args):
+    """
+    Reboot daemon node
+    Args:
+        dstate: daemon dstate
+        args: reboot arguments(ex., timeout=300, interval=30)
+    """
+    timeout = 600
+    interval = 30
+
+    if args.get('timeout'):
+        timeout = args['timeout']
+    if args.get('interval'):
+        interval = args['interval']
+
+    try:
+        # reboot node
+        dstate.remote.run(args=["sudo", "shutdown", "-r", "now", run.Raw("&")])
+
+        # wait for ssh reconnection
+        assert dstate.remote.reconnect(timeout=timeout, sleep_time=interval),\
+            " [ {} ] Reboot failed".format(dstate.id_)
+        log.info(" [ {} ] Reboot successful".format(dstate.id_))
+        return True
+    except AssertionError as err:
+        assert False, err
+
+
+def ceph_daemon_system_test(ctx, daemon):
+    """
+    Perform sequential actions on daemon.
+        1) stop daemon, check IO and cluster status
+        2) re/start daemon, check IO and cluster status
+        3) reboot node, check IO and cluster
+    Args:
+        ctx: ceph context obj
+        daemon: ceph daemon
+    """
+    daemon = "ceph.%s" % daemon.lower() \
+        if not daemon.lower().startswith("ceph.") else daemon
+
+    kwargs = {
+        "timeout": 120,
+        "exit_status": None,
+        "state": None,
+        "check_status": True,
+        "verify_status": None,
+        "check_keys": None
+    }
+
+    try:
+        # Get daemon nodes with SystemDState obj from ctx
+        daemons = ctx.daemons.daemons.get(daemon)
+        for name, dstate in daemons.items():
+            # stop and verify the cluster status
+            dstate.stop()
+            kwargs['exit_status'] = 0
+            kwargs['state'] = [HEALTH['warn']]
+            kwargs['check_keys'] = "{}_down".format(dstate.daemon_type)
+
+            check_service_status(ctx, dstate, **kwargs)
+
+            # start and verify the cluster status
+            dstate.restart()
+            kwargs['exit_status'] = None
+            kwargs['state'] = [HEALTH['warn'], HEALTH['good']]
+            kwargs['check_keys'] = None
+            check_service_status(ctx, dstate, **kwargs)
+
+            # restart daemon and verify cluster status
+            dstate.restart()
+            check_service_status(ctx, dstate, **kwargs)
+
+            # reboot daemon node and verify cluster status
+            reboot_node(dstate, timeout=600, interval=30)
+            log.info("[ ({}, {}) ] daemon system tests Completed".format(daemon, dstate.id_))
+            return True
+    except KeyError as err:
+        log.error("No {}(s) found".format(daemon))
+        assert False, err
+    finally:
+        log.info("Daemon service system tests Completed")