]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
qa: add qa/tasks/nvmeof.py and rbd/nvmeof_basic_task and fio workunits 54959/head
authorVallari Agrawal <val.agl002@gmail.com>
Thu, 1 Feb 2024 13:07:27 +0000 (18:37 +0530)
committerVallari Agrawal <val.agl002@gmail.com>
Mon, 12 Feb 2024 07:30:09 +0000 (13:00 +0530)
This is v2 of the rbd/nvmeof test: It deploys 1 gateway and 1 initiator.
Then does basic verification on nvme commands and runs fio.

This commit creates:
1. qa/tasks/nvmeof.py: adds a new 'Nvmeof' task which deploys
    the gateway and shares config with the initiator hosts.
    Sharing config was previously done by 'nvmeof_gateway_cfg' task
    in qa/tasks/cephadm.py (that task is removed in this commit).
2. qa/workunits/rbd/nvmeof_basic_tests.sh:
    Runs nvme commands (discovery, connect, connect-all, disconnect-all,
    and list-subsys) and does basic verification of the output.
3. qa/workunits/rbd/nvmeof_fio_test.sh:
    Runs fio command. Also runs iostat in parallel if IOSTAT_INTERVAL
    variable is set. This variable configures the delay between each iostat
    print.

nvmeof-cli upgrade from v0.0.6 to v0.0.7 introduced major changes
to all nvmeof commands. This commit changes v0.0.6 commands to
v0.0.7 in qa/workunits/rbd/nvmeof_initiator.sh

Signed-off-by: Vallari Agrawal <val.agl002@gmail.com>
qa/suites/rbd/nvmeof/base/install.yaml
qa/suites/rbd/nvmeof/cluster/fixed-3.yaml
qa/suites/rbd/nvmeof/workloads/nvmeof_initiator.yaml
qa/tasks/cephadm.py
qa/tasks/nvmeof.py [new file with mode: 0644]
qa/workunits/rbd/nvmeof_basic_tests.sh [new file with mode: 0755]
qa/workunits/rbd/nvmeof_fio_test.sh [new file with mode: 0755]
qa/workunits/rbd/nvmeof_initiator.sh

index 5a852f14dbe1b59afb7185fb2dbb36025b0e66d0..6fc91d8f359f795aa65947c3d355a88d2bc15d6a 100644 (file)
@@ -10,23 +10,4 @@ tasks:
     - ceph orch host ls
     - ceph orch device ls
     - ceph osd lspools
-    # create pool
-    - ceph osd pool create mypool
-    - rbd pool init mypool
-    # deploy nvmeof
-    ## Uncomment to test specific nvmeof images
-    ## - ceph config set mgr mgr/cephadm/container_image_nvmeof quay.io/ceph/nvmeof:latest
-    - ceph orch apply nvmeof mypool --placement="1 $(hostname)"
-    - ceph orch ps --refresh
 
-- cephadm.wait_for_service:
-    service: nvmeof.mypool
-
-- cephadm.nvmeof_gateway_cfg:
-    source: host.a 
-    target: client.1
-    service: nvmeof.mypool
-
-- exec:
-    client.0:
-      - journalctl -u $(systemctl list-units | grep nvmeof.mypool | awk '{print $1}')
index 42e696cd2f14749ff308b9866c4d4d976f5859f7..f417079e31a1b367f284c09e6d53f1e3e179920b 100644 (file)
@@ -5,9 +5,11 @@ roles:
   - osd.0
   - osd.1
   - client.0
+  - ceph.nvmeof.nvmeof.a
 - - host.b
   - mon.b
   - osd.2
   - osd.3
   - osd.4
   - client.1
+- - client.2
index 4c947c1f787fa31d7e6173ab4c4de672e31d3fef..bbb9b0ab5f2a0855a28b491403bd91c36872a290 100644 (file)
@@ -1,6 +1,27 @@
 tasks:
+- nvmeof:
+    client: client.0
+    version: latest  # "default" uses packaged version; change to test specific nvmeof images, example "latest"
+    rbd:
+        pool_name: mypool
+        image_name: myimage
+    gateway_config:
+        source: host.a 
+        target: client.2
+        vars:
+            cli_version: latest
+
+- cephadm.wait_for_service:
+    service: nvmeof.mypool
+
 - workunit:
     no_coverage_and_limits: true
     clients:
-      client.1:
+      client.2:
         - rbd/nvmeof_initiator.sh
+        - rbd/nvmeof_basic_tests.sh
+        - rbd/nvmeof_fio_test.sh
+    env:
+      RBD_POOL: mypool
+      RBD_IMAGE: myimage
+      IOSTAT_INTERVAL: '10'
index 9e386bbd5c29b258aa9a44be801e8b1a2ee87d66..f60aabc84359d3bd24071c775af9fb1820de3dd4 100644 (file)
@@ -21,7 +21,6 @@ from teuthology import packaging
 from teuthology.orchestra import run
 from teuthology.orchestra.daemon import DaemonGroup
 from teuthology.config import config as teuth_config
-from teuthology.exceptions import ConfigError
 from textwrap import dedent
 from tasks.cephfs.filesystem import MDSCluster, Filesystem
 from tasks.util import chacra
@@ -101,43 +100,6 @@ def update_archive_setting(ctx, key, value):
         yaml.safe_dump(info_yaml, info_file, default_flow_style=False)
 
 
-@contextlib.contextmanager
-def nvmeof_gateway_cfg(ctx, config):
-    source_host = config.get('source')
-    target_host = config.get('target')
-    nvmeof_service = config.get('service')
-    if not (source_host and target_host and nvmeof_service):
-        raise ConfigError('nvmeof_gateway_cfg requires "source", "target", and "service"')
-    remote = list(ctx.cluster.only(source_host).remotes.keys())[0]
-    ip_address = remote.ip_address
-    gateway_name = ""
-    r = remote.run(args=[
-        'systemctl', 'list-units',
-        run.Raw('|'), 'grep', nvmeof_service
-    ], stdout=StringIO())
-    output = r.stdout.getvalue()
-    pattern_str = f"{re.escape(nvmeof_service)}(.*?)(?=\.service)"
-    pattern = re.compile(pattern_str)
-    match = pattern.search(output)
-    if match:
-        gateway_name = match.group()
-    conf_data = dedent(f"""
-        NVMEOF_GATEWAY_IP_ADDRESS={ip_address}
-        NVMEOF_GATEWAY_NAME={gateway_name}
-        """)
-    target_remote = list(ctx.cluster.only(target_host).remotes.keys())[0]
-    target_remote.write_file(
-        path='/etc/ceph/nvmeof.env',
-        data=conf_data,
-        sudo=True
-    )
-
-    try:
-        yield
-    finally:
-        pass
-
-
 @contextlib.contextmanager
 def normalize_hostnames(ctx):
     """
diff --git a/qa/tasks/nvmeof.py b/qa/tasks/nvmeof.py
new file mode 100644 (file)
index 0000000..b75d00d
--- /dev/null
@@ -0,0 +1,168 @@
+import logging
+from textwrap import dedent
+from teuthology.task import Task
+from teuthology import misc
+from teuthology.exceptions import ConfigError
+from tasks.util import get_remote_for_role
+from tasks.cephadm import _shell
+
+log = logging.getLogger(__name__)
+
+conf_file = '/etc/ceph/nvmeof.env'
+
+
+class Nvmeof(Task):
+    """
+    Setup nvmeof gateway on client and then share gateway config to target host.
+
+        - nvmeof:
+            client: client.0
+            version: default
+            rbd:
+                pool_name: mypool
+                image_name: myimage
+                rbd_size: 1024
+            gateway_config:
+                source: host.a 
+                target: client.2
+                vars:
+                    cli_version: latest
+                    
+    """
+
+    def setup(self):
+        super(Nvmeof, self).setup()
+        try:
+            self.client = self.config['client']
+        except KeyError:
+            raise ConfigError('nvmeof requires a client to connect with')
+
+        self.cluster_name, type_, self.client_id = misc.split_role(self.client)
+        if type_ != 'client':
+            msg = 'client role ({0}) must be a client'.format(self.client)
+            raise ConfigError(msg)
+        self.remote = get_remote_for_role(self.ctx, self.client)
+
+    def begin(self):
+        super(Nvmeof, self).begin()
+        self._set_defaults()
+        self.deploy_nvmeof()
+        self.set_gateway_cfg()
+
+    def _set_defaults(self):
+        self.gateway_image = self.config.get('version', 'default')
+
+        rbd_config = self.config.get('rbd', {})
+        self.poolname = rbd_config.get('pool_name', 'mypool')
+        self.rbd_image_name = rbd_config.get('image_name', 'myimage')
+        self.rbd_size = rbd_config.get('rbd_size', 1024*8)
+
+        gateway_config = self.config.get('gateway_config', {})
+        conf_vars = gateway_config.get('vars', {})
+        self.cli_image = conf_vars.get('cli_version', 'latest')
+        self.bdev = conf_vars.get('bdev', 'mybdev')
+        self.serial = conf_vars.get('serial', 'SPDK00000000000001')
+        self.nqn = conf_vars.get('nqn', 'nqn.2016-06.io.spdk:cnode1')
+        self.port = conf_vars.get('port', '4420')
+        self.srport = conf_vars.get('srport', '5500')
+
+    def deploy_nvmeof(self):
+        """
+        Deploy nvmeof gateway.
+        """
+        log.info('[nvmeof]: deploying nvmeof gateway...')
+        if not hasattr(self.ctx, 'ceph'):
+            self.ctx.ceph = {}
+        fsid = self.ctx.ceph[self.cluster_name].fsid
+
+        nodes = []
+        daemons = {}
+
+        for remote, roles in self.ctx.cluster.remotes.items():
+            for role in [r for r in roles
+                         if misc.is_type('nvmeof', self.cluster_name)(r)]:
+                c_, _, id_ = misc.split_role(role)
+                log.info('Adding %s on %s' % (role, remote.shortname))
+                nodes.append(remote.shortname + '=' + id_)
+                daemons[role] = (remote, id_)
+
+        if nodes:
+            image = self.gateway_image
+            if (image != "default"):
+                log.info(f'[nvmeof]: ceph config set mgr mgr/cephadm/container_image_nvmeof quay.io/ceph/nvmeof:{image}')
+                _shell(self.ctx, self.cluster_name, self.remote, [
+                    'ceph', 'config', 'set', 'mgr', 
+                    'mgr/cephadm/container_image_nvmeof',
+                    f'quay.io/ceph/nvmeof:{image}'
+                ])
+
+            poolname = self.poolname
+            imagename = self.rbd_image_name
+
+            log.info(f'[nvmeof]: ceph osd pool create {poolname}')
+            _shell(self.ctx, self.cluster_name, self.remote, [
+                'ceph', 'osd', 'pool', 'create', poolname
+            ])
+
+            log.info(f'[nvmeof]: rbd pool init {poolname}')
+            _shell(self.ctx, self.cluster_name, self.remote, [
+                'rbd', 'pool', 'init', poolname
+            ])
+
+            log.info(f'[nvmeof]: ceph orch apply nvmeof {poolname}')
+            _shell(self.ctx, self.cluster_name, self.remote, [
+                'ceph', 'orch', 'apply', 'nvmeof', poolname, 
+                '--placement', str(len(nodes)) + ';' + ';'.join(nodes)
+            ])
+
+            log.info(f'[nvmeof]: rbd create {poolname}/{imagename} --size {self.rbd_size}')
+            _shell(self.ctx, self.cluster_name, self.remote, [
+                'rbd', 'create', f'{poolname}/{imagename}', '--size', f'{self.rbd_size}'
+            ])
+
+        for role, i in daemons.items():
+            remote, id_ = i
+            self.ctx.daemons.register_daemon(
+                remote, 'nvmeof', id_,
+                cluster=self.cluster_name,
+                fsid=fsid,
+                logger=log.getChild(role),
+                wait=False,
+                started=True,
+            )
+        log.info("[nvmeof]: executed deploy_nvmeof successfully!")
+        
+    def set_gateway_cfg(self):
+        log.info('[nvmeof]: running set_gateway_cfg...')
+        gateway_config = self.config.get('gateway_config', {})
+        source_host = gateway_config.get('source')
+        target_host = gateway_config.get('target')
+        if not (source_host and target_host):
+            raise ConfigError('gateway_config requires "source" and "target"')
+        remote = list(self.ctx.cluster.only(source_host).remotes.keys())[0]
+        ip_address = remote.ip_address
+        gateway_name = ""
+        nvmeof_daemons = self.ctx.daemons.iter_daemons_of_role('nvmeof', cluster=self.cluster_name)
+        for daemon in nvmeof_daemons:
+            if ip_address == daemon.remote.ip_address:
+                gateway_name = daemon.name()
+        conf_data = dedent(f"""
+            NVMEOF_GATEWAY_IP_ADDRESS={ip_address}
+            NVMEOF_GATEWAY_NAME={gateway_name}
+            NVMEOF_CLI_IMAGE="quay.io/ceph/nvmeof-cli:{self.cli_image}"
+            NVMEOF_BDEV={self.bdev}
+            NVMEOF_SERIAL={self.serial}
+            NVMEOF_NQN={self.nqn}
+            NVMEOF_PORT={self.port}
+            NVMEOF_SRPORT={self.srport}
+            """)
+        target_remote = list(self.ctx.cluster.only(target_host).remotes.keys())[0]
+        target_remote.write_file(
+            path=conf_file,
+            data=conf_data,
+            sudo=True
+        )
+        log.info("[nvmeof]: executed set_gateway_cfg successfully!")
+
+
+task = Nvmeof
diff --git a/qa/workunits/rbd/nvmeof_basic_tests.sh b/qa/workunits/rbd/nvmeof_basic_tests.sh
new file mode 100755 (executable)
index 0000000..878e043
--- /dev/null
@@ -0,0 +1,72 @@
+#!/bin/bash -x
+
+source /etc/ceph/nvmeof.env
+SPDK_CONTROLLER="SPDK bdev Controller"
+DISCOVERY_PORT="8009"
+
+discovery() {
+    output=$(sudo nvme discover -t tcp -a $NVMEOF_GATEWAY_IP_ADDRESS -s $DISCOVERY_PORT)
+    expected_discovery_stdout="subtype: nvme subsystem"
+    if ! echo "$output" | grep -q "$expected_discovery_stdout"; then
+        return 1
+    fi
+}
+
+connect() {
+    sudo nvme connect -t tcp --traddr $NVMEOF_GATEWAY_IP_ADDRESS -s $NVMEOF_PORT -n $NVMEOF_NQN
+    output=$(sudo nvme list)
+    if ! echo "$output" | grep -q "$SPDK_CONTROLLER"; then
+        return 1
+    fi
+}
+
+disconnect_all() {
+    sudo nvme disconnect-all
+    output=$(sudo nvme list)
+    if echo "$output" | grep -q "$SPDK_CONTROLLER"; then
+        return 1
+    fi
+}
+
+connect_all() {
+    sudo nvme connect-all --traddr=$NVMEOF_GATEWAY_IP_ADDRESS --transport=tcp
+    output=$(sudo nvme list)
+    if ! echo "$output" | grep -q "$SPDK_CONTROLLER"; then
+        return 1
+    fi
+}
+
+list_subsys() {
+    expected_count=$1
+    output=$(sudo nvme list-subsys --output-format=json)
+    multipath=$(echo $output | grep -c '"tcp"')
+    if [ "$multipath" -ne "$expected_count" ]; then
+        return 1
+    fi
+}
+
+
+test_run() {
+    echo "[nvmeof] Running test: $1"
+    $1 "${@:2}" # execute func
+    if [ $? -eq 0 ]; then
+        echo "[nvmeof] $1 test passed!"
+    else
+        echo "[nvmeof] $1 test failed!"
+        exit 1
+    fi
+}
+
+
+test_run disconnect_all
+test_run discovery 
+test_run connect
+test_run list_subsys 1
+test_run disconnect_all
+test_run list_subsys 0
+test_run connect_all
+test_run list_subsys 1
+
+
+echo "-------------Test Summary-------------"
+echo "[nvmeof] All nvmeof basic tests passed!"
diff --git a/qa/workunits/rbd/nvmeof_fio_test.sh b/qa/workunits/rbd/nvmeof_fio_test.sh
new file mode 100755 (executable)
index 0000000..bacc15e
--- /dev/null
@@ -0,0 +1,36 @@
+#!/bin/bash -ex
+
+sudo yum -y install fio
+sudo yum -y install sysstat
+
+fio_file=$(mktemp -t nvmeof-fio-XXXX)
+drives_list=$(sudo nvme list --output-format=json | jq -r '.Devices | .[] | select(.ModelNumber == "SPDK bdev Controller") | .DevicePath')
+
+RUNTIME=${RUNTIME:-600}
+# IOSTAT_INTERVAL=10
+
+
+cat >> $fio_file <<EOF
+[nvmeof-fio-test]
+ioengine=${IO_ENGINE:-sync}
+bsrange=${BS_RANGE:-4k-64k}
+numjobs=${NUM_OF_JOBS:-1}
+size=${SIZE:-1G}
+time_based=1
+runtime=$RUNTIME
+rw=${RW:-randrw}
+filename=$(echo "$drives_list" | tr '\n' ':' | sed 's/:$//')
+verify=md5
+verify_fatal=1
+EOF
+
+fio --showcmd $fio_file
+sudo fio $fio_file &
+
+if [ -n "$IOSTAT_INTERVAL" ]; then
+    iostat_count=$(( RUNTIME / IOSTAT_INTERVAL ))
+    iostat -d $IOSTAT_INTERVAL $iostat_count -h 
+fi
+wait
+
+echo "[nvmeof] fio test successful!"
index c3f3c1082bfc374b46a4940d5f8b315bdd098bd7..8f79238083b2a55955c000cf9706859d7ee3a08d 100755 (executable)
@@ -6,74 +6,22 @@ sudo modprobe nvme-fabrics
 sudo modprobe nvme-tcp
 sudo dnf install nvme-cli -y
 
-# import NVMEOF_GATEWAY_IP_ADDRESS and NVMEOF_GATEWAY_NAME=nvmeof.poolname.smithiXXX.abcde
 source /etc/ceph/nvmeof.env
 
-HOSTNAME=$(hostname)
-IMAGE="quay.io/ceph/nvmeof-cli:latest"
-RBD_POOL=$(awk -F'.' '{print $2}' <<< "$NVMEOF_GATEWAY_NAME")
-RBD_IMAGE="myimage"
-RBD_SIZE=$((1024*8)) #8GiB
-BDEV="mybdev"
-SERIAL="SPDK00000000000001"
-NQN="nqn.2016-06.io.spdk:cnode1"
-PORT="4420"
-SRPORT="5500"
-DISCOVERY_PORT="8009"
+# RBD_POOL and RBD_IMAGE is intended to be set from yaml, 'mypool' and 'myimage' are defaults
+RBD_POOL="${RBD_POOL:-mypool}"
+RBD_IMAGE="${RBD_IMAGE:-myimage}"
 
-rbd create $RBD_POOL/$RBD_IMAGE --size $RBD_SIZE
-sudo podman run -it $IMAGE --server-address $NVMEOF_GATEWAY_IP_ADDRESS --server-port $SRPORT create_bdev --pool $RBD_POOL --image $RBD_IMAGE --bdev $BDEV
+HOSTNAME=$(hostname)
 sudo podman images
 sudo podman ps
-sudo podman run -it $IMAGE --server-address $NVMEOF_GATEWAY_IP_ADDRESS --server-port $SRPORT create_subsystem --subnqn $NQN --serial $SERIAL
-sudo podman run -it $IMAGE --server-address $NVMEOF_GATEWAY_IP_ADDRESS --server-port $SRPORT add_namespace --subnqn $NQN --bdev $BDEV
-sudo podman run -it $IMAGE --server-address $NVMEOF_GATEWAY_IP_ADDRESS --server-port $SRPORT create_listener -n $NQN -g client.$NVMEOF_GATEWAY_NAME -a $NVMEOF_GATEWAY_IP_ADDRESS -s $PORT
-sudo podman run -it $IMAGE --server-address $NVMEOF_GATEWAY_IP_ADDRESS --server-port $SRPORT add_host --subnqn $NQN --host "*"
-sudo podman run -it $IMAGE --server-address $NVMEOF_GATEWAY_IP_ADDRESS --server-port $SRPORT get_subsystems
+sudo podman run -it $NVMEOF_CLI_IMAGE --server-address $NVMEOF_GATEWAY_IP_ADDRESS --server-port $NVMEOF_SRPORT subsystem list
+sudo podman run -it $NVMEOF_CLI_IMAGE --server-address $NVMEOF_GATEWAY_IP_ADDRESS --server-port $NVMEOF_SRPORT subsystem add --subsystem $NVMEOF_NQN
+sudo podman run -it $NVMEOF_CLI_IMAGE --server-address $NVMEOF_GATEWAY_IP_ADDRESS --server-port $NVMEOF_SRPORT namespace add --subsystem $NVMEOF_NQN --rbd-pool $RBD_POOL --rbd-image $RBD_IMAGE
+sudo podman run -it $NVMEOF_CLI_IMAGE --server-address $NVMEOF_GATEWAY_IP_ADDRESS --server-port $NVMEOF_SRPORT listener add --subsystem $NVMEOF_NQN --gateway-name client.$NVMEOF_GATEWAY_NAME --traddr $NVMEOF_GATEWAY_IP_ADDRESS --trsvcid $NVMEOF_PORT
+sudo podman run -it $NVMEOF_CLI_IMAGE --server-address $NVMEOF_GATEWAY_IP_ADDRESS --server-port $NVMEOF_SRPORT host add --subsystem $NVMEOF_NQN --host "*"
+sudo podman run -it $NVMEOF_CLI_IMAGE --server-address $NVMEOF_GATEWAY_IP_ADDRESS --server-port $NVMEOF_SRPORT subsystem list
 sudo lsmod | grep nvme
 sudo nvme list
-sudo nvme discover -t tcp -a $NVMEOF_GATEWAY_IP_ADDRESS -s $DISCOVERY_PORT
-sudo nvme connect -t tcp --traddr $NVMEOF_GATEWAY_IP_ADDRESS -s $PORT -n $NQN
-sudo nvme list
-
-echo "testing nvmeof initiator..."
-
-nvme_model="SPDK bdev Controller"
-
-echo "Test 1: create initiator - starting"
-if ! sudo nvme list | grep -q "$nvme_model"; then
-  echo "nvmeof initiator not created!"
-  exit 1
-fi
-echo "Test 1: create initiator - passed!"
-
-
-echo "Test 2: device size - starting"
-image_size_in_bytes=$(($RBD_SIZE * 1024 * 1024))
-nvme_size=$(sudo nvme list --output-format=json | \
-        jq -r ".Devices | .[] | select(.ModelNumber == \"$nvme_model\") | .PhysicalSize")
-if [ "$image_size_in_bytes" != "$nvme_size" ]; then
-  echo "block device size do not match!"
-  exit 1
-fi
-echo "Test 2: device size - passed!"
-
-
-echo "Test 3: basic IO - starting"
-nvme_drive=$(sudo nvme list --output-format=json | \
-        jq -r ".Devices | .[] | select(.ModelNumber == \"$nvme_model\") | .DevicePath")
-io_input_file="/tmp/nvmeof_test_input"
-echo "Hello world" > $io_input_file
-truncate -s 2k $io_input_file
-sudo dd if=$io_input_file of=$nvme_drive oflag=direct count=1 bs=2k #write
-io_output_file="/tmp/nvmeof_test_output"
-sudo dd if=$nvme_drive of=$io_output_file iflag=direct count=1 bs=2k #read
-if ! cmp $io_input_file $io_output_file; then
-  echo "nvmeof initiator - io test failed!"
-  exit 1
-fi
-sudo rm -f $io_input_file $io_output_file
-echo "Test 3: basic IO - passed!"
-
 
-echo "nvmeof initiator tests passed!"
+echo "[nvmeof] Initiator setup done"
\ No newline at end of file