]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
qa: Expand nvmeof thrasher and add nvmeof_namespaces.yaml job
authorVallari Agrawal <val.agl002@gmail.com>
Mon, 29 Jul 2024 11:01:12 +0000 (16:31 +0530)
committerVallari Agrawal <val.agl002@gmail.com>
Wed, 28 Aug 2024 12:42:28 +0000 (18:12 +0530)
1. qa/tasks/nvmeof.py: add other methods to stop nvmeof daemons
2. add qa/workunits/rbd/nvmeof_namespace_test.sh which adds and
   deletes new namespaces. It is run in nvmeof_namespaces.yaml
   job where fio happens to other namespaces in background.

Signed-off-by: Vallari Agrawal <val.agl002@gmail.com>
qa/suites/nvmeof/basic/workloads/nvmeof_namespaces.yaml [new file with mode: 0644]
qa/tasks/nvmeof.py
qa/workunits/rbd/nvmeof_namespace_test.sh [new file with mode: 0755]

diff --git a/qa/suites/nvmeof/basic/workloads/nvmeof_namespaces.yaml b/qa/suites/nvmeof/basic/workloads/nvmeof_namespaces.yaml
new file mode 100644 (file)
index 0000000..f43549d
--- /dev/null
@@ -0,0 +1,40 @@
+tasks:
+- nvmeof:
+    client: client.0
+    gw_image: quay.io/ceph/nvmeof:1.2 # "default" is the image cephadm defaults to; change to test specific nvmeof images, example "latest"
+    rbd:
+      pool_name: mypool
+      image_name_prefix: myimage
+    gateway_config:
+      subsystems_count: 3
+      namespaces_count: 20
+      cli_image: quay.io/ceph/nvmeof-cli:1.2
+
+- cephadm.wait_for_service:
+    service: nvmeof.mypool
+
+- workunit:
+    no_coverage_and_limits: true
+    clients:
+      client.2:
+        - rbd/nvmeof_setup_subsystem.sh
+    env:
+      RBD_POOL: mypool
+      RBD_IMAGE_PREFIX: myimage
+
+- workunit:
+    no_coverage_and_limits: true
+    timeout: 30m
+    clients:
+      client.2:
+        - rbd/nvmeof_basic_tests.sh
+        - rbd/nvmeof_fio_test.sh --rbd_iostat
+      client.3:
+        - rbd/nvmeof_basic_tests.sh
+        - rbd/nvmeof_namespace_test.sh
+    env:
+      RBD_POOL: mypool
+      IOSTAT_INTERVAL: '10'
+      RUNTIME: '600'
+      NEW_NAMESPACES_COUNT: '5'
+
index 092585955a3b69c6a76eee0e6a60b503d85fdafd..97b3648a093ee434e9a83bcbc6cab78f13ab0992 100644 (file)
@@ -343,6 +343,37 @@ class NvmeofThrasher(Thrasher, Greenlet):
                 self.log('switch_task: done waiting for the other thrasher')
                 other_thrasher.switch_thrasher.clear()
 
+    def kill_daemon(self, daemon):
+        kill_methods = [
+            "ceph_daemon_stop", "systemctl_stop",
+            "daemon_remove",
+        ]
+        chosen_method = self.rng.choice(kill_methods)
+        d_name = '%s.%s' % (daemon.type_, daemon.id_)
+        if chosen_method == "ceph_daemon_stop": 
+            daemon.remote.run(args=[
+                "ceph", "orch", "daemon", "stop",
+                d_name
+            ], check_status=False)
+        elif chosen_method == "systemctl_stop":
+            daemon.stop()
+        elif chosen_method == "daemon_remove":
+            daemon.remote.run(args=[
+                "ceph", "orch", "daemon", "rm",
+                d_name
+            ], check_status=False)
+        return chosen_method
+
+    def revive_daemon(self, daemon, killed_method):
+        if killed_method == "ceph_daemon_stop":
+            name = '%s.%s' % (daemon.type_, daemon.id_)
+            daemon.remote.run(args=[
+                "ceph", "orch", "daemon", "restart",
+                name
+            ])
+        elif killed_method == "systemctl_stop":
+            daemon.restart() 
+
     def do_thrash(self):
         self.log('start thrashing')
         self.log(f'seed: {self.random_seed}, , '\
@@ -354,7 +385,7 @@ class NvmeofThrasher(Thrasher, Greenlet):
         summary = []
 
         while not self.stopping.is_set():
-            killed_daemons = []
+            killed_daemons = defaultdict(list)
 
             weight = 1.0 / len(self.daemons)
             count = 0
@@ -380,9 +411,10 @@ class NvmeofThrasher(Thrasher, Greenlet):
                         continue
 
                 self.log('kill {label}'.format(label=daemon.id_))
-                daemon.stop()
+                # daemon.stop()
+                kill_method = self.kill_daemon(daemon)
 
-                killed_daemons.append(daemon)
+                killed_daemons[kill_method].append(daemon)
                 daemons_thrash_history[daemon.id_] += [datetime.now()]
 
                 # only thrash max_thrash_daemons amount of daemons
@@ -391,7 +423,10 @@ class NvmeofThrasher(Thrasher, Greenlet):
                     break
 
             if killed_daemons:
-                summary += ["killed: " + ", ".join([d.id_ for d in killed_daemons])]
+                iteration_summary = "thrashed- "
+                for kill_method in killed_daemons:
+                    iteration_summary += (", ".join([d.id_ for d in killed_daemons[kill_method]]) + f" (by {kill_method}); ") 
+                summary += [iteration_summary]
                 # delay before reviving
                 revive_delay = self.min_revive_delay
                 if self.randomize:
@@ -405,9 +440,11 @@ class NvmeofThrasher(Thrasher, Greenlet):
                 self.switch_task()
 
                 # revive after thrashing
-                for daemon in killed_daemons:
-                    self.log('reviving {label}'.format(label=daemon.id_))
-                    daemon.restart()
+                for kill_method in killed_daemons:
+                    for daemon in killed_daemons[kill_method]:
+                        self.log('reviving {label}'.format(label=daemon.id_))
+                        # daemon.restart()
+                        self.revive_daemon(daemon, kill_method)
                 
                 # delay before thrashing
                 thrash_delay = self.min_thrash_delay
diff --git a/qa/workunits/rbd/nvmeof_namespace_test.sh b/qa/workunits/rbd/nvmeof_namespace_test.sh
new file mode 100755 (executable)
index 0000000..ef331fd
--- /dev/null
@@ -0,0 +1,71 @@
+#!/bin/bash -xe
+
+# It's assumed in this test that each subsystem has equal number
+# of namespaces (i.e. NVMEOF_NAMESPACES_COUNT ns per subsystem). 
+# This script then adds NEW_NAMESPACES_COUNT amount of namespaces
+# to each subsystem and then deletes those new namespaces.
+
+source /etc/ceph/nvmeof.env
+
+RBD_POOL="${RBD_POOL:-mypool}"
+NEW_IMAGE_SIZE="${RBD_IMAGE_SIZE:-8192}" # 1024*8
+NEW_NAMESPACES_COUNT="${NEW_NAMESPACES_COUNT:-3}"
+
+gateways_count=$(( $(echo "$NVMEOF_GATEWAY_IP_ADDRESSES" | tr -cd ',' | wc -c) + 1 ))
+new_images_count=$(( $NVMEOF_SUBSYSTEMS_COUNT * $NEW_NAMESPACES_COUNT)) 
+
+
+assert_namespaces_count() {
+    expected_count_per_subsys=$1
+    actual_count=$(sudo podman run -it $NVMEOF_CLI_IMAGE --server-address $NVMEOF_DEFAULT_GATEWAY_IP_ADDRESS --server-port $NVMEOF_SRPORT --format json subsystem list | 
+        grep namespace_count | grep $expected_count_per_subsys | wc -l)
+    if [ "$actual_count" -ne "$NVMEOF_SUBSYSTEMS_COUNT" ]; then
+        sudo podman run -it $NVMEOF_CLI_IMAGE --server-address $NVMEOF_DEFAULT_GATEWAY_IP_ADDRESS --server-port $NVMEOF_SRPORT --format json subsystem list
+        echo "Expected count of namepaces not found, expected (per subsystem): $expected_count_per_subsys"
+        return 1
+    fi
+}
+
+
+# add rbd images
+for i in $(seq 1 $new_images_count); do
+    image_name="test${i}"
+    rbd create $RBD_POOL/$image_name --size $NEW_IMAGE_SIZE
+done
+
+# add new namespaces
+image_index=1
+for i in $(seq 1 $NVMEOF_SUBSYSTEMS_COUNT); do
+    subsystem_nqn="${NVMEOF_SUBSYSTEMS_PREFIX}${i}"
+    for ns in $(seq 1 $NEW_NAMESPACES_COUNT); do
+        image="test${image_index}"
+        sudo podman run -it $NVMEOF_CLI_IMAGE --server-address $NVMEOF_DEFAULT_GATEWAY_IP_ADDRESS --server-port $NVMEOF_SRPORT namespace add --subsystem $subsystem_nqn --rbd-pool $RBD_POOL --rbd-image $image --load-balancing-group $(($image_index % $gateways_count + 1))
+        ((image_index++))
+    done
+done
+
+# list namespaces
+for i in $(seq 1 $NVMEOF_SUBSYSTEMS_COUNT); do
+    subsystem_nqn="${NVMEOF_SUBSYSTEMS_PREFIX}${i}"
+    sudo podman run -it $NVMEOF_CLI_IMAGE --server-address $NVMEOF_DEFAULT_GATEWAY_IP_ADDRESS --server-port $NVMEOF_SRPORT --format plain namespace list --subsystem $subsystem_nqn        
+done
+
+# verify namespaces added
+expected_count_per_subsys=$(( $NEW_NAMESPACES_COUNT + $NVMEOF_NAMESPACES_COUNT ))
+assert_namespaces_count $expected_count_per_subsys
+
+# delete namespaces
+for i in $(seq 1 $NVMEOF_SUBSYSTEMS_COUNT); do
+    subsystem_nqn="${NVMEOF_SUBSYSTEMS_PREFIX}${i}"
+    NSIDs=$(sudo podman run -it $NVMEOF_CLI_IMAGE --server-address $NVMEOF_DEFAULT_GATEWAY_IP_ADDRESS --server-port $NVMEOF_SRPORT --format json namespace list --subsystem $subsystem_nqn | 
+            jq -r '.namespaces[] | select(.rbd_image_name | startswith("test")) | .nsid')
+
+    for nsid in $NSIDs; do
+        sudo podman run -it $NVMEOF_CLI_IMAGE --server-address $NVMEOF_DEFAULT_GATEWAY_IP_ADDRESS --server-port $NVMEOF_SRPORT namespace del --subsystem $subsystem_nqn --nsid $nsid
+    done
+done
+
+# verify namespaces deleted
+expected_count_per_subsys=$NVMEOF_NAMESPACES_COUNT
+assert_namespaces_count $expected_count_per_subsys
+