From 58d8be9fd83366b42f586f93e0568640d10e3d3c Mon Sep 17 00:00:00 2001 From: Vallari Agrawal Date: Mon, 29 Jul 2024 16:31:12 +0530 Subject: [PATCH] qa: Expand nvmeof thrasher and add nvmeof_namespaces.yaml job 1. qa/tasks/nvmeof.py: add other methods to stop nvmeof daemons 2. add qa/workunits/rbd/nvmeof_namespace_test.sh which adds and deletes new namespaces. It is run in nvmeof_namespaces.yaml job where fio happens to other namespaces in background. Signed-off-by: Vallari Agrawal --- .../basic/workloads/nvmeof_namespaces.yaml | 40 +++++++++++ qa/tasks/nvmeof.py | 51 +++++++++++-- qa/workunits/rbd/nvmeof_namespace_test.sh | 71 +++++++++++++++++++ 3 files changed, 155 insertions(+), 7 deletions(-) create mode 100644 qa/suites/nvmeof/basic/workloads/nvmeof_namespaces.yaml create mode 100755 qa/workunits/rbd/nvmeof_namespace_test.sh diff --git a/qa/suites/nvmeof/basic/workloads/nvmeof_namespaces.yaml b/qa/suites/nvmeof/basic/workloads/nvmeof_namespaces.yaml new file mode 100644 index 0000000000000..f43549d2d8399 --- /dev/null +++ b/qa/suites/nvmeof/basic/workloads/nvmeof_namespaces.yaml @@ -0,0 +1,40 @@ +tasks: +- nvmeof: + client: client.0 + gw_image: quay.io/ceph/nvmeof:1.2 # "default" is the image cephadm defaults to; change to test specific nvmeof images, example "latest" + rbd: + pool_name: mypool + image_name_prefix: myimage + gateway_config: + subsystems_count: 3 + namespaces_count: 20 + cli_image: quay.io/ceph/nvmeof-cli:1.2 + +- cephadm.wait_for_service: + service: nvmeof.mypool + +- workunit: + no_coverage_and_limits: true + clients: + client.2: + - rbd/nvmeof_setup_subsystem.sh + env: + RBD_POOL: mypool + RBD_IMAGE_PREFIX: myimage + +- workunit: + no_coverage_and_limits: true + timeout: 30m + clients: + client.2: + - rbd/nvmeof_basic_tests.sh + - rbd/nvmeof_fio_test.sh --rbd_iostat + client.3: + - rbd/nvmeof_basic_tests.sh + - rbd/nvmeof_namespace_test.sh + env: + RBD_POOL: mypool + IOSTAT_INTERVAL: '10' + RUNTIME: '600' + NEW_NAMESPACES_COUNT: '5' + diff --git a/qa/tasks/nvmeof.py b/qa/tasks/nvmeof.py index 092585955a3b6..97b3648a093ee 100644 --- a/qa/tasks/nvmeof.py +++ b/qa/tasks/nvmeof.py @@ -343,6 +343,37 @@ class NvmeofThrasher(Thrasher, Greenlet): self.log('switch_task: done waiting for the other thrasher') other_thrasher.switch_thrasher.clear() + def kill_daemon(self, daemon): + kill_methods = [ + "ceph_daemon_stop", "systemctl_stop", + "daemon_remove", + ] + chosen_method = self.rng.choice(kill_methods) + d_name = '%s.%s' % (daemon.type_, daemon.id_) + if chosen_method == "ceph_daemon_stop": + daemon.remote.run(args=[ + "ceph", "orch", "daemon", "stop", + d_name + ], check_status=False) + elif chosen_method == "systemctl_stop": + daemon.stop() + elif chosen_method == "daemon_remove": + daemon.remote.run(args=[ + "ceph", "orch", "daemon", "rm", + d_name + ], check_status=False) + return chosen_method + + def revive_daemon(self, daemon, killed_method): + if killed_method == "ceph_daemon_stop": + name = '%s.%s' % (daemon.type_, daemon.id_) + daemon.remote.run(args=[ + "ceph", "orch", "daemon", "restart", + name + ]) + elif killed_method == "systemctl_stop": + daemon.restart() + def do_thrash(self): self.log('start thrashing') self.log(f'seed: {self.random_seed}, , '\ @@ -354,7 +385,7 @@ class NvmeofThrasher(Thrasher, Greenlet): summary = [] while not self.stopping.is_set(): - killed_daemons = [] + killed_daemons = defaultdict(list) weight = 1.0 / len(self.daemons) count = 0 @@ -380,9 +411,10 @@ class NvmeofThrasher(Thrasher, Greenlet): continue self.log('kill {label}'.format(label=daemon.id_)) - daemon.stop() + # daemon.stop() + kill_method = self.kill_daemon(daemon) - killed_daemons.append(daemon) + killed_daemons[kill_method].append(daemon) daemons_thrash_history[daemon.id_] += [datetime.now()] # only thrash max_thrash_daemons amount of daemons @@ -391,7 +423,10 @@ class NvmeofThrasher(Thrasher, Greenlet): break if killed_daemons: - summary += ["killed: " + ", ".join([d.id_ for d in killed_daemons])] + iteration_summary = "thrashed- " + for kill_method in killed_daemons: + iteration_summary += (", ".join([d.id_ for d in killed_daemons[kill_method]]) + f" (by {kill_method}); ") + summary += [iteration_summary] # delay before reviving revive_delay = self.min_revive_delay if self.randomize: @@ -405,9 +440,11 @@ class NvmeofThrasher(Thrasher, Greenlet): self.switch_task() # revive after thrashing - for daemon in killed_daemons: - self.log('reviving {label}'.format(label=daemon.id_)) - daemon.restart() + for kill_method in killed_daemons: + for daemon in killed_daemons[kill_method]: + self.log('reviving {label}'.format(label=daemon.id_)) + # daemon.restart() + self.revive_daemon(daemon, kill_method) # delay before thrashing thrash_delay = self.min_thrash_delay diff --git a/qa/workunits/rbd/nvmeof_namespace_test.sh b/qa/workunits/rbd/nvmeof_namespace_test.sh new file mode 100755 index 0000000000000..ef331fd085b6a --- /dev/null +++ b/qa/workunits/rbd/nvmeof_namespace_test.sh @@ -0,0 +1,71 @@ +#!/bin/bash -xe + +# It's assumed in this test that each subsystem has equal number +# of namespaces (i.e. NVMEOF_NAMESPACES_COUNT ns per subsystem). +# This script then adds NEW_NAMESPACES_COUNT amount of namespaces +# to each subsystem and then deletes those new namespaces. + +source /etc/ceph/nvmeof.env + +RBD_POOL="${RBD_POOL:-mypool}" +NEW_IMAGE_SIZE="${RBD_IMAGE_SIZE:-8192}" # 1024*8 +NEW_NAMESPACES_COUNT="${NEW_NAMESPACES_COUNT:-3}" + +gateways_count=$(( $(echo "$NVMEOF_GATEWAY_IP_ADDRESSES" | tr -cd ',' | wc -c) + 1 )) +new_images_count=$(( $NVMEOF_SUBSYSTEMS_COUNT * $NEW_NAMESPACES_COUNT)) + + +assert_namespaces_count() { + expected_count_per_subsys=$1 + actual_count=$(sudo podman run -it $NVMEOF_CLI_IMAGE --server-address $NVMEOF_DEFAULT_GATEWAY_IP_ADDRESS --server-port $NVMEOF_SRPORT --format json subsystem list | + grep namespace_count | grep $expected_count_per_subsys | wc -l) + if [ "$actual_count" -ne "$NVMEOF_SUBSYSTEMS_COUNT" ]; then + sudo podman run -it $NVMEOF_CLI_IMAGE --server-address $NVMEOF_DEFAULT_GATEWAY_IP_ADDRESS --server-port $NVMEOF_SRPORT --format json subsystem list + echo "Expected count of namepaces not found, expected (per subsystem): $expected_count_per_subsys" + return 1 + fi +} + + +# add rbd images +for i in $(seq 1 $new_images_count); do + image_name="test${i}" + rbd create $RBD_POOL/$image_name --size $NEW_IMAGE_SIZE +done + +# add new namespaces +image_index=1 +for i in $(seq 1 $NVMEOF_SUBSYSTEMS_COUNT); do + subsystem_nqn="${NVMEOF_SUBSYSTEMS_PREFIX}${i}" + for ns in $(seq 1 $NEW_NAMESPACES_COUNT); do + image="test${image_index}" + sudo podman run -it $NVMEOF_CLI_IMAGE --server-address $NVMEOF_DEFAULT_GATEWAY_IP_ADDRESS --server-port $NVMEOF_SRPORT namespace add --subsystem $subsystem_nqn --rbd-pool $RBD_POOL --rbd-image $image --load-balancing-group $(($image_index % $gateways_count + 1)) + ((image_index++)) + done +done + +# list namespaces +for i in $(seq 1 $NVMEOF_SUBSYSTEMS_COUNT); do + subsystem_nqn="${NVMEOF_SUBSYSTEMS_PREFIX}${i}" + sudo podman run -it $NVMEOF_CLI_IMAGE --server-address $NVMEOF_DEFAULT_GATEWAY_IP_ADDRESS --server-port $NVMEOF_SRPORT --format plain namespace list --subsystem $subsystem_nqn +done + +# verify namespaces added +expected_count_per_subsys=$(( $NEW_NAMESPACES_COUNT + $NVMEOF_NAMESPACES_COUNT )) +assert_namespaces_count $expected_count_per_subsys + +# delete namespaces +for i in $(seq 1 $NVMEOF_SUBSYSTEMS_COUNT); do + subsystem_nqn="${NVMEOF_SUBSYSTEMS_PREFIX}${i}" + NSIDs=$(sudo podman run -it $NVMEOF_CLI_IMAGE --server-address $NVMEOF_DEFAULT_GATEWAY_IP_ADDRESS --server-port $NVMEOF_SRPORT --format json namespace list --subsystem $subsystem_nqn | + jq -r '.namespaces[] | select(.rbd_image_name | startswith("test")) | .nsid') + + for nsid in $NSIDs; do + sudo podman run -it $NVMEOF_CLI_IMAGE --server-address $NVMEOF_DEFAULT_GATEWAY_IP_ADDRESS --server-port $NVMEOF_SRPORT namespace del --subsystem $subsystem_nqn --nsid $nsid + done +done + +# verify namespaces deleted +expected_count_per_subsys=$NVMEOF_NAMESPACES_COUNT +assert_namespaces_count $expected_count_per_subsys + -- 2.39.5