]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
qa: Improve scalability test
authorVallari Agrawal <vallari.agrawal@ibm.com>
Fri, 27 Jun 2025 09:48:02 +0000 (15:18 +0530)
committerVenky Shankar <vshankar@redhat.com>
Fri, 7 Nov 2025 05:33:39 +0000 (05:33 +0000)
Improve logs of scalablity script.
And DEBUG mode in comments, we can
use it when needed.

Signed-off-by: Vallari Agrawal <vallari.agrawal@ibm.com>
qa/suites/nvmeof/basic/workloads/nvmeof_scalability.yaml
qa/suites/nvmeof/thrash/gateway-initiator-setup/10-subsys-90-namespace-no_huge_pages.yaml
qa/workunits/nvmeof/basic_tests.sh
qa/workunits/nvmeof/fio_test.sh
qa/workunits/nvmeof/scalability_test.sh

index 236d62dc940a1a8e2634939452b0337142513500..efbd66d81252f6ac0f4081a78ffcc7adb7f9c8df 100644 (file)
@@ -13,6 +13,18 @@ tasks:
 - cephadm.wait_for_service:
     service: nvmeof.mypool.mygroup0
 
+## Enable gateway in DEBUG mode
+# - cephadm.exec:
+#     host.a:
+#       - ceph orch ls nvmeof --export > /tmp/nvmeof-orig.yaml
+#       - "sed 's/log_level:\ INFO/log_level:\ DEBUG/g' /tmp/nvmeof-orig.yaml > /tmp/nvmeof-debug.yaml"
+#       - cat /tmp/nvmeof-debug.yaml
+#       - ceph orch apply -i /tmp/nvmeof-debug.yaml
+#       - ceph orch redeploy nvmeof.mypool.mygroup0
+
+# - cephadm.wait_for_service:
+#     service: nvmeof.mypool.mygroup0
+
 - workunit:
     no_coverage_and_limits: true
     timeout: 30m
index d2da1d0877aff248f011641d5581a735456be0f6..30f7e58340383c7619d3179e1bf35a92c7799dad 100644 (file)
@@ -19,7 +19,7 @@ tasks:
       - cp /tmp/nvmeof-orig.yaml /tmp/nvmeof-no-huge-page.yaml 
       - "sed -i '/  pool: mypool/a\\  spdk_mem_size: 4096' /tmp/nvmeof-no-huge-page.yaml"
       - cat /tmp/nvmeof-no-huge-page.yaml
-      - ceph orch ls --refresh
+      - ceph orch ls
       - ceph orch apply -i /tmp/nvmeof-no-huge-page.yaml
       - ceph orch redeploy nvmeof.mypool.mygroup0
       
index b7483787f9574e3b444105c33a64671e3a6dd8e2..e4670b8baead2832ae66127e56c454c76aeb4861 100755 (executable)
@@ -65,7 +65,7 @@ test_run() {
         echo "[nvmeof] $1 test failed!"
         sudo nvme list-subsys
         sudo nvme list
-        sudo dmesg -T > $TESTDIR/archive/dmesg.log
+        sudo dmesg -T > $TESTDIR/archive/dmesg-basic_tests.log
         exit 1
     fi
 }
index 7aa26d973b65254167a45c27c8f8a1073c7ef84c..3066f5c863d1d67e858c5ef25d1508f29f00aaea 100755 (executable)
@@ -94,7 +94,7 @@ status_log() {
         sudo nvme list-subsys /dev/$device
         sudo nvme id-ns /dev/$device
     done
-    
+    sudo dmesg -T > $TESTDIR/archive/dmesg-fio_tests.log 
 }
 
 
index d83cbcdd0e4828fb86f859e70384a9f23af8d9f5..85e88032244099ed6ee4a3774f98921c38de6976 100755 (executable)
@@ -8,12 +8,24 @@ GROUP="${NVMEOF_GROUP:-mygroup0}"
 source /etc/ceph/nvmeof.env
 
 if [ -z "$GATEWAYS" ]; then
-    echo "At least one gateway needs to be defined for scalability test"
+    echo "[nvmeof.scale] At least one gateway needs to be defined for scalability test"
     exit 1
 fi
 
 status_checks() {
+    status_checks_ $1
+    if [ $? -eq 0 ]; then
+        echo "[nvmeof.scale] Verified successfully that everything is working with $1 gateways"
+    else
+        echo "[nvmeof.scale] Verification failed!"
+        sudo dmesg -T > $TESTDIR/archive/dmesg-scalability_test.log
+        exit 1
+    fi
+}
+
+status_checks_() {
     expected_count=$1
+    echo "[nvmeof.scale] Verifying that everything is working with $expected_count gateways"
 
     output=$(ceph nvme-gw show $POOL $GROUP) 
     # nvme_show=$(echo $output | grep -o '"AVAILABLE"' | wc -l)
@@ -40,25 +52,27 @@ status_checks() {
         num_namespaces=$(echo "$gw" | jq '.["num-namespaces"]')
 
         if [[ "$availability" != "AVAILABLE" ]]; then
-            echo "Gateway $gw_id is not AVAILABLE."
-            exit 1
+            echo "[nvmeof.scale] Gateway $gw_id is not AVAILABLE."
+            return 1
         fi
 
         diff=$((num_namespaces - expected_avg_ns))
         if [[ $diff -lt -1 || $diff -gt 1 ]]; then
-            echo "Gateway $gw_id has num-namespaces ($num_namespaces), expected around $expected_ns_count. Indicates a problem in ns load-balancing."
-            exit 1
+            echo "[nvmeof.scale] Gateway $gw_id has num-namespaces ($num_namespaces), expected around $expected_avg_ns. Indicates a problem in ns load-balancing."
+            return 1
         fi
     done
 
     orch_ls=$(ceph orch ls)
     if ! echo "$orch_ls" | grep -q "$expected_count/$expected_count"; then
+        echo "[nvmeof.scale] Expected $expected_count running gateways in 'ceph orch ls'"
         return 1
     fi
 
     output=$(ceph orch ps --service-name nvmeof.$POOL.$GROUP)     
     orch_ps=$(echo $output | grep -o 'running' | wc -l)
     if [ "$orch_ps" -ne "$expected_count" ]; then
+        echo "[nvmeof.scale] Expected $expected_count running gateways in 'ceph orch ps', but found $orch_ps"
         return 1
     fi
 
@@ -69,7 +83,7 @@ total_gateways_count=$(( $(echo "$NVMEOF_GATEWAY_IP_ADDRESSES" | tr -cd ',' | wc
 scaled_down_gateways_count=$(( total_gateways_count - $(echo "$GATEWAYS" | tr -cd ',' | wc -c) - 1 ))
 
 
-echo "[nvmeof.scale] Setting up config to remove gateways ${GATEWAYS}"
+echo "[nvmeof.scale] SCALE DOWN: Setting up config to remove gateways ${GATEWAYS}"
 ceph orch ls --service-name nvmeof.$POOL.$GROUP --export > /tmp/nvmeof-gw.yaml
 ceph orch ls nvmeof --export > /tmp/nvmeof-gw.yaml
 cat /tmp/nvmeof-gw.yaml
@@ -78,16 +92,17 @@ pattern=$(echo $GATEWAYS | sed 's/,/\\|/g')
 sed "/$pattern/d" /tmp/nvmeof-gw.yaml > /tmp/nvmeof-gw-new.yaml  
 cat /tmp/nvmeof-gw-new.yaml
 
-echo "[nvmeof.scale] Starting scale testing by removing ${GATEWAYS}"
+echo "[nvmeof.scale] SCALE DOWN: Starting scale testing by removing ${GATEWAYS}"
 status_checks $total_gateways_count 
 ceph orch apply -i /tmp/nvmeof-gw-new.yaml # downscale
-ceph orch redeploy nvmeof.$POOL.$GROUP 
+ceph orch redeploy nvmeof.$POOL.$GROUP 
 sleep $DELAY
 status_checks $scaled_down_gateways_count
-echo "[nvmeof.scale] Downscale complete - removed gateways (${GATEWAYS}); now scaling back up"
+echo "[nvmeof.scale] SCALE DOWN successful! Removed gateways (${GATEWAYS}) and verified;" 
+echo "[nvmeof.scale] SCALE UP: scaling up to $total_gateways_count gateways (from $scaled_down_gateways_count gateways)"
 ceph orch apply -i /tmp/nvmeof-gw.yaml #upscale
-ceph orch redeploy nvmeof.$POOL.$GROUP 
+ceph orch redeploy nvmeof.$POOL.$GROUP 
 sleep $DELAY
 status_checks $total_gateways_count
-
+echo "[nvmeof.scale] SCALE UP successful! All gateways running and verified." 
 echo "[nvmeof.scale] Scale testing passed for ${GATEWAYS}"