qa/workunits/rados/test.sh: make --serial work with --timeout

author Kamoltat Sirivadhna <ksirivad@redhat.com>

Wed, 2 Jul 2025 20:20:39 +0000 (20:20 +0000)

committer Kamoltat Sirivadhna <ksirivad@redhat.com>

Wed, 2 Jul 2025 20:59:28 +0000 (20:59 +0000)
author Kamoltat Sirivadhna <ksirivad@redhat.com>
Wed, 2 Jul 2025 20:20:39 +0000 (20:20 +0000)
committer Kamoltat Sirivadhna <ksirivad@redhat.com>
Wed, 2 Jul 2025 20:59:28 +0000 (20:59 +0000)
diff --git a/qa/workunits/rados/test.sh b/qa/workunits/rados/test.sh

index 6593adb0e4dd5f0884d75dcc76207068c8f735c1..d954a24b066c86fbe5dd12f1ddb3affd99c3b9da 100755 (executable)
--- a/qa/workunits/rados/test.sh
+++ b/qa/workunits/rados/test.sh
@@ -1,12 +1,47 @@
  #!/usr/bin/env bash
  set -ex
-
-# ./test.sh                               # Default: parallel mode, 30-min timeout
-# ./test.sh --serial                      # Serial mode, 30-min timeout
-# ./test.sh --crimson                     # Crimson mode, 30-min timeout
-# ./test.sh --timeout 3600                # Parallel mode, 60-min timeout
-# ./test.sh --serial --timeout 60         # Serial mode, 1-min timeout
-# ./test.sh --crimson --timeout 0         # Crimson mode, no timeout
+# This script runs the RADOS API tests in parallel or serial mode, with optional --timeout for each test.
+# It can also be run in a vstart environment for local testing.
+
+# Define test arrays for better organization
+RADOS_TESTS=(
+    api_aio api_aio_pp
+    api_io api_io_pp
+    api_asio api_list
+    api_lock api_lock_pp
+    api_misc api_misc_pp
+    api_tier_pp
+    api_pool
+    api_snapshots api_snapshots_pp
+    api_stat api_stat_pp
+    api_watch_notify api_watch_notify_pp
+    api_cmd api_cmd_pp
+    api_service api_service_pp
+    api_c_write_operations
+    api_c_read_operations
+    list_parallel
+    open_pools_parallel
+    delete_pools_parallel
+)
+
+NEORADOS_TESTS=(
+    cls cmd handler_error io ec_io list ec_list misc pool
+    read_operations snapshots watch_notify write_operations
+)
+
+# Note on argument ordering: This script processes arguments sequentially,
+# so the order matters. Arguments must be provided in this specific sequence:
+# 1. --serial OR --crimson (optional)
+# 2. --timeout VALUE (optional) (for each test)
+# 3. --vstart (optional)
+#
+# For example:
+# ./test.sh                               # Default: parallel mode, 90-min timeout for each test
+# ./test.sh --serial                      # Serial mode, 90-min timeout for each test
+# ./test.sh --crimson                     # Crimson mode, 90-min timeout for each test
+# ./test.sh --timeout 3600                # Parallel mode, 60-min timeout for each test
+# ./test.sh --serial --timeout 60         # Serial mode, 1-min timeout for each test
+# ./test.sh --crimson --timeout 0         # Crimson mode, no timeout for each test
  # ../qa/workunits/rados/test.sh --vstart  # Run tests locally from `ceph/build` dir
  
  # First argument must be either --serial or --crimson or nothing
@@ -22,10 +57,11 @@ elif [ "$1" = "--crimson" ]; then
  fi
  
  # After processing the first arg, check for --timeout
-timeout=1800  # 30 minutes default value
+timeout=5400  # 90 minutes default value
  if [ "$1" = "--timeout" ]; then
      shift
      if [ -n "$1" ] && [[ "$1" =~ ^[0-9]+$ ]]; then
+        echo "Setting timeout to $1 seconds for each test"
          timeout=$1
          shift # Remove the timeout value from the list so color can be processed next
      else
@@ -38,7 +74,10 @@ color="" # Default color setting for gtest in terminal (-t)
  [ -t 1 ] && color="--gtest_color=yes"
  
  vstart=0
-[ "$1" = "--vstart" ] && vstart=1
+if [ "$1" = "--vstart" ]; then
+    vstart=1
+    shift
+fi
  
  function cleanup() {
      pkill -P $$ || true
@@ -49,35 +88,17 @@ GTEST_OUTPUT_DIR=${TESTDIR:-$(mktemp -d)}/archive/unit_test_xml_report
  mkdir -p $GTEST_OUTPUT_DIR
  
  declare -A pids
+declare -A test_type
+ret=0
  
-
-# If in vstart mode, compile all test targets
+# If in vstart mode, compile all test targets and start a vstart cluster.
  if [ $vstart -eq 1 ]; then
-    for f in \
-        api_aio api_aio_pp \
-       api_io api_io_pp \
-       api_asio api_list \
-       api_lock api_lock_pp \
-       api_misc api_misc_pp \
-       api_tier_pp \
-       api_pool \
-       api_snapshots api_snapshots_pp \
-       api_stat api_stat_pp \
-       api_watch_notify api_watch_notify_pp \
-       api_cmd api_cmd_pp \
-       api_service api_service_pp \
-       api_c_write_operations \
-       api_c_read_operations \
-       list_parallel \
-       open_pools_parallel \
-       delete_pools_parallel
+    for f in "${RADOS_TESTS[@]}";
      do
          ninja -j$(nproc) ceph_test_rados_$f
      done
  
-    for f in \
-        cls cmd handler_error io ec_io list ec_list misc pool read_operations snapshots \
-        watch_notify write_operations
+    for f in "${NEORADOS_TESTS[@]}";
      do
          ninja -j$(nproc) ceph_test_neorados_$f
      done
@@ -87,42 +108,32 @@ if [ $vstart -eq 1 ]; then
      ../src/vstart.sh --debug --new -x --localhost --bluestore
  fi
  
-# Running all tests
-for f in \
-    api_aio api_aio_pp \
-    api_io api_io_pp \
-    api_asio api_list \
-    api_lock api_lock_pp \
-    api_misc api_misc_pp \
-    api_tier_pp \
-    api_pool \
-    api_snapshots api_snapshots_pp \
-    api_stat api_stat_pp \
-    api_watch_notify api_watch_notify_pp \
-    api_cmd api_cmd_pp \
-    api_service api_service_pp \
-    api_c_write_operations \
-    api_c_read_operations \
-    list_parallel \
-    open_pools_parallel \
-    delete_pools_parallel
+# Running all tests in ceph_test_rados
+for f in "${RADOS_TESTS[@]}"
  do
      executable="ceph_test_rados_$f"
      if [ $vstart -eq 1 ]; then
          executable="./bin/$executable"
      fi
      if [ $parallel -eq 1 ]; then
-       r=`printf '%25s' $f`
-       ff=`echo $f | awk '{print $1}'`
-       bash -o pipefail -exc "$executable --gtest_output=xml:$GTEST_OUTPUT_DIR/$f.xml $color 2>&1 | tee ceph_test_rados_$ff.log | sed \"s/^/$r: /\"" &
-       pid=$!
-       echo "test $f on pid $pid"
-       pids[$f]=$pid
+        r=`printf '%25s' $f`
+        ff=`echo $f | awk '{print $1}'`
+        bash -o pipefail -exc "$executable --gtest_output=xml:$GTEST_OUTPUT_DIR/$f.xml $color 2>&1 | tee ceph_test_rados_$ff.log | sed \"s/^/$r: /\"" &
+        pid=$!
+        echo "test $f on pid $pid"
+           pids[$f]=$pid
+        test_type["$f"]="rados" # Store test type for later use in parallel mode
      else
-       $executable
+        # If running in serial mode, run the test directly
+        if ! timeout $timeout $executable; then
+            echo "ERROR: Test $f timed out after $timeout seconds"
+            echo "Check the logs for failures in $f"
+            ret=1
+        fi
      fi
  done
  
+# Running all tests in ceph_test_neorados
  for f in \
      cls cmd handler_error io ec_io list ec_list misc pool read_operations snapshots \
      watch_notify write_operations
@@ -132,28 +143,33 @@ do
          executable="./bin/$executable"
      fi
      if [ $parallel -eq 1 ]; then
-       r=`printf '%25s' $f`
-       ff=`echo $f | awk '{print $1}'`
-       bash -o pipefail -exc "$executable --gtest_output=xml:$GTEST_OUTPUT_DIR/neorados_$f.xml $color 2>&1 | tee ceph_test_neorados_$ff.log | sed \"s/^/$r: /\"" &
-       pid=$!
-       echo "test $f on pid $pid"
-       pids[$f]=$pid
+        r=`printf '%25s' $f`
+        ff=`echo $f | awk '{print $1}'`
+        bash -o pipefail -exc "$executable --gtest_output=xml:$GTEST_OUTPUT_DIR/neorados_$f.xml $color 2>&1 | tee ceph_test_neorados_$ff.log | sed \"s/^/$r: /\"" &
+        pid=$!
+        echo "test $f on pid $pid"
+        pids[$f]=$pid
+        test_type["$f"]="neorados" # Store test type for later use in parallel mode
      else
-       if [ $crimson -eq 1 ]; then
-               if [ $f = "ec_io" ] || [ $f = "ec_list" ]; then
-                       echo "Skipping EC with Crimson"
-                       continue
-               fi
-       fi
-       $executable
+        if [ $crimson -eq 1 ]; then
+            if [ $f = "ec_io" ] || [ $f = "ec_list" ]; then
+                echo "Skipping EC with Crimson"
+                continue
+            fi
+        fi
+        # If running in serial mode, run the test directly
+        if ! timeout $timeout $executable; then
+            echo "ERROR: Test $f timed out after $timeout seconds"
+            echo "Check the logs for failures in $f"
+            ret=1
+        fi
      fi
  done
  
-ret=0
  if [ $parallel -eq 1 ]; then
  for t in "${!pids[@]}"
  do
-    # Set timeout values
+    # Set timeout value for each test
      max_wait=$timeout
      waited=0
      check_interval=10
@@ -167,17 +183,16 @@ do
          waited=$((waited + check_interval))
          echo "Waiting for test $t (PID $pid)... waited $waited seconds"
          if [ $waited -ge $max_wait ]; then
-        # Process timed out
-        echo "ERROR: Test $t ($pid) - TIMED OUT after $max_wait seconds"
-
-        # Create fallback XML file
-        xml_path="$GTEST_OUTPUT_DIR/$t.xml"
-        if [[ $t == neorados_* ]]; then
-            xml_path="$GTEST_OUTPUT_DIR/neorados_$t.xml"
-        fi
-
-        echo "Creating fallback XML report at $xml_path"
-        cat > "$xml_path" << EOF
+            # Process timed out
+            echo "ERROR: Test $t ($pid) - TIMED OUT after $max_wait seconds"
+            # Create fallback XML file
+            if [ "${test_type[$t]}" = "neorados" ]; then
+                xml_path="$GTEST_OUTPUT_DIR/neorados_$t.xml"
+            else
+                xml_path="$GTEST_OUTPUT_DIR/$t.xml"
+            fi
+            echo "Creating fallback XML report at $xml_path"
+            cat > "$xml_path" << EOF
  <?xml version="1.0" encoding="UTF-8"?>
  <testsuites tests="1" failures="1" disabled="0" errors="0" time="${max_wait}.000" timestamp="$(date -Iseconds)" name="AllTests">
    <testsuite name="Timeout" tests="1" failures="1" disabled="0" errors="0" time="${max_wait}.000">
@@ -191,9 +206,9 @@ do
    </testsuite>
  </testsuites>
  EOF
-        kill -9 $pid 2>/dev/null || true
-        ret=1
-        break
+            kill -9 $pid 2>/dev/null || true
+            ret=1
+            break
          fi
      done
      # Only wait after process has ended naturally or been killed
author	Kamoltat Sirivadhna <ksirivad@redhat.com>
	Wed, 2 Jul 2025 20:20:39 +0000 (20:20 +0000)
committer	Kamoltat Sirivadhna <ksirivad@redhat.com>
	Wed, 2 Jul 2025 20:59:28 +0000 (20:59 +0000)