From: Kamoltat Sirivadhna Date: Wed, 2 Jul 2025 20:20:39 +0000 (+0000) Subject: qa/workunits/rados/test.sh: make --serial work with --timeout X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=bfa6edb0ea2827919d77f94d59ccee0425eb510d;p=ceph.git qa/workunits/rados/test.sh: make --serial work with --timeout - For --serial we use timeout function and --timeout value to to reutrn exit 1 if test exceeds --timeout. - Update documentation for command-line arguments such that user understands the order of argument matters. - Use test arrays for better organization of test definitions - Add test type tracking to properly handle both rados and neorados tests when falling back on XML - Change timeout default value to 90 minutes Fixes: https://tracker.ceph.com/issues/70772 Signed-off-by: Kamoltat Sirivadhna --- diff --git a/qa/workunits/rados/test.sh b/qa/workunits/rados/test.sh index 6593adb0e4d..d954a24b066 100755 --- a/qa/workunits/rados/test.sh +++ b/qa/workunits/rados/test.sh @@ -1,12 +1,47 @@ #!/usr/bin/env bash set -ex - -# ./test.sh # Default: parallel mode, 30-min timeout -# ./test.sh --serial # Serial mode, 30-min timeout -# ./test.sh --crimson # Crimson mode, 30-min timeout -# ./test.sh --timeout 3600 # Parallel mode, 60-min timeout -# ./test.sh --serial --timeout 60 # Serial mode, 1-min timeout -# ./test.sh --crimson --timeout 0 # Crimson mode, no timeout +# This script runs the RADOS API tests in parallel or serial mode, with optional --timeout for each test. +# It can also be run in a vstart environment for local testing. + +# Define test arrays for better organization +RADOS_TESTS=( + api_aio api_aio_pp + api_io api_io_pp + api_asio api_list + api_lock api_lock_pp + api_misc api_misc_pp + api_tier_pp + api_pool + api_snapshots api_snapshots_pp + api_stat api_stat_pp + api_watch_notify api_watch_notify_pp + api_cmd api_cmd_pp + api_service api_service_pp + api_c_write_operations + api_c_read_operations + list_parallel + open_pools_parallel + delete_pools_parallel +) + +NEORADOS_TESTS=( + cls cmd handler_error io ec_io list ec_list misc pool + read_operations snapshots watch_notify write_operations +) + +# Note on argument ordering: This script processes arguments sequentially, +# so the order matters. Arguments must be provided in this specific sequence: +# 1. --serial OR --crimson (optional) +# 2. --timeout VALUE (optional) (for each test) +# 3. --vstart (optional) +# +# For example: +# ./test.sh # Default: parallel mode, 90-min timeout for each test +# ./test.sh --serial # Serial mode, 90-min timeout for each test +# ./test.sh --crimson # Crimson mode, 90-min timeout for each test +# ./test.sh --timeout 3600 # Parallel mode, 60-min timeout for each test +# ./test.sh --serial --timeout 60 # Serial mode, 1-min timeout for each test +# ./test.sh --crimson --timeout 0 # Crimson mode, no timeout for each test # ../qa/workunits/rados/test.sh --vstart # Run tests locally from `ceph/build` dir # First argument must be either --serial or --crimson or nothing @@ -22,10 +57,11 @@ elif [ "$1" = "--crimson" ]; then fi # After processing the first arg, check for --timeout -timeout=1800 # 30 minutes default value +timeout=5400 # 90 minutes default value if [ "$1" = "--timeout" ]; then shift if [ -n "$1" ] && [[ "$1" =~ ^[0-9]+$ ]]; then + echo "Setting timeout to $1 seconds for each test" timeout=$1 shift # Remove the timeout value from the list so color can be processed next else @@ -38,7 +74,10 @@ color="" # Default color setting for gtest in terminal (-t) [ -t 1 ] && color="--gtest_color=yes" vstart=0 -[ "$1" = "--vstart" ] && vstart=1 +if [ "$1" = "--vstart" ]; then + vstart=1 + shift +fi function cleanup() { pkill -P $$ || true @@ -49,35 +88,17 @@ GTEST_OUTPUT_DIR=${TESTDIR:-$(mktemp -d)}/archive/unit_test_xml_report mkdir -p $GTEST_OUTPUT_DIR declare -A pids +declare -A test_type +ret=0 - -# If in vstart mode, compile all test targets +# If in vstart mode, compile all test targets and start a vstart cluster. if [ $vstart -eq 1 ]; then - for f in \ - api_aio api_aio_pp \ - api_io api_io_pp \ - api_asio api_list \ - api_lock api_lock_pp \ - api_misc api_misc_pp \ - api_tier_pp \ - api_pool \ - api_snapshots api_snapshots_pp \ - api_stat api_stat_pp \ - api_watch_notify api_watch_notify_pp \ - api_cmd api_cmd_pp \ - api_service api_service_pp \ - api_c_write_operations \ - api_c_read_operations \ - list_parallel \ - open_pools_parallel \ - delete_pools_parallel + for f in "${RADOS_TESTS[@]}"; do ninja -j$(nproc) ceph_test_rados_$f done - for f in \ - cls cmd handler_error io ec_io list ec_list misc pool read_operations snapshots \ - watch_notify write_operations + for f in "${NEORADOS_TESTS[@]}"; do ninja -j$(nproc) ceph_test_neorados_$f done @@ -87,42 +108,32 @@ if [ $vstart -eq 1 ]; then ../src/vstart.sh --debug --new -x --localhost --bluestore fi -# Running all tests -for f in \ - api_aio api_aio_pp \ - api_io api_io_pp \ - api_asio api_list \ - api_lock api_lock_pp \ - api_misc api_misc_pp \ - api_tier_pp \ - api_pool \ - api_snapshots api_snapshots_pp \ - api_stat api_stat_pp \ - api_watch_notify api_watch_notify_pp \ - api_cmd api_cmd_pp \ - api_service api_service_pp \ - api_c_write_operations \ - api_c_read_operations \ - list_parallel \ - open_pools_parallel \ - delete_pools_parallel +# Running all tests in ceph_test_rados +for f in "${RADOS_TESTS[@]}" do executable="ceph_test_rados_$f" if [ $vstart -eq 1 ]; then executable="./bin/$executable" fi if [ $parallel -eq 1 ]; then - r=`printf '%25s' $f` - ff=`echo $f | awk '{print $1}'` - bash -o pipefail -exc "$executable --gtest_output=xml:$GTEST_OUTPUT_DIR/$f.xml $color 2>&1 | tee ceph_test_rados_$ff.log | sed \"s/^/$r: /\"" & - pid=$! - echo "test $f on pid $pid" - pids[$f]=$pid + r=`printf '%25s' $f` + ff=`echo $f | awk '{print $1}'` + bash -o pipefail -exc "$executable --gtest_output=xml:$GTEST_OUTPUT_DIR/$f.xml $color 2>&1 | tee ceph_test_rados_$ff.log | sed \"s/^/$r: /\"" & + pid=$! + echo "test $f on pid $pid" + pids[$f]=$pid + test_type["$f"]="rados" # Store test type for later use in parallel mode else - $executable + # If running in serial mode, run the test directly + if ! timeout $timeout $executable; then + echo "ERROR: Test $f timed out after $timeout seconds" + echo "Check the logs for failures in $f" + ret=1 + fi fi done +# Running all tests in ceph_test_neorados for f in \ cls cmd handler_error io ec_io list ec_list misc pool read_operations snapshots \ watch_notify write_operations @@ -132,28 +143,33 @@ do executable="./bin/$executable" fi if [ $parallel -eq 1 ]; then - r=`printf '%25s' $f` - ff=`echo $f | awk '{print $1}'` - bash -o pipefail -exc "$executable --gtest_output=xml:$GTEST_OUTPUT_DIR/neorados_$f.xml $color 2>&1 | tee ceph_test_neorados_$ff.log | sed \"s/^/$r: /\"" & - pid=$! - echo "test $f on pid $pid" - pids[$f]=$pid + r=`printf '%25s' $f` + ff=`echo $f | awk '{print $1}'` + bash -o pipefail -exc "$executable --gtest_output=xml:$GTEST_OUTPUT_DIR/neorados_$f.xml $color 2>&1 | tee ceph_test_neorados_$ff.log | sed \"s/^/$r: /\"" & + pid=$! + echo "test $f on pid $pid" + pids[$f]=$pid + test_type["$f"]="neorados" # Store test type for later use in parallel mode else - if [ $crimson -eq 1 ]; then - if [ $f = "ec_io" ] || [ $f = "ec_list" ]; then - echo "Skipping EC with Crimson" - continue - fi - fi - $executable + if [ $crimson -eq 1 ]; then + if [ $f = "ec_io" ] || [ $f = "ec_list" ]; then + echo "Skipping EC with Crimson" + continue + fi + fi + # If running in serial mode, run the test directly + if ! timeout $timeout $executable; then + echo "ERROR: Test $f timed out after $timeout seconds" + echo "Check the logs for failures in $f" + ret=1 + fi fi done -ret=0 if [ $parallel -eq 1 ]; then for t in "${!pids[@]}" do - # Set timeout values + # Set timeout value for each test max_wait=$timeout waited=0 check_interval=10 @@ -167,17 +183,16 @@ do waited=$((waited + check_interval)) echo "Waiting for test $t (PID $pid)... waited $waited seconds" if [ $waited -ge $max_wait ]; then - # Process timed out - echo "ERROR: Test $t ($pid) - TIMED OUT after $max_wait seconds" - - # Create fallback XML file - xml_path="$GTEST_OUTPUT_DIR/$t.xml" - if [[ $t == neorados_* ]]; then - xml_path="$GTEST_OUTPUT_DIR/neorados_$t.xml" - fi - - echo "Creating fallback XML report at $xml_path" - cat > "$xml_path" << EOF + # Process timed out + echo "ERROR: Test $t ($pid) - TIMED OUT after $max_wait seconds" + # Create fallback XML file + if [ "${test_type[$t]}" = "neorados" ]; then + xml_path="$GTEST_OUTPUT_DIR/neorados_$t.xml" + else + xml_path="$GTEST_OUTPUT_DIR/$t.xml" + fi + echo "Creating fallback XML report at $xml_path" + cat > "$xml_path" << EOF @@ -191,9 +206,9 @@ do EOF - kill -9 $pid 2>/dev/null || true - ret=1 - break + kill -9 $pid 2>/dev/null || true + ret=1 + break fi done # Only wait after process has ended naturally or been killed