From: Kamoltat (Junior) Sirivadhna Date: Tue, 28 Apr 2026 20:13:10 +0000 (+0000) Subject: src/script: init test_stretch crush_collisions.sh X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=a22a63030fc99285fc2d3cbceeceea9349790fa6;p=ceph.git src/script: init test_stretch crush_collisions.sh Add script to test for CRUSH retry exhaustion in stretch mode with 2 datacenters. Tests unbiased stretch rules by running multiple iterations of PG mappings and checking for collisions that exceed the 50-try limit. Also add --show-retry-exhaustion flag to crushtool to detect and report when CRUSH mapping hits the maximum retry limit. Signed-off-by: Kamoltat (Junior) Sirivadhna --- diff --git a/src/crush/CrushTester.cc b/src/crush/CrushTester.cc index 9fc95d8df9c..9f5e7d9061c 100644 --- a/src/crush/CrushTester.cc +++ b/src/crush/CrushTester.cc @@ -475,7 +475,7 @@ int CrushTester::test(CephContext* cct) // make adjustments adjust_weights(weight); - if (output_choose_tries) + if (output_choose_tries || show_retry_exhaustion) crush.start_choose_profile(); for (int r = min_rule; r < crush.get_max_rules() && r <= max_rule; r++) { @@ -673,15 +673,42 @@ int CrushTester::test(CephContext* cct) } } - if (output_choose_tries) { + if (output_choose_tries || show_retry_exhaustion) { __u32 *v = 0; int n = crush.get_choose_profile(&v); - for (int i=0; i 0 && v[n-1] > 0) { + cerr << std::endl; + cerr << "WARNING: Retry exhaustion detected!" << std::endl; + cerr << " " << v[n-1] << " PG(s) hit the maximum retry limit of " << (n) << std::endl; + cerr << " This indicates CRUSH failed to find optimal placement for some PGs." << std::endl; + cerr << std::endl; + } else { + cout << std::endl; + cout << "No retry exhaustion detected (maximum tries needed: "; + // Find the actual maximum tries used + int max_tries_used = 1; + for (int i = n-1; i >= 0; i--) { + if (v[i] > 0) { + max_tries_used = i+1; + break; + } + } + cout << max_tries_used << " / " << (n) << ")" << std::endl; + cout << std::endl; + } } crush.stop_choose_profile(); diff --git a/src/crush/CrushTester.h b/src/crush/CrushTester.h index 49e41de3080..daacc26ad97 100644 --- a/src/crush/CrushTester.h +++ b/src/crush/CrushTester.h @@ -31,6 +31,7 @@ class CrushTester { bool output_mappings; bool output_bad_mappings; bool output_choose_tries; + bool show_retry_exhaustion; bool output_data_file; bool output_csv; @@ -182,6 +183,7 @@ public: output_mappings(false), output_bad_mappings(false), output_choose_tries(false), + show_retry_exhaustion(false), output_data_file(false), output_csv(false), output_data_file_name("") @@ -251,6 +253,13 @@ public: return output_choose_tries; } + void set_show_retry_exhaustion(bool b) { + show_retry_exhaustion = b; + } + bool get_show_retry_exhaustion() const { + return show_retry_exhaustion; + } + void set_batches(int b) { num_batches = b; } diff --git a/src/script/test_stretch_crush_collisions.sh b/src/script/test_stretch_crush_collisions.sh new file mode 100755 index 00000000000..dc924f1b577 --- /dev/null +++ b/src/script/test_stretch_crush_collisions.sh @@ -0,0 +1,171 @@ +#!/bin/bash +# Test script to detect CRUSH retry exhaustion in stretch mode configurations +# Tests whether unbiased stretch rules with exactly 2 datacenters experience +# collision retry exhaustion (hitting the 50-try limit) + +set -e + +# Find the script directory and repo root +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" + +# Default to build directory at repo root, but allow override +BUILD_DIR="${BUILD_DIR:-$REPO_ROOT/build}" + +# Find crushtool +if [ -n "$CRUSHTOOL" ]; then + # User specified CRUSHTOOL, use it + : +elif [ -x "$BUILD_DIR/bin/crushtool" ]; then + CRUSHTOOL="$BUILD_DIR/bin/crushtool" +else + echo "Error: Cannot find crushtool. Please set BUILD_DIR or CRUSHTOOL environment variable." + exit 1 +fi + +OUTPUT_DIR="${OUTPUT_DIR:-./crush_test_results}" +NUM_PGS="${NUM_PGS:-100000}" + +mkdir -p "$OUTPUT_DIR" + +# Function to create a CRUSH map with 2 datacenters +create_crush_map() { + local filename=$1 + + cat > "$filename" <<'EOF' +# CRUSH map for stretch mode with 2 datacenters +# Matches real cluster structure + +# devices +device 0 osd.0 +device 1 osd.1 +device 2 osd.2 +device 3 osd.3 +device 4 osd.4 +device 5 osd.5 +device 6 osd.6 +device 7 osd.7 + +# types +type 0 osd +type 1 host +type 2 datacenter +type 3 root + +# buckets +host host1 { + id -9 + alg straw2 + hash 0 + item osd.0 weight 1.0 + item osd.1 weight 1.0 +} + +host host2 { + id -10 + alg straw2 + hash 0 + item osd.2 weight 1.0 + item osd.3 weight 1.0 +} + +host host3 { + id -11 + alg straw2 + hash 0 + item osd.4 weight 1.0 + item osd.5 weight 1.0 +} + +host host4 { + id -12 + alg straw2 + hash 0 + item osd.6 weight 1.0 + item osd.7 weight 1.0 +} + +datacenter dc1 { + id -5 + alg straw2 + hash 0 + item host1 weight 2.0 + item host2 weight 2.0 +} + +datacenter dc2 { + id -7 + alg straw2 + hash 0 + item host3 weight 2.0 + item host4 weight 2.0 +} + +root default { + id -1 + alg straw2 + hash 0 + item dc1 weight 4.0 + item dc2 weight 4.0 +} + +# CRUSH rules +rule stretch_replicated_rule { + id 0 + type replicated + step take default + step choose firstn 0 type datacenter + step chooseleaf firstn 2 type host + step emit +} +EOF +} + +# Function to test a CRUSH map +test_crush_map() { + local map_txt=$1 + local map_bin=$2 + local rule_id=$3 + local rule_name=$4 + local iteration=$5 + + if $CRUSHTOOL -i "$map_bin" --test \ + --min-x 1 \ + --max-x "$NUM_PGS" \ + --rule "$rule_id" \ + --num-rep 4 \ + --show-statistics \ + --set-choose-total-tries 50 \ + --show-retry-exhaustion 2>&1 | grep -q "WARNING: Retry exhaustion detected!"; then + return 1 # Retry exhaustion detected - failure + else + return 0 # No retry exhaustion - success + fi +} + +echo "Testing Unbiased Rule with 2 Datacenters" +echo "Running 100 iterations with $NUM_PGS PGs each..." +echo "" + +create_crush_map "$OUTPUT_DIR/crush_2dc.txt" + +echo "Compiling CRUSH map..." +$CRUSHTOOL -c "$OUTPUT_DIR/crush_2dc.txt" -o "$OUTPUT_DIR/crush_2dc.bin" + +for i in $(seq 1 100); do + echo -n " Iteration $i/100: " + + if test_crush_map \ + "$OUTPUT_DIR/crush_2dc.txt" \ + "$OUTPUT_DIR/crush_2dc.bin" \ + 0 \ + "stretch_replicated_rule" \ + "$i"; then + echo "OK" + else + echo "RETRY EXHAUSTION DETECTED" + exit 1 + fi +done + + diff --git a/src/tools/crushtool.cc b/src/tools/crushtool.cc index 21b3efd3475..17e6e31f9dd 100644 --- a/src/tools/crushtool.cc +++ b/src/tools/crushtool.cc @@ -229,6 +229,8 @@ void usage() cout << " --show-mappings show mappings\n"; cout << " --show-bad-mappings show bad mappings\n"; cout << " --show-choose-tries show choose tries histogram\n"; + cout << " --show-retry-exhaustion\n"; + cout << " check for and report CRUSH retry exhaustion\n"; cout << " --output-name name\n"; cout << " prepend the data file(s) generated during the\n"; cout << " testing routine with name\n"; @@ -542,6 +544,9 @@ int main(int argc, const char **argv) } else if (ceph_argparse_flag(args, i, "--show_choose_tries", (char*)NULL)) { display = true; tester.set_output_choose_tries(true); + } else if (ceph_argparse_flag(args, i, "--show-retry-exhaustion", (char*)NULL)) { + display = true; + tester.set_show_retry_exhaustion(true); } else if (ceph_argparse_witharg(args, i, &val, "-c", "--compile", (char*)NULL)) { srcfn = val; compile = true;