From eb5be7cccaa44320c2ddb2fe259e8ef956fc7f16 Mon Sep 17 00:00:00 2001 From: Ronen Friedman Date: Wed, 6 Aug 2025 06:42:16 -0500 Subject: [PATCH] test/standalone/scrub: improve build_pg_dicts() performance build_pg_dicts() is used to construct a set of dictionaries (PG to Primary OSD, PG to Acting OSDs, etc.) from the output of 'ceph pg dump'. The original code wasn't very efficient. So much so, that when used in a new test that creates a large cluster, its run time was prohibitively long. Signed-off-by: Ronen Friedman --- qa/standalone/scrub/scrub-helpers.sh | 44 +++++++++++++++------------- 1 file changed, 24 insertions(+), 20 deletions(-) diff --git a/qa/standalone/scrub/scrub-helpers.sh b/qa/standalone/scrub/scrub-helpers.sh index 5dedb88398a..1509f0837e4 100644 --- a/qa/standalone/scrub/scrub-helpers.sh +++ b/qa/standalone/scrub/scrub-helpers.sh @@ -327,33 +327,37 @@ function build_pg_dicts { local saved_echo_flag=${-//[^x]/} set +x + # This jq filter extracts all required fields, creating a tab-separated output. + local jq_filter='.pg_stats[] | [.pgid, (.acting | @sh), .acting_primary, (.pgid | split(".")[0])] | @tsv' + # if the infile name is '-', fetch the dump directly from the ceph cluster if [[ $infile == "-" ]]; then - local -r ceph_cmd="ceph pg dump pgs_brief -f=json-pretty" - local -r ceph_cmd_out=$(eval $ceph_cmd) + local json_data + json_data=$(ceph pg dump pgs_brief -f=json) local -r ceph_cmd_rc=$? if [[ $ceph_cmd_rc -ne 0 ]]; then - echo "Error: the command '$ceph_cmd' failed with return code $ceph_cmd_rc" + echo "Error: 'ceph pg dump' command failed with return code $ceph_cmd_rc" fi - (( extr_dbg >= 3 )) && echo "$ceph_cmd_out" > /tmp/e2 - l0=`echo "$ceph_cmd_out" | jq '[.pg_stats | group_by(.pg_stats)[0] | map({pgid: .pgid, pool: (.pgid | split(".")[0]), acting: .acting, acting_primary: .acting_primary})] | .[]' ` + (( extr_dbg >= 3 )) && echo "$json_data" > /tmp/e2 + + while IFS=$'\t' read -r pgid acting acting_primary pool; do + [[ -z "$pgid" ]] && continue + (( extr_dbg >= 1 )) && echo "PG: $pgid acting: $acting primary: $acting_primary pool: $pool" + pg_primary_dict["$pgid"]=$acting_primary + pg_acting_dict["$pgid"]=$acting + pg_pool_dict["$pgid"]=$pool + done < <(echo "$json_data" | jq -r "$jq_filter") + else - l0=`jq '[.pg_stats | group_by(.pg_stats)[0] | map({pgid: .pgid, pool: (.pgid | split(".")[0]), acting: .acting, acting_primary: .acting_primary})] | .[]' $infile ` + # Process directly from file + while IFS=$'\t' read -r pgid acting acting_primary pool; do + [[ -z "$pgid" ]] && continue + (( extr_dbg >= 1 )) && echo "PG: $pgid acting: $acting primary: $acting_primary pool: $pool" + pg_primary_dict["$pgid"]=$acting_primary + pg_acting_dict["$pgid"]=$acting + pg_pool_dict["$pgid"]=$pool + done < <(jq -r "$jq_filter" "$infile") fi - (( extr_dbg >= 2 )) && echo "L0: $l0" - - mapfile -t l1 < <(echo "$l0" | jq -c '.[]') - (( extr_dbg >= 2 )) && echo "L1: ${#l1[@]}" - - for item in "${l1[@]}"; do - pgid=$(echo "$item" | jq -r '.pgid') - acting=$(echo "$item" | jq -r '.acting | @sh') - pg_acting_dict["$pgid"]=$acting - acting_primary=$(echo "$item" | jq -r '.acting_primary') - pg_primary_dict["$pgid"]=$acting_primary - pool=$(echo "$item" | jq -r '.pool') - pg_pool_dict["$pgid"]=$pool - done if [[ -n "$saved_echo_flag" ]]; then set -x; fi } -- 2.39.5