From: Ronen Friedman Date: Wed, 6 Aug 2025 11:42:16 +0000 (-0500) Subject: test/standalone/scrub: improve build_pg_dicts() performance X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=eb5be7cccaa44320c2ddb2fe259e8ef956fc7f16;p=ceph.git test/standalone/scrub: improve build_pg_dicts() performance build_pg_dicts() is used to construct a set of dictionaries (PG to Primary OSD, PG to Acting OSDs, etc.) from the output of 'ceph pg dump'. The original code wasn't very efficient. So much so, that when used in a new test that creates a large cluster, its run time was prohibitively long. Signed-off-by: Ronen Friedman --- diff --git a/qa/standalone/scrub/scrub-helpers.sh b/qa/standalone/scrub/scrub-helpers.sh index 5dedb88398a..1509f0837e4 100644 --- a/qa/standalone/scrub/scrub-helpers.sh +++ b/qa/standalone/scrub/scrub-helpers.sh @@ -327,33 +327,37 @@ function build_pg_dicts { local saved_echo_flag=${-//[^x]/} set +x + # This jq filter extracts all required fields, creating a tab-separated output. + local jq_filter='.pg_stats[] | [.pgid, (.acting | @sh), .acting_primary, (.pgid | split(".")[0])] | @tsv' + # if the infile name is '-', fetch the dump directly from the ceph cluster if [[ $infile == "-" ]]; then - local -r ceph_cmd="ceph pg dump pgs_brief -f=json-pretty" - local -r ceph_cmd_out=$(eval $ceph_cmd) + local json_data + json_data=$(ceph pg dump pgs_brief -f=json) local -r ceph_cmd_rc=$? if [[ $ceph_cmd_rc -ne 0 ]]; then - echo "Error: the command '$ceph_cmd' failed with return code $ceph_cmd_rc" + echo "Error: 'ceph pg dump' command failed with return code $ceph_cmd_rc" fi - (( extr_dbg >= 3 )) && echo "$ceph_cmd_out" > /tmp/e2 - l0=`echo "$ceph_cmd_out" | jq '[.pg_stats | group_by(.pg_stats)[0] | map({pgid: .pgid, pool: (.pgid | split(".")[0]), acting: .acting, acting_primary: .acting_primary})] | .[]' ` + (( extr_dbg >= 3 )) && echo "$json_data" > /tmp/e2 + + while IFS=$'\t' read -r pgid acting acting_primary pool; do + [[ -z "$pgid" ]] && continue + (( extr_dbg >= 1 )) && echo "PG: $pgid acting: $acting primary: $acting_primary pool: $pool" + pg_primary_dict["$pgid"]=$acting_primary + pg_acting_dict["$pgid"]=$acting + pg_pool_dict["$pgid"]=$pool + done < <(echo "$json_data" | jq -r "$jq_filter") + else - l0=`jq '[.pg_stats | group_by(.pg_stats)[0] | map({pgid: .pgid, pool: (.pgid | split(".")[0]), acting: .acting, acting_primary: .acting_primary})] | .[]' $infile ` + # Process directly from file + while IFS=$'\t' read -r pgid acting acting_primary pool; do + [[ -z "$pgid" ]] && continue + (( extr_dbg >= 1 )) && echo "PG: $pgid acting: $acting primary: $acting_primary pool: $pool" + pg_primary_dict["$pgid"]=$acting_primary + pg_acting_dict["$pgid"]=$acting + pg_pool_dict["$pgid"]=$pool + done < <(jq -r "$jq_filter" "$infile") fi - (( extr_dbg >= 2 )) && echo "L0: $l0" - - mapfile -t l1 < <(echo "$l0" | jq -c '.[]') - (( extr_dbg >= 2 )) && echo "L1: ${#l1[@]}" - - for item in "${l1[@]}"; do - pgid=$(echo "$item" | jq -r '.pgid') - acting=$(echo "$item" | jq -r '.acting | @sh') - pg_acting_dict["$pgid"]=$acting - acting_primary=$(echo "$item" | jq -r '.acting_primary') - pg_primary_dict["$pgid"]=$acting_primary - pool=$(echo "$item" | jq -r '.pool') - pg_pool_dict["$pgid"]=$pool - done if [[ -n "$saved_echo_flag" ]]; then set -x; fi }