From: Kamoltat (Junior) Sirivadhna Date: Wed, 11 Feb 2026 18:38:41 +0000 (+0000) Subject: qa/standalone: wait for ceph pg dump pgs X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=48e05a5518576141757d78b73a42ab0ae283098d;p=ceph.git qa/standalone: wait for ceph pg dump pgs Problem: In osd-backfill-recovery-logs.sh we were trying to do `ceph osd out` on a empty value when `ceph pg dump pgs` returned nothing. Solution: In ceph-helpers.sh we created wait_for_pg_data() to wait for 30 seconds (default) for `pg dump pgs` to receive some values before proceeding. Fixes: https://tracker.ceph.com/issues/74524 Signed-off-by: Kamoltat (Junior) Sirivadhna --- diff --git a/qa/standalone/ceph-helpers.sh b/qa/standalone/ceph-helpers.sh index b8dbc39a98e..aa020891d09 100755 --- a/qa/standalone/ceph-helpers.sh +++ b/qa/standalone/ceph-helpers.sh @@ -1719,6 +1719,35 @@ function wait_for_pg_clean() { return 0 } +## +# Wait for PG data to be available from pg dump +# Usage: wait_for_pg_data [timeout] +# Example: wait_for_pg_data '.pg_stats[0].up[]' 30 +# +# @param jq_expr jq expression to extract data from pg dump +# @param timeout timeout in seconds (default: 30) +# @return 0 on success, 1 on timeout +# +function wait_for_pg_data() { + local jq_expr="$1" + local timeout=${2:-30} + local count=0 + + while true; do + local result=$(ceph pg dump pgs --format=json 2>/dev/null | jq -r "$jq_expr" 2>/dev/null) + if [ -n "$result" ] && [ "$result" != "null" ]; then + echo "$result" + return 0 + fi + sleep 1 + count=$((count + 1)) + if [ $count -gt $timeout ]; then + echo "ERROR: Timeout waiting for PG data: $jq_expr" >&2 + return 1 + fi + done +} + ## # Wait until the cluster becomes peered or if it does not make progress # for $WAIT_FOR_CLEAN_TIMEOUT seconds. diff --git a/qa/standalone/osd-backfill/osd-backfill-recovery-log.sh b/qa/standalone/osd-backfill/osd-backfill-recovery-log.sh index 4712c3a5527..3f28ee2066b 100755 --- a/qa/standalone/osd-backfill/osd-backfill-recovery-log.sh +++ b/qa/standalone/osd-backfill/osd-backfill-recovery-log.sh @@ -64,8 +64,10 @@ function _common_test() { rados -p test put obj-${j} /etc/passwd done - # Mark out all OSDs for this pool - ceph osd out $(ceph pg dump pgs --format=json | jq '.pg_stats[0].up[]') + + # Wait for PG to be visible and mark out all OSDs for this pool + local pg_up_osds=$(wait_for_pg_data '.pg_stats[0].up[]') || return 1 + ceph osd out $pg_up_osds if [ "$moreobjects" != "0" ]; then for j in $(seq 1 $moreobjects) do @@ -97,7 +99,7 @@ function _common_test() { fi done - newprimary=$(ceph pg dump pgs --format=json | jq '.pg_stats[0].up_primary') + newprimary=$(wait_for_pg_data '.pg_stats[0].up_primary') || return 1 kill_daemons ERRORS=0