From: Bill Scales Date: Wed, 18 Jun 2025 14:06:50 +0000 (+0100) Subject: qa: get_rand_pg_acting_set needs to wait for pool to create PGs X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=cbaadf521919e28dc2d916088a106304a18830f9;p=ceph.git qa: get_rand_pg_acting_set needs to wait for pool to create PGs There is a race hazard in the OSD thrasher which tests if a pool exists and then queries the PGs in the pool. It is possible that a pool exists (has been added to OSDMap) but the PGs have not been created yet (by the OSDs). Add a sleep/retry to mitigate the race. Fixes: https://tracker.ceph.com/issues/70818 Signed-off-by: Bill Scales --- diff --git a/qa/tasks/ceph_manager.py b/qa/tasks/ceph_manager.py index de86b188f556f..0f7e92c5c2fb9 100644 --- a/qa/tasks/ceph_manager.py +++ b/qa/tasks/ceph_manager.py @@ -950,7 +950,13 @@ class OSDThrasher(Thrasher): have the option to specify which pool you want the PG from. """ - pgs = self.ceph_manager.get_pg_stats() + with safe_while(sleep=5, tries=3, action="get_pg_stats") as proceed: + while proceed(): + pgs = self.ceph_manager.get_pg_stats() + if pgs: + break + # If pool has just been created it might not have PGs yet + self.log('No pgs; trying again') if not pgs: self.log('No pgs; doing nothing') return