From cbaadf521919e28dc2d916088a106304a18830f9 Mon Sep 17 00:00:00 2001 From: Bill Scales Date: Wed, 18 Jun 2025 15:06:50 +0100 Subject: [PATCH] qa: get_rand_pg_acting_set needs to wait for pool to create PGs There is a race hazard in the OSD thrasher which tests if a pool exists and then queries the PGs in the pool. It is possible that a pool exists (has been added to OSDMap) but the PGs have not been created yet (by the OSDs). Add a sleep/retry to mitigate the race. Fixes: https://tracker.ceph.com/issues/70818 Signed-off-by: Bill Scales --- qa/tasks/ceph_manager.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/qa/tasks/ceph_manager.py b/qa/tasks/ceph_manager.py index de86b188f556f..0f7e92c5c2fb9 100644 --- a/qa/tasks/ceph_manager.py +++ b/qa/tasks/ceph_manager.py @@ -950,7 +950,13 @@ class OSDThrasher(Thrasher): have the option to specify which pool you want the PG from. """ - pgs = self.ceph_manager.get_pg_stats() + with safe_while(sleep=5, tries=3, action="get_pg_stats") as proceed: + while proceed(): + pgs = self.ceph_manager.get_pg_stats() + if pgs: + break + # If pool has just been created it might not have PGs yet + self.log('No pgs; trying again') if not pgs: self.log('No pgs; doing nothing') return -- 2.39.5