From 897e4366819ca6d2510d025656e5be1bef97ae48 Mon Sep 17 00:00:00 2001 From: Jos Collin Date: Thu, 14 Mar 2024 20:12:46 +0530 Subject: [PATCH] qa: Wait for mirror daemon restart before getting new rados_inst After blocklisted/failed, wait for the mirror daemon restart which is after 30 seconds timeout and then check for the new rados_inst. Fixes: https://tracker.ceph.com/issues/64927 Signed-off-by: Jos Collin (cherry picked from commit a9a56919ff95d2face436da2ca08fd26580fadb9) --- qa/tasks/cephfs/test_mirroring.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/qa/tasks/cephfs/test_mirroring.py b/qa/tasks/cephfs/test_mirroring.py index 6e57df5d0a846..d1b8cb9a3e82a 100644 --- a/qa/tasks/cephfs/test_mirroring.py +++ b/qa/tasks/cephfs/test_mirroring.py @@ -275,7 +275,8 @@ class TestMirroring(CephFSTestCase): """return the rados addr used by cephfs-mirror instance""" res = self.mirror_daemon_command(f'mirror status for fs: {fs_name}', 'fs', 'mirror', 'status', f'{fs_name}@{fs_id}') - return res['rados_inst'] + if 'rados_inst' in res: + return res['rados_inst'] def mirror_daemon_command(self, cmd_label, *args): asok_path = self.get_daemon_admin_socket() @@ -491,6 +492,7 @@ class TestMirroring(CephFSTestCase): # fetch rados address for blacklist check rados_inst = self.get_mirror_rados_addr(self.primary_fs_name, self.primary_fs_id) + self.assertTrue(rados_inst) # simulate non-responding mirror daemon by sending SIGSTOP pid = self.get_mirror_daemon_pid() @@ -509,9 +511,16 @@ class TestMirroring(CephFSTestCase): # check if the rados addr is blocklisted self.assertTrue(self.mds_cluster.is_addr_blocklisted(rados_inst)) - # wait enough so that the mirror daemon restarts blocklisted instances - time.sleep(40) - rados_inst_new = self.get_mirror_rados_addr(self.primary_fs_name, self.primary_fs_id) + # wait for restart, which is after 30 seconds timeout (cephfs_mirror_restart_mirror_on_blocklist_interval) + time.sleep(60) + + # get the new rados_inst + rados_inst_new = "" + with safe_while(sleep=2, tries=20, action='wait for mirror status rados_inst') as proceed: + while proceed(): + rados_inst_new = self.get_mirror_rados_addr(self.primary_fs_name, self.primary_fs_id) + if rados_inst_new: + break # and we should get a new rados instance self.assertTrue(rados_inst != rados_inst_new) -- 2.39.5