]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
qa: Wait for mirror daemon restart before getting new rados_inst 58632/head
authorJos Collin <jcollin@redhat.com>
Thu, 14 Mar 2024 14:42:46 +0000 (20:12 +0530)
committerJos Collin <jcollin@redhat.com>
Wed, 17 Jul 2024 01:00:06 +0000 (06:30 +0530)
After blocklisted/failed, wait for the mirror daemon restart
which is after 30 seconds timeout and then check for the new rados_inst.

Fixes: https://tracker.ceph.com/issues/64927
Signed-off-by: Jos Collin <jcollin@redhat.com>
(cherry picked from commit a9a56919ff95d2face436da2ca08fd26580fadb9)

qa/tasks/cephfs/test_mirroring.py

index 6e57df5d0a846bd92611d39c46303a2de2f6805f..d1b8cb9a3e82a4596a657f98cac36bb0ebd094cf 100644 (file)
@@ -275,7 +275,8 @@ class TestMirroring(CephFSTestCase):
         """return the rados addr used by cephfs-mirror instance"""
         res = self.mirror_daemon_command(f'mirror status for fs: {fs_name}',
                                          'fs', 'mirror', 'status', f'{fs_name}@{fs_id}')
-        return res['rados_inst']
+        if 'rados_inst' in res:
+            return res['rados_inst']
 
     def mirror_daemon_command(self, cmd_label, *args):
         asok_path = self.get_daemon_admin_socket()
@@ -491,6 +492,7 @@ class TestMirroring(CephFSTestCase):
 
         # fetch rados address for blacklist check
         rados_inst = self.get_mirror_rados_addr(self.primary_fs_name, self.primary_fs_id)
+        self.assertTrue(rados_inst)
 
         # simulate non-responding mirror daemon by sending SIGSTOP
         pid = self.get_mirror_daemon_pid()
@@ -509,9 +511,16 @@ class TestMirroring(CephFSTestCase):
         # check if the rados addr is blocklisted
         self.assertTrue(self.mds_cluster.is_addr_blocklisted(rados_inst))
 
-        # wait enough so that the mirror daemon restarts blocklisted instances
-        time.sleep(40)
-        rados_inst_new = self.get_mirror_rados_addr(self.primary_fs_name, self.primary_fs_id)
+        # wait for restart, which is after 30 seconds timeout (cephfs_mirror_restart_mirror_on_blocklist_interval)
+        time.sleep(60)
+
+        # get the new rados_inst
+        rados_inst_new = ""
+        with safe_while(sleep=2, tries=20, action='wait for mirror status rados_inst') as proceed:
+            while proceed():
+                rados_inst_new = self.get_mirror_rados_addr(self.primary_fs_name, self.primary_fs_id)
+                if rados_inst_new:
+                    break
 
         # and we should get a new rados instance
         self.assertTrue(rados_inst != rados_inst_new)