]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
qa: add helper for waiting for a rank to fail
authorPatrick Donnelly <pdonnell@redhat.com>
Thu, 15 Sep 2022 13:55:47 +0000 (09:55 -0400)
committerPatrick Donnelly <pdonnell@redhat.com>
Tue, 1 Aug 2023 15:16:01 +0000 (11:16 -0400)
For killpoint testing.

Signed-off-by: Patrick Donnelly <pdonnell@redhat.com>
qa/tasks/cephfs/filesystem.py

index 21804f877700ea1af4fdc2c7ee83a263a03e0e37..d18f699b113ade8b3767fa18ba8dd836b6b6887a 100644 (file)
@@ -68,6 +68,14 @@ class FSMissing(Exception):
     def __str__(self):
         return f"File system {self.ident} does not exist in the map"
 
+class NoSuchRank(Exception):
+    def __init__(self, fscid, rank):
+        self.fscid = fscid
+        self.rank = rank
+
+    def __str__(self):
+        return f"No such rank {self.fscid}:{self.rank}"
+
 class FSStatus(RunCephCmd):
     """
     Operations on a snapshot of the FSMap.
@@ -163,7 +171,7 @@ class FSStatus(RunCephCmd):
         for info in self.get_ranks(fscid):
             if info['rank'] == rank:
                 return info
-        raise RuntimeError("FSCID {0} has no rank {1}".format(fscid, rank))
+        raise NoSuchRank(fscid, rank)
 
     def get_mds(self, name):
         """
@@ -1146,6 +1154,38 @@ class Filesystem(MDSCluster):
 
         return result
 
+    def wait_for_death(self, rank=0, timeout=None, status=None):
+        """
+        Wait until rank fails and cluster is healthy.
+        :return: status
+        """
+
+        if timeout is None:
+            timeout = DAEMON_WAIT_TIMEOUT
+
+        if status is None:
+            status = self.status()
+
+        rinfo = self.get_rank(rank=rank, status=status)
+        elapsed = 0
+        while True:
+            try:
+                info = self.get_rank(rank=rank, status=status)
+                if rinfo['gid'] != info['gid']:
+                    log.info(f"mds.{rinfo['name']}:{rinfo['gid']} failed")
+                    break
+            except NoSuchRank:
+                log.info(f"mds.{rinfo['name']}:{rinfo['gid']} failed")
+                break
+            time.sleep(1)
+            elapsed += 1
+
+            if elapsed > timeout:
+                log.debug("status = {0}".format(status))
+                raise RuntimeError("Timed out waiting for rank to fail")
+
+            status = self.status()
+
     def wait_for_daemons(self, timeout=None, skip_max_mds_check=False, status=None):
         """
         Wait until all daemons are healthy