From a6b8bbd2cbb979dcfc1405392114a7a095616bee Mon Sep 17 00:00:00 2001 From: Patrick Donnelly Date: Thu, 15 Sep 2022 09:55:47 -0400 Subject: [PATCH] qa: add helper for waiting for a rank to fail For killpoint testing. Signed-off-by: Patrick Donnelly --- qa/tasks/cephfs/filesystem.py | 42 ++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/qa/tasks/cephfs/filesystem.py b/qa/tasks/cephfs/filesystem.py index 21804f87770..d18f699b113 100644 --- a/qa/tasks/cephfs/filesystem.py +++ b/qa/tasks/cephfs/filesystem.py @@ -68,6 +68,14 @@ class FSMissing(Exception): def __str__(self): return f"File system {self.ident} does not exist in the map" +class NoSuchRank(Exception): + def __init__(self, fscid, rank): + self.fscid = fscid + self.rank = rank + + def __str__(self): + return f"No such rank {self.fscid}:{self.rank}" + class FSStatus(RunCephCmd): """ Operations on a snapshot of the FSMap. @@ -163,7 +171,7 @@ class FSStatus(RunCephCmd): for info in self.get_ranks(fscid): if info['rank'] == rank: return info - raise RuntimeError("FSCID {0} has no rank {1}".format(fscid, rank)) + raise NoSuchRank(fscid, rank) def get_mds(self, name): """ @@ -1146,6 +1154,38 @@ class Filesystem(MDSCluster): return result + def wait_for_death(self, rank=0, timeout=None, status=None): + """ + Wait until rank fails and cluster is healthy. + :return: status + """ + + if timeout is None: + timeout = DAEMON_WAIT_TIMEOUT + + if status is None: + status = self.status() + + rinfo = self.get_rank(rank=rank, status=status) + elapsed = 0 + while True: + try: + info = self.get_rank(rank=rank, status=status) + if rinfo['gid'] != info['gid']: + log.info(f"mds.{rinfo['name']}:{rinfo['gid']} failed") + break + except NoSuchRank: + log.info(f"mds.{rinfo['name']}:{rinfo['gid']} failed") + break + time.sleep(1) + elapsed += 1 + + if elapsed > timeout: + log.debug("status = {0}".format(status)) + raise RuntimeError("Timed out waiting for rank to fail") + + status = self.status() + def wait_for_daemons(self, timeout=None, skip_max_mds_check=False, status=None): """ Wait until all daemons are healthy -- 2.39.5