From f4a11a329020d31a3f99a5099201676d3e1fb2b9 Mon Sep 17 00:00:00 2001 From: Patrick Donnelly Date: Thu, 9 Sep 2021 19:47:04 -0400 Subject: [PATCH] qa: add test for standby-replay marking rank damaged Signed-off-by: Patrick Donnelly --- qa/suites/fs/multifs/tasks/failover.yaml | 2 ++ qa/tasks/cephfs/filesystem.py | 12 ++++++++++++ qa/tasks/cephfs/test_failover.py | 16 ++++++++++++++++ 3 files changed, 30 insertions(+) diff --git a/qa/suites/fs/multifs/tasks/failover.yaml b/qa/suites/fs/multifs/tasks/failover.yaml index 4a95f01da9c73..9c403c76db6c8 100644 --- a/qa/suites/fs/multifs/tasks/failover.yaml +++ b/qa/suites/fs/multifs/tasks/failover.yaml @@ -5,6 +5,8 @@ overrides: - \(MDS_INSUFFICIENT_STANDBY\) - \(MDS_ALL_DOWN\) - \(MDS_UP_LESS_THAN_MAX\) + - \(MDS_DAMAGE\) + - \(FS_DEGRADED\) ceph-fuse: disabled: true tasks: diff --git a/qa/tasks/cephfs/filesystem.py b/qa/tasks/cephfs/filesystem.py index c5076a28bc85d..0f846cf1e0e63 100644 --- a/qa/tasks/cephfs/filesystem.py +++ b/qa/tasks/cephfs/filesystem.py @@ -147,6 +147,13 @@ class FSStatus(object): if info['rank'] >= 0 and info['state'] != 'up:standby-replay': yield info + def get_damaged(self, fscid): + """ + Get the damaged ranks for the given FSCID. + """ + fs = self.get_fsmap(fscid) + return fs['mdsmap']['damaged'] + def get_rank(self, fscid, rank): """ Get the rank for the given FSCID. @@ -1013,6 +1020,11 @@ class Filesystem(MDSCluster): status = self.getinfo() return status.get_ranks(self.id) + def get_damaged(self, status=None): + if status is None: + status = self.getinfo() + return status.get_damaged(self.id) + def get_replays(self, status=None): if status is None: status = self.getinfo() diff --git a/qa/tasks/cephfs/test_failover.py b/qa/tasks/cephfs/test_failover.py index 3df44d3da3353..5cc8ec0d58991 100644 --- a/qa/tasks/cephfs/test_failover.py +++ b/qa/tasks/cephfs/test_failover.py @@ -514,6 +514,22 @@ class TestStandbyReplay(CephFSTestCase): time.sleep(30) self._confirm_single_replay() + def test_standby_replay_damaged(self): + """ + That a standby-replay daemon can cause the rank to go damaged correctly. + """ + + self._confirm_no_replay() + self.config_set("mds", "mds_standby_replay_damaged", True) + self.fs.set_allow_standby_replay(True) + self.wait_until_true( + lambda: len(self.fs.get_damaged()) > 0, + timeout=30 + ) + status = self.fs.status() + self.assertListEqual([], list(self.fs.get_ranks(status=status))) + self.assertListEqual([0], self.fs.get_damaged(status=status)) + def test_standby_replay_disable(self): """ That turning off allow_standby_replay fails all standby-replay daemons. -- 2.39.5