From 8931fa2f92f91fe4e0510c7bb2b0d528f829a901 Mon Sep 17 00:00:00 2001 From: John Spray Date: Fri, 29 Apr 2016 20:07:16 +0100 Subject: [PATCH] tasks/cephfs: add TestStandbyReplay.test_standby_replay_unused For the case where we have a daemon come up that wants to be a standby replay, but someone else is already following the target, so he has to be just a regular standy instead. Signed-off-by: John Spray --- tasks/cephfs/filesystem.py | 4 +- tasks/cephfs/test_failover.py | 87 +++++++++++++++++++++++++++-------- 2 files changed, 71 insertions(+), 20 deletions(-) diff --git a/tasks/cephfs/filesystem.py b/tasks/cephfs/filesystem.py index f9ad1488aa9b3..c0d179047deb2 100644 --- a/tasks/cephfs/filesystem.py +++ b/tasks/cephfs/filesystem.py @@ -419,7 +419,7 @@ class Filesystem(MDSCluster): else: return False - def get_daemon_names(self, state): + def get_daemon_names(self, state=None): """ Return MDS daemon names of those daemons in the given state :param state: @@ -428,7 +428,7 @@ class Filesystem(MDSCluster): status = self.get_mds_map() result = [] for mds_status in sorted(status['info'].values(), lambda a, b: cmp(a['rank'], b['rank'])): - if mds_status['state'] == state: + if mds_status['state'] == state or state is None: result.append(mds_status['name']) return result diff --git a/tasks/cephfs/test_failover.py b/tasks/cephfs/test_failover.py index 5183beb95f7de..f26fae8b2d2ba 100644 --- a/tasks/cephfs/test_failover.py +++ b/tasks/cephfs/test_failover.py @@ -90,9 +90,73 @@ class TestFailover(CephFSTestCase): class TestStandbyReplay(CephFSTestCase): - MDSS_REQUIRED = 2 + MDSS_REQUIRED = 3 REQUIRE_FILESYSTEM = False + def set_standby_for(self, leader, follower, replay): + self.set_conf("mds.{0}".format(follower), "mds_standby_for_name", leader) + if replay: + self.set_conf("mds.{0}".format(follower), "mds_standby_replay", "true") + + def get_info_by_name(self, fs, mds_name): + if fs is None: + mds_info = self.mds_cluster.get_fs_map()['standbys'] + else: + mds_info = fs.get_mds_map()['info'].values() + for info in mds_info: + if info['name'] == mds_name: + return info + + log.warn(json.dumps(mds_info, indent=2)) + raise RuntimeError("MDS '{0}' not found".format(mds_name)) + + + def test_standby_replay_unused(self): + # Pick out exactly 2 daemons to be run during test + use_daemons = sorted(self.mds_cluster.mds_ids[0:3]) + mds_a, mds_b, mds_c = use_daemons + log.info("Using MDS daemons: {0}".format(use_daemons)) + + # B and C should both follow A, but only one will + # really get into standby replay state. + self.set_standby_for(mds_a, mds_b, True) + self.set_standby_for(mds_a, mds_c, True) + + # Create FS and start A + fs_a = self.mds_cluster.get_filesystem("alpha") + fs_a.create() + self.mds_cluster.mds_restart(mds_a) + fs_a.wait_for_daemons() + self.assertEqual(fs_a.get_active_names(), [mds_a]) + + # Start B, he should go into standby replay + self.mds_cluster.mds_restart(mds_b) + self.wait_for_daemon_start([mds_b]) + info_b = self.get_info_by_name(fs_a, mds_b) + self.assertEqual(info_b['state'], "up:standby-replay") + self.assertEqual(info_b['standby_for_name'], mds_a) + self.assertEqual(info_b['rank'], 0) + + # Start C, he should go into standby (*not* replay) + self.mds_cluster.mds_restart(mds_c) + self.wait_for_daemon_start([mds_c]) + info_c = self.get_info_by_name(None, mds_c) + self.assertEqual(info_c['state'], "up:standby") + self.assertEqual(info_c['standby_for_name'], mds_a) + self.assertEqual(info_c['rank'], -1) + + # Kill B, C should go into standby replay + self.mds_cluster.mds_stop(mds_b) + self.mds_cluster.mds_fail(mds_b) + self.wait_until_equal( + lambda: self.get_info_by_name(fs_a, mds_c)['state'], + "up:standby-replay", + 60) + info_c = self.get_info_by_name(fs_a, mds_c) + self.assertEqual(info_c['state'], "up:standby-replay") + self.assertEqual(info_c['standby_for_name'], mds_a) + self.assertEqual(info_c['rank'], 0) + def test_standby_failure(self): """ That the failure of a standby-replay daemon happens cleanly @@ -103,14 +167,11 @@ class TestStandbyReplay(CephFSTestCase): mds_a, mds_b = use_daemons log.info("Using MDS daemons: {0}".format(use_daemons)) - def set_standby_for(leader, follower, replay): - self.set_conf("mds.{0}".format(follower), "mds_standby_for_name", leader) - if replay: - self.set_conf("mds.{0}".format(follower), "mds_standby_replay", "true") + # Configure two pairs of MDSs that are standby for each other - set_standby_for(mds_a, mds_b, True) - set_standby_for(mds_b, mds_a, False) + self.set_standby_for(mds_a, mds_b, True) + self.set_standby_for(mds_b, mds_a, False) # Create FS alpha and get mds_a to come up as active fs_a = self.mds_cluster.get_filesystem("alpha") @@ -123,17 +184,8 @@ class TestStandbyReplay(CephFSTestCase): self.mds_cluster.mds_restart(mds_b) self.wait_for_daemon_start([mds_b]) - def get_info_by_name(fs, mds_name): - mds_map = fs.get_mds_map() - for gid_str, info in mds_map['info'].items(): - if info['name'] == mds_name: - return info - - log.warn(json.dumps(mds_map, indent=2)) - raise RuntimeError("MDS '{0}' not found in filesystem MDSMap".format(mds_name)) - # See the standby come up as the correct rank - info_b = get_info_by_name(fs_a, mds_b) + info_b = self.get_info_by_name(fs_a, mds_b) self.assertEqual(info_b['state'], "up:standby-replay") self.assertEqual(info_b['standby_for_name'], mds_a) self.assertEqual(info_b['rank'], 0) @@ -162,7 +214,6 @@ class TestMultiFilesystems(CephFSTestCase): self.fs.mon_manager.raw_cluster_cmd("fs", "flag", "set", "enable_multiple", "true", "--yes-i-really-mean-it") - def _setup_two(self): fs_a = self.mds_cluster.get_filesystem("alpha") fs_b = self.mds_cluster.get_filesystem("bravo") -- 2.39.5