From 636fc40d902fd8427740e7a2cfdac6c9971d269a Mon Sep 17 00:00:00 2001 From: John Spray Date: Wed, 29 Mar 2017 11:01:33 -0400 Subject: [PATCH] qa: additions to mgr.test_failover Reproducers for recent fixes: http://tracker.ceph.com/issues/19407 http://tracker.ceph.com/issues/19258 Signed-off-by: John Spray --- qa/tasks/mgr/test_failover.py | 53 +++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/qa/tasks/mgr/test_failover.py b/qa/tasks/mgr/test_failover.py index 8994ad49bfe29..e695cf2ed907e 100644 --- a/qa/tasks/mgr/test_failover.py +++ b/qa/tasks/mgr/test_failover.py @@ -35,6 +35,38 @@ class TestFailover(MgrTestCase): timeout=10 ) + def test_timeout_nostandby(self): + """ + That when an active mgr stop responding, and no standby is + available, the active mgr is removed from the map anyway. + """ + # Query which mgr is active + original_active = self.mgr_cluster.get_active_id() + original_standbys = self.mgr_cluster.get_standby_ids() + + for s in original_standbys: + self.mgr_cluster.mgr_stop(s) + self.mgr_cluster.mgr_fail(s) + + self.assertListEqual(self.mgr_cluster.get_standby_ids(), []) + self.assertEqual(self.mgr_cluster.get_active_id(), original_active) + + grace = int(self.mgr_cluster.get_config("mon_mgr_beacon_grace")) + log.info("Should time out in about {0} seconds".format(grace)) + + self.mgr_cluster.mgr_stop(original_active) + + # Now wait for the mon to notice the mgr is gone and remove it + # from the map. + self.wait_until_equal( + lambda: self.mgr_cluster.get_active_id(), + "", + timeout=grace * 2 + ) + + self.assertListEqual(self.mgr_cluster.get_standby_ids(), []) + self.assertEqual(self.mgr_cluster.get_active_id(), "") + def test_explicit_fail(self): """ That when a user explicitly fails a daemon, a standby immediately @@ -60,6 +92,27 @@ class TestFailover(MgrTestCase): timeout=10 ) + # We should be able to fail back over again: the exercises + # our re-initialization of the python runtime within + # a single process lifetime. + + # Get rid of any bystander standbys so that the original_active + # will be selected as next active. + new_active = self.mgr_cluster.get_active_id() + for daemon in original_standbys: + if daemon != new_active: + self.mgr_cluster.mgr_stop(daemon) + self.mgr_cluster.mgr_fail(daemon) + + self.assertListEqual(self.mgr_cluster.get_standby_ids(), + [original_active]) + + self.mgr_cluster.mgr_stop(new_active) + self.mgr_cluster.mgr_fail(new_active) + + self.assertEqual(self.mgr_cluster.get_active_id(), original_active) + self.assertEqual(self.mgr_cluster.get_standby_ids(), []) + def test_standby_timeout(self): """ That when a standby daemon stops sending beacons, it is -- 2.39.5