]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
qa: additions to mgr.test_failover
authorJohn Spray <john.spray@redhat.com>
Wed, 29 Mar 2017 15:01:33 +0000 (11:01 -0400)
committerJohn Spray <john.spray@redhat.com>
Thu, 20 Apr 2017 14:00:31 +0000 (15:00 +0100)
Reproducers for recent fixes:
http://tracker.ceph.com/issues/19407
http://tracker.ceph.com/issues/19258

Signed-off-by: John Spray <john.spray@redhat.com>
qa/tasks/mgr/test_failover.py

index 8994ad49bfe2983ffeef4380039edddcc7c7bace..e695cf2ed907e7567d656a0840d81531961df47f 100644 (file)
@@ -35,6 +35,38 @@ class TestFailover(MgrTestCase):
             timeout=10
         )
 
+    def test_timeout_nostandby(self):
+        """
+        That when an active mgr stop responding, and no standby is
+        available, the active mgr is removed from the map anyway.
+        """
+        # Query which mgr is active
+        original_active = self.mgr_cluster.get_active_id()
+        original_standbys = self.mgr_cluster.get_standby_ids()
+
+        for s in original_standbys:
+            self.mgr_cluster.mgr_stop(s)
+            self.mgr_cluster.mgr_fail(s)
+
+        self.assertListEqual(self.mgr_cluster.get_standby_ids(), [])
+        self.assertEqual(self.mgr_cluster.get_active_id(), original_active)
+
+        grace = int(self.mgr_cluster.get_config("mon_mgr_beacon_grace"))
+        log.info("Should time out in about {0} seconds".format(grace))
+
+        self.mgr_cluster.mgr_stop(original_active)
+
+        # Now wait for the mon to notice the mgr is gone and remove it
+        # from the map.
+        self.wait_until_equal(
+            lambda: self.mgr_cluster.get_active_id(),
+            "",
+            timeout=grace * 2
+        )
+
+        self.assertListEqual(self.mgr_cluster.get_standby_ids(), [])
+        self.assertEqual(self.mgr_cluster.get_active_id(), "")
+
     def test_explicit_fail(self):
         """
         That when a user explicitly fails a daemon, a standby immediately
@@ -60,6 +92,27 @@ class TestFailover(MgrTestCase):
             timeout=10
         )
 
+        # We should be able to fail back over again: the exercises
+        # our re-initialization of the python runtime within
+        # a single process lifetime.
+
+        # Get rid of any bystander standbys so that the original_active
+        # will be selected as next active.
+        new_active = self.mgr_cluster.get_active_id()
+        for daemon in original_standbys:
+            if daemon != new_active:
+                self.mgr_cluster.mgr_stop(daemon)
+                self.mgr_cluster.mgr_fail(daemon)
+
+        self.assertListEqual(self.mgr_cluster.get_standby_ids(),
+                             [original_active])
+
+        self.mgr_cluster.mgr_stop(new_active)
+        self.mgr_cluster.mgr_fail(new_active)
+
+        self.assertEqual(self.mgr_cluster.get_active_id(), original_active)
+        self.assertEqual(self.mgr_cluster.get_standby_ids(), [])
+
     def test_standby_timeout(self):
         """
         That when a standby daemon stops sending beacons, it is