]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
qa/tasks: Check MDS failover during mon_thrash
authorJos Collin <jcollin@redhat.com>
Mon, 1 Apr 2019 11:36:41 +0000 (17:06 +0530)
committerJos Collin <jcollin@redhat.com>
Thu, 2 May 2019 02:56:24 +0000 (08:26 +0530)
Check MDS failover during mon_thrash.

Fixes: http://tracker.ceph.com/issues/17309
Signed-off-by: Jos Collin <jcollin@redhat.com>
qa/tasks/mon_thrash.py

index d38770df7195934a387ac5d00c1201a8ebc61572..d264e8bcf8aed803fa296304485a05ef5f4488ec 100644 (file)
@@ -10,6 +10,7 @@ import gevent
 import json
 import math
 from teuthology import misc as teuthology
+from tasks.cephfs.filesystem import MDSCluster
 
 log = logging.getLogger(__name__)
 
@@ -59,6 +60,7 @@ class MonitorThrasher:
                         in % (default: 0)
     freeze_mon_duration: how many seconds to freeze the mon (default: 15)
     scrub               Scrub after each iteration (default: True)
+    check_mds_failover  Check if mds failover happened (default: False)
 
     Note: if 'store-thrash' is set to True, then 'maintain-quorum' must also
           be set to True.
@@ -75,6 +77,7 @@ class MonitorThrasher:
         seed: 31337
         maintain_quorum: true
         thrash_many: true
+        check_mds_failover: True
     - ceph-fuse:
     - workunit:
         clients:
@@ -127,6 +130,12 @@ class MonitorThrasher:
             assert self.maintain_quorum, \
                 'store_thrash = true must imply maintain_quorum = true'
 
+        #MDS failover
+        self.mds_failover = self.config.get('check_mds_failover', False)
+
+        if self.mds_failover:
+            self.mds_cluster = MDSCluster(ctx)
+
         self.thread = gevent.spawn(self.do_thrash)
 
     def log(self, x):
@@ -214,6 +223,10 @@ class MonitorThrasher:
         """
         Continuously loop and thrash the monitors.
         """
+        #status before mon thrashing
+        if self.mds_failover:
+            oldstatus = self.mds_cluster.status()
+
         self.log('start thrashing')
         self.log('seed: {s}, revive delay: {r}, thrash delay: {t} '\
                    'thrash many: {tm}, maintain quorum: {mq} '\
@@ -307,6 +320,13 @@ class MonitorThrasher:
                     delay=self.thrash_delay))
                 time.sleep(self.thrash_delay)
 
+        #status after thrashing
+        if self.mds_failover:
+            status = self.mds_cluster.status()
+            assert not oldstatus.hadfailover(status), \
+                'MDS Failover'
+
+
 @contextlib.contextmanager
 def task(ctx, config):
     """