qa/tasks: Check MDS failover during mon_thrash

author Jos Collin <jcollin@redhat.com>

Mon, 1 Apr 2019 11:36:41 +0000 (17:06 +0530)

committer Jos Collin <jcollin@redhat.com>

Thu, 2 May 2019 02:56:24 +0000 (08:26 +0530)
author Jos Collin <jcollin@redhat.com>
Mon, 1 Apr 2019 11:36:41 +0000 (17:06 +0530)
committer Jos Collin <jcollin@redhat.com>
Thu, 2 May 2019 02:56:24 +0000 (08:26 +0530)
diff --git a/qa/tasks/mon_thrash.py b/qa/tasks/mon_thrash.py

index d38770df7195934a387ac5d00c1201a8ebc61572..d264e8bcf8aed803fa296304485a05ef5f4488ec 100644 (file)
--- a/qa/tasks/mon_thrash.py
+++ b/qa/tasks/mon_thrash.py
@@ -10,6 +10,7 @@ import gevent
  import json
  import math
  from teuthology import misc as teuthology
+from tasks.cephfs.filesystem import MDSCluster
  
  log = logging.getLogger(__name__)
  
@@ -59,6 +60,7 @@ class MonitorThrasher:
                          in % (default: 0)
      freeze_mon_duration: how many seconds to freeze the mon (default: 15)
      scrub               Scrub after each iteration (default: True)
+    check_mds_failover  Check if mds failover happened (default: False)
  
      Note: if 'store-thrash' is set to True, then 'maintain-quorum' must also
            be set to True.
@@ -75,6 +77,7 @@ class MonitorThrasher:
          seed: 31337
          maintain_quorum: true
          thrash_many: true
+        check_mds_failover: True
      - ceph-fuse:
      - workunit:
          clients:
@@ -127,6 +130,12 @@ class MonitorThrasher:
              assert self.maintain_quorum, \
                  'store_thrash = true must imply maintain_quorum = true'
  
+        #MDS failover
+        self.mds_failover = self.config.get('check_mds_failover', False)
+
+        if self.mds_failover:
+            self.mds_cluster = MDSCluster(ctx)
+
          self.thread = gevent.spawn(self.do_thrash)
  
      def log(self, x):
@@ -214,6 +223,10 @@ class MonitorThrasher:
          """
          Continuously loop and thrash the monitors.
          """
+        #status before mon thrashing
+        if self.mds_failover:
+            oldstatus = self.mds_cluster.status()
+
          self.log('start thrashing')
          self.log('seed: {s}, revive delay: {r}, thrash delay: {t} '\
                     'thrash many: {tm}, maintain quorum: {mq} '\
@@ -307,6 +320,13 @@ class MonitorThrasher:
                      delay=self.thrash_delay))
                  time.sleep(self.thrash_delay)
  
+        #status after thrashing
+        if self.mds_failover:
+            status = self.mds_cluster.status()
+            assert not oldstatus.hadfailover(status), \
+                'MDS Failover'
+
+
  @contextlib.contextmanager
  def task(ctx, config):
      """
author	Jos Collin <jcollin@redhat.com>
	Mon, 1 Apr 2019 11:36:41 +0000 (17:06 +0530)
committer	Jos Collin <jcollin@redhat.com>
	Thu, 2 May 2019 02:56:24 +0000 (08:26 +0530)