From fc7a50be7b2f017af090284060bee1850ef23f61 Mon Sep 17 00:00:00 2001 From: Jos Collin Date: Mon, 1 Apr 2019 17:06:41 +0530 Subject: [PATCH] qa/tasks: Check MDS failover during mon_thrash Check MDS failover during mon_thrash. Fixes: http://tracker.ceph.com/issues/17309 Signed-off-by: Jos Collin --- qa/tasks/mon_thrash.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/qa/tasks/mon_thrash.py b/qa/tasks/mon_thrash.py index d38770df719..d264e8bcf8a 100644 --- a/qa/tasks/mon_thrash.py +++ b/qa/tasks/mon_thrash.py @@ -10,6 +10,7 @@ import gevent import json import math from teuthology import misc as teuthology +from tasks.cephfs.filesystem import MDSCluster log = logging.getLogger(__name__) @@ -59,6 +60,7 @@ class MonitorThrasher: in % (default: 0) freeze_mon_duration: how many seconds to freeze the mon (default: 15) scrub Scrub after each iteration (default: True) + check_mds_failover Check if mds failover happened (default: False) Note: if 'store-thrash' is set to True, then 'maintain-quorum' must also be set to True. @@ -75,6 +77,7 @@ class MonitorThrasher: seed: 31337 maintain_quorum: true thrash_many: true + check_mds_failover: True - ceph-fuse: - workunit: clients: @@ -127,6 +130,12 @@ class MonitorThrasher: assert self.maintain_quorum, \ 'store_thrash = true must imply maintain_quorum = true' + #MDS failover + self.mds_failover = self.config.get('check_mds_failover', False) + + if self.mds_failover: + self.mds_cluster = MDSCluster(ctx) + self.thread = gevent.spawn(self.do_thrash) def log(self, x): @@ -214,6 +223,10 @@ class MonitorThrasher: """ Continuously loop and thrash the monitors. """ + #status before mon thrashing + if self.mds_failover: + oldstatus = self.mds_cluster.status() + self.log('start thrashing') self.log('seed: {s}, revive delay: {r}, thrash delay: {t} '\ 'thrash many: {tm}, maintain quorum: {mq} '\ @@ -307,6 +320,13 @@ class MonitorThrasher: delay=self.thrash_delay)) time.sleep(self.thrash_delay) + #status after thrashing + if self.mds_failover: + status = self.mds_cluster.status() + assert not oldstatus.hadfailover(status), \ + 'MDS Failover' + + @contextlib.contextmanager def task(ctx, config): """ -- 2.39.5