log = logging.getLogger(__name__)
-class MDSThrasher:
- """
- MDSThrasher::
-
- The MDSThrasher thrashes MDSs during execution of other tasks (workunits, etc).
-
- The config is optional. Many of the config parameters are a a maximum value
- to use when selecting a random value from a range. To always use the maximum
- value, set no_random to true. The config is a dict containing some or all of:
-
- seed: [no default] seed the random number generator
-
- randomize: [default: true] enables randomization and use the max/min values
-
- max_thrash: [default: 1] the maximum number of MDSs that will be thrashed at
- any given time.
-
- max_thrash_delay: [default: 30] maximum number of seconds to delay before
- thrashing again.
-
- max_revive_delay: [default: 10] maximum number of seconds to delay before
- bringing back a thrashed MDS
- thrash_in_replay: [default: 0.0] likelihood that the MDS will be thrashed
- during replay. Value should be between 0.0 and 1.0
-
- max_replay_thrash_delay: [default: 4] maximum number of seconds to delay while in
- the replay state before thrashing
-
- thrash_weights: allows specific MDSs to be thrashed more/less frequently. This option
- overrides anything specified by max_thrash. This option is a dict containing
- mds.x: weight pairs. For example, [mds.a: 0.7, mds.b: 0.3, mds.c: 0.0]. Each weight
- is a value from 0.0 to 1.0. Any MDSs not specified will be automatically
- given a weight of 0.0. For a given MDS, by default the trasher delays for up
- to max_thrash_delay, trashes, waits for the MDS to recover, and iterates. If a non-zero
- weight is specified for an MDS, for each iteration the thrasher chooses whether to thrash
- during that iteration based on a random value [0-1] not exceeding the weight of that MDS.
+class MDSThrasher:
+ """
+ MDSThrasher::
- Examples::
+ The MDSThrasher thrashes MDSs during execution of other tasks (workunits, etc).
+ The config is optional. Many of the config parameters are a a maximum value
+ to use when selecting a random value from a range. To always use the maximum
+ value, set no_random to true. The config is a dict containing some or all of:
- The following example sets the likelihood that mds.a will be thrashed
- to 80%, mds.b to 20%, and other MDSs will not be thrashed. It also sets the
- likelihood that an MDS will be thrashed in replay to 40%.
- Thrash weights do not have to sum to 1.
+ seed: [no default] seed the random number generator
- tasks:
- - ceph:
- - mds_thrash:
- thrash_weights:
- - mds.a: 0.8
- - mds.b: 0.2
- thrash_in_replay: 0.4
- - ceph-fuse:
- - workunit:
- clients:
- all: [suites/fsx.sh]
+ randomize: [default: true] enables randomization and use the max/min values
- The following example disables randomization, and uses the max delay values:
+ max_thrash: [default: 1] the maximum number of MDSs that will be thrashed at
+ any given time.
- tasks:
- - ceph:
- - mds_thrash:
- max_thrash_delay: 10
- max_revive_delay: 1
- max_replay_thrash_delay: 4
+ max_thrash_delay: [default: 30] maximum number of seconds to delay before
+ thrashing again.
- """
- def __init__(self, ctx, manager, config, logger, failure_group, weight):
- self.ctx = ctx
- self.manager = manager
- self.manager.wait_for_clean()
+ max_revive_delay: [default: 10] maximum number of seconds to delay before
+ bringing back a thrashed MDS
+
+ thrash_in_replay: [default: 0.0] likelihood that the MDS will be thrashed
+ during replay. Value should be between 0.0 and 1.0
- self.stopping = False
- self.logger = logger
- self.config = config
+ max_replay_thrash_delay: [default: 4] maximum number of seconds to delay while in
+ the replay state before thrashing
- self.randomize = bool(self.config.get('randomize', True))
- self.max_thrash_delay = float(self.config.get('thrash_delay', 30.0))
- self.thrash_in_replay = float(self.config.get('thrash_in_replay', False))
- assert self.thrash_in_replay >= 0.0 and self.thrash_in_replay <= 1.0, 'thrash_in_replay ({v}) must be between [0.0, 1.0]'.format(v=self.thrash_in_replay)
+ thrash_weights: allows specific MDSs to be thrashed more/less frequently. This option
+ overrides anything specified by max_thrash. This option is a dict containing
+ mds.x: weight pairs. For example, [mds.a: 0.7, mds.b: 0.3, mds.c: 0.0]. Each weight
+ is a value from 0.0 to 1.0. Any MDSs not specified will be automatically
+ given a weight of 0.0. For a given MDS, by default the trasher delays for up
+ to max_thrash_delay, trashes, waits for the MDS to recover, and iterates. If a non-zero
+ weight is specified for an MDS, for each iteration the thrasher chooses whether to thrash
+ during that iteration based on a random value [0-1] not exceeding the weight of that MDS.
- self.max_replay_thrash_delay = float(self.config.get('max_replay_thrash_delay', 4.0))
+ Examples::
- self.max_revive_delay = float(self.config.get('max_revive_delay', 10.0))
- self.thread = gevent.spawn(self.do_thrash)
+ The following example sets the likelihood that mds.a will be thrashed
+ to 80%, mds.b to 20%, and other MDSs will not be thrashed. It also sets the
+ likelihood that an MDS will be thrashed in replay to 40%.
+ Thrash weights do not have to sum to 1.
- self.failure_group = failure_group
- self.weight = weight
+ tasks:
+ - ceph:
+ - mds_thrash:
+ thrash_weights:
+ - mds.a: 0.8
+ - mds.b: 0.2
+ thrash_in_replay: 0.4
+ - ceph-fuse:
+ - workunit:
+ clients:
+ all: [suites/fsx.sh]
- def log(self, x):
- """Write data to logger assigned to this MDThrasher"""
- self.logger.info(x)
+ The following example disables randomization, and uses the max delay values:
- def do_join(self):
- """Thread finished"""
- self.stopping = True
- self.thread.get()
+ tasks:
+ - ceph:
+ - mds_thrash:
+ max_thrash_delay: 10
+ max_revive_delay: 1
+ max_replay_thrash_delay: 4
- def do_thrash(self):
"""
- Perform the random thrashing action
- """
- self.log('starting mds_do_thrash for failure group: ' + ', '.join(['mds.{_id}'.format(_id=_f) for _f in self.failure_group]))
- while not self.stopping:
- delay = self.max_thrash_delay
- if self.randomize:
- delay = random.randrange(0.0, self.max_thrash_delay)
-
- if delay > 0.0:
- self.log('waiting for {delay} secs before thrashing'.format(delay=delay))
- time.sleep(delay)
-
- skip = random.randrange(0.0, 1.0)
- if self.weight < 1.0 and skip > self.weight:
- self.log('skipping thrash iteration with skip ({skip}) > weight ({weight})'.format(skip=skip, weight=self.weight))
- continue
-
- # find the active mds in the failure group
- statuses = [self.manager.get_mds_status(m) for m in self.failure_group]
- actives = filter(lambda s: s['state'] == 'up:active', statuses)
- assert len(actives) == 1, 'Can only have one active in a failure group'
-
- active_mds = actives[0]['name']
- active_rank = actives[0]['rank']
-
- self.log('kill mds.{id} (rank={r})'.format(id=active_mds, r=active_rank))
- self.manager.kill_mds_by_rank(active_rank)
-
- # wait for mon to report killed mds as crashed
- status = {}
- last_laggy_since = None
- itercount = 0
- while True:
- failed = self.manager.get_mds_status_all()['failed']
- status = self.manager.get_mds_status(active_mds)
- if not status:
- break
- if 'laggy_since' in status:
- last_laggy_since = status['laggy_since']
- break
- if any([(f == active_mds) for f in failed]):
- break
- self.log('waiting till mds map indicates mds.{_id} is laggy/crashed, in failed state, or mds.{_id} is removed from mdsmap'.format(_id=active_mds))
- itercount = itercount + 1
- if itercount > 10:
- self.log('mds map: {status}'.format(status=self.manager.get_mds_status_all()))
- time.sleep(2)
- if last_laggy_since:
- self.log('mds.{_id} reported laggy/crashed since: {since}'.format(_id=active_mds, since=last_laggy_since))
- else:
- self.log('mds.{_id} down, removed from mdsmap'.format(_id=active_mds, since=last_laggy_since))
-
- # wait for a standby mds to takeover and become active
- takeover_mds = None
- takeover_rank = None
- itercount = 0
- while True:
- statuses = [self.manager.get_mds_status(m) for m in self.failure_group]
- actives = filter(lambda s: s and s['state'] == 'up:active', statuses)
- if len(actives) > 0:
- assert len(actives) == 1, 'Can only have one active in failure group'
- takeover_mds = actives[0]['name']
- takeover_rank = actives[0]['rank']
- break
- itercount = itercount + 1
- if itercount > 10:
- self.log('mds map: {status}'.format(status=self.manager.get_mds_status_all()))
-
- self.log('New active mds is mds.{_id}'.format(_id=takeover_mds))
-
- # wait for a while before restarting old active to become new
- # standby
- delay = self.max_revive_delay
- if self.randomize:
- delay = random.randrange(0.0, self.max_revive_delay)
-
- self.log('waiting for {delay} secs before reviving mds.{id}'.format(
- delay=delay,id=active_mds))
- time.sleep(delay)
-
- self.log('reviving mds.{id}'.format(id=active_mds))
- self.manager.revive_mds(active_mds, standby_for_rank=takeover_rank)
-
- status = {}
- while True:
- status = self.manager.get_mds_status(active_mds)
- if status and (status['state'] == 'up:standby' or status['state'] == 'up:standby-replay'):
- break
- self.log('waiting till mds map indicates mds.{_id} is in standby or standby-replay'.format(_id=active_mds))
- time.sleep(2)
- self.log('mds.{_id} reported in {state} state'.format(_id=active_mds, state=status['state']))
-
- # don't do replay thrashing right now
- continue
- # this might race with replay -> active transition...
- if status['state'] == 'up:replay' and random.randrange(0.0, 1.0) < self.thrash_in_replay:
-
- delay = self.max_replay_thrash_delay
- if self.randomize:
- delay = random.randrange(0.0, self.max_replay_thrash_delay)
- time.sleep(delay)
- self.log('kill replaying mds.{id}'.format(id=self.to_kill))
- self.manager.kill_mds(self.to_kill)
-
- delay = self.max_revive_delay
- if self.randomize:
- delay = random.randrange(0.0, self.max_revive_delay)
-
- self.log('waiting for {delay} secs before reviving mds.{id}'.format(
- delay=delay,id=self.to_kill))
- time.sleep(delay)
- self.log('revive mds.{id}'.format(id=self.to_kill))
- self.manager.revive_mds(self.to_kill)
+ def __init__(self, ctx, manager, config, logger, failure_group, weight):
+ self.ctx = ctx
+ self.manager = manager
+ self.manager.wait_for_clean()
+
+ self.stopping = False
+ self.logger = logger
+ self.config = config
+
+ self.randomize = bool(self.config.get('randomize', True))
+ self.max_thrash_delay = float(self.config.get('thrash_delay', 30.0))
+ self.thrash_in_replay = float(self.config.get('thrash_in_replay', False))
+ assert self.thrash_in_replay >= 0.0 and self.thrash_in_replay <= 1.0, 'thrash_in_replay ({v}) must be between [0.0, 1.0]'.format(
+ v=self.thrash_in_replay)
+
+ self.max_replay_thrash_delay = float(self.config.get('max_replay_thrash_delay', 4.0))
+
+ self.max_revive_delay = float(self.config.get('max_revive_delay', 10.0))
+
+ self.thread = gevent.spawn(self.do_thrash)
+
+ self.failure_group = failure_group
+ self.weight = weight
+
+ def log(self, x):
+ """Write data to logger assigned to this MDThrasher"""
+ self.logger.info(x)
+
+ def do_join(self):
+ """Thread finished"""
+ self.stopping = True
+ self.thread.get()
+
+ def do_thrash(self):
+ """
+ Perform the random thrashing action
+ """
+ self.log('starting mds_do_thrash for failure group: ' + ', '.join(
+ ['mds.{_id}'.format(_id=_f) for _f in self.failure_group]))
+ while not self.stopping:
+ delay = self.max_thrash_delay
+ if self.randomize:
+ delay = random.randrange(0.0, self.max_thrash_delay)
+
+ if delay > 0.0:
+ self.log('waiting for {delay} secs before thrashing'.format(delay=delay))
+ time.sleep(delay)
+
+ skip = random.randrange(0.0, 1.0)
+ if self.weight < 1.0 and skip > self.weight:
+ self.log('skipping thrash iteration with skip ({skip}) > weight ({weight})'.format(skip=skip,
+ weight=self.weight))
+ continue
+
+ # find the active mds in the failure group
+ statuses = [self.manager.get_mds_status(m) for m in self.failure_group]
+ actives = filter(lambda s: s['state'] == 'up:active', statuses)
+ assert len(actives) == 1, 'Can only have one active in a failure group'
+
+ active_mds = actives[0]['name']
+ active_rank = actives[0]['rank']
+
+ self.log('kill mds.{id} (rank={r})'.format(id=active_mds, r=active_rank))
+ self.manager.kill_mds_by_rank(active_rank)
+
+ # wait for mon to report killed mds as crashed
+ status = {}
+ last_laggy_since = None
+ itercount = 0
+ while True:
+ failed = self.manager.get_mds_status_all()['failed']
+ status = self.manager.get_mds_status(active_mds)
+ if not status:
+ break
+ if 'laggy_since' in status:
+ last_laggy_since = status['laggy_since']
+ break
+ if any([(f == active_mds) for f in failed]):
+ break
+ self.log(
+ 'waiting till mds map indicates mds.{_id} is laggy/crashed, in failed state, or mds.{_id} is removed from mdsmap'.format(
+ _id=active_mds))
+ itercount = itercount + 1
+ if itercount > 10:
+ self.log('mds map: {status}'.format(status=self.manager.get_mds_status_all()))
+ time.sleep(2)
+ if last_laggy_since:
+ self.log(
+ 'mds.{_id} reported laggy/crashed since: {since}'.format(_id=active_mds, since=last_laggy_since))
+ else:
+ self.log('mds.{_id} down, removed from mdsmap'.format(_id=active_mds, since=last_laggy_since))
+
+ # wait for a standby mds to takeover and become active
+ takeover_mds = None
+ takeover_rank = None
+ itercount = 0
+ while True:
+ statuses = [self.manager.get_mds_status(m) for m in self.failure_group]
+ actives = filter(lambda s: s and s['state'] == 'up:active', statuses)
+ if len(actives) > 0:
+ assert len(actives) == 1, 'Can only have one active in failure group'
+ takeover_mds = actives[0]['name']
+ takeover_rank = actives[0]['rank']
+ break
+ itercount = itercount + 1
+ if itercount > 10:
+ self.log('mds map: {status}'.format(status=self.manager.get_mds_status_all()))
+
+ self.log('New active mds is mds.{_id}'.format(_id=takeover_mds))
+
+ # wait for a while before restarting old active to become new
+ # standby
+ delay = self.max_revive_delay
+ if self.randomize:
+ delay = random.randrange(0.0, self.max_revive_delay)
+
+ self.log('waiting for {delay} secs before reviving mds.{id}'.format(
+ delay=delay, id=active_mds))
+ time.sleep(delay)
+
+ self.log('reviving mds.{id}'.format(id=active_mds))
+ self.manager.revive_mds(active_mds, standby_for_rank=takeover_rank)
+
+ status = {}
+ while True:
+ status = self.manager.get_mds_status(active_mds)
+ if status and (status['state'] == 'up:standby' or status['state'] == 'up:standby-replay'):
+ break
+ self.log(
+ 'waiting till mds map indicates mds.{_id} is in standby or standby-replay'.format(_id=active_mds))
+ time.sleep(2)
+ self.log('mds.{_id} reported in {state} state'.format(_id=active_mds, state=status['state']))
+
+ # don't do replay thrashing right now
+ continue
+ # this might race with replay -> active transition...
+ if status['state'] == 'up:replay' and random.randrange(0.0, 1.0) < self.thrash_in_replay:
+
+ delay = self.max_replay_thrash_delay
+ if self.randomize:
+ delay = random.randrange(0.0, self.max_replay_thrash_delay)
+ time.sleep(delay)
+ self.log('kill replaying mds.{id}'.format(id=self.to_kill))
+ self.manager.kill_mds(self.to_kill)
+
+ delay = self.max_revive_delay
+ if self.randomize:
+ delay = random.randrange(0.0, self.max_revive_delay)
+
+ self.log('waiting for {delay} secs before reviving mds.{id}'.format(
+ delay=delay, id=self.to_kill))
+ time.sleep(delay)
+
+ self.log('revive mds.{id}'.format(id=self.to_kill))
+ self.manager.revive_mds(self.to_kill)
@contextlib.contextmanager
statuses = None
statuses_by_rank = None
while True:
- statuses = {m : manager.get_mds_status(m) for m in mdslist}
+ statuses = {m: manager.get_mds_status(m) for m in mdslist}
statuses_by_rank = {}
for _, s in statuses.iteritems():
if isinstance(s, dict):
# setup failure groups
failure_groups = {}
- actives = {s['name'] : s for (_,s) in statuses.iteritems() if s['state'] == 'up:active'}
+ actives = {s['name']: s for (_, s) in statuses.iteritems() if s['state'] == 'up:active'}
log.info('Actives is: {d}'.format(d=actives))
log.info('Statuses is: {d}'.format(d=statuses_by_rank))
for active in actives:
- for (r,s) in statuses.iteritems():
+ for (r, s) in statuses.iteritems():
if s['standby_for_name'] == active:
if not active in failure_groups:
failure_groups[active] = []
log.info('Assigning mds rank {r} to failure group {g}'.format(r=r, g=active))
failure_groups[active].append(r)
- for (active,standbys) in failure_groups.iteritems():
+ for (active, standbys) in failure_groups.iteritems():
weight = 1.0
if 'thrash_weights' in config:
thrashers[active] = MDSThrasher(
ctx, manager, config,
logger=log.getChild('mds_thrasher.failure_group.[{a}, {sbs}]'.format(
- a=active,
- sbs=', '.join(standbys)
- )
- ),
+ a=active,
+ sbs=', '.join(standbys)
+ )
+ ),
failure_group=failure_group,
weight=weight)
# if thrash_weights isn't specified and we've reached max_thrash,