run.wait(writes)
-def make_admin_daemon_dir(ctx, remote):
- """
- Create /var/run/ceph directory on remote site.
-
- :param ctx: Context
- :param remote: Remote site
- """
- remote.run(args=['sudo',
- 'install', '-d', '-m0777', '--', '/var/run/ceph', ], )
-
-
def mount_osd_data(ctx, remote, osd):
"""
Mount a remote OSD
format(o=osd))
teuthology.reconnect(self.ctx, 60, [remote])
mount_osd_data(self.ctx, remote, str(osd))
- make_admin_daemon_dir(self.ctx, remote)
+ self.make_admin_daemon_dir(remote)
self.ctx.daemons.get_daemon('osd', osd).reset()
self.ctx.daemons.get_daemon('osd', osd).restart()
"Check ipmi config.")
remote.console.power_on()
- make_admin_daemon_dir(self.ctx, remote)
+ self.make_admin_daemon_dir(remote)
self.ctx.daemons.get_daemon('mon', mon).restart()
def get_mon_status(self, mon):
self.log('health:\n{h}'.format(h=out))
return json.loads(out)
- ## metadata servers
-
- def kill_mds(self, mds):
- """
- Powercyle if set in config, otherwise just stop.
- """
- if self.config.get('powercycle'):
- (remote,) = (self.ctx.cluster.only('mds.{m}'.format(m=mds)).
- remotes.iterkeys())
- self.log('kill_mds on mds.{m} doing powercycle of {s}'.
- format(m=mds, s=remote.name))
- assert remote.console is not None, ("powercycling requested "
- "but RemoteConsole is not "
- "initialized. "
- "Check ipmi config.")
- remote.console.power_off()
- else:
- self.ctx.daemons.get_daemon('mds', mds).stop()
-
- def kill_mds_by_rank(self, rank):
- """
- kill_mds wrapper to kill based on rank passed.
- """
- status = self.get_mds_status_by_rank(rank)
- self.kill_mds(status['name'])
-
- def revive_mds(self, mds, standby_for_rank=None):
- """
- Revive mds -- do an ipmpi powercycle (if indicated by the config)
- and then restart (using --hot-standby if specified.
- """
- if self.config.get('powercycle'):
- (remote,) = (self.ctx.cluster.only('mds.{m}'.format(m=mds)).
- remotes.iterkeys())
- self.log('revive_mds on mds.{m} doing powercycle of {s}'.
- format(m=mds, s=remote.name))
- assert remote.console is not None, ("powercycling requested "
- "but RemoteConsole is not "
- "initialized. "
- "Check ipmi config.")
- remote.console.power_on()
- make_admin_daemon_dir(self.ctx, remote)
- args = []
- if standby_for_rank:
- args.extend(['--hot-standby', standby_for_rank])
- self.ctx.daemons.get_daemon('mds', mds).restart(*args)
-
- def revive_mds_by_rank(self, rank, standby_for_rank=None):
- """
- revive_mds wrapper to revive based on rank passed.
- """
- status = self.get_mds_status_by_rank(rank)
- self.revive_mds(status['name'], standby_for_rank)
-
def get_mds_status(self, mds):
"""
Run cluster commands for the mds in order to get mds information
return info
return None
- def get_mds_status_by_rank(self, rank):
+ def get_filepath(self):
"""
- Run cluster commands for the mds in order to get mds information
- check rank.
+ Return path to osd data with {id} needing to be replaced
"""
- j = self.get_mds_status_all()
- # collate; for dup ids, larger gid wins.
- for info in j['info'].itervalues():
- if info['rank'] == rank:
- return info
- return None
+ return "/var/lib/ceph/osd/ceph-{id}"
- def get_mds_status_all(self):
- """
- Run cluster command to extract all the mds status.
+ def make_admin_daemon_dir(self, remote):
"""
- out = self.raw_cluster_cmd('mds', 'dump', '--format=json')
- j = json.loads(' '.join(out.splitlines()[1:]))
- return j
+ Create /var/run/ceph directory on remote site.
- def get_filepath(self):
- """
- Return path to osd data with {id} needing to be replaced
+ :param ctx: Context
+ :param remote: Remote site
"""
- return "/var/lib/ceph/osd/ceph-{id}"
+ remote.run(args=['sudo',
+ 'install', '-d', '-m0777', '--', '/var/run/ceph', ], )
+
def utility_task(name):
"""
self.failure_group = failure_group
self.weight = weight
+ # TODO support multiple filesystems: will require behavioural change to select
+ # which filesystem to act on when doing rank-ish things
+ self.fs = Filesystem(self.ctx)
+
def _run(self):
try:
self.do_thrash()
def stop(self):
self.stopping.set()
+ def kill_mds(self, mds):
+ if self.config.get('powercycle'):
+ (remote,) = (self.ctx.cluster.only('mds.{m}'.format(m=mds)).
+ remotes.iterkeys())
+ self.log('kill_mds on mds.{m} doing powercycle of {s}'.
+ format(m=mds, s=remote.name))
+ assert remote.console is not None, ("powercycling requested "
+ "but RemoteConsole is not "
+ "initialized. "
+ "Check ipmi config.")
+ remote.console.power_off()
+ else:
+ self.ctx.daemons.get_daemon('mds', mds).stop()
+
+ def kill_mds_by_rank(self, rank):
+ """
+ kill_mds wrapper to kill based on rank passed.
+ """
+ status = self.mds_cluster.get_mds_info_by_rank(rank)
+ self.kill_mds(status['name'])
+
+ def revive_mds(self, mds, standby_for_rank=None):
+ """
+ Revive mds -- do an ipmpi powercycle (if indicated by the config)
+ and then restart (using --hot-standby if specified.
+ """
+ if self.config.get('powercycle'):
+ (remote,) = (self.ctx.cluster.only('mds.{m}'.format(m=mds)).
+ remotes.iterkeys())
+ self.log('revive_mds on mds.{m} doing powercycle of {s}'.
+ format(m=mds, s=remote.name))
+ assert remote.console is not None, ("powercycling requested "
+ "but RemoteConsole is not "
+ "initialized. "
+ "Check ipmi config.")
+ remote.console.power_on()
+ self.manager.make_admin_daemon_dir(self.ctx, remote)
+ args = []
+ if standby_for_rank:
+ args.extend(['--hot-standby', standby_for_rank])
+ self.ctx.daemons.get_daemon('mds', mds).restart(*args)
+
+ def revive_mds_by_rank(self, rank, standby_for_rank=None):
+ """
+ revive_mds wrapper to revive based on rank passed.
+ """
+ status = self.mds_cluster.get_mds_info_by_rank(rank)
+ self.revive_mds(status['name'], standby_for_rank)
+
+ def get_mds_status_all(self):
+ return self.fs.get_mds_map()
+
def do_thrash(self):
"""
Perform the random thrashing action
"""
- # TODO support multiple filesystems: will require behavioural change to select
- # which filesystem to act on when doing rank-ish things
- fs = Filesystem(self.ctx)
-
self.log('starting mds_do_thrash for failure group: ' + ', '.join(
['mds.{_id}'.format(_id=_f) for _f in self.failure_group]))
while not self.stopping.is_set():
last_laggy_since = None
itercount = 0
while True:
- failed = fs.get_mds_map()['failed']
+ failed = self.fs.get_mds_map()['failed']
status = self.mds_cluster.get_mds_info(active_mds)
if not status:
break
'mds_thrash task requires at least 2 metadata servers'
# choose random seed
- seed = None
if 'seed' in config:
seed = int(config['seed'])
else:
# if thrash_weights isn't specified and we've reached max_thrash,
# we're done
- if not 'thrash_weights' in config and len(thrashers) == max_thrashers:
+ if 'thrash_weights' not in config and len(thrashers) == max_thrashers:
break
try: