+"""
+Admin Socket task -- used in rados, powercycle, and smoke testing
+"""
from cStringIO import StringIO
import json
log = logging.getLogger(__name__)
+
def task(ctx, config):
"""
Run an admin socket command, make sure the output is json, and run
Note that there must be a ceph client with an admin socket running
before this task is run. The tests are parallelized at the client
level. Tests for a single client are run serially.
+
+ :param ctx: Context
+ :param config: Configuration
"""
assert isinstance(config, dict), \
'admin_socket task requires a dict for configuration'
teuthology.replace_all_with_clients(ctx.cluster, config)
- with parallel() as p:
+ with parallel() as ptask:
for client, tests in config.iteritems():
- p.spawn(_run_tests, ctx, client, tests)
+ ptask.spawn(_run_tests, ctx, client, tests)
+
def _socket_command(ctx, remote, socket_path, command, args):
"""
Run an admin socket command and return the result as a string.
+
+ :param ctx: Context
+ :param remote: Remote site
+ :param socket_path: path to socket
+ :param command: command to be run remotely
+ :param args: command arguments
+
+ :returns: output of command in json format
"""
json_fp = StringIO()
testdir = teuthology.get_testdir(ctx)
break
assert max_tries > 0
max_tries -= 1
- log.info('ceph cli returned an error, command not registered yet? sleeping and retrying ...')
+ log.info('ceph cli returned an error, command not registered yet?')
+ log.info('sleeping and retrying ...')
time.sleep(1)
out = json_fp.getvalue()
json_fp.close()
return json.loads(out)
def _run_tests(ctx, client, tests):
+ """
+ Create a temp directory and wait for a client socket to be created.
+ For each test, copy the executable locally and run the test.
+ Remove temp directory when finished.
+
+ :param ctx: Context
+ :param client: client machine to run the test
+ :param tests: list of tests to run
+ """
testdir = teuthology.get_testdir(ctx)
log.debug('Running admin socket tests on %s', client)
(remote,) = ctx.cluster.only(client).remotes.iterkeys()
'rm', '-rf', '--', tmp_dir,
],
)
-
+"""
+Run an autotest test on the ceph cluster.
+"""
import json
import logging
import os
p.spawn(_run_tests, testdir, remote, role, tests)
def _download(testdir, remote):
+ """
+ Download. Does not explicitly support muliple tasks in a single run.
+ """
remote.run(
args=[
# explicitly does not support multiple autotest tasks
)
def _run_tests(testdir, remote, role, tests):
+ """
+ Spawned to run test on remote site
+ """
assert isinstance(role, basestring)
PREFIX = 'client.'
assert role.startswith(PREFIX)
+"""
+Execute ceph-deploy as a task
+"""
from cStringIO import StringIO
import contextlib
time.sleep(1)
def get_nodes_using_roles(ctx, config, role):
+ """Extract the names of nodes that match a given role from a cluster"""
newl = []
for _remote, roles_for_host in ctx.cluster.remotes.iteritems():
for id_ in teuthology.roles_of_type(roles_for_host, role):
return newl
def get_dev_for_osd(ctx, config):
+ """Get a list of all osd device names."""
osd_devs = []
for remote, roles_for_host in ctx.cluster.remotes.iteritems():
host = remote.name.split('@')[-1]
return osd_devs
def get_all_nodes(ctx, config):
+ """Return a string of node names separated by blanks"""
nodelist = []
for t, k in ctx.config['targets'].iteritems():
host = t.split('@')[-1]
return nodelist
def execute_ceph_deploy(ctx, config, cmd):
+ """Remotely execute a ceph_deploy command"""
testdir = teuthology.get_testdir(ctx)
ceph_admin = teuthology.get_first_mon(ctx, config)
exec_cmd = cmd
@contextlib.contextmanager
def build_ceph_cluster(ctx, config):
+ """Build a ceph cluster"""
log.info('Building ceph cluster using ceph-deploy...')
testdir = teuthology.get_testdir(ctx)
ceph_branch = None
if config.get('conf') is not None:
confp = config.get('conf')
for section, keys in confp.iteritems():
- lines = '[{section}]\n'.format(section=section)
+ lines = '[{section}]\n'.format(section=section)
+ teuthology.append_lines_to_file(remote, conf_path, lines, sudo=True)
+ for key, value in keys.iteritems():
+ log.info("[%s] %s = %s" % (section, key, value))
+ lines = '{key} = {value}\n'.format(key=key, value=value)
teuthology.append_lines_to_file(remote, conf_path, lines, sudo=True)
- for key, value in keys.iteritems():
- log.info("[%s] %s = %s" % (section, key, value))
- lines = '{key} = {value}\n'.format(key=key, value=value)
- teuthology.append_lines_to_file(remote, conf_path, lines, sudo=True)
estatus_install = execute_ceph_deploy(ctx, config, install_nodes)
if estatus_install != 0:
+"""
+Ceph FUSE client task
+"""
import contextlib
import logging
import os
valgrind: [--tool=memcheck, --leak-check=full, --show-reachable=yes]
- interactive:
+ :param ctx: Context
+ :param config: Configuration
"""
log.info('Mounting ceph-fuse clients...')
fuse_daemons = {}
+"""
+Ceph cluster task.
+
+Handle the setup, starting, and clean-up of a Ceph cluster.
+"""
from cStringIO import StringIO
import argparse
log = logging.getLogger(__name__)
class DaemonState(object):
+ """
+ Daemon State. A daemon exists for each instance of each role.
+ """
def __init__(self, remote, role, id_, *command_args, **command_kwargs):
+ """
+ Pass remote command information as parameters to remote site
+
+ :param remote: Remote site
+ :param role: Role (osd, rgw, mon, mds)
+ :param id_: Id within role (osd.1, osd.2, for eaxmple)
+ :param command_args: positional arguments (used in restart commands)
+ :param command_kwargs: keyword arguments (used in restart commands)
+ """
self.remote = remote
self.command_args = command_args
self.command_kwargs = command_kwargs
def stop(self):
"""
+ Stop this daemon instance.
+
Note: this can raise a run.CommandFailedError,
run.CommandCrashedError, or run.ConnectionLostError.
"""
self.log.info('Stopped')
def restart(self, *args, **kwargs):
+ """
+ Restart with a new command passed in the arguments
+
+ :param args: positional arguments passed to remote.run
+ :param kwargs: keyword arguments passed to remote.run
+ """
self.log.info('Restarting')
if self.proc is not None:
self.log.debug('stopping old one...')
self.log.info('Started')
def restart_with_args(self, extra_args):
+ """
+ Restart, adding new paramaters to the current command.
+
+ :param extra_args: Extra keyword arguments to be added.
+ """
self.log.info('Restarting')
if self.proc is not None:
self.log.debug('stopping old one...')
self.log.info('Started')
def signal(self, sig):
+ """
+ Send a signal to associated remote commnad
+
+ :param sig: signal to send
+ """
self.proc.stdin.write(struct.pack('!b', sig))
self.log.info('Sent signal %d', sig)
def running(self):
+ """
+ Are we running?
+ :return: True if remote run command value is set, False otherwise.
+ """
return self.proc is not None
def reset(self):
+ """
+ clear remote run command value.
+ """
self.proc = None
def wait_for_exit(self):
+ """
+ clear remote run command value after waiting for exit.
+ """
if self.proc:
run.wait([self.proc])
self.proc = None
class CephState(object):
+ """
+ Collection of daemon state instances
+ """
def __init__(self):
+ """
+ self.daemons is a dictionary indexed by role. Each entry is a dictionary of
+ DaemonState values indexcd by an id parameter.
+ """
self.daemons = {}
def add_daemon(self, remote, role, id_, *args, **kwargs):
+ """
+ Add a daemon. If there already is a daemon for this id_ and role, stop that
+ daemon and. Restart the damon once the new value is set.
+
+ :param remote: Remote site
+ :param role: Role (osd, mds, mon, rgw, for example)
+ :param id_: Id (index into role dictionary)
+ :param args: Daemonstate positional parameters
+ :param kwargs: Daemonstate keyword parameters
+ """
if role not in self.daemons:
self.daemons[role] = {}
if id_ in self.daemons[role]:
self.daemons[role][id_].restart()
def get_daemon(self, role, id_):
+ """
+ get the daemon associated with this id_ for this role.
+
+ :param role: Role (osd, mds, mon, rgw, for example)
+ :param id_: Id (index into role dictionary)
+ """
if role not in self.daemons:
return None
return self.daemons[role].get(str(id_), None)
def iter_daemons_of_role(self, role):
+ """
+ Iterate through all daemon instances for this role. Return dictionary of
+ daemon values.
+
+ :param role: Role (osd, mds, mon, rgw, for example)
+ """
return self.daemons.get(role, {}).values()
@contextlib.contextmanager
def ceph_log(ctx, config):
+ """
+ Create /var/log/ceph log directory that is open to everyone.
+ Add valgrind and profiling-logger directories.
+
+ :param ctx: Context
+ :param config: Configuration
+ """
log.info('Making ceph log dir writeable by non-root...')
run.wait(
ctx.cluster.run(
@contextlib.contextmanager
def ship_utilities(ctx, config):
+ """
+ Write a copy of valgrind.supp to each of the remote sites. Set executables used
+ by Ceph in /usr/local/bin. When finished (upon exit of the teuthology run), remove
+ these files.
+
+ :param ctx: Context
+ :param config: Configuration
+ """
assert config is None
testdir = teuthology.get_testdir(ctx)
filenames = []
def assign_devs(roles, devs):
+ """
+ Create a dictionary of devs indexed by roles
+
+ :param roles: List of roles
+ :param devs: Corresponding list of devices.
+ :returns: Dictionary of devs indexed by roles.
+ """
return dict(zip(roles, devs))
@contextlib.contextmanager
def valgrind_post(ctx, config):
+ """
+ After the tests run, look throught all the valgrind logs. Exceptions are raised
+ if textual errors occured in the logs, or if valgrind exceptions were detected in
+ the logs.
+
+ :param ctx: Context
+ :param config: Configuration
+ """
try:
yield
finally:
def mount_osd_data(ctx, remote, osd):
+ """
+ Mount a remote OSD
+
+ :param ctx: Context
+ :param remote: Remote site
+ :param ods: Osd name
+ """
log.debug('Mounting data for osd.{o} on {r}'.format(o=osd, r=remote))
if remote in ctx.disk_config.remote_to_roles_to_dev and osd in ctx.disk_config.remote_to_roles_to_dev[remote]:
dev = ctx.disk_config.remote_to_roles_to_dev[remote][osd]
)
def make_admin_daemon_dir(ctx, remote):
+ """
+ Create /var/run/ceph directory on remote site.
+
+ :param ctx: Context
+ :param remote: Remote site
+ """
remote.run(
args=[
'sudo',
@contextlib.contextmanager
def cluster(ctx, config):
+ """
+ Handle the creation and removal of a ceph cluster.
+
+ On startup:
+ Create directories needed for the cluster.
+ Create remote journals for all osds.
+ Create and set keyring.
+ Copy the monmap to tht test systems.
+ Setup mon nodes.
+ Setup mds nodes.
+ Mkfs osd nodes.
+ Add keyring information to monmaps
+ Mkfs mon nodes.
+
+ On exit:
+ If errors occured, extract a failure message and store in ctx.summary.
+ Unmount all test files and temporary journaling files.
+ Save the monitor information and archive all ceph logs.
+ Cleanup the keyring setup, and remove all monitor map and data files left over.
+
+ :param ctx: Context
+ :param config: Configuration
+ """
testdir = teuthology.get_testdir(ctx)
log.info('Creating ceph cluster...')
run.wait(
log.info('Checking cluster log for badness...')
def first_in_ceph_log(pattern, excludes):
+ """
+ Find the first occurence of the pattern specified in the Ceph log,
+ Returns None if none found.
+
+ :param pattern: Pattern scanned for.
+ :param excludes: Patterns to ignore.
+ :return: First line of text (or None if not found)
+ """
args = [
'sudo',
'egrep', pattern,
@contextlib.contextmanager
def run_daemon(ctx, config, type_):
+ """
+ Run daemons for a role type. Handle the startup and termination of a a daemon.
+ On startup -- set coverages, cpu_profile, valgrind values for all remotes,
+ and a max_mds value for one mds.
+ On cleanup -- Stop all existing daemons of this type.
+
+ :param ctx: Context
+ :param config: Configuration
+ :paran type_: Role type
+ """
log.info('Starting %s daemons...' % type_)
testdir = teuthology.get_testdir(ctx)
daemons = ctx.cluster.only(teuthology.is_type(type_))
teuthology.stop_daemons_of_type(ctx, type_)
def healthy(ctx, config):
+ """
+ Wait for all osd's to be up, and for the ceph health monitor to return HEALTH_OK.
+
+ :param ctx: Context
+ :param config: Configuration
+ """
log.info('Waiting until ceph is healthy...')
firstmon = teuthology.get_first_mon(ctx, config)
(mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys()
)
def wait_for_osds_up(ctx, config):
+ """
+ Wait for all osd's to come up.
+
+ :param ctx: Context
+ :param config: Configuration
+ """
log.info('Waiting until ceph osds are all up...')
firstmon = teuthology.get_first_mon(ctx, config)
(mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys()
)
def wait_for_mon_quorum(ctx, config):
+ """
+ Check renote ceph status until all monitors are up.
+
+ :param ctx: Context
+ :param config: Configuration
+ """
import json
import time
@contextlib.contextmanager
def restart(ctx, config):
- """
+ """
restart ceph daemons
For example::
wait-for-healthy: false
wait-for-osds-up: true
- """
- if config is None:
- config = {}
- if isinstance(config, list):
- config = { 'daemons': config }
- if 'daemons' not in config:
- config['daemons'] = []
- type_daemon = ['mon', 'osd', 'mds', 'rgw']
- for d in type_daemon:
- type_ = d
- for daemon in ctx.daemons.iter_daemons_of_role(type_):
- config['daemons'].append(type_ + '.' + daemon.id_)
-
- assert isinstance(config['daemons'], list)
- daemons = dict.fromkeys(config['daemons'])
- for i in daemons.keys():
- type_ = i.split('.', 1)[0]
- id_ = i.split('.', 1)[1]
- ctx.daemons.get_daemon(type_, id_).stop()
- ctx.daemons.get_daemon(type_, id_).restart()
-
- if config.get('wait-for-healthy', True):
- healthy(ctx=ctx, config=None)
- if config.get('wait-for-osds-up', False):
- wait_for_osds_up(ctx=ctx, config=None)
- yield
+ :param ctx: Context
+ :param config: Configuration
+ """
+ if config is None:
+ config = {}
+ if isinstance(config, list):
+ config = { 'daemons': config }
+ if 'daemons' not in config:
+ config['daemons'] = []
+ type_daemon = ['mon', 'osd', 'mds', 'rgw']
+ for d in type_daemon:
+ type_ = d
+ for daemon in ctx.daemons.iter_daemons_of_role(type_):
+ config['daemons'].append(type_ + '.' + daemon.id_)
+
+ assert isinstance(config['daemons'], list)
+ daemons = dict.fromkeys(config['daemons'])
+ for i in daemons.keys():
+ type_ = i.split('.', 1)[0]
+ id_ = i.split('.', 1)[1]
+ ctx.daemons.get_daemon(type_, id_).stop()
+ ctx.daemons.get_daemon(type_, id_).restart()
+
+ if config.get('wait-for-healthy', True):
+ healthy(ctx=ctx, config=None)
+ if config.get('wait-for-osds-up', False):
+ wait_for_osds_up(ctx=ctx, config=None)
+ yield
@contextlib.contextmanager
def task(ctx, config):
- ceph:
log-whitelist: ['foo.*bar', 'bad message']
+ :param ctx: Context
+ :param config: Configuration
"""
if config is None:
config = {}
+"""
+Set up client keyring
+"""
import logging
from teuthology import misc as teuthology
log = logging.getLogger(__name__)
def create_keyring(ctx):
+ """
+ Set up key ring on remote sites
+ """
log.info('Setting up client nodes...')
clients = ctx.cluster.only(teuthology.is_type('client'))
testdir = teuthology.get_testdir(ctx)
+"""
+Chef-solo task
+"""
import logging
from ..orchestra import run
+"""
+Mount cifs clients. Unmount when finished.
+"""
import contextlib
import logging
import os
- cifs-mount:
client.0:
share: cephfuse
+
+ :param ctx: Context
+ :param config: Configuration
"""
log.info('Mounting cifs clients...')
from teuthology.task.samba import get_sambas
samba_roles = ['samba.{id_}'.format(id_=id_) for id_ in teuthology.all_roles_of_type(ctx.cluster, 'samba')]
sambas = list(get_sambas(ctx=ctx, roles=samba_roles))
- (ip, port) = sambas[0][1].ssh.get_transport().getpeername()
+ (ip, _) = sambas[0][1].ssh.get_transport().getpeername()
log.info('samba ip: {ip}'.format(ip=ip))
for id_, remote in clients:
+"""
+Clock synchronizer
+"""
import logging
import contextlib
to sync.
+ :param ctx: Context
+ :param config: Configuration
"""
log.info('Syncing clocks and checking initial clock skew...')
@contextlib.contextmanager
def check(ctx, config):
+ """
+ Run ntpdc at the start and the end of the task.
+
+ :param ctx: Context
+ :param config: Configuration
+ """
log.info('Checking initial clock skew...')
for rem in ctx.cluster.remotes.iterkeys():
rem.run(
+"""
+Cram tests
+"""
import logging
import os
- cram:
clients:
all: [http://ceph.com/qa/test.t]
+
+ :param ctx: Context
+ :param config: Configuration
"""
assert isinstance(config, dict)
assert 'clients' in config and isinstance(config['clients'], dict), \
)
def _run_tests(ctx, role):
+ """
+ For each role, check to make sure it's a client, then run the cram on that client
+
+ :param ctx: Context
+ :param role: Roles
+ """
assert isinstance(role, basestring)
PREFIX = 'client.'
assert role.startswith(PREFIX)
+"""
+Raise exceptions on osd coredumps or test err directories
+"""
import contextlib
import logging
import time
+"""
+Special case divergence test
+"""
import logging
import time
+"""
+Dump_stuck command
+"""
import logging
import re
import time
log = logging.getLogger(__name__)
def check_stuck(manager, num_inactive, num_unclean, num_stale, timeout=10):
+ """
+ Do checks. Make sure get_stuck_pgs return the right amout of information, then
+ extract health information from the raw_cluster_cmd and compare the results with
+ values passed in. This passes if all asserts pass.
+
+ :param num_manager: Ceph manager
+ :param num_inactive: number of inaactive pages that are stuck
+ :param num_unclean: number of unclean pages that are stuck
+ :paran num_stale: number of stale pages that are stuck
+ :param timeout: timeout value for get_stuck_pgs calls
+ """
inactive = manager.get_stuck_pgs('inactive', timeout)
assert len(inactive) == num_inactive
unclean = manager.get_stuck_pgs('unclean', timeout)
"""
Test the dump_stuck command.
+ :param ctx: Context
+ :param config: Configuration
"""
assert config is None, \
'dump_stuck requires no configuration'
+"""
+Exececute custom commands
+"""
import logging
from teuthology import misc as teuthology
- echo 'module ceph +p' > /sys/kernel/debug/dynamic_debug/control
- interactive:
+ :param ctx: Context
+ :param config: Configuration
"""
log.info('Executing custom commands...')
assert isinstance(config, dict), "task exec got invalid config"
+"""
+Filestore/filejournal handler
+"""
import logging
from ..orchestra import run
import random
Currently this is a kludge; we require the ceph task preceeds us just
so that we get the tarball installed to run the test binary.
+
+ :param ctx: Context
+ :param config: Configuration
"""
assert config is None or isinstance(config, list) \
or isinstance(config, dict), \
+"""
+Drop into a python shell
+"""
import code
import readline
import rlcompleter
+"""
+Mount/unmount a ``kernel`` client.
+"""
import contextlib
import logging
import os
- ceph-fuse: [client.0]
- kclient: [client.1]
- interactive:
+
+ :param ctx: Context
+ :param config: Configuration
"""
log.info('Mounting kernel clients...')
assert config is None or isinstance(config, list), \
+"""
+Most ceph console logging
+"""
import contextlib
import logging
+"""
+Kernel installation task
+"""
from cStringIO import StringIO
import logging
If config is None or just specifies a version to use,
it is applied to all nodes.
+
+ :param ctx: Context
+ :param config: Configuration
"""
if config is None or \
len(filter(lambda x: x in ['tag', 'branch', 'sha1', 'kdb',
def _find_arch_and_dist(ctx):
"""
Return the arch and distro value as a tuple.
+
+ Currently this only returns armv7l on the quantal distro or x86_64
+ on the precise distro
+
+ :param ctx: Context
+ :returns: arch,distro
"""
info = ctx.config.get('machine_type', 'plana')
if teuthology.is_arm(info):
return ('x86_64', 'precise')
def validate_config(ctx, config):
+ """
+ Make sure that all kernels in the list of remove kernels
+ refer to the same kernel.
+
+ :param ctx: Context
+ :param config: Configuration
+ """
for _, roles_for_host in ctx.cluster.remotes.iteritems():
kernel = None
for role in roles_for_host:
del config[role]
def _vsplitter(version):
- """kernels from Calxeda are named ...ceph-<sha1>...highbank
- kernels that we generate named ...-g<sha1>
+ """Kernels from Calxeda are named ...ceph-<sha1>...highbank.
+ Kernels that we generate are named ...-g<sha1>.
This routine finds the text in front of the sha1 that is used by
need_to_install() to extract information from the kernel name.
- """
+ :param version: Name of the kernel
+ """
if version.endswith('highbank'):
return 'ceph-'
return '-g'
def need_to_install(ctx, role, sha1):
+ """
+ Check to see if we need to install a kernel. Get the version
+ of the currently running kernel, extract the sha1 value from
+ its name, and compare it against the value passed in.
+
+ :param ctx: Context
+ :param role: machine associated with each role
+ :param sha1: sha1 to compare against (used in checking)
+ """
ret = True
log.info('Checking kernel version of {role}, want {sha1}...'.format(
role=role,
return ret
def install_firmware(ctx, config):
+ """
+ Go to the github to get the latest firmware.
+
+ :param ctx: Context
+ :param config: Configuration
+ """
linux_firmware_git_upstream = 'git://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git'
uri = teuth_config.linux_firmware_git_url or linux_firmware_git_upstream
fw_dir = '/lib/firmware/updates'
)
def download_deb(ctx, config):
+ """
+ Download a Debian kernel and copy the assocated linux image.
+
+ :param ctx: Context
+ :param config: Configuration
+ """
procs = {}
for role, src in config.iteritems():
(role_remote,) = ctx.cluster.only(role).remotes.keys()
def _no_grub_link(in_file, remote, kernel_ver):
+ """
+ Copy and link kernel related files if grub cannot be used
+ (as is the case in Arm kernels)
+
+ :param infile: kernel file or image file to be copied.
+ :param remote: remote machine
+ :param kernel_ver: kernel version
+ """
boot1 = '/boot/%s' % in_file
boot2 = '%s.old' % boot1
remote.run(
)
def install_and_reboot(ctx, config):
+ """
+ Install and reboot the kernel. This mostly performs remote
+ installation operations. The code does check for Arm images
+ and skips grub operations if the kernel is Arm. Otherwise, it
+ extracts kernel titles from submenu entries and makes the appropriate
+ grub calls. The assumptions here are somewhat simplified in that
+ it expects kernel entries to be present under submenu entries.
+
+ :param ctx: Context
+ :param config: Configuration
+ """
procs = {}
kernel_title = ''
for role, src in config.iteritems():
proc.exitstatus.get()
def enable_disable_kdb(ctx, config):
+ """
+ Enable kdb on remote machines in use. Disable on those that are
+ not in use.
+
+ :param ctx: Context
+ :param config: Configuration
+ """
for role, enable in config.iteritems():
(role_remote,) = ctx.cluster.only(role).remotes.keys()
if "mira" in role_remote.name:
"""
Loop reconnecting and checking kernel versions until
they're all correct or the timeout is exceeded.
+
+ :param ctx: Context
+ :param need_install: list of packages that we need to reinstall.
+ :param timeout: number of second before we timeout.
"""
import time
starttime = time.time()
kernel:
kdb: true
+ :param ctx: Context
+ :param config: Configuration
"""
assert config is None or isinstance(config, dict), \
"task kernel only supports a dictionary for configuration"
+"""
+Export/Unexport a ``nfs server`` client.
+"""
import contextlib
import logging
import os
ro,sync,wdelay,hide,nocrossmnt,secure,root_squash,no_all_squash,
no_subtree_check,secure_locks,acl,anonuid=65534,anongid=65534
+ :param ctx: Context
+ :param config: Configuration
"""
log.info('Exporting nfs server...')
Prevent bogus clients from old runs from access our
export. Specify all specify node addresses for this run.
"""
- ips = [host for (host, port) in (remote.ssh.get_transport().getpeername() for (remote, roles) in ctx.cluster.remotes.items())]
+ ips = [host for (host, _) in (remote.ssh.get_transport().getpeername() for (remote, roles) in ctx.cluster.remotes.items())]
for ip in ips:
args += [ '{ip}:{MNT}'.format(ip=ip, MNT=mnt) ]
+"""
+Localdir
+"""
import contextlib
import logging
import os
- localdir: [client.2]
- interactive:
+ :param ctx: Context
+ :param config: Configuration
"""
log.info('Creating local mnt dirs...')
+"""
+Locking tests
+"""
import logging
import os
In the past this test would have failed; there was a bug where waitlocks weren't
cleaned up if the process failed. More involved scenarios are also possible.
+
+ :param ctx: Context
+ :param config: Configuration
"""
log.info('Starting lockfile')
try:
# task
def lock_one(op, ctx):
+ """
+ Perform the individual lock
+ """
log.debug('spinning up locker with op={op_}'.format(op_=op))
timeout = None
proc = None
+"""
+loctests
+"""
import logging
from ..orchestra import run
[client.0, client.1]
This task does not yield; there would be little point.
+
+ :param ctx: Context
+ :param config: Configuration
"""
assert isinstance(config, list)
+"""
+Lost_unfound
+"""
import logging
import ceph_manager
from teuthology import misc as teuthology
def task(ctx, config):
"""
Test handling of lost objects.
+
+ A pretty rigid cluseter is brought up andtested by this task
"""
if config is None:
config = {}
+"""
+Force pg creation on all osds
+"""
from teuthology import misc as teuthology
from ..orchestra import run
import logging
+"""
+Thrash mds by simulating failures
+"""
import logging
import contextlib
import ceph_manager
self.weight = weight
def log(self, x):
+ """Write data to logger assigned to this MDThrasher"""
self.logger.info(x)
def do_join(self):
+ """Thread finished"""
self.stopping = True
self.thread.get()
def do_thrash(self):
+ """
+ Perform the random thrashing action
+ """
self.log('starting mds_do_thrash for failure group: ' + ', '.join(['mds.{_id}'.format(_id=_f) for _f in self.failure_group]))
while not self.stopping:
delay = self.max_thrash_delay
+"""
+Monitor recovery
+"""
import logging
import ceph_manager
from teuthology import misc as teuthology
+"""
+Start mpi processes (and allow commands to be run inside process)
+"""
import logging
from teuthology import misc as teuthology
clients:
- rm -f {testdir}/gmnt
+ :param ctx: Context
+ :param config: Configuration
"""
assert isinstance(config, dict), 'task mpi got invalid config'
assert 'exec' in config, 'task mpi got invalid config, missing exec'
+"""
+Multibench testing
+"""
import contextlib
import logging
import radosbench
"please list clients to run on"
def run_one(num):
+ """Run test spawn from gevent"""
start = time.time()
benchcontext = copy.copy(config.get('radosbench'))
iterations = 0
+"""
+Nfs client tester
+"""
import contextlib
import logging
import os
+"""
+Null task
+"""
def task(ctx, config):
"""
This task does nothing.
+"""
+Test Object locations going down
+"""
import logging
import ceph_manager
from teuthology import misc as teuthology
+"""
+Run omapbench executable within teuthology
+"""
import contextlib
import logging
+"""
+Osd backfill test
+"""
import logging
import ceph_manager
import time
def rados_start(ctx, remote, cmd):
+ """
+ Run a remote rados command (currently used to only write data)
+ """
log.info("rados %s" % ' '.join(cmd))
testdir = teuthology.get_testdir(ctx)
pre = [
+"""
+Handle osdfailsafe configuration settings (nearfull ratio and full ratio)
+"""
from cStringIO import StringIO
import logging
import time
+"""
+osd recovery
+"""
import logging
import ceph_manager
import time
def rados_start(testdir, remote, cmd):
+ """
+ Run a remote rados command (currently used to only write data)
+ """
log.info("rados %s" % ' '.join(cmd))
pre = [
'adjust-ulimits',
+"""
+Task to group parallel running tasks
+"""
import sys
import logging
That is, if the entry is not a dict, we will look it up in the top-level
config.
- Sequential task and Parallel tasks can be nested.
+ Sequential tasks and Parallel tasks can be nested.
"""
log.info('starting parallel...')
p.spawn(_run_spawned, ctx, confg, taskname)
def _run_spawned(ctx,config,taskname):
+ """Run one of the tasks (this runs in parallel with others)"""
mgr = {}
try:
log.info('In parallel, running task %s...' % taskname)
+"""
+Parallel contextmanager test
+"""
import contextlib
import logging
+"""
+Peer test (Single test, not much configurable here)
+"""
import logging
import json
+"""
+Handle parallel execution on remote hosts
+"""
import logging
from teuthology import misc as teuthology
from gevent import event as event
def _init_barrier(barrier_queue, remote):
+ """current just queues a remote host"""
barrier_queue.put(remote)
def _do_barrier(barrier, barrier_queue, remote):
- # special case for barrier
+ """special case for barrier"""
barrier_queue.get()
if barrier_queue.empty():
barrier.set()
barrier.wait()
def _exec_host(barrier, barrier_queue, remote, sudo, testdir, ls):
+ """Execute command remotely"""
log.info('Running commands on host %s', remote.name)
args = [
'TESTDIR={tdir}'.format(tdir=testdir),
tor.wait([r])
def _generate_remotes(ctx, config):
+ """Return remote roles and the type of role specified in config"""
if 'all' in config and len(config) == 1:
ls = config['all']
for remote in ctx.cluster.remotes.iterkeys():
+"""
+Qemu task
+"""
from cStringIO import StringIO
import contextlib
@contextlib.contextmanager
def create_dirs(ctx, config):
+ """
+ Handle directory creation and cleanup
+ """
testdir = teuthology.get_testdir(ctx)
for client, client_config in config.iteritems():
assert 'test' in client_config, 'You must specify a test to run'
@contextlib.contextmanager
def generate_iso(ctx, config):
+ """Execute system commands to generate iso"""
log.info('generating iso...')
testdir = teuthology.get_testdir(ctx)
for client, client_config in config.iteritems():
@contextlib.contextmanager
def download_image(ctx, config):
+ """Downland base image, remove image file when done"""
log.info('downloading base image')
testdir = teuthology.get_testdir(ctx)
for client, client_config in config.iteritems():
@contextlib.contextmanager
def run_qemu(ctx, config):
+ """Setup kvm environment and start qemu"""
procs = []
testdir = teuthology.get_testdir(ctx)
for client, client_config in config.iteritems():
+"""
+Rados modle-based integration tests
+"""
import contextlib
import logging
import gevent
])
def thread():
+ """Thread spawned by gevent"""
if not hasattr(ctx, 'manager'):
first_mon = teuthology.get_first_mon(ctx, config)
(mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()
+"""
+Rados benchmarking
+"""
import contextlib
import logging
+"""
+Rgw admin testing against a running instance
+"""
# The test cases in this file have been annotated for inventory.
# To extract the inventory (in csv format) use the command:
#
def successful_ops(out):
+ """Extract total from the first summary entry (presumed to be only one)"""
summary = out['summary']
if len(summary) == 0:
return 0
+"""
+Run rados gateway agent in test mode
+"""
import contextlib
import logging
import argparse
+"""
+Rbd testing task
+"""
import contextlib
import logging
import os
log = logging.getLogger(__name__)
def default_image_name(role):
+ """
+ Currently just append role to 'testimage.' string
+ """
return 'testimage.{role}'.format(role=role)
@contextlib.contextmanager
role_images = [(role, None) for role in config]
def strip_client_prefix(role):
+ """Currently just removes 'client.' from start of role name"""
PREFIX = 'client.'
assert role.startswith(PREFIX)
id_ = role[len(PREFIX):]
]
)
-# Determine the canonical path for a given path on the host
-# representing the given role. A canonical path contains no
-# . or .. components, and includes no symbolic links.
def canonical_path(ctx, role, path):
+ """
+ Determine the canonical path for a given path on the host
+ representing the given role. A canonical path contains no
+ . or .. components, and includes no symbolic links.
+ """
version_fp = StringIO()
ctx.cluster.only(role).run(
args=[ 'readlink', '-f', path ],
yield
def run_xfstests_one_client(ctx, role, properties):
+ """
+ Spawned routine to handle xfs tests for a single client
+ """
testdir = teuthology.get_testdir(ctx)
try:
count = properties.get('count')
+"""
+Run fsx on an rbd image
+"""
import contextlib
import logging
yield
def _run_one_client(ctx, config, role):
+ """Spawned task that runs the client"""
testdir = teuthology.get_testdir(ctx)
(remote,) = ctx.cluster.only(role).remotes.iterkeys()
remote.run(
+"""
+Rest Api
+"""
import logging
import contextlib
import time
@contextlib.contextmanager
def run_rest_api_daemon(ctx, api_clients):
+ """
+ Wrapper starts the rest api daemons
+ """
if not hasattr(ctx, 'daemons'):
ctx.daemons = CephState()
remotes = ctx.cluster.only(teuthology.is_type('client')).remotes
+"""
+Daemon restart
+"""
import logging
import pipes
log = logging.getLogger(__name__)
def restart_daemon(ctx, config, role, id_, *args):
+ """
+ Handle restart (including the execution of the command parameters passed)
+ """
log.info('Restarting {r}.{i} daemon...'.format(r=role, i=id_))
daemon = ctx.daemons.get_daemon(role, id_)
log.debug('Waiting for exit of {r}.{i} daemon...'.format(r=role, i=id_))
daemon.restart()
def get_tests(ctx, config, role, remote, testdir):
+ """Download restart tests"""
srcdir = '{tdir}/restart.{role}'.format(tdir=testdir, role=role)
refspec = config.get('branch')
+"""
+Samba
+"""
import contextlib
import logging
import sys
log = logging.getLogger(__name__)
def get_sambas(ctx, roles):
+ """
+ Scan for roles that are samba. Yield the id of the the samba role
+ (samba.0, samba.1...) and the associated remote site
+
+ :param ctx: Context
+ :param roles: roles for this test (extracted from yaml files)
+ """
for role in roles:
assert isinstance(role, basestring)
PREFIX = 'samba.'
role, the default behavior is to enable the ceph UNC //localhost/ceph
and use the ceph vfs module as the smbd backend.
+ :param ctx: Context
+ :param config: Configuration
"""
log.info("Setting up smbd with ceph vfs...")
assert config is None or isinstance(config, list) or isinstance(config, dict), \
+"""
+Scrub osds
+"""
import contextlib
import gevent
import logging
scrub_proc.do_join()
class Scrubber:
+ """
+ Scrubbing is actually performed during initialzation
+ """
def __init__(self, manager, config):
+ """
+ Spawn scrubbing thread upon completion.
+ """
self.ceph_manager = manager
self.ceph_manager.wait_for_clean()
else:
def tmp(x):
+ """Local display"""
print x
self.log = tmp
self.thread = gevent.spawn(self.do_scrub)
def do_join(self):
+ """Scrubbing thread finished"""
self.stopping = True
self.thread.get()
def do_scrub(self):
+ """Perform the scrub operation"""
frequency = self.config.get("frequency", 30)
deep = self.config.get("deep", 0)
+"""Scrub testing"""
from cStringIO import StringIO
import logging
+"""
+Task sequencer
+"""
import sys
import logging
That is, if the entry is not a dict, we will look it up in the top-level
config.
- Sequential task and Parallel tasks can be nested.
+ Sequential tasks and Parallel tasks can be nested.
+
+ :param ctx: Context
+ :param config: Configuration
"""
stack = []
try:
+"""
+Sleep task
+"""
import logging
import time
duration: 10
- interactive:
+ :param ctx: Context
+ :param config: Configuration
"""
if not config:
config = {}
+"""
+Parallel and sequential task tester. Not used by any ceph tests, but used to
+unit test the parallel and sequential tasks
+"""
import logging
import contextlib
import time
+"""
+Thrash -- Simulate random osd failures.
+"""
import contextlib
import logging
import ceph_manager
+"""
+Timer task
+"""
import logging
import contextlib
import datetime
+"""
+test_stress_watch task
+"""
import contextlib
import logging
import proc_thrasher
+"""
+Workunit task -- Run ceph on sets of specific clients
+"""
import logging
import pipes
import os
def task(ctx, config):
"""
- Run ceph all workunits found under the specified path.
+ Run ceph on all workunits found under the specified path.
For example::
env:
FOO: bar
BAZ: quux
+
+ :param ctx: Context
+ :param config: Configuration
"""
assert isinstance(config, dict)
assert isinstance(config.get('clients'), dict), \
PREFIX = 'client.'
assert role.startswith(PREFIX)
if created_dir_dict[role]:
- _delete_dir(ctx, role, config.get('subdir'))
+ _delete_dir(ctx, role)
+
+def _delete_dir(ctx, role):
+ """
+ Delete file used by this role, and delete the directory that this
+ role appeared in.
-def _delete_dir(ctx, role, subdir):
+ :param ctx: Context
+ :param role: "role.#" where # is used for the role id.
+ """
PREFIX = 'client.'
testdir = teuthology.get_testdir(ctx)
id_ = role[len(PREFIX):]
(remote,) = ctx.cluster.only(role).remotes.iterkeys()
mnt = os.path.join(testdir, 'mnt.{id}'.format(id=id_))
+ # Is there any reason why this is not: join(mnt, role) ?
client = os.path.join(mnt, 'client.{id}'.format(id=id_))
try:
remote.run(
log.exception("Caught an execption deleting dir {dir}".format(dir=mnt))
def _make_scratch_dir(ctx, role, subdir):
+ """
+ Make scratch directories for this role. This also makes the mount
+ point if that directory does not exist.
+
+ :param ctx: Context
+ :param role: "role.#" where # is used for the role id.
+ :param subdir: use this subdir (False if not used)
+ """
retVal = False
PREFIX = 'client.'
id_ = role[len(PREFIX):]
return retVal
def _spawn_on_all_clients(ctx, refspec, tests, env, subdir):
+ """
+ Make a scratch directory for each client in the cluster, and then for each
+ test spawn _run_tests for each role.
+
+ :param ctx: Context
+ :param refspec: branch, sha1, or version tag used to identify this
+ build
+ :param tests: specific tests specified.
+ :param env: evnironment set in yaml file. Could be None.
+ :param subdir: subdirectory set in yaml file. Could be None
+ """
client_generator = teuthology.all_roles_of_type(ctx.cluster, 'client')
client_remotes = list()
for client in client_generator:
# cleanup the generated client directories
client_generator = teuthology.all_roles_of_type(ctx.cluster, 'client')
for client in client_generator:
- _delete_dir(ctx, 'client.{id}'.format(id=client), subdir)
+ _delete_dir(ctx, 'client.{id}'.format(id=client))
def _run_tests(ctx, refspec, role, tests, env, subdir=None):
+ """
+ Run the individual test. Create a scratch directory and then extract the workunits
+ from the git-hub. Make the executables, and then run the tests.
+ Clean up (remove files created) after the tests are finished.
+
+ :param ctx: Context
+ :param refspec: branch, sha1, or version tag used to identify this
+ build
+ :param tests: specific tests specified.
+ :param env: evnironment set in yaml file. Could be None.
+ :param subdir: subdirectory set in yaml file. Could be None
+ """
testdir = teuthology.get_testdir(ctx)
assert isinstance(role, basestring)
PREFIX = 'client.'