]> git.apps.os.sepia.ceph.com Git - teuthology.git/commitdiff
Refactor for modularity.
authorTommi Virtanen <tommi.virtanen@dreamhost.com>
Fri, 3 Jun 2011 21:47:44 +0000 (14:47 -0700)
committerTommi Virtanen <tommi.virtanen@dreamhost.com>
Fri, 3 Jun 2011 21:47:44 +0000 (14:47 -0700)
New style: run "./virtualenv/bin/teuthology -v interactive.yaml".

daemon-helper [deleted file]
dbench.py [deleted file]
dbench.yaml
interactive.yaml [new file with mode: 0644]
requirements.txt
setup.py
teuthology/misc.py
teuthology/run.py [new file with mode: 0644]
teuthology/task/ceph.py [new file with mode: 0644]
teuthology/task/daemon-helper [new file with mode: 0755]

diff --git a/daemon-helper b/daemon-helper
deleted file mode 100755 (executable)
index a44fb45..0000000
+++ /dev/null
@@ -1,63 +0,0 @@
-#!/usr/bin/python
-
-"""
-Helper script for running long-living processes.
-
-(Name says daemon, but that is intended to mean "long-living", we
-assume child process does not double-fork.)
-
-We start the command passed as arguments, with /dev/null as stdin, and
-then wait for EOF on stdin.
-
-When EOF is seen on stdin, the child process is killed.
-
-When the child process exits, this helper exits too.
-"""
-
-import fcntl
-import os
-import select
-import signal
-import subprocess
-import sys
-
-with file('/dev/null', 'rb') as devnull:
-    proc = subprocess.Popen(
-        args=sys.argv[1:],
-        stdin=devnull,
-        )
-
-flags = fcntl.fcntl(0, fcntl.F_GETFL)
-fcntl.fcntl(0, fcntl.F_SETFL, flags | os.O_NDELAY)
-
-saw_eof = False
-while True:
-    r,w,x = select.select([0], [], [0], 0.2)
-    if r:
-        data = os.read(0, 1)
-        if not data:
-            saw_eof = True
-            proc.send_signal(signal.SIGKILL)
-            break
-
-    if proc.poll() is not None:
-        # child exited
-        break
-
-exitstatus = proc.wait()
-if exitstatus > 0:
-    print >>sys.stderr, '{me}: command failed with exit status {exitstatus:d}'.format(
-        me=os.path.basename(sys.argv[0]),
-        exitstatus=exitstatus,
-        )
-    sys.exit(exitstatus)
-elif exitstatus < 0:
-    if saw_eof and exitstatus == -signal.SIGKILL:
-        # suppress error from the exit we intentionally caused
-        pass
-    else:
-        print >>sys.stderr, '{me}: command crashed with signal {signal:d}'.format(
-            me=os.path.basename(sys.argv[0]),
-            signal=-exitstatus,
-            )
-        sys.exit(1)
diff --git a/dbench.py b/dbench.py
deleted file mode 100644 (file)
index 8590330..0000000
--- a/dbench.py
+++ /dev/null
@@ -1,437 +0,0 @@
-from gevent import monkey; monkey.patch_all()
-from orchestra import monkey; monkey.patch_all()
-
-from cStringIO import StringIO
-
-import bunch
-import logging
-import os
-import sys
-import yaml
-
-from orchestra import connection, run, remote
-import orchestra.cluster
-# TODO cleanup
-import teuthology.misc as teuthology
-from teuthology.run_tasks import run_tasks
-
-log = logging.getLogger(__name__)
-
-if __name__ == '__main__':
-    logging.basicConfig(
-        # level=logging.INFO,
-        level=logging.DEBUG,
-        )
-
-    with file('dbench.yaml') as f:
-        config = yaml.safe_load(f)
-
-    ROLES = config['roles']
-
-    connections = [connection.connect(t) for t in config['targets']]
-    remotes = [remote.Remote(name=t, ssh=c) for c,t in zip(connections, config['targets'])]
-    cluster = orchestra.cluster.Cluster()
-    for rem, roles in zip(remotes, ROLES):
-        cluster.add(rem, roles)
-
-    ctx = bunch.Bunch(
-        cluster=cluster,
-        )
-
-    log.info('Checking for old test directory...')
-    processes = cluster.run(
-        args=[
-            'test', '!', '-e', '/tmp/cephtest',
-            ],
-        wait=False,
-        )
-    try:
-        run.wait(processes)
-    except run.CommandFailedError as e:
-        log.error('Host %s has stale cephtest directory, check your lock and reboot to clean up.', rem)
-        sys.exit(1)
-
-    log.info('Creating directories...')
-    run.wait(
-        cluster.run(
-            args=[
-                'install', '-d', '-m0755', '--',
-                '/tmp/cephtest/binary',
-                '/tmp/cephtest/log',
-                '/tmp/cephtest/profiling-logger',
-                '/tmp/cephtest/data',
-                '/tmp/cephtest/class_tmp',
-                ],
-            wait=False,
-            )
-        )
-
-    for filename in ['daemon-helper']:
-        log.info('Shipping %r...', filename)
-        src = os.path.join(os.path.dirname(__file__), filename)
-        dst = os.path.join('/tmp/cephtest', filename)
-        with file(src, 'rb') as f:
-            for rem in cluster.remotes.iterkeys():
-                teuthology.write_file(
-                    remote=rem,
-                    path=dst,
-                    data=f,
-                    )
-                f.seek(0)
-                rem.run(
-                    args=[
-                        'chmod',
-                        'a=rx',
-                        '--',
-                        dst,
-                        ],
-                    )
-
-    log.info('Untarring ceph binaries...')
-    ceph_bindir_url = teuthology.get_ceph_binary_url()
-    cluster.run(
-        args=[
-            'uname', '-m',
-            run.Raw('|'),
-            'sed', '-e', 's/^/ceph./; s/$/.tgz/',
-            run.Raw('|'),
-            'wget',
-            '-nv',
-            '-O-',
-            '--base={url}'.format(url=ceph_bindir_url),
-            # need to use --input-file to make wget respect --base
-            '--input-file=-',
-            run.Raw('|'),
-            'tar', '-xzf', '-', '-C', '/tmp/cephtest/binary',
-            ],
-        )
-
-    log.info('Writing configs...')
-    ips = [host for (host, port) in (conn.get_transport().getpeername() for conn in connections)]
-    conf = teuthology.skeleton_config(roles=ROLES, ips=ips)
-    conf_fp = StringIO()
-    conf.write(conf_fp)
-    conf_fp.seek(0)
-    writes = cluster.run(
-        args=[
-            'python',
-            '-c',
-            'import shutil, sys; shutil.copyfileobj(sys.stdin, file(sys.argv[1], "wb"))',
-            '/tmp/cephtest/ceph.conf',
-            ],
-        stdin=run.PIPE,
-        wait=False,
-        )
-    teuthology.feed_many_stdins_and_close(conf_fp, writes)
-    run.wait(writes)
-
-    log.info('Setting up mon.0...')
-    cluster.only('mon.0').run(
-        args=[
-            '/tmp/cephtest/binary/usr/local/bin/cauthtool',
-            '--create-keyring',
-            '/tmp/cephtest/ceph.keyring',
-            ],
-        )
-    cluster.only('mon.0').run(
-        args=[
-            '/tmp/cephtest/binary/usr/local/bin/cauthtool',
-            '--gen-key',
-            '--name=mon.',
-            '/tmp/cephtest/ceph.keyring',
-            ],
-        )
-    (mon0_remote,) = cluster.only('mon.0').remotes.keys()
-    teuthology.create_simple_monmap(
-        remote=mon0_remote,
-        conf=conf,
-        )
-
-    log.info('Creating admin key on mon.0...')
-    cluster.only('mon.0').run(
-        args=[
-            '/tmp/cephtest/binary/usr/local/bin/cauthtool',
-            '--gen-key',
-            '--name=client.admin',
-            '--set-uid=0',
-            '--cap', 'mon', 'allow *',
-            '--cap', 'osd', 'allow *',
-            '--cap', 'mds', 'allow',
-            '/tmp/cephtest/ceph.keyring',
-            ],
-        )
-
-    log.info('Copying mon.0 info to all monitors...')
-    keyring = teuthology.get_file(
-        remote=mon0_remote,
-        path='/tmp/cephtest/ceph.keyring',
-        )
-    monmap = teuthology.get_file(
-        remote=mon0_remote,
-        path='/tmp/cephtest/monmap',
-        )
-    mons = cluster.only(teuthology.is_type('mon'))
-    mons_no_0 = mons.exclude('mon.0')
-
-    for rem in mons_no_0.remotes.iterkeys():
-        # copy mon key and initial monmap
-        log.info('Sending mon0 info to node {remote}'.format(remote=rem))
-        teuthology.write_file(
-            remote=rem,
-            path='/tmp/cephtest/ceph.keyring',
-            data=keyring,
-            )
-        teuthology.write_file(
-            remote=rem,
-            path='/tmp/cephtest/monmap',
-            data=monmap,
-            )
-
-    log.info('Setting up mon nodes...')
-    run.wait(
-        mons.run(
-            args=[
-                '/tmp/cephtest/binary/usr/local/bin/osdmaptool',
-                '--clobber',
-                '--createsimple', '{num:d}'.format(
-                    num=teuthology.num_instances_of_type(ROLES, 'osd'),
-                    ),
-                '/tmp/cephtest/osdmap',
-                '--pg_bits', '2',
-                '--pgp_bits', '4',
-                ],
-            wait=False,
-            ),
-        )
-
-    for id_ in teuthology.all_roles_of_type(ROLES, 'mon'):
-        (rem,) = cluster.only('mon.{id}'.format(id=id_)).remotes.keys()
-        rem.run(
-            args=[
-                '/tmp/cephtest/binary/usr/local/bin/cmon',
-                '--mkfs',
-                '-i', id_,
-                '-c', '/tmp/cephtest/ceph.conf',
-                '--monmap=/tmp/cephtest/monmap',
-                '--osdmap=/tmp/cephtest/osdmap',
-                '--keyring=/tmp/cephtest/ceph.keyring',
-                ],
-            )
-
-    run.wait(
-        mons.run(
-            args=[
-                'rm',
-                '--',
-                '/tmp/cephtest/monmap',
-                '/tmp/cephtest/osdmap',
-                ],
-            wait=False,
-            ),
-        )
-
-    mon_daemons = {}
-    log.info('Starting mon daemons...')
-    for idx, roles_for_host in enumerate(ROLES):
-        for id_ in teuthology.roles_of_type(roles_for_host, 'mon'):
-            proc = run.run(
-                client=connections[idx],
-                args=[
-                    '/tmp/cephtest/daemon-helper',
-                    '/tmp/cephtest/binary/usr/local/bin/cmon',
-                    '-f',
-                    '-i', id_,
-                    '-c', '/tmp/cephtest/ceph.conf',
-                    ],
-                logger=log.getChild('mon.{id}'.format(id=id_)),
-                stdin=run.PIPE,
-                wait=False,
-                )
-            mon_daemons[id_] = proc
-
-    log.info('Setting up osd nodes...')
-    for idx, roles_for_host in enumerate(ROLES):
-        for id_ in teuthology.roles_of_type(roles_for_host, 'osd'):
-            run.run(
-                client=connections[idx],
-                args=[
-                    '/tmp/cephtest/binary/usr/local/bin/cauthtool',
-                    '--create-keyring',
-                    '--gen-key',
-                    '--name=osd.{id}'.format(id=id_),
-                    '/tmp/cephtest/data/osd.{id}.keyring'.format(id=id_),
-                    ],
-                )
-
-    log.info('Setting up mds nodes...')
-    for idx, roles_for_host in enumerate(ROLES):
-        for id_ in teuthology.roles_of_type(roles_for_host, 'mds'):
-            run.run(
-                client=connections[idx],
-                args=[
-                    '/tmp/cephtest/binary/usr/local/bin/cauthtool',
-                    '--create-keyring',
-                    '--gen-key',
-                    '--name=mds.{id}'.format(id=id_),
-                    '/tmp/cephtest/data/mds.{id}.keyring'.format(id=id_),
-                    ],
-                )
-
-    log.info('Setting up client nodes...')
-    clients = cluster.only(teuthology.is_type('client'))
-    for id_ in teuthology.all_roles_of_type(ROLES, 'client'):
-        (rem,) = cluster.only('client.{id}'.format(id=id_)).remotes.keys()
-        rem.run(
-            args=[
-                '/tmp/cephtest/binary/usr/local/bin/cauthtool',
-                '--create-keyring',
-                '--gen-key',
-                # TODO this --name= is not really obeyed, all unknown "types" are munged to "client"
-                '--name=client.{id}'.format(id=id_),
-                '/tmp/cephtest/data/client.{id}.keyring'.format(id=id_),
-                ],
-            )
-
-    log.info('Reading keys from all nodes...')
-    keys = []
-    for idx, roles_for_host in enumerate(ROLES):
-        for type_ in ['osd','mds','client']:
-            for id_ in teuthology.roles_of_type(roles_for_host, type_):
-                data = teuthology.get_file(
-                    remote=remotes[idx],
-                    path='/tmp/cephtest/data/{type}.{id}.keyring'.format(
-                        type=type_,
-                        id=id_,
-                        ),
-                    )
-                keys.append((type_, id_, data))
-
-    log.info('Adding keys to mon.0...')
-    for type_, id_, data in keys:
-        teuthology.write_file(
-            remote=mon0_remote,
-            path='/tmp/cephtest/temp.keyring',
-            data=data,
-            )
-        mon0_remote.run(
-            args=[
-                '/tmp/cephtest/binary/usr/local/bin/cauthtool',
-                '/tmp/cephtest/temp.keyring',
-                '--name={type}.{id}'.format(
-                    type=type_,
-                    id=id_,
-                    ),
-                ] + list(teuthology.generate_caps(type_)),
-            )
-        mon0_remote.run(
-            args=[
-                '/tmp/cephtest/binary/usr/local/bin/ceph',
-                '-c', '/tmp/cephtest/ceph.conf',
-                '-k', '/tmp/cephtest/ceph.keyring',
-                '-i', '/tmp/cephtest/temp.keyring',
-                'auth',
-                'add',
-                '{type}.{id}'.format(
-                    type=type_,
-                    id=id_,
-                    ),
-                ],
-            )
-
-    log.info('Setting max_mds...')
-    # TODO where does this belong?
-    mon0_remote.run(
-        args=[
-            '/tmp/cephtest/binary/usr/local/bin/ceph',
-            '-c', '/tmp/cephtest/ceph.conf',
-            '-k', '/tmp/cephtest/ceph.keyring',
-            'mds',
-            'set_max_mds',
-            '{num_mds:d}'.format(
-                num_mds=teuthology.num_instances_of_type(ROLES, 'mds'),
-                ),
-            ],
-        )
-
-    log.info('Running mkfs on osd nodes...')
-    for id_ in teuthology.all_roles_of_type(ROLES, 'osd'):
-        (rem,) = cluster.only('osd.{id}'.format(id=id_)).remotes.keys()
-        rem.run(
-            args=[
-                'mkdir',
-                os.path.join('/tmp/cephtest/data', 'osd.{id}.data'.format(id=id_)),
-                ],
-            )
-        rem.run(
-            args=[
-                '/tmp/cephtest/binary/usr/local/bin/cosd',
-                '--mkfs',
-                '-i', id_,
-                '-c', '/tmp/cephtest/ceph.conf'
-                ],
-            )
-
-    osd_daemons = {}
-    log.info('Starting osd daemons...')
-    for idx, roles_for_host in enumerate(ROLES):
-        for id_ in teuthology.roles_of_type(roles_for_host, 'osd'):
-            proc = run.run(
-                client=connections[idx],
-                args=[
-                    '/tmp/cephtest/daemon-helper',
-                    '/tmp/cephtest/binary/usr/local/bin/cosd',
-                    '-f',
-                    '-i', id_,
-                    '-c', '/tmp/cephtest/ceph.conf'
-                    ],
-                logger=log.getChild('osd.{id}'.format(id=id_)),
-                stdin=run.PIPE,
-                wait=False,
-                )
-            osd_daemons[id_] = proc
-
-    mds_daemons = {}
-    log.info('Starting mds daemons...')
-    for idx, roles_for_host in enumerate(ROLES):
-        for id_ in teuthology.roles_of_type(roles_for_host, 'mds'):
-            proc = run.run(
-                client=connections[idx],
-                args=[
-                    '/tmp/cephtest/daemon-helper',
-                    '/tmp/cephtest/binary/usr/local/bin/cmds',
-                    '-f',
-                    '-i', id_,
-                    '-c', '/tmp/cephtest/ceph.conf'
-                    ],
-                logger=log.getChild('mds.{id}'.format(id=id_)),
-                stdin=run.PIPE,
-                wait=False,
-                )
-            mds_daemons[id_] = proc
-
-
-    log.info('Waiting until ceph is healthy...')
-    teuthology.wait_until_healthy(
-        remote=mon0_remote,
-        )
-
-    # TODO kclient mount/umount
-
-    # TODO rbd
-
-    run_tasks(tasks=config['tasks'], ctx=ctx)
-
-    log.info('Shutting down mds daemons...')
-    for id_, proc in mds_daemons.iteritems():
-        proc.stdin.close()
-    run.wait(mds_daemons.itervalues())
-
-    log.info('Shutting down osd daemons...')
-    for id_, proc in osd_daemons.iteritems():
-        proc.stdin.close()
-    run.wait(osd_daemons.itervalues())
-
-    log.info('Shutting down mon daemons...')
-    for id_, proc in mon_daemons.iteritems():
-        proc.stdin.close()
-    run.wait(mon_daemons.itervalues())
index c69626e6ce516977770d37e95d0165227941600c..a8d5b0f964a22cdcaad927967c5e531d438778bd 100644 (file)
@@ -7,6 +7,7 @@ targets:
 - ubuntu@sepia71.ceph.dreamhost.com
 - ubuntu@sepia72.ceph.dreamhost.com
 tasks:
+- ceph:
 - cfuse: [client.0]
 - autotest:
     client.0: [dbench]
diff --git a/interactive.yaml b/interactive.yaml
new file mode 100644 (file)
index 0000000..df5ad0d
--- /dev/null
@@ -0,0 +1,12 @@
+roles:
+- [mon.0, mds.0, osd.0]
+- [mon.1, osd.1]
+- [mon.2, client.0]
+targets:
+- ubuntu@sepia70.ceph.dreamhost.com
+- ubuntu@sepia71.ceph.dreamhost.com
+- ubuntu@sepia72.ceph.dreamhost.com
+tasks:
+- ceph:
+- cfuse: [client.0]
+- interactive:
index c44846a98ea101e48a29849d895154e83634e69b..83e9769c61be46d91e8f5a7e62a83bbb2fd319fa 100644 (file)
@@ -2,3 +2,4 @@
 configobj
 PyYAML
 bunch >=1.0.0
+argparse >=1.2.1
index a2d408c8f0ce84e390a9285922b6978c1ee3bbbf..8421bdeac7727cf3280300c331a9f26528a0528b 100644 (file)
--- a/setup.py
+++ b/setup.py
@@ -17,6 +17,13 @@ setup(
         'configobj',
         'PyYAML',
         'bunch >=1.0.0',
+        'argparse >=1.2.1'
         ],
 
+    entry_points={
+        'console_scripts': [
+            'teuthology = teuthology.run:main',
+            ],
+        },
+
     )
index ca02099d054b2b04e496f609e2804048c2ddd028..c230aef894fbdb8cb692ad4603d7384c26dd581d 100644 (file)
@@ -93,11 +93,6 @@ def roles_of_type(roles_for_host, type_):
         id_ = name[len(prefix):]
         yield id_
 
-def all_roles_of_type(roles, type_):
-    for roles_for_host in roles:
-        for id_ in roles_of_type(roles_for_host, type_):
-            yield id_
-
 def is_type(type_):
     """
     Returns a matcher function for whether role is of type given.
@@ -107,16 +102,13 @@ def is_type(type_):
         return role.startswith(prefix)
     return _is_type
 
-def num_instances_of_type(roles, type_):
+def num_instances_of_type(cluster, type_):
+    remotes_and_roles = cluster.remotes.items()
+    roles = [roles for (remote, roles) in remotes_and_roles]
     prefix = '{type}.'.format(type=type_)
     num = sum(sum(1 for role in hostroles if role.startswith(prefix)) for hostroles in roles)
     return num
 
-def server_with_role(all_roles, role):
-    for idx, host_roles in enumerate(all_roles):
-        if role in host_roles:
-            return idx
-
 def create_simple_monmap(remote, conf):
     """
     Writes a simple monmap based on current ceph.conf into <tmpdir>/monmap.
diff --git a/teuthology/run.py b/teuthology/run.py
new file mode 100644 (file)
index 0000000..3a0fe29
--- /dev/null
@@ -0,0 +1,71 @@
+import argparse
+import yaml
+
+def config_file(string):
+    config = {}
+    try:
+        with file(string) as f:
+            g = yaml.safe_load_all(f)
+            for new in g:
+                config.update(new)
+    except IOError, e:
+        raise argparse.ArgumentTypeError(str(e))
+    return config
+
+class MergeConfig(argparse.Action):
+    def __call__(self, parser, namespace, values, option_string=None):
+        config = getattr(namespace, self.dest)
+        for new in values:
+            config.update(new)
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='Run ceph integration tests')
+    parser.add_argument(
+        '-v', '--verbose',
+        action='store_true', default=None,
+        help='be more verbose',
+        )
+    parser.add_argument(
+        'config',
+        metavar='CONFFILE',
+        nargs='+',
+        type=config_file,
+        action=MergeConfig,
+        default={},
+        help='config file to read',
+        )
+
+    args = parser.parse_args()
+    return args
+
+def main():
+    from gevent import monkey; monkey.patch_all()
+    from orchestra import monkey; monkey.patch_all()
+
+    import logging
+
+    log = logging.getLogger(__name__)
+    ctx = parse_args()
+
+    loglevel = logging.INFO
+    if ctx.verbose:
+        loglevel = logging.DEBUG
+
+    logging.basicConfig(
+        level=loglevel,
+        )
+
+    log.debug('\n  '.join(['Config:', ] + yaml.safe_dump(ctx.config).splitlines()))
+    log.info('Opening connections...')
+
+    from orchestra import connection, remote
+    import orchestra.cluster
+
+    remotes = [remote.Remote(name=t, ssh=connection.connect(t))
+               for t in ctx.config['targets']]
+    ctx.cluster = orchestra.cluster.Cluster()
+    for rem, roles in zip(remotes, ctx.config['roles']):
+        ctx.cluster.add(rem, roles)
+
+    from teuthology.run_tasks import run_tasks
+    run_tasks(tasks=ctx.config['tasks'], ctx=ctx)
diff --git a/teuthology/task/ceph.py b/teuthology/task/ceph.py
new file mode 100644 (file)
index 0000000..3585518
--- /dev/null
@@ -0,0 +1,414 @@
+from cStringIO import StringIO
+
+import contextlib
+import logging
+import os
+import gevent
+
+from teuthology import misc as teuthology
+from orchestra import run
+
+log = logging.getLogger(__name__)
+
+@contextlib.contextmanager
+def task(ctx, config):
+    assert config is None
+
+    log.info('Checking for old test directory...')
+    processes = ctx.cluster.run(
+        args=[
+            'test', '!', '-e', '/tmp/cephtest',
+            ],
+        wait=False,
+        )
+    failed = False
+    for proc in processes:
+        assert isinstance(proc.exitstatus, gevent.event.AsyncResult)
+        try:
+            proc.exitstatus.get()
+        except run.CommandFailedError:
+            log.error('Host %s has stale cephtest directory, check your lock and reboot to clean up.', proc.remote.shortname)
+            failed = True
+    if failed:
+        raise RuntimeError('Stale jobs detected, aborting.')
+
+    log.info('Creating directories...')
+    run.wait(
+        ctx.cluster.run(
+            args=[
+                'install', '-d', '-m0755', '--',
+                '/tmp/cephtest/binary',
+                '/tmp/cephtest/log',
+                '/tmp/cephtest/profiling-logger',
+                '/tmp/cephtest/data',
+                '/tmp/cephtest/class_tmp',
+                ],
+            wait=False,
+            )
+        )
+
+    for filename in ['daemon-helper']:
+        log.info('Shipping %r...', filename)
+        src = os.path.join(os.path.dirname(__file__), filename)
+        dst = os.path.join('/tmp/cephtest', filename)
+        with file(src, 'rb') as f:
+            for rem in ctx.cluster.remotes.iterkeys():
+                teuthology.write_file(
+                    remote=rem,
+                    path=dst,
+                    data=f,
+                    )
+                f.seek(0)
+                rem.run(
+                    args=[
+                        'chmod',
+                        'a=rx',
+                        '--',
+                        dst,
+                        ],
+                    )
+
+    log.info('Untarring ceph binaries...')
+    ceph_bindir_url = teuthology.get_ceph_binary_url()
+    ctx.cluster.run(
+        args=[
+            'uname', '-m',
+            run.Raw('|'),
+            'sed', '-e', 's/^/ceph./; s/$/.tgz/',
+            run.Raw('|'),
+            'wget',
+            '-nv',
+            '-O-',
+            '--base={url}'.format(url=ceph_bindir_url),
+            # need to use --input-file to make wget respect --base
+            '--input-file=-',
+            run.Raw('|'),
+            'tar', '-xzf', '-', '-C', '/tmp/cephtest/binary',
+            ],
+        )
+
+    log.info('Writing configs...')
+    remotes_and_roles = ctx.cluster.remotes.items()
+    roles = [roles for (remote, roles) in remotes_and_roles]
+    ips = [host for (host, port) in (remote.ssh.get_transport().getpeername() for (remote, roles) in remotes_and_roles)]
+    conf = teuthology.skeleton_config(roles=roles, ips=ips)
+    conf_fp = StringIO()
+    conf.write(conf_fp)
+    conf_fp.seek(0)
+    writes = ctx.cluster.run(
+        args=[
+            'python',
+            '-c',
+            'import shutil, sys; shutil.copyfileobj(sys.stdin, file(sys.argv[1], "wb"))',
+            '/tmp/cephtest/ceph.conf',
+            ],
+        stdin=run.PIPE,
+        wait=False,
+        )
+    teuthology.feed_many_stdins_and_close(conf_fp, writes)
+    run.wait(writes)
+
+    log.info('Setting up mon.0...')
+    ctx.cluster.only('mon.0').run(
+        args=[
+            '/tmp/cephtest/binary/usr/local/bin/cauthtool',
+            '--create-keyring',
+            '/tmp/cephtest/ceph.keyring',
+            ],
+        )
+    ctx.cluster.only('mon.0').run(
+        args=[
+            '/tmp/cephtest/binary/usr/local/bin/cauthtool',
+            '--gen-key',
+            '--name=mon.',
+            '/tmp/cephtest/ceph.keyring',
+            ],
+        )
+    (mon0_remote,) = ctx.cluster.only('mon.0').remotes.keys()
+    teuthology.create_simple_monmap(
+        remote=mon0_remote,
+        conf=conf,
+        )
+
+    log.info('Creating admin key on mon.0...')
+    ctx.cluster.only('mon.0').run(
+        args=[
+            '/tmp/cephtest/binary/usr/local/bin/cauthtool',
+            '--gen-key',
+            '--name=client.admin',
+            '--set-uid=0',
+            '--cap', 'mon', 'allow *',
+            '--cap', 'osd', 'allow *',
+            '--cap', 'mds', 'allow',
+            '/tmp/cephtest/ceph.keyring',
+            ],
+        )
+
+    log.info('Copying mon.0 info to all monitors...')
+    keyring = teuthology.get_file(
+        remote=mon0_remote,
+        path='/tmp/cephtest/ceph.keyring',
+        )
+    monmap = teuthology.get_file(
+        remote=mon0_remote,
+        path='/tmp/cephtest/monmap',
+        )
+    mons = ctx.cluster.only(teuthology.is_type('mon'))
+    mons_no_0 = mons.exclude('mon.0')
+
+    for rem in mons_no_0.remotes.iterkeys():
+        # copy mon key and initial monmap
+        log.info('Sending mon0 info to node {remote}'.format(remote=rem))
+        teuthology.write_file(
+            remote=rem,
+            path='/tmp/cephtest/ceph.keyring',
+            data=keyring,
+            )
+        teuthology.write_file(
+            remote=rem,
+            path='/tmp/cephtest/monmap',
+            data=monmap,
+            )
+
+    log.info('Setting up mon nodes...')
+    run.wait(
+        mons.run(
+            args=[
+                '/tmp/cephtest/binary/usr/local/bin/osdmaptool',
+                '--clobber',
+                '--createsimple', '{num:d}'.format(
+                    num=teuthology.num_instances_of_type(ctx.cluster, 'osd'),
+                    ),
+                '/tmp/cephtest/osdmap',
+                '--pg_bits', '2',
+                '--pgp_bits', '4',
+                ],
+            wait=False,
+            ),
+        )
+
+    for remote, roles_for_host in mons.remotes.iteritems():
+        for id_ in teuthology.roles_of_type(roles_for_host, 'mon'):
+            remote.run(
+                args=[
+                    '/tmp/cephtest/binary/usr/local/bin/cmon',
+                    '--mkfs',
+                    '-i', id_,
+                    '-c', '/tmp/cephtest/ceph.conf',
+                    '--monmap=/tmp/cephtest/monmap',
+                    '--osdmap=/tmp/cephtest/osdmap',
+                    '--keyring=/tmp/cephtest/ceph.keyring',
+                    ],
+                )
+
+    run.wait(
+        mons.run(
+            args=[
+                'rm',
+                '--',
+                '/tmp/cephtest/monmap',
+                '/tmp/cephtest/osdmap',
+                ],
+            wait=False,
+            ),
+        )
+
+    mon_daemons = {}
+    log.info('Starting mon daemons...')
+    for remote, roles_for_host in mons.remotes.iteritems():
+        for id_ in teuthology.roles_of_type(roles_for_host, 'mon'):
+            proc = remote.run(
+                args=[
+                    '/tmp/cephtest/daemon-helper',
+                    '/tmp/cephtest/binary/usr/local/bin/cmon',
+                    '-f',
+                    '-i', id_,
+                    '-c', '/tmp/cephtest/ceph.conf',
+                    ],
+                logger=log.getChild('mon.{id}'.format(id=id_)),
+                stdin=run.PIPE,
+                wait=False,
+                )
+            mon_daemons[id_] = proc
+
+    log.info('Setting up osd nodes...')
+    osds = ctx.cluster.only(teuthology.is_type('osd'))
+    for remote, roles_for_host in osds.remotes.iteritems():
+        for id_ in teuthology.roles_of_type(roles_for_host, 'osd'):
+            remote.run(
+                args=[
+                    '/tmp/cephtest/binary/usr/local/bin/cauthtool',
+                    '--create-keyring',
+                    '--gen-key',
+                    '--name=osd.{id}'.format(id=id_),
+                    '/tmp/cephtest/data/osd.{id}.keyring'.format(id=id_),
+                    ],
+                )
+
+    log.info('Setting up mds nodes...')
+    mdss = ctx.cluster.only(teuthology.is_type('mds'))
+    for remote, roles_for_host in mdss.remotes.iteritems():
+        for id_ in teuthology.roles_of_type(roles_for_host, 'mds'):
+            remote.run(
+                args=[
+                    '/tmp/cephtest/binary/usr/local/bin/cauthtool',
+                    '--create-keyring',
+                    '--gen-key',
+                    '--name=mds.{id}'.format(id=id_),
+                    '/tmp/cephtest/data/mds.{id}.keyring'.format(id=id_),
+                    ],
+                )
+
+    log.info('Setting up client nodes...')
+    clients = ctx.cluster.only(teuthology.is_type('client'))
+    for remote, roles_for_host in clients.remotes.iteritems():
+        for id_ in teuthology.roles_of_type(roles_for_host, 'client'):
+            remote.run(
+                args=[
+                    '/tmp/cephtest/binary/usr/local/bin/cauthtool',
+                    '--create-keyring',
+                    '--gen-key',
+                    # TODO this --name= is not really obeyed, all unknown "types" are munged to "client"
+                    '--name=client.{id}'.format(id=id_),
+                    '/tmp/cephtest/data/client.{id}.keyring'.format(id=id_),
+                    ],
+                )
+
+    log.info('Reading keys from all nodes...')
+    keys = []
+    for remote, roles_for_host in ctx.cluster.remotes.iteritems():
+        for type_ in ['osd','mds','client']:
+            for id_ in teuthology.roles_of_type(roles_for_host, type_):
+                data = teuthology.get_file(
+                    remote=remote,
+                    path='/tmp/cephtest/data/{type}.{id}.keyring'.format(
+                        type=type_,
+                        id=id_,
+                        ),
+                    )
+                keys.append((type_, id_, data))
+
+    log.info('Adding keys to mon.0...')
+    for type_, id_, data in keys:
+        teuthology.write_file(
+            remote=mon0_remote,
+            path='/tmp/cephtest/temp.keyring',
+            data=data,
+            )
+        mon0_remote.run(
+            args=[
+                '/tmp/cephtest/binary/usr/local/bin/cauthtool',
+                '/tmp/cephtest/temp.keyring',
+                '--name={type}.{id}'.format(
+                    type=type_,
+                    id=id_,
+                    ),
+                ] + list(teuthology.generate_caps(type_)),
+            )
+        mon0_remote.run(
+            args=[
+                '/tmp/cephtest/binary/usr/local/bin/ceph',
+                '-c', '/tmp/cephtest/ceph.conf',
+                '-k', '/tmp/cephtest/ceph.keyring',
+                '-i', '/tmp/cephtest/temp.keyring',
+                'auth',
+                'add',
+                '{type}.{id}'.format(
+                    type=type_,
+                    id=id_,
+                    ),
+                ],
+            )
+
+    log.info('Setting max_mds...')
+    # TODO where does this belong?
+    mon0_remote.run(
+        args=[
+            '/tmp/cephtest/binary/usr/local/bin/ceph',
+            '-c', '/tmp/cephtest/ceph.conf',
+            '-k', '/tmp/cephtest/ceph.keyring',
+            'mds',
+            'set_max_mds',
+            '{num_mds:d}'.format(
+                num_mds=teuthology.num_instances_of_type(ctx.cluster, 'mds'),
+                ),
+            ],
+        )
+
+    log.info('Running mkfs on osd nodes...')
+    for remote, roles_for_host in osds.remotes.iteritems():
+        for id_ in teuthology.roles_of_type(roles_for_host, 'osd'):
+            remote.run(
+                args=[
+                    'mkdir',
+                    os.path.join('/tmp/cephtest/data', 'osd.{id}.data'.format(id=id_)),
+                    ],
+                )
+            remote.run(
+                args=[
+                    '/tmp/cephtest/binary/usr/local/bin/cosd',
+                    '--mkfs',
+                    '-i', id_,
+                    '-c', '/tmp/cephtest/ceph.conf'
+                    ],
+                )
+
+    osd_daemons = {}
+    log.info('Starting osd daemons...')
+    for remote, roles_for_host in osds.remotes.iteritems():
+        for id_ in teuthology.roles_of_type(roles_for_host, 'osd'):
+            proc = remote.run(
+                args=[
+                    '/tmp/cephtest/daemon-helper',
+                    '/tmp/cephtest/binary/usr/local/bin/cosd',
+                    '-f',
+                    '-i', id_,
+                    '-c', '/tmp/cephtest/ceph.conf'
+                    ],
+                logger=log.getChild('osd.{id}'.format(id=id_)),
+                stdin=run.PIPE,
+                wait=False,
+                )
+            osd_daemons[id_] = proc
+
+    mds_daemons = {}
+    log.info('Starting mds daemons...')
+    for remote, roles_for_host in mdss.remotes.iteritems():
+        for id_ in teuthology.roles_of_type(roles_for_host, 'mds'):
+            proc = remote.run(
+                args=[
+                    '/tmp/cephtest/daemon-helper',
+                    '/tmp/cephtest/binary/usr/local/bin/cmds',
+                    '-f',
+                    '-i', id_,
+                    '-c', '/tmp/cephtest/ceph.conf'
+                    ],
+                logger=log.getChild('mds.{id}'.format(id=id_)),
+                stdin=run.PIPE,
+                wait=False,
+                )
+            mds_daemons[id_] = proc
+
+
+    log.info('Waiting until ceph is healthy...')
+    teuthology.wait_until_healthy(
+        remote=mon0_remote,
+        )
+
+    try:
+        yield
+    finally:
+        log.info('Shutting down mds daemons...')
+        for id_, proc in mds_daemons.iteritems():
+            proc.stdin.close()
+        run.wait(mds_daemons.itervalues())
+
+        log.info('Shutting down osd daemons...')
+        for id_, proc in osd_daemons.iteritems():
+            proc.stdin.close()
+        run.wait(osd_daemons.itervalues())
+
+        log.info('Shutting down mon daemons...')
+        for id_, proc in mon_daemons.iteritems():
+            proc.stdin.close()
+        run.wait(mon_daemons.itervalues())
diff --git a/teuthology/task/daemon-helper b/teuthology/task/daemon-helper
new file mode 100755 (executable)
index 0000000..a44fb45
--- /dev/null
@@ -0,0 +1,63 @@
+#!/usr/bin/python
+
+"""
+Helper script for running long-living processes.
+
+(Name says daemon, but that is intended to mean "long-living", we
+assume child process does not double-fork.)
+
+We start the command passed as arguments, with /dev/null as stdin, and
+then wait for EOF on stdin.
+
+When EOF is seen on stdin, the child process is killed.
+
+When the child process exits, this helper exits too.
+"""
+
+import fcntl
+import os
+import select
+import signal
+import subprocess
+import sys
+
+with file('/dev/null', 'rb') as devnull:
+    proc = subprocess.Popen(
+        args=sys.argv[1:],
+        stdin=devnull,
+        )
+
+flags = fcntl.fcntl(0, fcntl.F_GETFL)
+fcntl.fcntl(0, fcntl.F_SETFL, flags | os.O_NDELAY)
+
+saw_eof = False
+while True:
+    r,w,x = select.select([0], [], [0], 0.2)
+    if r:
+        data = os.read(0, 1)
+        if not data:
+            saw_eof = True
+            proc.send_signal(signal.SIGKILL)
+            break
+
+    if proc.poll() is not None:
+        # child exited
+        break
+
+exitstatus = proc.wait()
+if exitstatus > 0:
+    print >>sys.stderr, '{me}: command failed with exit status {exitstatus:d}'.format(
+        me=os.path.basename(sys.argv[0]),
+        exitstatus=exitstatus,
+        )
+    sys.exit(exitstatus)
+elif exitstatus < 0:
+    if saw_eof and exitstatus == -signal.SIGKILL:
+        # suppress error from the exit we intentionally caused
+        pass
+    else:
+        print >>sys.stderr, '{me}: command crashed with signal {signal:d}'.format(
+            me=os.path.basename(sys.argv[0]),
+            signal=-exitstatus,
+            )
+        sys.exit(1)