]> git.apps.os.sepia.ceph.com Git - teuthology.git/commitdiff
ceph-deploy task
authortamil <tamil.muthamizhan@inktank.com>
Fri, 8 Mar 2013 01:56:29 +0000 (17:56 -0800)
committertamil <tamil.muthamizhan@inktank.com>
Fri, 8 Mar 2013 01:56:29 +0000 (17:56 -0800)
Signed-off-by: tamil <tamil.muthamizhan@inktank.com>
teuthology/task/ceph-deploy.py [new file with mode: 0644]

diff --git a/teuthology/task/ceph-deploy.py b/teuthology/task/ceph-deploy.py
new file mode 100644 (file)
index 0000000..1db2df9
--- /dev/null
@@ -0,0 +1,344 @@
+from cStringIO import StringIO
+
+import contextlib
+import os
+import time
+import logging
+
+from teuthology import misc as teuthology
+from teuthology import contextutil
+import ceph as ceph_fn
+from ..orchestra import run
+
+log = logging.getLogger(__name__)
+
+@contextlib.contextmanager
+def download_ceph_deploy(ctx, config):
+    log.info('Downloading ceph-deploy...')
+    testdir = teuthology.get_testdir(ctx)
+    ceph_admin = teuthology.get_first_mon(ctx, config)
+
+    ctx.cluster.only(ceph_admin).run(
+        args=[
+            'wget', '-q', '-O-','https://raw.github.com/ceph/ceph-qa-chef/master/solo/solo-from-scratch',
+            run.Raw('|'), 'sh',
+            ],
+        )
+    ctx.cluster.only(ceph_admin).run(
+        args=[
+            'git', 'clone',
+            'http://github.com/ceph/ceph-deploy.git',
+            '{tdir}/ceph-deploy'.format(tdir=testdir),
+            ],
+        )
+    ctx.cluster.only(ceph_admin).run(
+        args=[
+            'cd',
+            '{tdir}/ceph-deploy'.format(tdir=testdir),
+            run.Raw('&&'),
+            './bootstrap',
+            ],
+        )
+
+    try:
+        yield
+    finally:
+        log.info('Removing ceph-deploy ...')
+        ctx.cluster.only(ceph_admin).run(
+            args=[
+                'rm',
+                '-rf',
+                '{tdir}/ceph-deploy'.format(tdir=testdir),
+                ],
+            )
+
+def is_healthy(ctx, config):
+    """Wait until a Ceph cluster is healthy."""
+    testdir = teuthology.get_testdir(ctx)
+    ceph_admin = teuthology.get_first_mon(ctx, config)
+    (remote,) = ctx.cluster.only(ceph_admin).remotes.keys()
+    while True:
+        r = remote.run(
+            args=[
+                'cd',
+                '{tdir}'.format(tdir=testdir),
+                run.Raw('&&'),
+                'sudo', 'ceph',
+                'health',
+                '--concise',
+                ],
+            stdout=StringIO(),
+            logger=log.getChild('health'),
+            )
+        out = r.stdout.getvalue()
+        log.debug('Ceph health: %s', out.rstrip('\n'))
+        if out.split(None, 1)[0] == 'HEALTH_OK':
+            break
+        time.sleep(1)
+
+def get_nodes_using_roles(ctx, config, role):
+    newl = []
+    for _remote, roles_for_host in ctx.cluster.remotes.iteritems():
+        for id_ in teuthology.roles_of_type(roles_for_host, role):
+            rem = _remote
+            if role == 'mon':
+                req1 = str(rem).split('@')[-1]
+            else:
+                req = str(rem).split('.')[0]
+                req1 = str(req).split('@')[1]
+            newl.append(req1)
+    return newl
+
+def get_dev_for_osd(ctx, config):
+    osd_devs = []
+    for remote, roles_for_host in ctx.cluster.remotes.iteritems():
+        host = remote.name.split('@')[-1]
+        shortname = host.split('.')[0]
+        devs = teuthology.get_scratch_devices(remote)
+        num_osd_per_host = list(teuthology.roles_of_type(roles_for_host, 'osd'))
+        num_osds = len(num_osd_per_host)
+        assert num_osds <= len(devs), 'fewer disks than osds on ' + shortname
+        for dev in devs[:num_osds]:
+            dev_short = dev.split('/')[-1]
+            osd_devs.append('{host}:{dev}'.format(host=shortname, dev=dev_short))
+    return osd_devs
+
+def get_all_nodes(ctx, config):
+    nodelist = []
+    for t, k in ctx.config['targets'].iteritems():
+        host = t.split('@')[-1]
+        simple_host = host.split('.')[0]
+        nodelist.append(simple_host)
+    nodelist = " ".join(nodelist)
+    return nodelist
+
+def execute_ceph_deploy(ctx, config, cmd):
+    testdir = teuthology.get_testdir(ctx)
+    ceph_admin = teuthology.get_first_mon(ctx, config)
+    exec_cmd = cmd
+    (remote,) = ctx.cluster.only(ceph_admin).remotes.iterkeys()
+    proc = remote.run(
+        args = [
+            'cd',
+            '{tdir}/ceph-deploy'.format(tdir=testdir),
+            run.Raw('&&'),
+            run.Raw(exec_cmd),
+            ],
+            check_status=False,
+        )
+    exitstatus = proc.exitstatus
+    return exitstatus
+
+@contextlib.contextmanager
+def build_ceph_cluster(ctx, config):
+    log.info('Building ceph cluster using ceph-deploy...')
+    testdir = teuthology.get_testdir(ctx)
+    ceph_branch = None
+    if config.get('branch') is not None:
+        cbranch = config.get('branch')
+        for var, val in cbranch.iteritems():
+            if var == 'testing':
+                ceph_branch = '--{var}'.format(var=var)
+            ceph_branch = '--{var}={val}'.format(var=var, val=val)
+    node_dev_list = []
+    all_nodes = get_all_nodes(ctx, config)
+    mds_nodes = get_nodes_using_roles(ctx, config, 'mds')
+    mds_nodes = " ".join(mds_nodes)
+    mon_node = get_nodes_using_roles(ctx, config, 'mon')
+    mon_nodes = " ".join(mon_node)
+    new_mon = './ceph-deploy new'+" "+mon_nodes
+    install_nodes = './ceph-deploy install '+ceph_branch+" "+all_nodes
+    purge_nodes = './ceph-deploy purge'+" "+all_nodes
+    uninstall_nodes = './ceph-deploy uninstall'+" "+all_nodes
+    mon_create_nodes = './ceph-deploy mon create'+" "+mon_nodes
+    mon_hostname = mon_nodes.split(' ')[0]
+    mon_hostname = str(mon_hostname)
+    gather_keys = './ceph-deploy gatherkeys'+" "+mon_hostname
+    deploy_mds = './ceph-deploy mds create'+" "+mds_nodes
+
+    if mon_nodes is not None:
+        estatus_new = execute_ceph_deploy(ctx, config, new_mon)
+        if estatus_new == 0:
+            estatus_install = execute_ceph_deploy(ctx, config, install_nodes)
+            if estatus_install==0:
+                estatus_mon = execute_ceph_deploy(ctx, config, mon_create_nodes)
+                if estatus_mon==0:
+                    estatus_gather = execute_ceph_deploy(ctx, config, gather_keys)
+                    if estatus_gather != 0:
+                        while (estatus_gather != 0):
+                            execute_ceph_deploy(ctx, config, mon_create_nodes)
+                            estatus_gather = execute_ceph_deploy(ctx, config, gather_keys)
+                            if estatus_gather == 0:
+                                break
+                    estatus_mds = execute_ceph_deploy(ctx, config, deploy_mds)
+                    if estatus_mds==0:
+                        node_dev_list = get_dev_for_osd(ctx, config)
+                        for d in node_dev_list:
+                            osd_create_cmds = './ceph-deploy osd create --zap-disk'+" "+d
+                            estatus_osd = execute_ceph_deploy(ctx, config, osd_create_cmds)
+                            if estatus_osd==0:
+                                log.info('success')
+                            else:
+                                zap_disk = './ceph-deploy zapdisk'+" "+d
+                                execute_ceph_deploy(ctx, config, zap_disk)
+                                estatus_osd = execute_ceph_deploy(ctx, config, osd_create_cmds)
+                                if estatus_osd==0:
+                                    log.info('success')
+                                else:
+                                    log.info('failure')
+    else:
+        log.info('no monitor nodes')
+
+    log.info('Setting up client nodes...')
+
+    conf_path = '/etc/ceph/ceph.conf'
+    first_mon = teuthology.get_first_mon(ctx, config)
+    (mon0_remote,) = ctx.cluster.only(first_mon).remotes.keys()
+    conf_data = teuthology.get_file(
+        remote=mon0_remote,
+        path=conf_path,
+        sudo=True,
+        )
+
+    clients = ctx.cluster.only(teuthology.is_type('client'))
+    for remot, roles_for_host in clients.remotes.iteritems():
+        for id_ in teuthology.roles_of_type(roles_for_host, 'client'):
+            client_keyring = '/etc/ceph/ceph.client.{id}.keyring'.format(id=id_)
+            mon0_remote.run(
+                args=[
+                    'cd',
+                    '{tdir}'.format(tdir=testdir),
+                    run.Raw('&&'),
+                    'sudo','bash','-c',
+                    run.Raw('"'),'ceph',
+                    'auth',
+                    'get-or-create',
+                    'client.{id}'.format(id=id_),
+                    'mds', 'allow',
+                    'mon', 'allow *',
+                    'osd', 'allow *',
+                    run.Raw('>'),
+                    client_keyring,
+                    run.Raw('"'),
+                    ],
+                )
+            key_data = teuthology.get_file(
+                remote=mon0_remote,
+                path=client_keyring,
+                sudo=True,
+                )
+            teuthology.sudo_write_file(
+                remote=remot,
+                path=client_keyring,
+                data=key_data,
+                perms='0644'
+            )
+            teuthology.sudo_write_file(
+                remote=remot,
+                path=conf_path,
+                data=conf_data,
+                perms='0644'
+            )
+    try:
+        yield
+
+    finally:
+        if ctx.archive is not None:
+            # archive mon data, too
+            log.info('Archiving mon data...')
+            path = os.path.join(ctx.archive, 'data')
+            os.makedirs(path)
+            mons = ctx.cluster.only(teuthology.is_type('mon'))
+            for remote, roles in mons.remotes.iteritems():
+                for role in roles:
+                    if role.startswith('mon.'):
+                        teuthology.pull_directory_tarball(
+                            remote,
+                            '/var/lib/ceph/mon',
+                            path + '/' + role + '.tgz')
+
+            log.info('Compressing logs...')
+            run.wait(
+                ctx.cluster.run(
+                    args=[
+                        'sudo',
+                        'find',
+                        '/var/log/ceph',
+                        '-name',
+                        '*.log',
+                        '-print0',
+                        run.Raw('|'),
+                        'sudo',
+                        'xargs',
+                        '-0',
+                        '--no-run-if-empty',
+                        '--',
+                        'gzip',
+                        '--',
+                        ],
+                    wait=False,
+                    ),
+                )
+
+            log.info('Archiving logs...')
+            path = os.path.join(ctx.archive, 'remote')
+            os.makedirs(path)
+            for remote in ctx.cluster.remotes.iterkeys():
+                sub = os.path.join(path, remote.shortname)
+                os.makedirs(sub)
+                teuthology.pull_directory(remote, '/var/log/ceph',
+                                          os.path.join(sub, 'log'))
+
+            log.info('Purging and Uninstalling ceph on test machines')
+            execute_ceph_deploy(ctx, config, purge_nodes)
+            execute_ceph_deploy(ctx, config, uninstall_nodes)
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Set up and tear down a Ceph cluster.
+
+    For example::
+
+        tasks:
+        - install:
+             extras: yes
+        - ssh_keys:
+        - ceph-deploy:
+             branch:
+                stable: bobtail
+
+        tasks:
+        - install:
+             extras: yes
+        - ssh_keys:
+        - ceph-deploy:
+             branch:
+                dev: master
+
+        tasks:
+        - install:
+             extras: yes
+        - ssh_keys:
+        - ceph-deploy:
+             branch:
+                testing:
+    """
+    if config is None:
+        config = {}
+    assert isinstance(config, dict), \
+        "task ceph-deploy only supports a dictionary for configuration"
+
+    if config.get('branch') is not None:
+        assert isinstance(config['branch'], dict), 'branch must be a dictionary'
+
+    with contextutil.nested(
+         lambda: ceph_fn.ship_utilities(ctx=ctx, config=None),
+         lambda: download_ceph_deploy(ctx=ctx, config=config),
+         lambda: build_ceph_cluster(ctx=ctx, config=dict(
+                 branch=config.get('branch',{}),
+                 )),
+        ):
+        #if config.get('wait-for-healthy', True):
+          #is_healthy(ctx=ctx, config=None)
+        yield