From: Sage Weil Date: Wed, 1 Feb 2012 00:25:53 +0000 (-0800) Subject: add backfill task X-Git-Tag: 1.1.0~2653 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=0236dc0f5eb4604cf841a08ba46f0619fc741cde;p=teuthology.git add backfill task This does a basic test of backfill functionality, including a divergent log on a backfill target (#1983). --- diff --git a/teuthology/task/backfill.py b/teuthology/task/backfill.py new file mode 100644 index 0000000000..c8c30fc22b --- /dev/null +++ b/teuthology/task/backfill.py @@ -0,0 +1,93 @@ +import logging +import ceph_manager +import time +from teuthology import misc as teuthology + + +log = logging.getLogger(__name__) + + +def rados_start(remote, cmd): + log.info("rados %s" % ' '.join(cmd)) + pre = [ + 'LD_LIBRARY_PATH=/tmp/cephtest/binary/usr/local/lib', + '/tmp/cephtest/enable-coredump', + '/tmp/cephtest/binary/usr/local/bin/ceph-coverage', + '/tmp/cephtest/archive/coverage', + '/tmp/cephtest/binary/usr/local/bin/rados', + '-c', '/tmp/cephtest/ceph.conf', + ]; + pre.extend(cmd) + proc = remote.run( + args=pre, + wait=False, + ) + return proc + +def task(ctx, config): + """ + Test backfill + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + 'thrashosds task only accepts a dict for configuration' + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() + + num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd') + log.info('num_osds is %s' % num_osds) + assert num_osds == 3 + + manager = ceph_manager.CephManager( + mon, + ctx=ctx, + logger=log.getChild('ceph_manager'), + ) + + while manager.get_osd_status()['up'] < 3: + manager.sleep(10) + manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') + manager.wait_till_clean() + + # write some data + p = rados_start(mon, ['-p', 'data', 'bench', '15', 'write', '-b', '4096']) + err = p.exitstatus.get(); + log.info('err is %d' % err) + + # mark osd.0 out to trigger a rebalance/backfill + manager.mark_out_osd(0) + + # also mark it down to it won't be included in pg_temps + manager.kill_osd(0) + manager.mark_down_osd(0) + + # write some new data + p = rados_start(mon, ['-p', 'data', 'bench', '30', 'write', '-b', '4096']) + + time.sleep(15) + + # blackhole + restart osd.1 + # this triggers a divergent backfill target + manager.blackhole_kill_osd(1) + time.sleep(2) + manager.revive_osd(1) + + # wait for our writes to complete + succeed + err = p.exitstatus.get() + log.info('err is %d' % err) + + # cluster must recover + manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') + manager.wait_till_clean() + + # re-add osd.0 + manager.revive_osd(0) + manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') + manager.wait_till_clean() + +