--- /dev/null
+from cStringIO import StringIO
+
+import logging
+import os
+import time
+
+import ceph_manager
+from teuthology import misc as teuthology
+
+log = logging.getLogger(__name__)
+
+def rados_start(remote, cmd):
+ log.info("rados %s" % ' '.join(cmd))
+ pre = [
+ 'LD_LIBRARY_PATH=/tmp/cephtest/binary/usr/local/lib',
+ '/tmp/cephtest/enable-coredump',
+ '/tmp/cephtest/binary/usr/local/bin/ceph-coverage',
+ '/tmp/cephtest/archive/coverage',
+ '/tmp/cephtest/binary/usr/local/bin/rados',
+ '-c', '/tmp/cephtest/ceph.conf',
+ ];
+ pre.extend(cmd)
+ proc = remote.run(
+ args=pre,
+ wait=True,
+ )
+ return proc
+
+def task(ctx, config):
+ """
+ Test [deep] scrub
+ """
+ if config is None:
+ config = {}
+ assert isinstance(config, dict), \
+ 'scrub_test task only accepts a dict for configuration'
+ first_mon = teuthology.get_first_mon(ctx, config)
+ (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()
+
+ num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd')
+ log.info('num_osds is %s' % num_osds)
+
+ manager = ceph_manager.CephManager(
+ mon,
+ ctx=ctx,
+ logger=log.getChild('ceph_manager'),
+ )
+
+ while len(manager.get_osd_status()['up']) < num_osds:
+ time.sleep(10)
+
+ for i in range(num_osds):
+ manager.raw_cluster_cmd('tell', 'osd.%d' % i, 'flush_pg_stats')
+ manager.wait_for_clean()
+
+ # write some data
+ p = rados_start(mon, ['-p', 'rbd', 'bench', '1', 'write', '-b', '4096'])
+ err = p.exitstatus
+ log.info('err is %d' % err)
+
+ # wait for some PG to have data that we can mess with
+ victim = None
+ osd = None
+ while victim is None:
+ stats = manager.get_pg_stats()
+ for pg in stats:
+ size = pg['stat_sum']['num_bytes']
+ if size > 0:
+ victim = pg['pgid']
+ osd = pg['acting'][0]
+ break
+
+ if victim is None:
+ time.sleep(3)
+
+ log.info('messing with PG %s on osd %d' % (victim, osd))
+
+
+ (osd_remote,) = ctx.cluster.only('osd.%d' % osd).remotes.iterkeys()
+ data_path = os.path.join('/tmp/cephtest/data',
+ 'osd.{id}.data'.format(id=osd),
+ 'current',
+ '{pg}_head'.format(pg=victim)
+ )
+
+
+ # fuzz time
+ ls_fp = StringIO()
+ osd_remote.run(
+ args=[ 'ls', data_path ],
+ stdout=ls_fp,
+ )
+ ls_out = ls_fp.getvalue()
+ ls_fp.close()
+
+ # find an object file we can mess with
+ file = None
+ for line in ls_out.split('\n'):
+ if line.find('object'):
+ file = line
+ break
+ assert file is not None
+
+ log.info('fuzzing %s' % file)
+
+ # put a single \0 at the beginning of the file
+ osd_remote.run(
+ args=[ 'dd',
+ 'if=/dev/zero',
+ 'of=%s' % os.path.join(data_path, file),
+ 'bs=1', 'count=1', 'conv=notrunc'
+ ]
+ )
+
+ # scrub, verify inconsistent
+ manager.raw_cluster_cmd('pg', 'deep-scrub', victim)
+
+ while True:
+ stats = manager.get_single_pg_stats(victim)
+ state = stats['state']
+
+ # wait for the scrub to finish
+ if state.find('scrubbing'):
+ time.sleep(3)
+ continue
+
+ inconsistent = stats['state'].find('+inconsistent') != -1
+ assert inconsistent
+ break
+
+
+ # repair, verify no longer inconsistent
+ manager.raw_cluster_cmd('pg', 'repair', victim)
+
+ while True:
+ stats = manager.get_single_pg_stats(victim)
+ state = stats['state']
+
+ # wait for the scrub to finish
+ if state.find('scrubbing'):
+ time.sleep(3)
+ continue
+
+ inconsistent = stats['state'].find('+inconsistent') != -1
+ assert not inconsistent
+ break
+
+ log.info('test successful!')