From 3b85b2311b95dd665920c3b91ca5d74ba09ce08e Mon Sep 17 00:00:00 2001 From: Mike Ryan Date: Thu, 2 Aug 2012 10:58:08 -0700 Subject: [PATCH] task: verify scrub detects files whose contents changed Signed-off-by: Mike Ryan --- teuthology/task/ceph_manager.py | 9 ++ teuthology/task/scrub_test.py | 148 ++++++++++++++++++++++++++++++++ 2 files changed, 157 insertions(+) create mode 100644 teuthology/task/scrub_test.py diff --git a/teuthology/task/ceph_manager.py b/teuthology/task/ceph_manager.py index 62d6d2139c3f6..c7697d423268f 100644 --- a/teuthology/task/ceph_manager.py +++ b/teuthology/task/ceph_manager.py @@ -221,6 +221,15 @@ class CephManager: j = json.loads('\n'.join(out.split('\n')[1:])) return j['pg_stats'] + def get_single_pg_stats(self, pgid): + all_stats = self.get_pg_stats() + + for pg in all_stats: + if pg['pgid'] == pgid: + return pg + + return None + def get_osd_dump(self): out = self.raw_cluster_cmd('--', 'osd','dump','--format=json') j = json.loads('\n'.join(out.split('\n')[1:])) diff --git a/teuthology/task/scrub_test.py b/teuthology/task/scrub_test.py new file mode 100644 index 0000000000000..1c80174de1725 --- /dev/null +++ b/teuthology/task/scrub_test.py @@ -0,0 +1,148 @@ +from cStringIO import StringIO + +import logging +import os +import time + +import ceph_manager +from teuthology import misc as teuthology + +log = logging.getLogger(__name__) + +def rados_start(remote, cmd): + log.info("rados %s" % ' '.join(cmd)) + pre = [ + 'LD_LIBRARY_PATH=/tmp/cephtest/binary/usr/local/lib', + '/tmp/cephtest/enable-coredump', + '/tmp/cephtest/binary/usr/local/bin/ceph-coverage', + '/tmp/cephtest/archive/coverage', + '/tmp/cephtest/binary/usr/local/bin/rados', + '-c', '/tmp/cephtest/ceph.conf', + ]; + pre.extend(cmd) + proc = remote.run( + args=pre, + wait=True, + ) + return proc + +def task(ctx, config): + """ + Test [deep] scrub + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + 'scrub_test task only accepts a dict for configuration' + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() + + num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd') + log.info('num_osds is %s' % num_osds) + + manager = ceph_manager.CephManager( + mon, + ctx=ctx, + logger=log.getChild('ceph_manager'), + ) + + while len(manager.get_osd_status()['up']) < num_osds: + time.sleep(10) + + for i in range(num_osds): + manager.raw_cluster_cmd('tell', 'osd.%d' % i, 'flush_pg_stats') + manager.wait_for_clean() + + # write some data + p = rados_start(mon, ['-p', 'rbd', 'bench', '1', 'write', '-b', '4096']) + err = p.exitstatus + log.info('err is %d' % err) + + # wait for some PG to have data that we can mess with + victim = None + osd = None + while victim is None: + stats = manager.get_pg_stats() + for pg in stats: + size = pg['stat_sum']['num_bytes'] + if size > 0: + victim = pg['pgid'] + osd = pg['acting'][0] + break + + if victim is None: + time.sleep(3) + + log.info('messing with PG %s on osd %d' % (victim, osd)) + + + (osd_remote,) = ctx.cluster.only('osd.%d' % osd).remotes.iterkeys() + data_path = os.path.join('/tmp/cephtest/data', + 'osd.{id}.data'.format(id=osd), + 'current', + '{pg}_head'.format(pg=victim) + ) + + + # fuzz time + ls_fp = StringIO() + osd_remote.run( + args=[ 'ls', data_path ], + stdout=ls_fp, + ) + ls_out = ls_fp.getvalue() + ls_fp.close() + + # find an object file we can mess with + file = None + for line in ls_out.split('\n'): + if line.find('object'): + file = line + break + assert file is not None + + log.info('fuzzing %s' % file) + + # put a single \0 at the beginning of the file + osd_remote.run( + args=[ 'dd', + 'if=/dev/zero', + 'of=%s' % os.path.join(data_path, file), + 'bs=1', 'count=1', 'conv=notrunc' + ] + ) + + # scrub, verify inconsistent + manager.raw_cluster_cmd('pg', 'deep-scrub', victim) + + while True: + stats = manager.get_single_pg_stats(victim) + state = stats['state'] + + # wait for the scrub to finish + if state.find('scrubbing'): + time.sleep(3) + continue + + inconsistent = stats['state'].find('+inconsistent') != -1 + assert inconsistent + break + + + # repair, verify no longer inconsistent + manager.raw_cluster_cmd('pg', 'repair', victim) + + while True: + stats = manager.get_single_pg_stats(victim) + state = stats['state'] + + # wait for the scrub to finish + if state.find('scrubbing'): + time.sleep(3) + continue + + inconsistent = stats['state'].find('+inconsistent') != -1 + assert not inconsistent + break + + log.info('test successful!') -- 2.39.5