From e714c77812807148d38dd90704e54cd8420ff2bf Mon Sep 17 00:00:00 2001 From: David Zafman Date: Fri, 18 Jan 2013 17:11:09 -0800 Subject: [PATCH] osd: Testing of deep-scrub omap changes Fix scrub_test.py and add omap corruption test Signed-off-by: David Zafman Reviewed-by: Samuel Just --- teuthology/task/ceph_manager.py | 40 +++++++++++++++ teuthology/task/scrub_test.py | 91 +++++++++++++++++++++++---------- 2 files changed, 103 insertions(+), 28 deletions(-) diff --git a/teuthology/task/ceph_manager.py b/teuthology/task/ceph_manager.py index e0bff247b3..5748eba801 100644 --- a/teuthology/task/ceph_manager.py +++ b/teuthology/task/ceph_manager.py @@ -208,6 +208,46 @@ class CephManager: ) return proc.stdout.getvalue() + def do_rados(self, remote, cmd): + pre = [ + 'LD_LIBRARY_PATH=/tmp/cephtest/binary/usr/local/lib', + '/tmp/cephtest/enable-coredump', + '/tmp/cephtest/binary/usr/local/bin/ceph-coverage', + '/tmp/cephtest/archive/coverage', + '/tmp/cephtest/binary/usr/local/bin/rados', + '-c', '/tmp/cephtest/ceph.conf', + ]; + pre.extend(cmd) + proc = remote.run( + args=pre, + wait=True, + ) + return proc + + def osd_admin_socket(self, osdnum, command): + remote = None + for _remote, roles_for_host in self.ctx.cluster.remotes.iteritems(): + for id_ in teuthology.roles_of_type(roles_for_host, 'osd'): + if int(id_) == osdnum: + remote = _remote + assert remote is not None + args=[ + 'LD_LIBRARY_PRELOAD=/tmp/cephtest/binary/usr/local/lib', + '/tmp/cephtest/enable-coredump', + '/tmp/cephtest/binary/usr/local/bin/ceph-coverage', + '/tmp/cephtest/archive/coverage', + '/tmp/cephtest/binary/usr/local/bin/ceph', + '-k', '/tmp/cephtest/ceph.keyring', + '-c', '/tmp/cephtest/ceph.conf', + '--admin-daemon', + "/tmp/cephtest/asok.osd.%s"%(str(osdnum),)] + args.extend(command) + return remote.run( + args=args, + stdout=StringIO(), + wait=True, + ) + def get_pg_primary(self, pool, pgnum): """ get primary for pool, pgnum (e.g. (data, 0)->0 diff --git a/teuthology/task/scrub_test.py b/teuthology/task/scrub_test.py index 1c80174de1..3375c56f5d 100644 --- a/teuthology/task/scrub_test.py +++ b/teuthology/task/scrub_test.py @@ -9,23 +9,6 @@ from teuthology import misc as teuthology log = logging.getLogger(__name__) -def rados_start(remote, cmd): - log.info("rados %s" % ' '.join(cmd)) - pre = [ - 'LD_LIBRARY_PATH=/tmp/cephtest/binary/usr/local/lib', - '/tmp/cephtest/enable-coredump', - '/tmp/cephtest/binary/usr/local/bin/ceph-coverage', - '/tmp/cephtest/archive/coverage', - '/tmp/cephtest/binary/usr/local/bin/rados', - '-c', '/tmp/cephtest/ceph.conf', - ]; - pre.extend(cmd) - proc = remote.run( - args=pre, - wait=True, - ) - return proc - def task(ctx, config): """ Test [deep] scrub @@ -54,7 +37,7 @@ def task(ctx, config): manager.wait_for_clean() # write some data - p = rados_start(mon, ['-p', 'rbd', 'bench', '1', 'write', '-b', '4096']) + p = manager.do_rados(mon, ['-p', 'rbd', 'bench', '--no-cleanup', '1', 'write', '-b', '4096']) err = p.exitstatus log.info('err is %d' % err) @@ -75,7 +58,6 @@ def task(ctx, config): log.info('messing with PG %s on osd %d' % (victim, osd)) - (osd_remote,) = ctx.cluster.only('osd.%d' % osd).remotes.iterkeys() data_path = os.path.join('/tmp/cephtest/data', 'osd.{id}.data'.format(id=osd), @@ -83,7 +65,6 @@ def task(ctx, config): '{pg}_head'.format(pg=victim) ) - # fuzz time ls_fp = StringIO() osd_remote.run( @@ -94,33 +75,39 @@ def task(ctx, config): ls_fp.close() # find an object file we can mess with - file = None + osdfilename = None for line in ls_out.split('\n'): - if line.find('object'): - file = line + if 'object' in line: + osdfilename = line break - assert file is not None + assert osdfilename is not None - log.info('fuzzing %s' % file) + # Get actual object name from osd stored filename + tmp=osdfilename.split('__') + objname=tmp[0] + objname=objname.replace('\u', '_') + log.info('fuzzing %s' % objname) # put a single \0 at the beginning of the file osd_remote.run( args=[ 'dd', 'if=/dev/zero', - 'of=%s' % os.path.join(data_path, file), + 'of=%s' % os.path.join(data_path, osdfilename), 'bs=1', 'count=1', 'conv=notrunc' ] ) # scrub, verify inconsistent manager.raw_cluster_cmd('pg', 'deep-scrub', victim) + # Give deep-scrub a chance to start + time.sleep(60) while True: stats = manager.get_single_pg_stats(victim) state = stats['state'] # wait for the scrub to finish - if state.find('scrubbing'): + if 'scrubbing' in state: time.sleep(3) continue @@ -131,13 +118,61 @@ def task(ctx, config): # repair, verify no longer inconsistent manager.raw_cluster_cmd('pg', 'repair', victim) + # Give repair a chance to start + time.sleep(60) + + while True: + stats = manager.get_single_pg_stats(victim) + state = stats['state'] + + # wait for the scrub to finish + if 'scrubbing' in state: + time.sleep(3) + continue + + inconsistent = stats['state'].find('+inconsistent') != -1 + assert not inconsistent + break + + # Test deep-scrub with various omap modifications + manager.do_rados(mon, ['-p', 'rbd', 'setomapval', objname, 'key', 'val']) + manager.do_rados(mon, ['-p', 'rbd', 'setomapheader', objname, 'hdr']) + + # Modify omap on specific osd + log.info('fuzzing omap of %s' % objname) + manager.osd_admin_socket(osd, ['rmomapkey', 'rbd', objname, 'key']); + manager.osd_admin_socket(osd, ['setomapval', 'rbd', objname, 'badkey', 'badval']); + manager.osd_admin_socket(osd, ['setomapheader', 'rbd', objname, 'badhdr']); + + # scrub, verify inconsistent + manager.raw_cluster_cmd('pg', 'deep-scrub', victim) + # Give deep-scrub a chance to start + time.sleep(60) + + while True: + stats = manager.get_single_pg_stats(victim) + state = stats['state'] + + # wait for the scrub to finish + if 'scrubbing' in state: + time.sleep(3) + continue + + inconsistent = stats['state'].find('+inconsistent') != -1 + assert inconsistent + break + + # repair, verify no longer inconsistent + manager.raw_cluster_cmd('pg', 'repair', victim) + # Give repair a chance to start + time.sleep(60) while True: stats = manager.get_single_pg_stats(victim) state = stats['state'] # wait for the scrub to finish - if state.find('scrubbing'): + if 'scrubbing' in state: time.sleep(3) continue -- 2.39.5