]> git.apps.os.sepia.ceph.com Git - teuthology.git/commitdiff
osd: Testing of deep-scrub omap changes
authorDavid Zafman <david.zafman@inktank.com>
Sat, 19 Jan 2013 01:11:09 +0000 (17:11 -0800)
committerDavid Zafman <david.zafman@inktank.com>
Tue, 22 Jan 2013 23:48:45 +0000 (15:48 -0800)
Fix scrub_test.py and add omap corruption test

Signed-off-by: David Zafman <david.zafman@inktank.com>
Reviewed-by: Samuel Just <sam.just@inktank.com>
teuthology/task/ceph_manager.py
teuthology/task/scrub_test.py

index e0bff247b3eac1bcf7befcbfebc58e23424c7cd8..5748eba801dd79228472bd6ef968b77db9e19abb 100644 (file)
@@ -208,6 +208,46 @@ class CephManager:
             )
         return proc.stdout.getvalue()
 
+    def do_rados(self, remote, cmd):
+        pre = [
+            'LD_LIBRARY_PATH=/tmp/cephtest/binary/usr/local/lib',
+            '/tmp/cephtest/enable-coredump',
+            '/tmp/cephtest/binary/usr/local/bin/ceph-coverage',
+            '/tmp/cephtest/archive/coverage',
+            '/tmp/cephtest/binary/usr/local/bin/rados',
+            '-c', '/tmp/cephtest/ceph.conf',
+            ];
+        pre.extend(cmd)
+        proc = remote.run(
+            args=pre,
+            wait=True,
+            )
+        return proc
+
+    def osd_admin_socket(self, osdnum, command):
+        remote = None
+        for _remote, roles_for_host in self.ctx.cluster.remotes.iteritems():
+            for id_ in teuthology.roles_of_type(roles_for_host, 'osd'):
+                if int(id_) == osdnum:
+                    remote = _remote
+        assert remote is not None
+        args=[
+                'LD_LIBRARY_PRELOAD=/tmp/cephtest/binary/usr/local/lib',
+                '/tmp/cephtest/enable-coredump',
+                '/tmp/cephtest/binary/usr/local/bin/ceph-coverage',
+                '/tmp/cephtest/archive/coverage',
+                '/tmp/cephtest/binary/usr/local/bin/ceph',
+                '-k', '/tmp/cephtest/ceph.keyring',
+                '-c', '/tmp/cephtest/ceph.conf',
+                '--admin-daemon',
+                "/tmp/cephtest/asok.osd.%s"%(str(osdnum),)]
+        args.extend(command)
+        return remote.run(
+            args=args,
+            stdout=StringIO(),
+            wait=True,
+            )
+
     def get_pg_primary(self, pool, pgnum):
         """
         get primary for pool, pgnum (e.g. (data, 0)->0
index 1c80174de17253a360b1dbd13e4d717ec58019bc..3375c56f5d5f5ae810dc3266c1bc699e342b9d9b 100644 (file)
@@ -9,23 +9,6 @@ from teuthology import misc as teuthology
 
 log = logging.getLogger(__name__)
 
-def rados_start(remote, cmd):
-    log.info("rados %s" % ' '.join(cmd))
-    pre = [
-        'LD_LIBRARY_PATH=/tmp/cephtest/binary/usr/local/lib',
-        '/tmp/cephtest/enable-coredump',
-        '/tmp/cephtest/binary/usr/local/bin/ceph-coverage',
-        '/tmp/cephtest/archive/coverage',
-        '/tmp/cephtest/binary/usr/local/bin/rados',
-        '-c', '/tmp/cephtest/ceph.conf',
-        ];
-    pre.extend(cmd)
-    proc = remote.run(
-        args=pre,
-        wait=True,
-        )
-    return proc
-
 def task(ctx, config):
     """
     Test [deep] scrub
@@ -54,7 +37,7 @@ def task(ctx, config):
     manager.wait_for_clean()
 
     # write some data
-    p = rados_start(mon, ['-p', 'rbd', 'bench', '1', 'write', '-b', '4096'])
+    p = manager.do_rados(mon, ['-p', 'rbd', 'bench', '--no-cleanup', '1', 'write', '-b', '4096'])
     err = p.exitstatus
     log.info('err is %d' % err)
 
@@ -75,7 +58,6 @@ def task(ctx, config):
 
     log.info('messing with PG %s on osd %d' % (victim, osd))
 
-
     (osd_remote,) = ctx.cluster.only('osd.%d' % osd).remotes.iterkeys()
     data_path = os.path.join('/tmp/cephtest/data',
                              'osd.{id}.data'.format(id=osd),
@@ -83,7 +65,6 @@ def task(ctx, config):
                              '{pg}_head'.format(pg=victim)
                             )
 
-
     # fuzz time
     ls_fp = StringIO()
     osd_remote.run(
@@ -94,33 +75,39 @@ def task(ctx, config):
     ls_fp.close()
 
     # find an object file we can mess with
-    file = None
+    osdfilename = None
     for line in ls_out.split('\n'):
-        if line.find('object'):
-            file = line
+        if 'object' in line:
+            osdfilename = line
             break
-    assert file is not None
+    assert osdfilename is not None
 
-    log.info('fuzzing %s' % file)
+    # Get actual object name from osd stored filename
+    tmp=osdfilename.split('__')
+    objname=tmp[0]
+    objname=objname.replace('\u', '_')
+    log.info('fuzzing %s' % objname)
 
     # put a single \0 at the beginning of the file
     osd_remote.run(
         args=[ 'dd',
                'if=/dev/zero',
-               'of=%s' % os.path.join(data_path, file),
+               'of=%s' % os.path.join(data_path, osdfilename),
                'bs=1', 'count=1', 'conv=notrunc'
              ]
     )
 
     # scrub, verify inconsistent
     manager.raw_cluster_cmd('pg', 'deep-scrub', victim)
+    # Give deep-scrub a chance to start
+    time.sleep(60)
 
     while True:
         stats = manager.get_single_pg_stats(victim)
         state = stats['state']
 
         # wait for the scrub to finish
-        if state.find('scrubbing'):
+        if 'scrubbing' in state:
             time.sleep(3)
             continue
 
@@ -131,13 +118,61 @@ def task(ctx, config):
 
     # repair, verify no longer inconsistent
     manager.raw_cluster_cmd('pg', 'repair', victim)
+    # Give repair a chance to start
+    time.sleep(60)
+
+    while True:
+        stats = manager.get_single_pg_stats(victim)
+        state = stats['state']
+
+        # wait for the scrub to finish
+        if 'scrubbing' in state:
+            time.sleep(3)
+            continue
+
+        inconsistent = stats['state'].find('+inconsistent') != -1
+        assert not inconsistent
+        break
+
+    # Test deep-scrub with various omap modifications
+    manager.do_rados(mon, ['-p', 'rbd', 'setomapval', objname, 'key', 'val'])
+    manager.do_rados(mon, ['-p', 'rbd', 'setomapheader', objname, 'hdr'])
+
+    # Modify omap on specific osd
+    log.info('fuzzing omap of %s' % objname)
+    manager.osd_admin_socket(osd, ['rmomapkey', 'rbd', objname, 'key']);
+    manager.osd_admin_socket(osd, ['setomapval', 'rbd', objname, 'badkey', 'badval']);
+    manager.osd_admin_socket(osd, ['setomapheader', 'rbd', objname, 'badhdr']);
+
+    # scrub, verify inconsistent
+    manager.raw_cluster_cmd('pg', 'deep-scrub', victim)
+    # Give deep-scrub a chance to start
+    time.sleep(60)
+
+    while True:
+        stats = manager.get_single_pg_stats(victim)
+        state = stats['state']
+
+        # wait for the scrub to finish
+        if 'scrubbing' in state:
+            time.sleep(3)
+            continue
+
+        inconsistent = stats['state'].find('+inconsistent') != -1
+        assert inconsistent
+        break
+
+    # repair, verify no longer inconsistent
+    manager.raw_cluster_cmd('pg', 'repair', victim)
+    # Give repair a chance to start
+    time.sleep(60)
 
     while True:
         stats = manager.get_single_pg_stats(victim)
         state = stats['state']
 
         # wait for the scrub to finish
-        if state.find('scrubbing'):
+        if 'scrubbing' in state:
             time.sleep(3)
             continue