]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
task: verify scrub detects files whose contents changed
authorMike Ryan <mike.ryan@inktank.com>
Thu, 2 Aug 2012 17:58:08 +0000 (10:58 -0700)
committerMike Ryan <mike.ryan@inktank.com>
Thu, 2 Aug 2012 18:14:51 +0000 (11:14 -0700)
Signed-off-by: Mike Ryan <mike.ryan@inktank.com>
teuthology/task/ceph_manager.py
teuthology/task/scrub_test.py [new file with mode: 0644]

index 62d6d2139c3f6d0aaf03380f16b4170066bd9757..c7697d423268ff9b4dcb39816c457ae2ff54788f 100644 (file)
@@ -221,6 +221,15 @@ class CephManager:
         j = json.loads('\n'.join(out.split('\n')[1:]))
         return j['pg_stats']
 
+    def get_single_pg_stats(self, pgid):
+        all_stats = self.get_pg_stats()
+
+        for pg in all_stats:
+            if pg['pgid'] == pgid:
+                return pg
+
+        return None
+
     def get_osd_dump(self):
         out = self.raw_cluster_cmd('--', 'osd','dump','--format=json')
         j = json.loads('\n'.join(out.split('\n')[1:]))
diff --git a/teuthology/task/scrub_test.py b/teuthology/task/scrub_test.py
new file mode 100644 (file)
index 0000000..1c80174
--- /dev/null
@@ -0,0 +1,148 @@
+from cStringIO import StringIO
+
+import logging
+import os
+import time
+
+import ceph_manager
+from teuthology import misc as teuthology
+
+log = logging.getLogger(__name__)
+
+def rados_start(remote, cmd):
+    log.info("rados %s" % ' '.join(cmd))
+    pre = [
+        'LD_LIBRARY_PATH=/tmp/cephtest/binary/usr/local/lib',
+        '/tmp/cephtest/enable-coredump',
+        '/tmp/cephtest/binary/usr/local/bin/ceph-coverage',
+        '/tmp/cephtest/archive/coverage',
+        '/tmp/cephtest/binary/usr/local/bin/rados',
+        '-c', '/tmp/cephtest/ceph.conf',
+        ];
+    pre.extend(cmd)
+    proc = remote.run(
+        args=pre,
+        wait=True,
+        )
+    return proc
+
+def task(ctx, config):
+    """
+    Test [deep] scrub
+    """
+    if config is None:
+        config = {}
+    assert isinstance(config, dict), \
+        'scrub_test task only accepts a dict for configuration'
+    first_mon = teuthology.get_first_mon(ctx, config)
+    (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()
+    
+    num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd')
+    log.info('num_osds is %s' % num_osds)
+
+    manager = ceph_manager.CephManager(
+        mon,
+        ctx=ctx,
+        logger=log.getChild('ceph_manager'),
+        )
+
+    while len(manager.get_osd_status()['up']) < num_osds:
+        time.sleep(10)
+
+    for i in range(num_osds):
+        manager.raw_cluster_cmd('tell', 'osd.%d' % i, 'flush_pg_stats')
+    manager.wait_for_clean()
+
+    # write some data
+    p = rados_start(mon, ['-p', 'rbd', 'bench', '1', 'write', '-b', '4096'])
+    err = p.exitstatus
+    log.info('err is %d' % err)
+
+    # wait for some PG to have data that we can mess with
+    victim = None
+    osd = None
+    while victim is None:
+        stats = manager.get_pg_stats()
+        for pg in stats:
+            size = pg['stat_sum']['num_bytes']
+            if size > 0:
+                victim = pg['pgid']
+                osd = pg['acting'][0]
+                break
+
+        if victim is None:
+            time.sleep(3)
+
+    log.info('messing with PG %s on osd %d' % (victim, osd))
+
+
+    (osd_remote,) = ctx.cluster.only('osd.%d' % osd).remotes.iterkeys()
+    data_path = os.path.join('/tmp/cephtest/data',
+                             'osd.{id}.data'.format(id=osd),
+                             'current',
+                             '{pg}_head'.format(pg=victim)
+                            )
+
+
+    # fuzz time
+    ls_fp = StringIO()
+    osd_remote.run(
+        args=[ 'ls', data_path ],
+        stdout=ls_fp,
+    )
+    ls_out = ls_fp.getvalue()
+    ls_fp.close()
+
+    # find an object file we can mess with
+    file = None
+    for line in ls_out.split('\n'):
+        if line.find('object'):
+            file = line
+            break
+    assert file is not None
+
+    log.info('fuzzing %s' % file)
+
+    # put a single \0 at the beginning of the file
+    osd_remote.run(
+        args=[ 'dd',
+               'if=/dev/zero',
+               'of=%s' % os.path.join(data_path, file),
+               'bs=1', 'count=1', 'conv=notrunc'
+             ]
+    )
+
+    # scrub, verify inconsistent
+    manager.raw_cluster_cmd('pg', 'deep-scrub', victim)
+
+    while True:
+        stats = manager.get_single_pg_stats(victim)
+        state = stats['state']
+
+        # wait for the scrub to finish
+        if state.find('scrubbing'):
+            time.sleep(3)
+            continue
+
+        inconsistent = stats['state'].find('+inconsistent') != -1
+        assert inconsistent
+        break
+
+
+    # repair, verify no longer inconsistent
+    manager.raw_cluster_cmd('pg', 'repair', victim)
+
+    while True:
+        stats = manager.get_single_pg_stats(victim)
+        state = stats['state']
+
+        # wait for the scrub to finish
+        if state.find('scrubbing'):
+            time.sleep(3)
+            continue
+
+        inconsistent = stats['state'].find('+inconsistent') != -1
+        assert not inconsistent
+        break
+
+    log.info('test successful!')