]> git-server-git.apps.pok.os.sepia.ceph.com Git - teuthology.git/commitdiff
task/peering_speed_test.py: add test which summarizes pg peering speed
authorSamuel Just <sam.just@inktank.com>
Tue, 4 Jun 2013 21:12:07 +0000 (14:12 -0700)
committerSamuel Just <sam.just@inktank.com>
Wed, 19 Jun 2013 22:16:28 +0000 (15:16 -0700)
Running this regularly may warn us about slow peering.

Signed-off-by: Samuel Just <sam.just@inktank.com>
teuthology/task/ceph_manager.py
teuthology/task/peering_speed_test.py [new file with mode: 0644]

index bfd37d0d510adaffe36635af2a0252814b383a62..86924848369e45cb056ebc17e3d90285dab3c670 100644 (file)
@@ -268,8 +268,10 @@ class CephManager:
             self.log = tmp
         if self.config is None:
             self.config = dict()
+        pools = self.list_pools()
         self.pools = {}
-        self.pools['data'] = self.get_pool_property('data', 'pg_num')
+        for pool in pools:
+            self.pools[pool] = self.get_pool_property(pool, 'pg_num')
 
     def raw_cluster_cmd(self, *args):
         testdir = teuthology.get_testdir(self.ctx)
@@ -316,6 +318,18 @@ class CephManager:
             )
         return proc
 
+    def rados_write_objects(
+        self, pool, num_objects, size, timelimit, threads, cleanup=False):
+        args = [
+            '-p', pool,
+            '--num-objects', num_objects,
+            '-b', size,
+            'bench', timelimit,
+            'write'
+            ]
+        if not cleanup: args.append('--no-cleanup')
+        return self.do_rados(self.controller, map(str, args))
+
     def do_put(self, pool, obj, fname):
         return self.do_rados(
             self.controller,
@@ -407,6 +421,21 @@ class CephManager:
                 return int(i['pool'])
         assert False
 
+    def list_pools(self):
+        """
+        list all pool names
+        """
+        out = self.raw_cluster_cmd('osd','dump','--format=json')
+        j = json.loads('\n'.join(out.split('\n')[1:]))
+        self.log(j['pools'])
+        return [str(i['pool_name']) for i in j['pools']]
+
+    def clear_pools(self):
+        """
+        remove all pools
+        """
+        [self.remove_pool(i) for i in self.list_pools()]
+
     def kick_recovery_wq(self, osdnum):
         return self.raw_cluster_cmd(
             'tell', "osd.%d" % (int(osdnum),),
diff --git a/teuthology/task/peering_speed_test.py b/teuthology/task/peering_speed_test.py
new file mode 100644 (file)
index 0000000..d70b5a4
--- /dev/null
@@ -0,0 +1,84 @@
+import logging
+import time
+from teuthology import misc as teuthology
+import ceph_manager
+
+log = logging.getLogger(__name__)
+
+from args import argify
+
+POOLNAME = "POOLNAME"
+ARGS = [
+    ('num_pgs', 'number of pgs to create', 256, int),
+    ('max_time', 'seconds to complete peering', 0, int),
+    ('runs', 'trials to run', 10, int),
+    ('num_objects', 'objects to create', 256 * 1024, int),
+    ('object_size', 'size in bytes for objects', 64, int),
+    ('creation_time_limit', 'time limit for pool population', 60*60, int),
+    ('create_threads', 'concurrent writes for create', 256, int)
+    ]
+
+def setup(ctx, config):
+    first_mon = teuthology.get_first_mon(ctx, config)
+    (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()
+    ctx.manager = ceph_manager.CephManager(
+        mon,
+        ctx=ctx,
+        logger=log.getChild('ceph_manager'),
+        )
+    ctx.manager.clear_pools()
+    ctx.manager.create_pool(POOLNAME, config.num_pgs)
+    log.info("populating pool")
+    ctx.manager.rados_write_objects(
+        POOLNAME,
+        config.num_objects,
+        config.object_size,
+        config.creation_time_limit,
+        config.create_threads)
+    log.info("done populating pool")
+
+def do_run(ctx, config):
+    start = time.time()
+    # mark in osd
+    ctx.manager.mark_in_osd(0)
+    log.info("writing out objects")
+    ctx.manager.rados_write_objects(
+        POOLNAME,
+        config.num_pgs, # write 1 object per pg or so
+        1,
+        config.creation_time_limit,
+        config.num_pgs, # lots of concurrency
+        cleanup = True)
+    peering_end = time.time()
+
+    log.info("peering done, waiting on recovery")
+    ctx.manager.wait_for_clean()
+
+    log.info("recovery done")
+    recovery_end = time.time()
+    if config.max_time:
+        assert(peering_end - start < config.max_time)
+    ctx.manager.mark_out_osd(0)
+    ctx.manager.wait_for_clean()
+    return {
+        'time_to_active': peering_end - start,
+        'time_to_clean': recovery_end - start
+        }
+
+@argify("peering_speed_test", ARGS)
+def task(ctx, config):
+    """
+    Peering speed test
+    """
+    setup(ctx, config)
+    ctx.manager.mark_out_osd(0)
+    ctx.manager.wait_for_clean()
+    ret = []
+    for i in range(config.runs):
+        log.info("Run {i}".format(i = i))
+        ret.append(do_run(ctx, config))
+
+    ctx.manager.mark_in_osd(0)
+    ctx.summary['recovery_times'] = {
+        'runs': ret
+        }