From 3d9fa2253b59724bbcfaacb565eadcebe100f994 Mon Sep 17 00:00:00 2001 From: Samuel Just Date: Tue, 4 Jun 2013 14:12:07 -0700 Subject: [PATCH] task/peering_speed_test.py: add test which summarizes pg peering speed Running this regularly may warn us about slow peering. Signed-off-by: Samuel Just --- teuthology/task/ceph_manager.py | 31 +++++++++- teuthology/task/peering_speed_test.py | 84 +++++++++++++++++++++++++++ 2 files changed, 114 insertions(+), 1 deletion(-) create mode 100644 teuthology/task/peering_speed_test.py diff --git a/teuthology/task/ceph_manager.py b/teuthology/task/ceph_manager.py index bfd37d0d510ad..86924848369e4 100644 --- a/teuthology/task/ceph_manager.py +++ b/teuthology/task/ceph_manager.py @@ -268,8 +268,10 @@ class CephManager: self.log = tmp if self.config is None: self.config = dict() + pools = self.list_pools() self.pools = {} - self.pools['data'] = self.get_pool_property('data', 'pg_num') + for pool in pools: + self.pools[pool] = self.get_pool_property(pool, 'pg_num') def raw_cluster_cmd(self, *args): testdir = teuthology.get_testdir(self.ctx) @@ -316,6 +318,18 @@ class CephManager: ) return proc + def rados_write_objects( + self, pool, num_objects, size, timelimit, threads, cleanup=False): + args = [ + '-p', pool, + '--num-objects', num_objects, + '-b', size, + 'bench', timelimit, + 'write' + ] + if not cleanup: args.append('--no-cleanup') + return self.do_rados(self.controller, map(str, args)) + def do_put(self, pool, obj, fname): return self.do_rados( self.controller, @@ -407,6 +421,21 @@ class CephManager: return int(i['pool']) assert False + def list_pools(self): + """ + list all pool names + """ + out = self.raw_cluster_cmd('osd','dump','--format=json') + j = json.loads('\n'.join(out.split('\n')[1:])) + self.log(j['pools']) + return [str(i['pool_name']) for i in j['pools']] + + def clear_pools(self): + """ + remove all pools + """ + [self.remove_pool(i) for i in self.list_pools()] + def kick_recovery_wq(self, osdnum): return self.raw_cluster_cmd( 'tell', "osd.%d" % (int(osdnum),), diff --git a/teuthology/task/peering_speed_test.py b/teuthology/task/peering_speed_test.py new file mode 100644 index 0000000000000..d70b5a4225a90 --- /dev/null +++ b/teuthology/task/peering_speed_test.py @@ -0,0 +1,84 @@ +import logging +import time +from teuthology import misc as teuthology +import ceph_manager + +log = logging.getLogger(__name__) + +from args import argify + +POOLNAME = "POOLNAME" +ARGS = [ + ('num_pgs', 'number of pgs to create', 256, int), + ('max_time', 'seconds to complete peering', 0, int), + ('runs', 'trials to run', 10, int), + ('num_objects', 'objects to create', 256 * 1024, int), + ('object_size', 'size in bytes for objects', 64, int), + ('creation_time_limit', 'time limit for pool population', 60*60, int), + ('create_threads', 'concurrent writes for create', 256, int) + ] + +def setup(ctx, config): + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() + ctx.manager = ceph_manager.CephManager( + mon, + ctx=ctx, + logger=log.getChild('ceph_manager'), + ) + ctx.manager.clear_pools() + ctx.manager.create_pool(POOLNAME, config.num_pgs) + log.info("populating pool") + ctx.manager.rados_write_objects( + POOLNAME, + config.num_objects, + config.object_size, + config.creation_time_limit, + config.create_threads) + log.info("done populating pool") + +def do_run(ctx, config): + start = time.time() + # mark in osd + ctx.manager.mark_in_osd(0) + log.info("writing out objects") + ctx.manager.rados_write_objects( + POOLNAME, + config.num_pgs, # write 1 object per pg or so + 1, + config.creation_time_limit, + config.num_pgs, # lots of concurrency + cleanup = True) + peering_end = time.time() + + log.info("peering done, waiting on recovery") + ctx.manager.wait_for_clean() + + log.info("recovery done") + recovery_end = time.time() + if config.max_time: + assert(peering_end - start < config.max_time) + ctx.manager.mark_out_osd(0) + ctx.manager.wait_for_clean() + return { + 'time_to_active': peering_end - start, + 'time_to_clean': recovery_end - start + } + +@argify("peering_speed_test", ARGS) +def task(ctx, config): + """ + Peering speed test + """ + setup(ctx, config) + ctx.manager.mark_out_osd(0) + ctx.manager.wait_for_clean() + ret = [] + for i in range(config.runs): + log.info("Run {i}".format(i = i)) + ret.append(do_run(ctx, config)) + + ctx.manager.mark_in_osd(0) + ctx.summary['recovery_times'] = { + 'runs': ret + } -- 2.39.5