From: David Zafman Date: Tue, 13 Jun 2017 22:17:57 +0000 (-0700) Subject: test: Add two new singleton test yamls radom-eio and thrash-eio X-Git-Tag: ses5-milestone8~1^2~19^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=33edfe3a0f3bb6b4d58546603b4c8f2cbf1b80d6;p=ceph.git test: Add two new singleton test yamls radom-eio and thrash-eio New option "random_eio" to Thrasher, sets 1 osd random read percentage New option "objectsize" to radosbench task (-o bench option) New option "type" to radosbench specify write, seq or rand Signed-off-by: David Zafman --- diff --git a/qa/suites/rados/singleton/all/random-eio.yaml b/qa/suites/rados/singleton/all/random-eio.yaml new file mode 100644 index 000000000000..954730b11296 --- /dev/null +++ b/qa/suites/rados/singleton/all/random-eio.yaml @@ -0,0 +1,39 @@ +roles: +- - mon.a + - mgr.x + - osd.0 + - osd.1 + - osd.2 +- - osd.3 + - osd.4 + - osd.5 + - client.0 +openstack: + - volumes: # attached to each instance + count: 3 + size: 10 # GB +tasks: +- install: +- ceph: + log-whitelist: + - missing primary copy of + - objects unfound and apparently lost +- full_sequential: + - exec: + client.0: + - sudo ceph tell osd.1 injectargs -- --filestore_debug_random_read_err=0.33 + - sudo ceph tell osd.1 injectargs -- --bluestore_debug_random_read_err=0.33 + - sudo ceph osd pool create test 16 16 + - sudo ceph osd pool set test size 3 + - sudo ceph pg dump pgs --format=json-pretty + - radosbench: + clients: [client.0] + time: 360 + type: rand + objectsize: 1048576 + pool: test + create_pool: false + - exec: + client.0: + - sudo ceph tell osd.1 injectargs -- --filestore_debug_random_read_err=0.0 + - sudo ceph tell osd.1 injectargs -- --bluestore_debug_random_read_err=0.0 diff --git a/qa/suites/rados/singleton/all/thrash-eio.yaml b/qa/suites/rados/singleton/all/thrash-eio.yaml new file mode 100644 index 000000000000..a70636549cd5 --- /dev/null +++ b/qa/suites/rados/singleton/all/thrash-eio.yaml @@ -0,0 +1,38 @@ +roles: +- - mon.a + - mgr.x + - osd.0 + - osd.1 + - osd.2 +- - osd.3 + - osd.4 + - osd.5 + - client.0 +openstack: + - volumes: # attached to each instance + count: 3 + size: 10 # GB +override: + ceph: + conf: + mon: + osd default pool size: 3 +tasks: +- install: +- ceph: + log-whitelist: + - wrongly marked me down + - missing primary copy of + - objects unfound and apparently lost +- thrashosds: + op_delay: 30 + clean_interval: 120 + chance_down: .5 + random_eio: .33 + min_live: 5 + min_in: 5 +- radosbench: + clients: [client.0] + time: 720 + type: rand + objectsize: 1048576 diff --git a/qa/tasks/ceph_manager.py b/qa/tasks/ceph_manager.py index 465da73d2358..a8082a1cbb48 100644 --- a/qa/tasks/ceph_manager.py +++ b/qa/tasks/ceph_manager.py @@ -125,6 +125,7 @@ class Thrasher: self.chance_thrash_cluster_full = self.config.get('chance_thrash_cluster_full', .05) self.chance_thrash_pg_upmap = self.config.get('chance_thrash_pg_upmap', 1.0) self.chance_thrash_pg_upmap_items = self.config.get('chance_thrash_pg_upmap', 1.0) + self.random_eio = self.config.get('random_eio') num_osds = self.in_osds + self.out_osds self.max_pgs = self.config.get("max_pgs_per_pool_osd", 1200) * num_osds @@ -435,6 +436,12 @@ class Thrasher: skip_admin_check=skip_admin_check) self.dead_osds.remove(osd) self.live_osds.append(osd) + if self.random_eio > 0 and osd is self.rerrosd: + self.ceph_manager.raw_cluster_cmd('tell', 'osd.'+str(self.rerrosd), + 'injectargs', '--', '--filestore_debug_random_read_err='+str(self.random_eio)) + self.ceph_manager.raw_cluster_cmd('tell', 'osd.'+str(self.rerrosd), + 'injectargs', '--', '--bluestore_debug_random_read_err='+str(self.random_eio)) + def out_osd(self, osd=None): """ @@ -955,6 +962,12 @@ class Thrasher: scrubint = self.config.get("scrub_interval", -1) maxdead = self.config.get("max_dead", 0) delay = self.config.get("op_delay", 5) + self.rerrosd = self.live_osds[0] + if self.random_eio > 0: + self.ceph_manager.raw_cluster_cmd('tell', 'osd.'+str(self.rerrosd), + 'injectargs', '--', '--filestore_debug_random_read_err='+str(self.random_eio)) + self.ceph_manager.raw_cluster_cmd('tell', 'osd.'+str(self.rerrosd), + 'injectargs', '--', '--bluestore_debug_random_read_err='+str(self.random_eio)) self.log("starting do_thrash") while not self.stopping: to_log = [str(x) for x in ["in_osds: ", self.in_osds, @@ -982,6 +995,11 @@ class Thrasher: Scrubber(self.ceph_manager, self.config) self.choose_action()() time.sleep(delay) + if self.random_eio > 0: + self.ceph_manager.raw_cluster_cmd('tell', 'osd.'+str(self.rerrosd), + 'injectargs', '--', '--filestore_debug_random_read_err=0.0') + self.ceph_manager.raw_cluster_cmd('tell', 'osd.'+str(self.rerrosd), + 'injectargs', '--', '--bluestore_debug_random_read_err=0.0') for pool in list(self.pools_to_fix_pgp_num): if self.ceph_manager.get_pool_pg_num(pool) > 0: self.fix_pgp_num(pool) diff --git a/qa/tasks/radosbench.py b/qa/tasks/radosbench.py index 3db57af83f81..af21c866ec5a 100644 --- a/qa/tasks/radosbench.py +++ b/qa/tasks/radosbench.py @@ -21,15 +21,17 @@ def task(ctx, config): time: pool: size: write size to use + objectsize: object size to use unique_pool: use a unique pool, defaults to False ec_pool: create an ec pool, defaults to False - create_pool: create pool, defaults to False + create_pool: create pool, defaults to True erasure_code_profile: name: teuthologyprofile k: 2 m: 1 ruleset-failure-domain: osd cleanup: false (defaults to true) + type: (defaults to write) example: tasks: @@ -46,6 +48,7 @@ def task(ctx, config): testdir = teuthology.get_testdir(ctx) manager = ctx.managers['ceph'] + runtype = config.get('type', 'write') create_pool = config.get('create_pool', True) for role in config.get('clients', ['client.0']): @@ -73,6 +76,34 @@ def task(ctx, config): else: pool = manager.create_pool_with_unique_name(erasure_code_profile_name=profile_name) + osize = config.get('objectsize', 0) + if osize is 0: + objectsize = [] + else: + objectsize = ['-o', str(osize)] + size = ['-b', str(config.get('size', 4<<20))] + # If doing a reading run then populate data + if runtype != "write": + proc = remote.run( + args=[ + "/bin/sh", "-c", + " ".join(['adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage', + 'rados', + '--no-log-to-stderr', + '--name', role] + + size + objectsize + + ['-p' , pool, + 'bench', str(60), "write", "--no-cleanup" + ]).format(tdir=testdir), + ], + logger=log.getChild('radosbench.{id}'.format(id=id_)), + wait=True + ) + size = [] + objectsize = [] + proc = remote.run( args=[ "/bin/sh", "-c", @@ -81,10 +112,10 @@ def task(ctx, config): '{tdir}/archive/coverage', 'rados', '--no-log-to-stderr', - '--name', role, - '-b', str(config.get('size', 4<<20)), - '-p' , pool, - 'bench', str(config.get('time', 360)), 'write', + '--name', role] + + size + objectsize + + ['-p' , pool, + 'bench', str(config.get('time', 360)), runtype, ] + cleanup).format(tdir=testdir), ], logger=log.getChild('radosbench.{id}'.format(id=id_)), diff --git a/qa/tasks/thrashosds.py b/qa/tasks/thrashosds.py index 2cd98145e05a..8e09dd6a20b8 100644 --- a/qa/tasks/thrashosds.py +++ b/qa/tasks/thrashosds.py @@ -149,6 +149,8 @@ def task(ctx, config): config['dump_ops_enable'] = config.get('dump_ops_enable', "true") # add default value for noscrub_toggle_delay config['noscrub_toggle_delay'] = config.get('noscrub_toggle_delay', 2.0) + # add default value for random_eio + config['random_eio'] = config.get('random_eio', 0.0) log.info("config is {config}".format(config=str(config)))