From 566ae5332e75f742e4748953b33f5fa217c47cd0 Mon Sep 17 00:00:00 2001 From: Samuel Just Date: Wed, 23 Jan 2013 16:13:22 -0800 Subject: [PATCH] ceph_manager: add filestore and heartbeat stalls Signed-off-by: Samuel Just --- teuthology/task/ceph_manager.py | 36 +++++++++++++++++++++++++++++++++ teuthology/task/thrashosds.py | 6 ++++++ 2 files changed, 42 insertions(+) diff --git a/teuthology/task/ceph_manager.py b/teuthology/task/ceph_manager.py index 5748eba801..49f82f07b4 100644 --- a/teuthology/task/ceph_manager.py +++ b/teuthology/task/ceph_manager.py @@ -124,6 +124,29 @@ class Thrasher: timeout=self.config.get('timeout') ) + def inject_pause(self, conf_key, duration, check_after, should_be_down): + the_one = random.choice(self.in_osds) + self.log("inject_pause on {osd}".format(osd = the_one)) + self.log( + "Testing {key} pause injection for duration {duration}".format( + key = conf_key, + duration = duration + )) + self.log( + "Checking after {after}, should_be_down={shouldbedown}".format( + after = check_after, + shouldbedown = should_be_down + )) + self.ceph_manager.set_config(the_one, **{conf_key:duration}) + if not should_be_down: + return + time.sleep(check_after) + status = self.ceph_manager.get_osd_status() + assert the_one in status['down'] + time.sleep(duration - check_after + 20) + status = self.ceph_manager.get_osd_status() + assert not the_one in status['down'] + def choose_action(self): chance_down = self.config.get('chance_down', 0) chance_test_min_size = self.config.get('chance_test_min_size', 0) @@ -148,6 +171,19 @@ class Thrasher: actions.append((self.grow_pool, self.config.get('chance_pgnum_grow', 0),)) actions.append((self.fix_pgp_num, self.config.get('chance_pgpnum_fix', 0),)) actions.append((self.test_pool_min_size, chance_test_min_size,)) + for key in ['heartbeat_inject_failure', 'filestore_inject_stall']: + for scenario in [ + (lambda: self.inject_pause(key, + self.config.get('pause_short', 3), + 0, + False), + self.config.get('chance_inject_pause_short', 1),), + (lambda: self.inject_pause(key, + self.config.get('pause_long', 150), + self.config.get('pause_check_after', 120), + True), + self.config.get('chance_inject_pause_long', 0.2),)]: + actions.append(scenario) total = sum([y for (x,y) in actions]) val = random.uniform(0, total) diff --git a/teuthology/task/thrashosds.py b/teuthology/task/thrashosds.py index 3325cef3cd..a945dbc475 100644 --- a/teuthology/task/thrashosds.py +++ b/teuthology/task/thrashosds.py @@ -62,6 +62,12 @@ def task(ctx, config): pool_grow_by: (10) amount to increase pgnum by max_pgs_per_pool_osd: (1200) don't expand pools past this size per osd + pause_short: (3) duration of short pause + pause_long: (150) duration of long pause + pause_check_after: (120) assert osd down after this long + chance_inject_pause_short: (1) chance of injecting short stall + chance_inject_pause_long: (0.1) chance of injecting long stall + example: tasks: -- 2.39.5