From bd83ed70dc2e369e8e75bd550bd0bad4c8fe7724 Mon Sep 17 00:00:00 2001 From: Samuel Just Date: Wed, 7 Nov 2012 12:36:37 -0800 Subject: [PATCH] ceph_manager: add test_min_size action Thrasher can now with configurable frequency test min_size by taking down all but one osd, waiting, killing that osd and bringing back the others, and verifying that the cluster goes clean. Signed-off-by: Samuel Just --- teuthology/task/ceph_manager.py | 26 ++++++++++++++++++++++++++ teuthology/task/thrashosds.py | 8 ++++++++ 2 files changed, 34 insertions(+) diff --git a/teuthology/task/ceph_manager.py b/teuthology/task/ceph_manager.py index c7697d423268f..4266a870f669c 100644 --- a/teuthology/task/ceph_manager.py +++ b/teuthology/task/ceph_manager.py @@ -82,8 +82,33 @@ class Thrasher: self.stopping = True self.thread.get() + def test_pool_min_size(self): + self.log("test_pool_min_size") + self.all_up() + self.ceph_manager.wait_for_recovery( + timeout=self.config.get('timeout') + ) + the_one = random.choice(self.in_osds) + self.log("Killing everyone but %s", the_one) + to_kill = filter(lambda x: x != the_one, self.in_osds) + [self.kill_osd(i) for i in to_kill] + [self.out_osd(i) for i in to_kill] + time.sleep(self.config.get("test_pool_min_size_time", 10)) + self.log("Killing %s"%(the_one,)) + self.kill_osd(the_one) + self.out_osd(the_one) + self.log("Reviving everyone but %s"%(the_one,)) + [self.revive_osd(i) for i in to_kill] + [self.in_osd(i) for i in to_kill] + self.log("Revived everyone but %s"%(the_one,)) + self.log("Waiting for clean") + self.ceph_manager.wait_for_recovery( + timeout=self.config.get('timeout') + ) + def choose_action(self): chance_down = self.config.get("chance_down", 0) + chance_test_min_size = self.config.get("chance_test_min_size", 0) if isinstance(chance_down, int): chance_down = float(chance_down) / 100 minin = self.config.get("min_in", 2) @@ -102,6 +127,7 @@ class Thrasher: actions.append((self.in_osd, 1.7,)) if len(self.dead_osds) > mindead: actions.append((self.revive_osd, 1.0,)) + actions.append((self.test_pool_min_size, chance_test_min_size,)) total = sum([y for (x,y) in actions]) val = random.uniform(0, total) diff --git a/teuthology/task/thrashosds.py b/teuthology/task/thrashosds.py index 2678e60e56b5e..2548c0a2b8e34 100644 --- a/teuthology/task/thrashosds.py +++ b/teuthology/task/thrashosds.py @@ -45,6 +45,14 @@ def task(ctx, config): can be either an integer (eg, 75) or a float probability (eg 0.75). + chance_test_min_size: (0) chance to run test_pool_min_size, + which: + - kills all but one osd + - waits + - kills that osd + - revives all other osds + - verifies that the osds fully recover + timeout: (360) the number of seconds to wait for the cluster to become clean after each cluster change. If this doesn't happen within the timeout, an exception will be raised. -- 2.39.5