ceph_manager: add test_min_size action

author Samuel Just <sam.just@inktank.com>

Wed, 7 Nov 2012 20:36:37 +0000 (12:36 -0800)

committer Samuel Just <sam.just@inktank.com>

Wed, 7 Nov 2012 20:56:31 +0000 (12:56 -0800)
author Samuel Just <sam.just@inktank.com>
Wed, 7 Nov 2012 20:36:37 +0000 (12:36 -0800)
committer Samuel Just <sam.just@inktank.com>
Wed, 7 Nov 2012 20:56:31 +0000 (12:56 -0800)
diff --git a/teuthology/task/ceph_manager.py b/teuthology/task/ceph_manager.py

index c7697d423268ff9b4dcb39816c457ae2ff54788f..4266a870f669ce47de61ddff8b0bf912966d18da 100644 (file)
--- a/teuthology/task/ceph_manager.py
+++ b/teuthology/task/ceph_manager.py
@@ -82,8 +82,33 @@ class Thrasher:
          self.stopping = True
          self.thread.get()
  
+    def test_pool_min_size(self):
+        self.log("test_pool_min_size")
+        self.all_up()
+        self.ceph_manager.wait_for_recovery(
+            timeout=self.config.get('timeout')
+            )
+        the_one = random.choice(self.in_osds)
+        self.log("Killing everyone but %s", the_one)
+        to_kill = filter(lambda x: x != the_one, self.in_osds)
+        [self.kill_osd(i) for i in to_kill]
+        [self.out_osd(i) for i in to_kill]
+        time.sleep(self.config.get("test_pool_min_size_time", 10))
+        self.log("Killing %s"%(the_one,))
+        self.kill_osd(the_one)
+        self.out_osd(the_one)
+        self.log("Reviving everyone but %s"%(the_one,))
+        [self.revive_osd(i) for i in to_kill]
+        [self.in_osd(i) for i in to_kill]
+        self.log("Revived everyone but %s"%(the_one,))
+        self.log("Waiting for clean")
+        self.ceph_manager.wait_for_recovery(
+            timeout=self.config.get('timeout')
+            )
+
      def choose_action(self):
          chance_down = self.config.get("chance_down", 0)
+        chance_test_min_size = self.config.get("chance_test_min_size", 0)
          if isinstance(chance_down, int):
              chance_down = float(chance_down) / 100
          minin = self.config.get("min_in", 2)
@@ -102,6 +127,7 @@ class Thrasher:
              actions.append((self.in_osd, 1.7,))
          if len(self.dead_osds) > mindead:
              actions.append((self.revive_osd, 1.0,))
+        actions.append((self.test_pool_min_size, chance_test_min_size,))
  
          total = sum([y for (x,y) in actions])
          val = random.uniform(0, total)
diff --git a/teuthology/task/thrashosds.py b/teuthology/task/thrashosds.py

index 2678e60e56b5eed6aecb58133b1e0648cc1a2fd2..2548c0a2b8e34fc2a4df16a40810723064372dee 100644 (file)
--- a/teuthology/task/thrashosds.py
+++ b/teuthology/task/thrashosds.py
@@ -45,6 +45,14 @@ def task(ctx, config):
         can be either an integer (eg, 75) or a float probability (eg
         0.75).
  
+    chance_test_min_size: (0) chance to run test_pool_min_size,
+       which:
+       - kills all but one osd
+       - waits
+       - kills that osd
+       - revives all other osds
+       - verifies that the osds fully recover
+
      timeout: (360) the number of seconds to wait for the cluster
         to become clean after each cluster change. If this doesn't
         happen within the timeout, an exception will be raised.
author	Samuel Just <sam.just@inktank.com>
	Wed, 7 Nov 2012 20:36:37 +0000 (12:36 -0800)
committer	Samuel Just <sam.just@inktank.com>
	Wed, 7 Nov 2012 20:56:31 +0000 (12:56 -0800)
teuthology/task/ceph_manager.py		patch \| blob \| history
teuthology/task/thrashosds.py		patch \| blob \| history