]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
thrasher: allow a config to set values
authorGreg Farnum <gregory.farnum@dreamhost.com>
Thu, 25 Aug 2011 22:18:42 +0000 (15:18 -0700)
committerGreg Farnum <gregory.farnum@dreamhost.com>
Thu, 25 Aug 2011 22:18:42 +0000 (15:18 -0700)
Signed-off-by: Greg Farnum <gregory.farnum@dreamhost.com>
teuthology/task/ceph_manager.py
teuthology/task/thrashosds.py

index 60944f1277e05909e0f7365daaeb1719aa872fad..9e1924e0eeec531ebe6d479c9f4e1219d518b5af 100644 (file)
@@ -4,8 +4,11 @@ import re
 import gevent
 from orchestra import run
 
+CLEANINT=60
+DELAY=5
+
 class Thrasher(gevent.Greenlet):
-    def __init__(self, manager, logger=None):
+    def __init__(self, manager, config, logger=None):
         self.ceph_manager = manager
         self.ceph_manager.wait_till_clean()
         osd_status = self.ceph_manager.get_osd_status()
@@ -13,12 +16,15 @@ class Thrasher(gevent.Greenlet):
         self.out_osds = osd_status['out']
         self.stopping = False
         self.logger = logger
+        self.config = config
         if self.logger != None:
             self.log = lambda x: self.logger.info(x)
         else:
             def tmp(x):
                 print x
             self.log = tmp
+        if self.config is None:
+            self.config = dict()
         gevent.Greenlet.__init__(self, self.do_thrash)
         self.start()
 
@@ -45,21 +51,31 @@ class Thrasher(gevent.Greenlet):
         self.get()
 
     def do_thrash(self):
-        CLEANINT=60
-        DELAY=5
+        cleanint = CLEANINT
+        delay = DELAY
+        minin = 2
+        minout = 0
+        if self.config.get("cleanInterval"):
+            cleanint = self.config["cleanInterval"]
+        if self.config.get("opDelay"):
+            delay = self.config["opDelay"]
+        if self.config.get("minIn"):
+            minin = self.config["minIn"]
+        if self.config.get("minOut"):
+            minout = self.config["minOut"]
         self.log("starting do_thrash")
         while not self.stopping:
             self.log(" ".join([str(x) for x in ["in_osds: ", self.in_osds, " out_osds: ", self.out_osds]]))
-            if random.uniform(0,1) < (float(DELAY)/CLEANINT):
+            if random.uniform(0,1) < (float(delay)/cleanint):
                 self.ceph_manager.wait_till_clean()
-            if (len(self.out_osds) == 0):
+            if (len(self.out_osds) == minout):
                 self.remove_osd()
-            elif (len(self.in_osds) <= 2):
+            elif (len(self.in_osds) <= minin):
                 self.add_osd()
             else:
                 x = random.choice([self.remove_osd, self.add_osd])
                 x()
-            time.sleep(DELAY)
+            time.sleep(delay)
 
 class CephManager:
     def __init__(self, controller, logger=None):
index 387ca6e7b887e1afc91c11f71f161dd6f6869887..83be97bbcef33927e67dc8552259fbb8bc221f97 100644 (file)
@@ -7,16 +7,31 @@ log = logging.getLogger(__name__)
 @contextlib.contextmanager
 def task(ctx, config):
     """
-    Run thrashosds
+    "Thrash" the OSDs by randomly marking them out/down (and then back
+    in) until the task is ended.
 
-    There is no configuration, all commands are run on mon0 and it stops when
-    __exit__ is called.
+    All commands are run on mon0 and it stops when __exit__ is called.
+    The config is optional, and is a dict containing some or all of:
+    minIn: (default 2) the minimum number of OSDs to keep in the cluster
+    minOut: (default 0) the minimum number of OSDs to keep out of the cluster
+    opDelay: (5) the length of time to sleep between changing an OSD's status
+    cleanInterval: (60) the approximate length of time to loop before waiting
+    until the cluster goes clean. (In reality this is used to probabilistically
+    choose when to wait, and the method used makes it closer to -- but not
+    identical to -- the half-life.)
+    chanceOut: (0) the probability that the thrasher will mark an OSD down
+    rather than marking it out. (The thrasher will not consider that OSD
+    out of the cluster, since presently an OSD wrongly marked down will
+    mark itself back up again.) This value can be either an integer (eg, 75)
+    or a float probability (eg 0.75).
+    
 
     example:
 
     tasks:
     - ceph:
     - thrashosds:
+        {chanceDown: 10, opDelay: 3, minIn: 1}
     - interactive:
     """
     log.info('Beginning thrashosds...')
@@ -27,7 +42,8 @@ def task(ctx, config):
         )
     thrash_proc = ceph_manager.Thrasher(
         manager,
-        logger=log.getChild('thrasher'),
+        config,
+        logger=log.getChild('thrasher')
         )
     try:
         yield