import gevent
from orchestra import run
+CLEANINT=60
+DELAY=5
+
class Thrasher(gevent.Greenlet):
- def __init__(self, manager, logger=None):
+ def __init__(self, manager, config, logger=None):
self.ceph_manager = manager
self.ceph_manager.wait_till_clean()
osd_status = self.ceph_manager.get_osd_status()
self.out_osds = osd_status['out']
self.stopping = False
self.logger = logger
+ self.config = config
if self.logger != None:
self.log = lambda x: self.logger.info(x)
else:
def tmp(x):
print x
self.log = tmp
+ if self.config is None:
+ self.config = dict()
gevent.Greenlet.__init__(self, self.do_thrash)
self.start()
self.get()
def do_thrash(self):
- CLEANINT=60
- DELAY=5
+ cleanint = CLEANINT
+ delay = DELAY
+ minin = 2
+ minout = 0
+ if self.config.get("cleanInterval"):
+ cleanint = self.config["cleanInterval"]
+ if self.config.get("opDelay"):
+ delay = self.config["opDelay"]
+ if self.config.get("minIn"):
+ minin = self.config["minIn"]
+ if self.config.get("minOut"):
+ minout = self.config["minOut"]
self.log("starting do_thrash")
while not self.stopping:
self.log(" ".join([str(x) for x in ["in_osds: ", self.in_osds, " out_osds: ", self.out_osds]]))
- if random.uniform(0,1) < (float(DELAY)/CLEANINT):
+ if random.uniform(0,1) < (float(delay)/cleanint):
self.ceph_manager.wait_till_clean()
- if (len(self.out_osds) == 0):
+ if (len(self.out_osds) == minout):
self.remove_osd()
- elif (len(self.in_osds) <= 2):
+ elif (len(self.in_osds) <= minin):
self.add_osd()
else:
x = random.choice([self.remove_osd, self.add_osd])
x()
- time.sleep(DELAY)
+ time.sleep(delay)
class CephManager:
def __init__(self, controller, logger=None):
@contextlib.contextmanager
def task(ctx, config):
"""
- Run thrashosds
+ "Thrash" the OSDs by randomly marking them out/down (and then back
+ in) until the task is ended.
- There is no configuration, all commands are run on mon0 and it stops when
- __exit__ is called.
+ All commands are run on mon0 and it stops when __exit__ is called.
+ The config is optional, and is a dict containing some or all of:
+ minIn: (default 2) the minimum number of OSDs to keep in the cluster
+ minOut: (default 0) the minimum number of OSDs to keep out of the cluster
+ opDelay: (5) the length of time to sleep between changing an OSD's status
+ cleanInterval: (60) the approximate length of time to loop before waiting
+ until the cluster goes clean. (In reality this is used to probabilistically
+ choose when to wait, and the method used makes it closer to -- but not
+ identical to -- the half-life.)
+ chanceOut: (0) the probability that the thrasher will mark an OSD down
+ rather than marking it out. (The thrasher will not consider that OSD
+ out of the cluster, since presently an OSD wrongly marked down will
+ mark itself back up again.) This value can be either an integer (eg, 75)
+ or a float probability (eg 0.75).
+
example:
tasks:
- ceph:
- thrashosds:
+ {chanceDown: 10, opDelay: 3, minIn: 1}
- interactive:
"""
log.info('Beginning thrashosds...')
)
thrash_proc = ceph_manager.Thrasher(
manager,
- logger=log.getChild('thrasher'),
+ config,
+ logger=log.getChild('thrasher')
)
try:
yield