self.minin = self.config.get("min_in", 3)
self.chance_move_pg = self.config.get('chance_move_pg', 1.0)
self.sighup_delay = self.config.get('sighup_delay')
+ self.optrack_toggle_delay = self.config.get('optrack_toggle_delay')
+ self.dump_ops_enable = self.config.get('dump_ops_enable')
num_osds = self.in_osds + self.out_osds
self.max_pgs = self.config.get("max_pgs_per_pool_osd", 1200) * num_osds
self.thread = gevent.spawn(self.do_thrash)
if self.sighup_delay:
self.sighup_thread = gevent.spawn(self.do_sighup)
+ if self.optrack_toggle_delay:
+ self.optrack_toggle_thread = gevent.spawn(self.do_optrack_toggle)
+ if self.dump_ops_enable == "true":
+ self.dump_ops_thread = gevent.spawn(self.do_dump_ops)
if self.config.get('powercycle') or not self.cmd_exists_on_osds("ceph-objectstore-tool"):
self.ceph_objectstore_tool = False
self.test_rm_past_intervals = False
if self.sighup_delay:
self.log("joining the do_sighup greenlet")
self.sighup_thread.get()
+ if self.optrack_toggle_delay:
+ self.log("joining the do_optrack_toggle greenlet")
+ self.optrack_toggle_thread.join()
+ if self.dump_ops_enable == "true":
+ self.log("joining the do_dump_ops greenlet")
+ self.dump_ops_thread.join()
def grow_pool(self):
"""
self.ceph_manager.signal_osd(osd, signal.SIGHUP, silent=True)
time.sleep(delay)
+ @log_exc
+ def do_optrack_toggle(self):
+ """
+ Loops and toggle op tracking to all osds.
+
+ Loop delay is controlled by the config value optrack_toggle_delay.
+ """
+ delay = float(self.optrack_toggle_delay)
+ osd_state = "true"
+ self.log("starting do_optrack_toggle with a delay of {0}".format(delay))
+ while not self.stopping:
+ if osd_state == "true":
+ osd_state = "false"
+ else:
+ osd_state = "true"
+ self.ceph_manager.raw_cluster_cmd_result('tell', 'osd.*',
+ 'injectargs', '--osd_enable_op_tracker=%s' % osd_state)
+ gevent.sleep(delay)
+
+ @log_exc
+ def do_dump_ops(self):
+ """
+ Loops and does op dumps on all osds
+ """
+ self.log("starting do_dump_ops")
+ while not self.stopping:
+ for osd in self.live_osds:
+ # Ignore errors because live_osds is in flux
+ self.ceph_manager.osd_admin_socket(osd, command=['dump_ops_in_flight'],
+ check_status=False, timeout=30)
+ self.ceph_manager.osd_admin_socket(osd, command=['dump_blocked_ops'],
+ check_status=False, timeout=30)
+ self.ceph_manager.osd_admin_socket(osd, command=['dump_historic_ops'],
+ check_status=False, timeout=30)
+ gevent.sleep(0)
+
@log_exc
def do_thrash(self):
"""
ceph_objectstore_tool: (true) whether to export/import a pg while an osd is down
chance_move_pg: (1.0) chance of moving a pg if more than 1 osd is down (default 100%)
+ optrack_toggle_delay: (2.0) duration to delay between toggling op tracker
+ enablement to all osds
+
+ dump_ops_enable: (true) continuously dump ops on all live osds
+
example:
tasks:
'thrashosds task only accepts a dict for configuration'
# add default value for sighup_delay
config['sighup_delay'] = config.get('sighup_delay', 0.1)
+ # add default value for optrack_toggle_delay
+ config['optrack_toggle_delay'] = config.get('optrack_toggle_delay', 2.0)
+ # add default value for dump_ops_enable
+ config['dump_ops_enable'] = config.get('dump_ops_enable', "true")
overrides = ctx.config.get('overrides', {})
teuthology.deep_merge(config, overrides.get('thrashosds', {}))