From 107e9c358acbc75f212f0bc6fcf83075933035b3 Mon Sep 17 00:00:00 2001 From: David Zafman Date: Tue, 15 Mar 2016 11:29:17 -0700 Subject: [PATCH] thrasher: Add dump_ops_enable options Add dump_ops_enable which continuously dumps ops using 3 commands Signed-off-by: David Zafman (Partial cherry picked from commit 7a528763d126eaca90e40b02aad02ab9c3c92a32) Excluded dynamic optracker testing with optrack_toggle_delay option Conflicts: tasks/ceph_manager.py (trivial) tasks/thrashosds.py (trivial) --- tasks/ceph_manager.py | 23 +++++++++++++++++++++++ tasks/thrashosds.py | 4 ++++ 2 files changed, 27 insertions(+) diff --git a/tasks/ceph_manager.py b/tasks/ceph_manager.py index 366ab327dfbf0..d7f3c10afa684 100644 --- a/tasks/ceph_manager.py +++ b/tasks/ceph_manager.py @@ -112,6 +112,7 @@ class Thrasher: self.clean_wait = self.config.get('clean_wait', 0) self.minin = self.config.get("min_in", 3) self.chance_move_pg = self.config.get('chance_move_pg', 1.0) + self.dump_ops_enable = self.config.get('dump_ops_enable') num_osds = self.in_osds + self.out_osds self.max_pgs = self.config.get("max_pgs_per_pool_osd", 1200) * num_osds @@ -135,6 +136,8 @@ class Thrasher: manager.raw_cluster_cmd('--', 'mon', 'tell', '*', 'injectargs', '--mon-osd-down-out-interval 0') self.thread = gevent.spawn(self.do_thrash) + if self.dump_ops_enable == "true": + self.dump_ops_thread = gevent.spawn(self.do_dump_ops) if self.config.get('powercycle') or not self.cmd_exists_on_osds("ceph-objectstore-tool"): self.ceph_objectstore_tool = False self.test_rm_past_intervals = False @@ -439,6 +442,9 @@ class Thrasher: """ self.stopping = True self.thread.get() + if self.dump_ops_enable == "true": + self.log("joining the do_dump_ops greenlet") + self.dump_ops_thread.join() def grow_pool(self): """ @@ -656,6 +662,23 @@ class Thrasher: raise return wrapper + @log_exc + def do_dump_ops(self): + """ + Loops and does op dumps on all osds + """ + self.log("starting do_dump_ops") + while not self.stopping: + for osd in self.live_osds: + # Ignore errors because live_osds is in flux + self.ceph_manager.osd_admin_socket(osd, command=['dump_ops_in_flight'], + check_status=False, timeout=30) + self.ceph_manager.osd_admin_socket(osd, command=['dump_blocked_ops'], + check_status=False, timeout=30) + self.ceph_manager.osd_admin_socket(osd, command=['dump_historic_ops'], + check_status=False, timeout=30) + gevent.sleep(0) + @log_exc def do_thrash(self): """ diff --git a/tasks/thrashosds.py b/tasks/thrashosds.py index a63243d2d9119..710a36908c17f 100644 --- a/tasks/thrashosds.py +++ b/tasks/thrashosds.py @@ -97,6 +97,8 @@ def task(ctx, config): ceph_objectstore_tool: (true) whether to export/import a pg while an osd is down chance_move_pg: (1.0) chance of moving a pg if more than 1 osd is down (default 100%) + dump_ops_enable: (true) continuously dump ops on all live osds + example: tasks: @@ -112,6 +114,8 @@ def task(ctx, config): config = {} assert isinstance(config, dict), \ 'thrashosds task only accepts a dict for configuration' + # add default value for dump_ops_enable + config['dump_ops_enable'] = config.get('dump_ops_enable', "true") overrides = ctx.config.get('overrides', {}) teuthology.deep_merge(config, overrides.get('thrashosds', {})) -- 2.39.5