thrasher: Add dump_ops_enable options

author David Zafman <dzafman@redhat.com>

Tue, 15 Mar 2016 18:29:17 +0000 (11:29 -0700)

committer David Zafman <dzafman@redhat.com>

Tue, 22 Nov 2016 02:35:13 +0000 (18:35 -0800)
author David Zafman <dzafman@redhat.com>
Tue, 15 Mar 2016 18:29:17 +0000 (11:29 -0700)
committer David Zafman <dzafman@redhat.com>
Tue, 22 Nov 2016 02:35:13 +0000 (18:35 -0800)
diff --git a/tasks/ceph_manager.py b/tasks/ceph_manager.py

index 366ab327dfbf02b34a2a874f60ea255869831ced..d7f3c10afa684b4f6a06e690816e4b2da70fca1e 100644 (file)
--- a/tasks/ceph_manager.py
+++ b/tasks/ceph_manager.py
@@ -112,6 +112,7 @@ class Thrasher:
          self.clean_wait = self.config.get('clean_wait', 0)
          self.minin = self.config.get("min_in", 3)
          self.chance_move_pg = self.config.get('chance_move_pg', 1.0)
+        self.dump_ops_enable = self.config.get('dump_ops_enable')
  
          num_osds = self.in_osds + self.out_osds
          self.max_pgs = self.config.get("max_pgs_per_pool_osd", 1200) * num_osds
@@ -135,6 +136,8 @@ class Thrasher:
              manager.raw_cluster_cmd('--', 'mon', 'tell', '*', 'injectargs',
                                      '--mon-osd-down-out-interval 0')
          self.thread = gevent.spawn(self.do_thrash)
+        if self.dump_ops_enable == "true":
+            self.dump_ops_thread = gevent.spawn(self.do_dump_ops)
          if self.config.get('powercycle') or not self.cmd_exists_on_osds("ceph-objectstore-tool"):
              self.ceph_objectstore_tool = False
              self.test_rm_past_intervals = False
@@ -439,6 +442,9 @@ class Thrasher:
          """
          self.stopping = True
          self.thread.get()
+        if self.dump_ops_enable == "true":
+            self.log("joining the do_dump_ops greenlet")
+            self.dump_ops_thread.join()
  
      def grow_pool(self):
          """
@@ -656,6 +662,23 @@ class Thrasher:
                  raise
          return wrapper
  
+    @log_exc
+    def do_dump_ops(self):
+        """
+        Loops and does op dumps on all osds
+        """
+        self.log("starting do_dump_ops")
+        while not self.stopping:
+            for osd in self.live_osds:
+                # Ignore errors because live_osds is in flux
+                self.ceph_manager.osd_admin_socket(osd, command=['dump_ops_in_flight'],
+                                     check_status=False, timeout=30)
+                self.ceph_manager.osd_admin_socket(osd, command=['dump_blocked_ops'],
+                                     check_status=False, timeout=30)
+                self.ceph_manager.osd_admin_socket(osd, command=['dump_historic_ops'],
+                                     check_status=False, timeout=30)
+            gevent.sleep(0)
+
      @log_exc
      def do_thrash(self):
          """
diff --git a/tasks/thrashosds.py b/tasks/thrashosds.py

index a63243d2d9119743456cb6cc1ec2927c7c11ca67..710a36908c17fb5d60b612aeffbbe67b125a0556 100644 (file)
--- a/tasks/thrashosds.py
+++ b/tasks/thrashosds.py
@@ -97,6 +97,8 @@ def task(ctx, config):
      ceph_objectstore_tool: (true) whether to export/import a pg while an osd is down
      chance_move_pg: (1.0) chance of moving a pg if more than 1 osd is down (default 100%)
  
+    dump_ops_enable: (true) continuously dump ops on all live osds
+
      example:
  
      tasks:
@@ -112,6 +114,8 @@ def task(ctx, config):
          config = {}
      assert isinstance(config, dict), \
          'thrashosds task only accepts a dict for configuration'
+    # add default value for dump_ops_enable
+    config['dump_ops_enable'] = config.get('dump_ops_enable', "true")
      overrides = ctx.config.get('overrides', {})
      teuthology.deep_merge(config, overrides.get('thrashosds', {}))
author	David Zafman <dzafman@redhat.com>
	Tue, 15 Mar 2016 18:29:17 +0000 (11:29 -0700)
committer	David Zafman <dzafman@redhat.com>
	Tue, 22 Nov 2016 02:35:13 +0000 (18:35 -0800)
tasks/ceph_manager.py		patch \| blob \| history
tasks/thrashosds.py		patch \| blob \| history