]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
thrasher: Add dump_ops_enable and optrack_toggle_delay options
authorDavid Zafman <dzafman@redhat.com>
Tue, 15 Mar 2016 18:29:17 +0000 (11:29 -0700)
committerDavid Zafman <dzafman@redhat.com>
Fri, 29 Apr 2016 16:43:38 +0000 (09:43 -0700)
Add dump_ops_enable which continuously dumps ops using 3 commands
Add optrack_toggle_delay to alternate op tracking enablement

Signed-off-by: David Zafman <dzafman@redhat.com>
tasks/ceph_manager.py
tasks/thrashosds.py

index 13ba9fc151190e23cdefebed82c75cfce2ea32e6..39c17faa83e5db80e1b1a5fa963114029a399f2c 100644 (file)
@@ -104,6 +104,8 @@ class Thrasher:
         self.minin = self.config.get("min_in", 3)
         self.chance_move_pg = self.config.get('chance_move_pg', 1.0)
         self.sighup_delay = self.config.get('sighup_delay')
+        self.optrack_toggle_delay = self.config.get('optrack_toggle_delay')
+        self.dump_ops_enable = self.config.get('dump_ops_enable')
 
         num_osds = self.in_osds + self.out_osds
         self.max_pgs = self.config.get("max_pgs_per_pool_osd", 1200) * num_osds
@@ -129,6 +131,10 @@ class Thrasher:
         self.thread = gevent.spawn(self.do_thrash)
         if self.sighup_delay:
             self.sighup_thread = gevent.spawn(self.do_sighup)
+        if self.optrack_toggle_delay:
+            self.optrack_toggle_thread = gevent.spawn(self.do_optrack_toggle)
+        if self.dump_ops_enable == "true":
+            self.dump_ops_thread = gevent.spawn(self.do_dump_ops)
         if self.config.get('powercycle') or not self.cmd_exists_on_osds("ceph-objectstore-tool"):
             self.ceph_objectstore_tool = False
             self.test_rm_past_intervals = False
@@ -425,6 +431,12 @@ class Thrasher:
         if self.sighup_delay:
             self.log("joining the do_sighup greenlet")
             self.sighup_thread.get()
+        if self.optrack_toggle_delay:
+            self.log("joining the do_optrack_toggle greenlet")
+            self.optrack_toggle_thread.join()
+        if self.dump_ops_enable == "true":
+            self.log("joining the do_dump_ops greenlet")
+            self.dump_ops_thread.join()
 
     def grow_pool(self):
         """
@@ -656,6 +668,42 @@ class Thrasher:
             self.ceph_manager.signal_osd(osd, signal.SIGHUP, silent=True)
             time.sleep(delay)
 
+    @log_exc
+    def do_optrack_toggle(self):
+        """
+        Loops and toggle op tracking to all osds.
+
+        Loop delay is controlled by the config value optrack_toggle_delay.
+        """
+        delay = float(self.optrack_toggle_delay)
+        osd_state = "true"
+        self.log("starting do_optrack_toggle with a delay of {0}".format(delay))
+        while not self.stopping:
+            if osd_state == "true":
+                osd_state = "false"
+            else:
+                osd_state = "true"
+            self.ceph_manager.raw_cluster_cmd_result('tell', 'osd.*',
+                             'injectargs', '--osd_enable_op_tracker=%s' % osd_state)
+            gevent.sleep(delay)
+
+    @log_exc
+    def do_dump_ops(self):
+        """
+        Loops and does op dumps on all osds
+        """
+        self.log("starting do_dump_ops")
+        while not self.stopping:
+            for osd in self.live_osds:
+                # Ignore errors because live_osds is in flux
+                self.ceph_manager.osd_admin_socket(osd, command=['dump_ops_in_flight'],
+                                     check_status=False, timeout=30)
+                self.ceph_manager.osd_admin_socket(osd, command=['dump_blocked_ops'],
+                                     check_status=False, timeout=30)
+                self.ceph_manager.osd_admin_socket(osd, command=['dump_historic_ops'],
+                                     check_status=False, timeout=30)
+            gevent.sleep(0)
+
     @log_exc
     def do_thrash(self):
         """
index cf0a65759fa515f62e67ac75432a08c3631d5e39..9f56fbe7d1a35e52aff0d168f4b2c65d2e2de818 100644 (file)
@@ -100,6 +100,11 @@ def task(ctx, config):
     ceph_objectstore_tool: (true) whether to export/import a pg while an osd is down
     chance_move_pg: (1.0) chance of moving a pg if more than 1 osd is down (default 100%)
 
+    optrack_toggle_delay: (2.0) duration to delay between toggling op tracker
+                  enablement to all osds
+
+    dump_ops_enable: (true) continuously dump ops on all live osds
+
     example:
 
     tasks:
@@ -117,6 +122,10 @@ def task(ctx, config):
         'thrashosds task only accepts a dict for configuration'
     # add default value for sighup_delay
     config['sighup_delay'] = config.get('sighup_delay', 0.1)
+    # add default value for optrack_toggle_delay
+    config['optrack_toggle_delay'] = config.get('optrack_toggle_delay', 2.0)
+    # add default value for dump_ops_enable
+    config['dump_ops_enable'] = config.get('dump_ops_enable', "true")
     overrides = ctx.config.get('overrides', {})
     teuthology.deep_merge(config, overrides.get('thrashosds', {}))