]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
thrasher: Add dump_ops_enable options
authorDavid Zafman <dzafman@redhat.com>
Tue, 15 Mar 2016 18:29:17 +0000 (11:29 -0700)
committerDavid Zafman <dzafman@redhat.com>
Tue, 22 Nov 2016 02:35:13 +0000 (18:35 -0800)
Add dump_ops_enable which continuously dumps ops using 3 commands

Signed-off-by: David Zafman <dzafman@redhat.com>
(Partial cherry picked from commit 7a528763d126eaca90e40b02aad02ab9c3c92a32)

Excluded dynamic optracker testing with optrack_toggle_delay option

Conflicts:
tasks/ceph_manager.py (trivial)
tasks/thrashosds.py (trivial)

tasks/ceph_manager.py
tasks/thrashosds.py

index 366ab327dfbf02b34a2a874f60ea255869831ced..d7f3c10afa684b4f6a06e690816e4b2da70fca1e 100644 (file)
@@ -112,6 +112,7 @@ class Thrasher:
         self.clean_wait = self.config.get('clean_wait', 0)
         self.minin = self.config.get("min_in", 3)
         self.chance_move_pg = self.config.get('chance_move_pg', 1.0)
+        self.dump_ops_enable = self.config.get('dump_ops_enable')
 
         num_osds = self.in_osds + self.out_osds
         self.max_pgs = self.config.get("max_pgs_per_pool_osd", 1200) * num_osds
@@ -135,6 +136,8 @@ class Thrasher:
             manager.raw_cluster_cmd('--', 'mon', 'tell', '*', 'injectargs',
                                     '--mon-osd-down-out-interval 0')
         self.thread = gevent.spawn(self.do_thrash)
+        if self.dump_ops_enable == "true":
+            self.dump_ops_thread = gevent.spawn(self.do_dump_ops)
         if self.config.get('powercycle') or not self.cmd_exists_on_osds("ceph-objectstore-tool"):
             self.ceph_objectstore_tool = False
             self.test_rm_past_intervals = False
@@ -439,6 +442,9 @@ class Thrasher:
         """
         self.stopping = True
         self.thread.get()
+        if self.dump_ops_enable == "true":
+            self.log("joining the do_dump_ops greenlet")
+            self.dump_ops_thread.join()
 
     def grow_pool(self):
         """
@@ -656,6 +662,23 @@ class Thrasher:
                 raise
         return wrapper
 
+    @log_exc
+    def do_dump_ops(self):
+        """
+        Loops and does op dumps on all osds
+        """
+        self.log("starting do_dump_ops")
+        while not self.stopping:
+            for osd in self.live_osds:
+                # Ignore errors because live_osds is in flux
+                self.ceph_manager.osd_admin_socket(osd, command=['dump_ops_in_flight'],
+                                     check_status=False, timeout=30)
+                self.ceph_manager.osd_admin_socket(osd, command=['dump_blocked_ops'],
+                                     check_status=False, timeout=30)
+                self.ceph_manager.osd_admin_socket(osd, command=['dump_historic_ops'],
+                                     check_status=False, timeout=30)
+            gevent.sleep(0)
+
     @log_exc
     def do_thrash(self):
         """
index a63243d2d9119743456cb6cc1ec2927c7c11ca67..710a36908c17fb5d60b612aeffbbe67b125a0556 100644 (file)
@@ -97,6 +97,8 @@ def task(ctx, config):
     ceph_objectstore_tool: (true) whether to export/import a pg while an osd is down
     chance_move_pg: (1.0) chance of moving a pg if more than 1 osd is down (default 100%)
 
+    dump_ops_enable: (true) continuously dump ops on all live osds
+
     example:
 
     tasks:
@@ -112,6 +114,8 @@ def task(ctx, config):
         config = {}
     assert isinstance(config, dict), \
         'thrashosds task only accepts a dict for configuration'
+    # add default value for dump_ops_enable
+    config['dump_ops_enable'] = config.get('dump_ops_enable', "true")
     overrides = ctx.config.get('overrides', {})
     teuthology.deep_merge(config, overrides.get('thrashosds', {}))