]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
tests: Thrasher: eliminate a race between kill_osd and __init__ 13609/head
authorNathan Cutler <ncutler@suse.com>
Thu, 2 Feb 2017 22:23:54 +0000 (23:23 +0100)
committerNathan Cutler <ncutler@suse.com>
Thu, 23 Feb 2017 16:07:56 +0000 (17:07 +0100)
If Thrasher.__init__() spawns the do_thrash thread before initializing the
ceph_objectstore_tool property, do_thrash races with the rest
of Thrasher.__init__() and in some cases do_thrash can call kill_osd() before
Trasher.__init__() progresses much further. This can lead to an exception
("AttributeError: Thrasher instance has no attribute 'ceph_objectstore_tool'")
being thrown in kill_osd().

This commit eliminates the race by making sure the ceph_objectstore_tool
attribute is initialized before the do_thrash thread is spawned.

Fixes: http://tracker.ceph.com/issues/18799
Signed-off-by: Nathan Cutler <ncutler@suse.com>
(cherry picked from commit b519d38fb1967628ad8a1c46fcfb3f984de58790)

qa/tasks/ceph_manager.py

index 11bc824d9dd7c3284cf16678ea2a29df9fb3bfd4..8e08439e702bb1e5bb73d376bddcf7c07c83b3df 100644 (file)
@@ -143,15 +143,8 @@ class Thrasher:
         except Exception:
             manager.raw_cluster_cmd('--', 'mon', 'tell', '*', 'injectargs',
                                     '--mon-osd-down-out-interval 0')
-        self.thread = gevent.spawn(self.do_thrash)
-        if self.sighup_delay:
-            self.sighup_thread = gevent.spawn(self.do_sighup)
-        if self.optrack_toggle_delay:
-            self.optrack_toggle_thread = gevent.spawn(self.do_optrack_toggle)
-        if self.dump_ops_enable == "true":
-            self.dump_ops_thread = gevent.spawn(self.do_dump_ops)
-        if self.noscrub_toggle_delay:
-            self.noscrub_toggle_thread = gevent.spawn(self.do_noscrub_toggle)
+        # initialize ceph_objectstore_tool property - must be done before
+        # do_thrash is spawned - http://tracker.ceph.com/issues/18799
         if (self.config.get('powercycle') or
             not self.cmd_exists_on_osds("ceph-objectstore-tool") or
             self.config.get('disable_objectstore_tool_tests', False)):
@@ -168,6 +161,16 @@ class Thrasher:
                 self.config.get('ceph_objectstore_tool', True)
             self.test_rm_past_intervals = \
                 self.config.get('test_rm_past_intervals', True)
+        # spawn do_thrash
+        self.thread = gevent.spawn(self.do_thrash)
+        if self.sighup_delay:
+            self.sighup_thread = gevent.spawn(self.do_sighup)
+        if self.optrack_toggle_delay:
+            self.optrack_toggle_thread = gevent.spawn(self.do_optrack_toggle)
+        if self.dump_ops_enable == "true":
+            self.dump_ops_thread = gevent.spawn(self.do_dump_ops)
+        if self.noscrub_toggle_delay:
+            self.noscrub_toggle_thread = gevent.spawn(self.do_noscrub_toggle)
 
     def cmd_exists_on_osds(self, cmd):
         allremotes = self.ceph_manager.ctx.cluster.only(\