From: Nathan Cutler Date: Thu, 2 Feb 2017 22:23:54 +0000 (+0100) Subject: tests: Thrasher: eliminate a race between kill_osd and __init__ X-Git-Tag: v12.0.0~9^2 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=b519d38fb1967628ad8a1c46fcfb3f984de58790;p=ceph-ci.git tests: Thrasher: eliminate a race between kill_osd and __init__ If Thrasher.__init__() spawns the do_thrash thread before initializing the ceph_objectstore_tool property, do_thrash races with the rest of Thrasher.__init__() and in some cases do_thrash can call kill_osd() before Trasher.__init__() progresses much further. This can lead to an exception ("AttributeError: Thrasher instance has no attribute 'ceph_objectstore_tool'") being thrown in kill_osd(). This commit eliminates the race by making sure the ceph_objectstore_tool attribute is initialized before the do_thrash thread is spawned. Fixes: http://tracker.ceph.com/issues/18799 Signed-off-by: Nathan Cutler --- diff --git a/qa/tasks/ceph_manager.py b/qa/tasks/ceph_manager.py index 17cf2cb30be..e73562678b8 100644 --- a/qa/tasks/ceph_manager.py +++ b/qa/tasks/ceph_manager.py @@ -143,15 +143,8 @@ class Thrasher: except Exception: manager.raw_cluster_cmd('--', 'mon', 'tell', '*', 'injectargs', '--mon-osd-down-out-interval 0') - self.thread = gevent.spawn(self.do_thrash) - if self.sighup_delay: - self.sighup_thread = gevent.spawn(self.do_sighup) - if self.optrack_toggle_delay: - self.optrack_toggle_thread = gevent.spawn(self.do_optrack_toggle) - if self.dump_ops_enable == "true": - self.dump_ops_thread = gevent.spawn(self.do_dump_ops) - if self.noscrub_toggle_delay: - self.noscrub_toggle_thread = gevent.spawn(self.do_noscrub_toggle) + # initialize ceph_objectstore_tool property - must be done before + # do_thrash is spawned - http://tracker.ceph.com/issues/18799 if (self.config.get('powercycle') or not self.cmd_exists_on_osds("ceph-objectstore-tool") or self.config.get('disable_objectstore_tool_tests', False)): @@ -168,6 +161,16 @@ class Thrasher: self.config.get('ceph_objectstore_tool', True) self.test_rm_past_intervals = \ self.config.get('test_rm_past_intervals', True) + # spawn do_thrash + self.thread = gevent.spawn(self.do_thrash) + if self.sighup_delay: + self.sighup_thread = gevent.spawn(self.do_sighup) + if self.optrack_toggle_delay: + self.optrack_toggle_thread = gevent.spawn(self.do_optrack_toggle) + if self.dump_ops_enable == "true": + self.dump_ops_thread = gevent.spawn(self.do_dump_ops) + if self.noscrub_toggle_delay: + self.noscrub_toggle_thread = gevent.spawn(self.do_noscrub_toggle) def cmd_exists_on_osds(self, cmd): allremotes = self.ceph_manager.ctx.cluster.only(\