From 625d9b257c61fc922d1761ef6c9e8228fdb000fa Mon Sep 17 00:00:00 2001 From: Samuel Just Date: Wed, 23 Oct 2013 10:52:55 -0700 Subject: [PATCH] ceph_manager: workaround for 6116 This is an annoying race, we really should delay going clean until the backfill peer has acknoledged the clean info, but we currently don't. In order to prevent this bug from messing up the nightlies, we'll delay killing the peer for 20s to make it likely that the backfill peer has gotten the clean info. Workaround: #6116 Signed-off-by: Samuel Just --- teuthology/task/ceph_manager.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/teuthology/task/ceph_manager.py b/teuthology/task/ceph_manager.py index 18f897641c..286d0c8f85 100644 --- a/teuthology/task/ceph_manager.py +++ b/teuthology/task/ceph_manager.py @@ -219,6 +219,10 @@ class Thrasher: self.ceph_manager.wait_for_clean( timeout=self.config.get('timeout') ) + # now we wait 20s to ensure that any backfill peers have heard about + # the cleanness + time.sleep(20) + self.log("Recovered, killing an osd") self.kill_osd(mark_down=True, mark_out=True) self.log("Waiting for clean again") -- 2.39.5