From 625d9b257c61fc922d1761ef6c9e8228fdb000fa Mon Sep 17 00:00:00 2001
From: Samuel Just <sam.just@inktank.com>
Date: Wed, 23 Oct 2013 10:52:55 -0700
Subject: [PATCH] ceph_manager: workaround for 6116

This is an annoying race, we really should delay going
clean until the backfill peer has acknoledged the clean
info, but we currently don't.  In order to prevent this
bug from messing up the nightlies, we'll delay killing
the peer for 20s to make it likely that the backfill
peer has gotten the clean info.

Workaround: #6116
Signed-off-by: Samuel Just <sam.just@inktank.com>
---
 teuthology/task/ceph_manager.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/teuthology/task/ceph_manager.py b/teuthology/task/ceph_manager.py
index 18f897641..286d0c8f8 100644
--- a/teuthology/task/ceph_manager.py
+++ b/teuthology/task/ceph_manager.py
@@ -219,6 +219,10 @@ class Thrasher:
         self.ceph_manager.wait_for_clean(
             timeout=self.config.get('timeout')
             )
+        # now we wait 20s to ensure that any backfill peers have heard about
+        # the cleanness
+        time.sleep(20)
+
         self.log("Recovered, killing an osd")
         self.kill_osd(mark_down=True, mark_out=True)
         self.log("Waiting for clean again")
-- 
2.47.3