From 28aaca58e7bbebc9bdd8b5d5611304d6687542a3 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 6 Sep 2017 13:27:33 -0400 Subject: [PATCH] qa/tasks/ceph_manager: avoid test_map_discontinuity stall with too few up osds Some tests have m=2,k=2 and this will break them. Sometimes even if we have 5 up osds, we end up with 4 and CRUSH gets picky, so build in a buffer and only do this if we have 6 up. We don't have an easy way from here to see what the min up osds for healthy is... basically this map discontinuity test just sucks. Signed-off-by: Sage Weil --- qa/tasks/ceph_manager.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/qa/tasks/ceph_manager.py b/qa/tasks/ceph_manager.py index 5b579937d15..37aff1b262a 100644 --- a/qa/tasks/ceph_manager.py +++ b/qa/tasks/ceph_manager.py @@ -998,7 +998,8 @@ class Thrasher: self.ceph_manager.raw_cluster_cmd('osd', 'reweight', str(osd), str(1)) if random.uniform(0, 1) < float( - self.config.get('chance_test_map_discontinuity', 0)): + self.config.get('chance_test_map_discontinuity', 0)) \ + and len(self.live_osds) > 5: # avoid m=2,k=2 stall, w/ some buffer for crush being picky self.test_map_discontinuity() else: self.ceph_manager.wait_for_recovery( -- 2.39.5