From: Sage Weil Date: Wed, 6 Sep 2017 17:27:33 +0000 (-0400) Subject: qa/tasks/ceph_manager: avoid test_map_discontinuity stall with too few up osds X-Git-Tag: v14.1.0~823^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=28aaca58e7bbebc9bdd8b5d5611304d6687542a3;p=ceph.git qa/tasks/ceph_manager: avoid test_map_discontinuity stall with too few up osds Some tests have m=2,k=2 and this will break them. Sometimes even if we have 5 up osds, we end up with 4 and CRUSH gets picky, so build in a buffer and only do this if we have 6 up. We don't have an easy way from here to see what the min up osds for healthy is... basically this map discontinuity test just sucks. Signed-off-by: Sage Weil --- diff --git a/qa/tasks/ceph_manager.py b/qa/tasks/ceph_manager.py index 5b579937d15..37aff1b262a 100644 --- a/qa/tasks/ceph_manager.py +++ b/qa/tasks/ceph_manager.py @@ -998,7 +998,8 @@ class Thrasher: self.ceph_manager.raw_cluster_cmd('osd', 'reweight', str(osd), str(1)) if random.uniform(0, 1) < float( - self.config.get('chance_test_map_discontinuity', 0)): + self.config.get('chance_test_map_discontinuity', 0)) \ + and len(self.live_osds) > 5: # avoid m=2,k=2 stall, w/ some buffer for crush being picky self.test_map_discontinuity() else: self.ceph_manager.wait_for_recovery(