From: Nathan Cutler Date: Sun, 9 Apr 2017 18:11:27 +0000 (+0200) Subject: tests: Thrasher: handle "OSD has the store locked" gracefully X-Git-Tag: v10.2.8~65^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=refs%2Fpull%2F14416%2Fhead;p=ceph.git tests: Thrasher: handle "OSD has the store locked" gracefully On slower machines (VPS, OVH) it takes time for the OSD to go down. Fixes: http://tracker.ceph.com/issues/19556 Signed-off-by: Nathan Cutler (cherry picked from commit a5b19d2d73540b730392f8001c8601f2cecc1b51) --- diff --git a/qa/tasks/ceph_manager.py b/qa/tasks/ceph_manager.py index b82b3fc54f5a..b659b15c42e9 100644 --- a/qa/tasks/ceph_manager.py +++ b/qa/tasks/ceph_manager.py @@ -222,12 +222,22 @@ class Thrasher: break log.debug("ceph-objectstore-tool binary not present, trying again") - proc = exp_remote.run(args=cmd, wait=True, - check_status=False, stdout=StringIO()) - if proc.exitstatus: - raise Exception("ceph-objectstore-tool: " - "exp list-pgs failure with status {ret}". - format(ret=proc.exitstatus)) + # ceph-objectstore-tool might bogusly fail with "OSD has the store locked" + # see http://tracker.ceph.com/issues/19556 + with safe_while(sleep=15, tries=40, action="ceph-objectstore-tool --op list-pgs") as proceed: + while proceed(): + proc = exp_remote.run(args=cmd, wait=True, + check_status=False, + stdout=StringIO(), stderr=StringIO()) + if proc.exitstatus == 0: + break + elif proc.exitstatus == 1 and proc.stderr == "OSD has the store locked": + continue + else: + raise Exception("ceph-objectstore-tool: " + "exp list-pgs failure with status {ret}". + format(ret=proc.exitstatus)) + pgs = proc.stdout.getvalue().split('\n')[:-1] if len(pgs) == 0: self.log("No PGs found for osd.{osd}".format(osd=exp_osd))