From: Nathan Cutler Date: Thu, 24 Nov 2016 10:25:35 +0000 (+0100) Subject: thrashosds: try ceph-objectstore-tool for 10 minutes X-Git-Tag: v10.2.6~165^2^2~5^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=cec7441e29454c6bc90e92c937b12b89c21d0d76;p=ceph.git thrashosds: try ceph-objectstore-tool for 10 minutes If ceph-objectstore-tool binary is not present, it's likely because we're in the middle of an upgrade. Do not try to run the binary until we verify that it's really present. If it is absent, spend up to 10 minutes waiting for it to appear. Before this patch there was quite a large window for a race to occur. This patch doesn't entirely eliminate it, but drastically reduces it. Fixes: http://tracker.ceph.com/issues/18014 Signed-off-by: Nathan Cutler (cherry picked from commit 862b47faac1fc9f05ee3322ee4b65cf3d3d666c5) --- diff --git a/tasks/ceph_manager.py b/tasks/ceph_manager.py index 2b88b4aa116..746a532de47 100644 --- a/tasks/ceph_manager.py +++ b/tasks/ceph_manager.py @@ -18,6 +18,7 @@ from teuthology import misc as teuthology from tasks.scrub import Scrubber from util.rados import cmd_erasure_code_profile from util import get_remote +from teuthology.contextutil import safe_while from teuthology.orchestra.remote import Remote from teuthology.orchestra import run from teuthology.exceptions import CommandFailedError @@ -209,6 +210,18 @@ class Thrasher: "/var/log/ceph/objectstore_tool.\\$pid.log ". format(fpath=FSPATH, jpath=JPATH)) cmd = (prefix + "--op list-pgs").format(id=exp_osd) + + # ceph-objectstore-tool might be temporarily absent during an + # upgrade - see http://tracker.ceph.com/issues/18014 + with safe_while(sleep=15, tries=40, action="type ceph-objectstore-tool") as proceed: + while proceed(): + proc = exp_remote.run(args=['type', 'ceph-objectstore-tool'], + wait=True, check_status=False, stdout=StringIO(), + stderr=StringIO()) + if proc.exitstatus == 0: + break + log.debug("ceph-objectstore-tool binary not present, trying again") + proc = exp_remote.run(args=cmd, wait=True, check_status=False, stdout=StringIO()) if proc.exitstatus: