]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
thrashosds: try ceph-objectstore-tool for 10 minutes
authorNathan Cutler <ncutler@suse.com>
Thu, 24 Nov 2016 10:25:35 +0000 (11:25 +0100)
committerNathan Cutler <ncutler@suse.com>
Fri, 25 Nov 2016 21:50:31 +0000 (22:50 +0100)
If ceph-objectstore-tool binary is not present, it's likely because we're in
the middle of an upgrade. Do not try to run the binary until we verify that
it's really present. If it is absent, spend up to 10 minutes waiting for it to
appear.

Before this patch there was quite a large window for a race to occur. This
patch doesn't entirely eliminate it, but drastically reduces it.

Fixes: http://tracker.ceph.com/issues/18014
Signed-off-by: Nathan Cutler <ncutler@suse.com>
tasks/ceph_manager.py

index f2928d81c9243a87e7dc6e7952ed5f64d52b5a0f..17cf2cb30be40a40e31326cd28dab53d8c502184 100644 (file)
@@ -18,6 +18,7 @@ from teuthology import misc as teuthology
 from tasks.scrub import Scrubber
 from util.rados import cmd_erasure_code_profile
 from util import get_remote
+from teuthology.contextutil import safe_while
 from teuthology.orchestra.remote import Remote
 from teuthology.orchestra import run
 from teuthology.exceptions import CommandFailedError
@@ -224,6 +225,18 @@ class Thrasher:
                           "/var/log/ceph/objectstore_tool.\\$pid.log ".
                           format(fpath=FSPATH, jpath=JPATH))
             cmd = (prefix + "--op list-pgs").format(id=exp_osd)
+
+            # ceph-objectstore-tool might be temporarily absent during an 
+            # upgrade - see http://tracker.ceph.com/issues/18014
+            with safe_while(sleep=15, tries=40, action="type ceph-objectstore-tool") as proceed:
+                while proceed():
+                    proc = exp_remote.run(args=['type', 'ceph-objectstore-tool'], 
+                               wait=True, check_status=False, stdout=StringIO(),
+                               stderr=StringIO())
+                    if proc.exitstatus == 0:
+                        break
+                    log.debug("ceph-objectstore-tool binary not present, trying again")
+
             proc = exp_remote.run(args=cmd, wait=True,
                                   check_status=False, stdout=StringIO())
             if proc.exitstatus: