]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
thrashosds: try ceph-objectstore-tool for 10 minutes
authorNathan Cutler <ncutler@suse.com>
Thu, 24 Nov 2016 10:25:35 +0000 (11:25 +0100)
committerNathan Cutler <ncutler@suse.com>
Sun, 4 Dec 2016 08:55:56 +0000 (09:55 +0100)
If ceph-objectstore-tool binary is not present, it's likely because we're in
the middle of an upgrade. Do not try to run the binary until we verify that
it's really present. If it is absent, spend up to 10 minutes waiting for it to
appear.

Before this patch there was quite a large window for a race to occur. This
patch doesn't entirely eliminate it, but drastically reduces it.

Fixes: http://tracker.ceph.com/issues/18014
Signed-off-by: Nathan Cutler <ncutler@suse.com>
(cherry picked from commit 862b47faac1fc9f05ee3322ee4b65cf3d3d666c5)

tasks/ceph_manager.py

index 2b88b4aa11663a76723233846881654961f2827b..746a532de47f00b3a2728675f70ee2178781ffff 100644 (file)
@@ -18,6 +18,7 @@ from teuthology import misc as teuthology
 from tasks.scrub import Scrubber
 from util.rados import cmd_erasure_code_profile
 from util import get_remote
+from teuthology.contextutil import safe_while
 from teuthology.orchestra.remote import Remote
 from teuthology.orchestra import run
 from teuthology.exceptions import CommandFailedError
@@ -209,6 +210,18 @@ class Thrasher:
                           "/var/log/ceph/objectstore_tool.\\$pid.log ".
                           format(fpath=FSPATH, jpath=JPATH))
             cmd = (prefix + "--op list-pgs").format(id=exp_osd)
+
+            # ceph-objectstore-tool might be temporarily absent during an 
+            # upgrade - see http://tracker.ceph.com/issues/18014
+            with safe_while(sleep=15, tries=40, action="type ceph-objectstore-tool") as proceed:
+                while proceed():
+                    proc = exp_remote.run(args=['type', 'ceph-objectstore-tool'], 
+                               wait=True, check_status=False, stdout=StringIO(),
+                               stderr=StringIO())
+                    if proc.exitstatus == 0:
+                        break
+                    log.debug("ceph-objectstore-tool binary not present, trying again")
+
             proc = exp_remote.run(args=cmd, wait=True,
                                   check_status=False, stdout=StringIO())
             if proc.exitstatus: