]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
Fix bug #10096 (ceph-disk umount race condition) 2947/head
authorBlaine Gardner <blaine.gardner@hp.com>
Mon, 17 Nov 2014 23:17:15 +0000 (17:17 -0600)
committerBlaine Gardner <blaine.gardner@hp.com>
Mon, 17 Nov 2014 23:17:15 +0000 (17:17 -0600)
Bug: http://tracker.ceph.com/issues/10096

Brief: Unmounting temporary mount point failed due to file being 'busy'.
Root cause could not be easily determined due to timing variances caused
by debug attempts. Race condition exists.

Solution: Implement a retry with incremental backoff as a viable
workaround. This workaround is okay because (1) Finding the root cause
would take a not insignificant amount of time/effort. (2) The workaround
is a more general fix for any process that might cause the exhibited
behavior.

Signed-off-by: Blaine Gardner <blaine.gardner@hp.com>
src/ceph-disk

index 012d9e57e0a09d66cf13469d948c383c485d488d..20fd5b3a4f8f0f89e20819e1f55f64b51f46d218 100755 (executable)
@@ -29,6 +29,7 @@ import stat
 import sys
 import tempfile
 import uuid
+import time
 
 """
 Prepare:
@@ -900,17 +901,25 @@ def unmount(
     """
     Unmount and removes the given mount point.
     """
-    try:
-        LOG.debug('Unmounting %s', path)
-        command_check_call(
-            [
-                '/bin/umount',
-                '--',
-                path,
-                ],
-            )
-    except subprocess.CalledProcessError as e:
-        raise UnmountError(e)
+    retries = 0
+    while True:
+        try:
+            LOG.debug('Unmounting %s', path)
+            command_check_call(
+                [
+                    '/bin/umount',
+                    '--',
+                    path,
+                    ],
+                )
+            break
+        except subprocess.CalledProcessError as e:
+            # on failure, retry 3 times with incremental backoff
+            if retries == 3:
+                raise UnmountError(e)
+            else:
+                time.sleep(0.5 + retries * 1.0)
+                retries += 1
 
     os.rmdir(path)