]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
ceph-volume lvm.zap: retry wipefs several times to prevent race condition failures
authorAlfredo Deza <adeza@redhat.com>
Thu, 11 Jul 2019 13:34:10 +0000 (09:34 -0400)
committerAlfredo Deza <adeza@redhat.com>
Tue, 10 Sep 2019 18:54:52 +0000 (14:54 -0400)
Signed-off-by: Alfredo Deza <adeza@redhat.com>
(cherry picked from commit b7b8b106fa485d3b8345b583fbc7f74e30cbc03e)

src/ceph-volume/ceph_volume/devices/lvm/zap.py

index 328a036152e2c1ebb5242938524247e84d36c160..f62b0cc61f1b1c3a0e9b11d6bb852fa7d09fdd62 100644 (file)
@@ -1,12 +1,13 @@
 import argparse
 import os
 import logging
+import time
 
 from textwrap import dedent
 
 from ceph_volume import decorators, terminal, process
 from ceph_volume.api import lvm as api
-from ceph_volume.util import system, encryption, disk, arg_validators
+from ceph_volume.util import system, encryption, disk, arg_validators, str_to_int
 from ceph_volume.util.device import Device
 from ceph_volume.systemd import systemctl
 
@@ -17,12 +18,38 @@ mlogger = terminal.MultiLogger(__name__)
 def wipefs(path):
     """
     Removes the filesystem from an lv or partition.
+
+    Environment variables supported::
+
+    * ``CEPH_VOLUME_WIPEFS_TRIES``: Defaults to 8
+    * ``CEPH_VOLUME_WIPEFS_INTERVAL``: Defaults to 5
+
     """
-    process.run([
-        'wipefs',
-        '--all',
-        path
-    ])
+    tries = str_to_int(
+        os.environ.get('CEPH_VOLUME_WIPEFS_TRIES', 8)
+    )
+    interval = str_to_int(
+        os.environ.get('CEPH_VOLUME_WIPEFS_INTERVAL', 5)
+    )
+
+    for trying in range(tries):
+        stdout, stderr, exit_code = process.call([
+            'wipefs',
+            '--all',
+            path
+        ])
+        if exit_code != 0:
+            # this could narrow the retry by poking in the stderr of the output
+            # to verify that 'probing initialization failed' appears, but
+            # better to be broad in this retry to prevent missing on
+            # a different message that needs to be retried as well
+            terminal.warning(
+                'failed to wipefs device, will try again to workaround probable race condition'
+            )
+            time.sleep(interval)
+        else:
+            return
+    raise RuntimeError("could not complete wipefs on device: %s" % path)
 
 
 def zap_data(path):