From: Alfredo Deza Date: Thu, 11 Jul 2019 13:34:10 +0000 (-0400) Subject: ceph-volume lvm.zap: retry wipefs several times to prevent race condition failures X-Git-Tag: v15.1.0~2217^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=b7b8b106fa485d3b8345b583fbc7f74e30cbc03e;p=ceph.git ceph-volume lvm.zap: retry wipefs several times to prevent race condition failures Signed-off-by: Alfredo Deza --- diff --git a/src/ceph-volume/ceph_volume/devices/lvm/zap.py b/src/ceph-volume/ceph_volume/devices/lvm/zap.py index 9a7a103adab..de9030c0a88 100644 --- a/src/ceph-volume/ceph_volume/devices/lvm/zap.py +++ b/src/ceph-volume/ceph_volume/devices/lvm/zap.py @@ -1,12 +1,13 @@ import argparse import os import logging +import time from textwrap import dedent from ceph_volume import decorators, terminal, process from ceph_volume.api import lvm as api -from ceph_volume.util import system, encryption, disk, arg_validators +from ceph_volume.util import system, encryption, disk, arg_validators, str_to_int from ceph_volume.util.device import Device from ceph_volume.systemd import systemctl @@ -17,12 +18,38 @@ mlogger = terminal.MultiLogger(__name__) def wipefs(path): """ Removes the filesystem from an lv or partition. + + Environment variables supported:: + + * ``CEPH_VOLUME_WIPEFS_TRIES``: Defaults to 8 + * ``CEPH_VOLUME_WIPEFS_INTERVAL``: Defaults to 5 + """ - process.run([ - 'wipefs', - '--all', - path - ]) + tries = str_to_int( + os.environ.get('CEPH_VOLUME_WIPEFS_TRIES', 8) + ) + interval = str_to_int( + os.environ.get('CEPH_VOLUME_WIPEFS_INTERVAL', 5) + ) + + for trying in range(tries): + stdout, stderr, exit_code = process.call([ + 'wipefs', + '--all', + path + ]) + if exit_code != 0: + # this could narrow the retry by poking in the stderr of the output + # to verify that 'probing initialization failed' appears, but + # better to be broad in this retry to prevent missing on + # a different message that needs to be retried as well + terminal.warning( + 'failed to wipefs device, will try again to workaround probable race condition' + ) + time.sleep(interval) + else: + return + raise RuntimeError("could not complete wipefs on device: %s" % path) def zap_data(path):