From 730b5d62d3cda7de4076bafa6e9e35f1eb8e2190 Mon Sep 17 00:00:00 2001 From: Loic Dachary Date: Wed, 16 Dec 2015 12:33:25 +0100 Subject: [PATCH] ceph-disk: increase partprobe / udevadm settle timeouts The default of 120 seconds may be exceeded when the disk is very slow which can happen in cloud environments. Increase it to 600 seconds instead. The partprobe command may fail for the same reason but it does not have a timeout parameter. Instead, try a few times before failing. The udevadm settle guarding partprobe are not necessary because partprobe already does the same. However, partprobe does not provide a way to control the timeout. Having a udevadm settle after another is going to be a noop most of the time and not add any delay. It matters when the udevadm settle run by partprobe fails with a timeout because partprobe will silentely ignores the failure. http://tracker.ceph.com/issues/14080 Fixes: #14080 Signed-off-by: Loic Dachary --- src/ceph-disk | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/src/ceph-disk b/src/ceph-disk index bddf643dd5eef..21c70b876b5a1 100755 --- a/src/ceph-disk +++ b/src/ceph-disk @@ -1245,9 +1245,24 @@ def update_partition(dev, description): group changes etc. are complete. """ LOG.debug('Calling partprobe on %s device %s', description, dev) - command_check_call(['udevadm', 'settle']) - command_check_call(['partprobe', dev]) - command_check_call(['udevadm', 'settle']) + partprobe_ok = False + error = 'unknown error' + for i in (1, 2, 3, 4, 5): + command_check_call(['udevadm', 'settle', '--timeout=600']) + try: + _check_output(['partprobe', dev]) + partprobe_ok = True + break + except subprocess.CalledProcessError as e: + error = e.output + if ('unable to inform the kernel' not in error and + 'Device or resource busy' not in error): + raise + LOG.debug('partprobe %s failed : %s (ignored, waiting 60s)' % (dev, error)) + time.sleep(60) + if not partprobe_ok: + raise Error('partprobe %s failed : %s' % (dev, error)) + command_check_call(['udevadm', 'settle', '--timeout=600']) def zap(dev): """ -- 2.39.5