From 3fedce6c34fb3acfa57b5a028cb750041ee8fbd6 Mon Sep 17 00:00:00 2001 From: Guillaume Abrioux Date: Tue, 16 Aug 2022 14:13:41 +0200 Subject: [PATCH] ceph-volume: add a retry in util.disk.remove_partition This fixes a possible race condition when zapping a device. Due to some udev events, that race condition makes the key `ID_PART_ENTRY_NUMBER` show up too late. The idea here is to retry multiple times before actually failing. Fixes: https://tracker.ceph.com/issues/57144 Signed-off-by: Guillaume Abrioux --- src/ceph-volume/ceph_volume/util/disk.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/ceph-volume/ceph_volume/util/disk.py b/src/ceph-volume/ceph_volume/util/disk.py index d2459e12086..4f388742d8d 100644 --- a/src/ceph-volume/ceph_volume/util/disk.py +++ b/src/ceph-volume/ceph_volume/util/disk.py @@ -2,6 +2,7 @@ import logging import os import re import stat +import time from ceph_volume import process from ceph_volume.api import lvm from ceph_volume.util.system import get_file_contents @@ -134,8 +135,16 @@ def remove_partition(device): :param device: A ``Device()`` object """ - udev_info = udevadm_property(device.path) - partition_number = udev_info.get('ID_PART_ENTRY_NUMBER') + # Sometimes there's a race condition that makes 'ID_PART_ENTRY_NUMBER' be not present + # in the output of `udevadm info --query=property`. + # Probably not ideal and not the best fix but this allows to get around that issue. + # The idea is to make it retry multiple times before actually failing. + for i in range(10): + udev_info = udevadm_property(device.path) + partition_number = udev_info.get('ID_PART_ENTRY_NUMBER') + if partition_number: + break + time.sleep(0.2) if not partition_number: raise RuntimeError('Unable to detect the partition number for device: %s' % device.path) -- 2.39.5