]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
ceph-volume: add a retry in util.disk.remove_partition 47989/head
authorGuillaume Abrioux <gabrioux@redhat.com>
Tue, 16 Aug 2022 12:13:41 +0000 (14:13 +0200)
committerGuillaume Abrioux <gabrioux@redhat.com>
Tue, 6 Sep 2022 09:04:30 +0000 (11:04 +0200)
This fixes a possible race condition when zapping a device.
Due to some udev events, that race condition makes the key
`ID_PART_ENTRY_NUMBER` show up too late.

The idea here is to retry multiple times before actually failing.

Fixes: https://tracker.ceph.com/issues/57144
Signed-off-by: Guillaume Abrioux <gabrioux@redhat.com>
(cherry picked from commit 3fedce6c34fb3acfa57b5a028cb750041ee8fbd6)

src/ceph-volume/ceph_volume/util/disk.py

index d2459e1208674d6b49e63348b29138f4350d315e..4f388742d8dee5f05fac66e5b6d9bb488dc46b60 100644 (file)
@@ -2,6 +2,7 @@ import logging
 import os
 import re
 import stat
+import time
 from ceph_volume import process
 from ceph_volume.api import lvm
 from ceph_volume.util.system import get_file_contents
@@ -134,8 +135,16 @@ def remove_partition(device):
 
     :param device: A ``Device()`` object
     """
-    udev_info = udevadm_property(device.path)
-    partition_number = udev_info.get('ID_PART_ENTRY_NUMBER')
+    # Sometimes there's a race condition that makes 'ID_PART_ENTRY_NUMBER' be not present
+    # in the output of `udevadm info --query=property`.
+    # Probably not ideal and not the best fix but this allows to get around that issue.
+    # The idea is to make it retry multiple times before actually failing.
+    for i in range(10):
+        udev_info = udevadm_property(device.path)
+        partition_number = udev_info.get('ID_PART_ENTRY_NUMBER')
+        if partition_number:
+            break
+        time.sleep(0.2)
     if not partition_number:
         raise RuntimeError('Unable to detect the partition number for device: %s' % device.path)