From d501c7b33a6418c525cde62dedb8348d718ff2b4 Mon Sep 17 00:00:00 2001 From: Erwan Velu Date: Fri, 13 Apr 2018 12:32:22 +0200 Subject: [PATCH] ceph-disk: get_partition_dev() should fail until get_dev_path(partname) is populated get_partition_dev() is a function called to get a block device path. The @retry decorator implements a timeout mecanism as it exist a race between the partition creation command and the actual presence of the partition. This function have a semantic issue. The function is asked if /dev/sda6 exists but check if /sys/block/sda/sda6 exists. Once /sys/block/sda/sda6 is populated within the timeout, the function returns "/dev/sda6". But this doesn't check if "/dev/sda6" really exists, so a function using this value fall into a race condition and fails too. We do have this case inside the CI where timing are very variable regarding the workload and VM's performance and even sometimes nasted VMs. This patch offer to double-check before returning that the actual /dev/ entry actually exist once the BLOCKDIR (/sys) is populated. If not, the @retry is triggered to let some time resolution this temporary situation. If this state is permanent, an explicit error message is reported like : /dev/sdb1 is not populated while /sys/block/sdb has it Signed-off-by: Erwan Velu --- src/ceph-disk/ceph_disk/main.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/ceph-disk/ceph_disk/main.py b/src/ceph-disk/ceph_disk/main.py index e9c1a514806..b01573bd138 100644 --- a/src/ceph-disk/ceph_disk/main.py +++ b/src/ceph-disk/ceph_disk/main.py @@ -781,6 +781,10 @@ def get_partition_dev(dev, pnum): if not partname or len(f) < len(partname): partname = f if partname: + # BLOCKDIR is populated but is it the case of get_dev_path(partname) ? + if not os.path.exists(get_dev_path(partname)): + raise Error('%s is not populated while %s has it' % + (get_dev_path(partname), sys_entry)) return get_dev_path(partname) else: raise Error('partition %d for %s does not appear to exist%s' % -- 2.39.5