From e5921ef4a89f497a0bff6510fce0bb5c242d6172 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 7 Oct 2019 15:32:39 +0200 Subject: [PATCH] krbd: retry on transient errors from udev_enumerate_scan_devices() udev_enumerate_scan_devices() doesn't handle disappearing devices well. If called while some devices are being removed, it sometimes propagates ENOENT and ENODEV errors encountered operating on directory entries in /sys that no longer exist. Some of these errors are suppressed, but this isn't reliable and varies across versions. In particular, systemd 239 suppresses ENODEV from sd_device_new_from_syspath() but doesn't suppress ENODEV from sd_device_get_devnum(). In systemd 243 the call to sd_device_get_devnum() has been moved, but it still leaks ENOENT from sd_device_get_is_initialized() (referring to the body of FOREACH_DIRENT_ALL loop in enumerator_scan_dir_and_add_devices()). Assume that all ENOENT and ENODEV errors are transient and retry the call to udev_enumerate_scan_devices(). Don't limit the number, but log each retry. Fixes: https://tracker.ceph.com/issues/41036 Signed-off-by: Ilya Dryomov --- src/krbd.cc | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/src/krbd.cc b/src/krbd.cc index 10034691726..3a548233b0f 100644 --- a/src/krbd.cc +++ b/src/krbd.cc @@ -477,6 +477,7 @@ static int devno_to_krbd_id(struct udev *udev, dev_t devno, string *pid) struct udev_device *dev; int r; +retry: enm = udev_enumerate_new(udev); if (!enm) return -ENOMEM; @@ -498,8 +499,14 @@ static int devno_to_krbd_id(struct udev *udev, dev_t devno, string *pid) } r = udev_enumerate_scan_devices(enm); - if (r < 0) + if (r < 0) { + if (r == -ENOENT || r == -ENODEV) { + std::cerr << "rbd: udev enumerate failed, retrying" << std::endl; + udev_enumerate_unref(enm); + goto retry; + } goto out_enm; + } l = udev_enumerate_get_list_entry(enm); if (!l) { @@ -530,6 +537,7 @@ static int __enumerate_devices(struct udev *udev, const krbd_spec& spec, struct udev_enumerate *enm; int r; +retry: enm = udev_enumerate_new(udev); if (!enm) return -ENOMEM; @@ -565,8 +573,14 @@ static int __enumerate_devices(struct udev *udev, const krbd_spec& spec, goto out_enm; r = udev_enumerate_scan_devices(enm); - if (r < 0) + if (r < 0) { + if (r == -ENOENT || r == -ENODEV) { + std::cerr << "rbd: udev enumerate failed, retrying" << std::endl; + udev_enumerate_unref(enm); + goto retry; + } goto out_enm; + } *penm = enm; return 0; @@ -866,6 +880,7 @@ static int do_dump(struct udev *udev, Formatter *f, TextTable *tbl) bool have_output = false; int r; +retry: enm = udev_enumerate_new(udev); if (!enm) return -ENOMEM; @@ -875,8 +890,14 @@ static int do_dump(struct udev *udev, Formatter *f, TextTable *tbl) goto out_enm; r = udev_enumerate_scan_devices(enm); - if (r < 0) + if (r < 0) { + if (r == -ENOENT || r == -ENODEV) { + std::cerr << "rbd: udev enumerate failed, retrying" << std::endl; + udev_enumerate_unref(enm); + goto retry; + } goto out_enm; + } udev_list_entry_foreach(l, udev_enumerate_get_list_entry(enm)) { struct udev_device *dev; -- 2.39.5