From e4657cb2a2b087af0eae87ef32808b503e0e2051 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 29 Jan 2025 12:56:34 +0100 Subject: [PATCH] librbd: stop filtering async request error codes The roots of this go back to 2015 when snap create was changed to filter EEXIST in commit 63f6c9bac9a4 ("librbd: fixed snap create race conditions") and flatten respectively EINVAL in commit ef7e210c3f74 ("librbd: better handling for duplicate flatten requests"). From there this pattern made it to most other operations that can be proxied including "rbd migration execute". The motivation was to suppress generation of an "expected" error in response to a duplicate async request notification for the operation. However, doing this at the top of the handler (right before returning to the caller) and for an error as generic as EINVAL is super fragile. It's trivial for an error that is being filtered to sneak in with a lower level change completely unnoticed. For example, live migration recently added NBD stream which is implemented on top of libnbd and it turns out that some libnbd APIs return EINVAL on various occasions when the NBD endpoint disappears and an error like ENOTCONN would make more sense. If this occurs during "rbd migration execute" operation, the rest of librbd never learns that migration was disrupted and the image is transitioned to MIGRATION_STATE_EXECUTED, thus handing a partially imported (read: corrupted) image to the user. Luckily, with commits 07fbc4b71df4 ("librbd: track complete async operation requests") and 96bc20445afb ("librbd: track complete async operation return code"), the scenario which originally prompted error code filtering isn't an issue anymore. Despite a few shortcomings (e.g. when an async request notification is acked with result 0, it's impossible to tell whether a) a new operation was kicked off, b) there is an operation that is still in progress or c) it's for an operation that completed earlier but hasn't "expired" yet), even just commit 07fbc4b71df4 by itself prevents a duplicate notification from kicking off a second operation that could generate an error for something that actually succeeded. With that in mind, eradicate error code filtering from Operations class. Fixes: https://tracker.ceph.com/issues/58185 Signed-off-by: Ilya Dryomov --- src/librbd/Operations.cc | 68 +++++++++------------------------------- 1 file changed, 15 insertions(+), 53 deletions(-) diff --git a/src/librbd/Operations.cc b/src/librbd/Operations.cc index ad6e5bcf6a08a..e3cf3df3d95e1 100644 --- a/src/librbd/Operations.cc +++ b/src/librbd/Operations.cc @@ -185,7 +185,6 @@ struct C_InvokeAsyncRequest : public Context { bool permit_snapshot; boost::function local; boost::function remote; - std::set filter_error_codes; Context *on_finish; bool request_lock = false; @@ -194,11 +193,10 @@ struct C_InvokeAsyncRequest : public Context { bool permit_snapshot, const boost::function& local, const boost::function& remote, - const std::set &filter_error_codes, Context *on_finish) : image_ctx(image_ctx), operation(operation), request_type(request_type), permit_snapshot(permit_snapshot), local(local), remote(remote), - filter_error_codes(filter_error_codes), on_finish(on_finish) { + on_finish(on_finish) { } void send() { @@ -382,9 +380,6 @@ struct C_InvokeAsyncRequest : public Context { } void finish(int r) override { - if (filter_error_codes.count(r) != 0) { - r = 0; - } on_finish->complete(r); } }; @@ -503,11 +498,8 @@ int Operations::flatten(ProgressContext &prog_ctx) { m_image_ctx.image_watcher, request_id, boost::ref(prog_ctx), _1)); - if (r < 0 && r != -EINVAL) { - return r; - } ldout(cct, 20) << "flatten finished" << dendl; - return 0; + return r; } template @@ -582,10 +574,7 @@ int Operations::rebuild_object_map(ProgressContext &prog_ctx) { boost::ref(prog_ctx), _1)); ldout(cct, 10) << "rebuild object map finished" << dendl; - if (r < 0) { - return r; - } - return 0; + return r; } template @@ -686,12 +675,9 @@ int Operations::rename(const char *dstname) { boost::bind(&ImageWatcher::notify_rename, m_image_ctx.image_watcher, request_id, dstname, _1)); - if (r < 0 && r != -EEXIST) { - return r; - } m_image_ctx.set_image_name(dstname); - return 0; + return r; } template @@ -874,7 +860,7 @@ void Operations::snap_create(const cls::rbd::SnapshotNamespace &snap_namespac boost::bind(&ImageWatcher::notify_snap_create, m_image_ctx.image_watcher, request_id, snap_namespace, snap_name, flags, boost::ref(prog_ctx), _1), - {-EEXIST}, on_finish); + on_finish); req->send(); } @@ -1077,7 +1063,7 @@ void Operations::snap_remove(const cls::rbd::SnapshotNamespace& snap_namespac boost::bind(&ImageWatcher::notify_snap_remove, m_image_ctx.image_watcher, request_id, snap_namespace, snap_name, _1), - {-ENOENT}, on_finish); + on_finish); req->send(); } else { std::shared_lock owner_lock{m_image_ctx.owner_lock}; @@ -1173,9 +1159,6 @@ int Operations::snap_rename(const char *srcname, const char *dstname) { boost::bind(&ImageWatcher::notify_snap_rename, m_image_ctx.image_watcher, request_id, snap_id, dstname, _1)); - if (r < 0 && r != -EEXIST) { - return r; - } } else { C_SaferCond cond_ctx; { @@ -1184,13 +1167,10 @@ int Operations::snap_rename(const char *srcname, const char *dstname) { } r = cond_ctx.wait(); - if (r < 0) { - return r; - } } m_image_ctx.perfcounter->inc(l_librbd_snap_rename); - return 0; + return r; } template @@ -1275,9 +1255,6 @@ int Operations::snap_protect(const cls::rbd::SnapshotNamespace& snap_namespac boost::bind(&ImageWatcher::notify_snap_protect, m_image_ctx.image_watcher, request_id, snap_namespace, snap_name, _1)); - if (r < 0 && r != -EBUSY) { - return r; - } } else { C_SaferCond cond_ctx; { @@ -1286,11 +1263,9 @@ int Operations::snap_protect(const cls::rbd::SnapshotNamespace& snap_namespac } r = cond_ctx.wait(); - if (r < 0) { - return r; - } } - return 0; + + return r; } template @@ -1373,9 +1348,6 @@ int Operations::snap_unprotect(const cls::rbd::SnapshotNamespace& snap_namesp boost::bind(&ImageWatcher::notify_snap_unprotect, m_image_ctx.image_watcher, request_id, snap_namespace, snap_name, _1)); - if (r < 0 && r != -EINVAL) { - return r; - } } else { C_SaferCond cond_ctx; { @@ -1384,11 +1356,9 @@ int Operations::snap_unprotect(const cls::rbd::SnapshotNamespace& snap_namesp } r = cond_ctx.wait(); - if (r < 0) { - return r; - } } - return 0; + + return r; } template @@ -1709,9 +1679,6 @@ int Operations::metadata_remove(const std::string &key) { boost::bind(&ImageWatcher::notify_metadata_remove, m_image_ctx.image_watcher, request_id, key, _1)); - if (r == -ENOENT) { - r = 0; - } std::string config_key; if (util::is_metadata_config_override(key, &config_key) && r >= 0) { @@ -1775,11 +1742,8 @@ int Operations::migrate(ProgressContext &prog_ctx) { m_image_ctx.image_watcher, request_id, boost::ref(prog_ctx), _1)); - if (r < 0 && r != -EINVAL) { - return r; - } ldout(cct, 20) << "migrate finished" << dendl; - return 0; + return r; } template @@ -1842,11 +1806,9 @@ int Operations::sparsify(size_t sparse_size, ProgressContext &prog_ctx) { m_image_ctx.image_watcher, request_id, sparse_size, boost::ref(prog_ctx), _1)); - if (r < 0 && r != -EINVAL) { - return r; - } + ldout(cct, 20) << "resparsify finished" << dendl; - return 0; + return r; } template @@ -1934,7 +1896,7 @@ int Operations::invoke_async_request( permit_snapshot, local_request, remote_request, - {}, &ctx); + &ctx); req->send(); return ctx.wait(); } -- 2.39.5