From: Ilya Dryomov Date: Mon, 15 Aug 2022 07:44:09 +0000 (+0200) Subject: librbd: discard cache state if compare-and-write writes to disk X-Git-Tag: v18.0.0~256^2~2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=ec0fc71cdb579e31ff4e599c43f3918322e70ddf;p=ceph.git librbd: discard cache state if compare-and-write writes to disk Otherwise this is obviously broken with rbd_cache_policy = writeback or rbd_cache_policy = writethrough as the write is done on the OSD, beneath ObjectCacher. This went unnoticed because the original (and so far the only known) compare-and-write user, tcmu-runner rbd driver, disables the cache altogether. Signed-off-by: Ilya Dryomov --- diff --git a/src/librbd/cache/ObjectCacherObjectDispatch.cc b/src/librbd/cache/ObjectCacherObjectDispatch.cc index 81d5a71885b0..822a053e1431 100644 --- a/src/librbd/cache/ObjectCacherObjectDispatch.cc +++ b/src/librbd/cache/ObjectCacherObjectDispatch.cc @@ -389,18 +389,37 @@ bool ObjectCacherObjectDispatch::compare_and_write( // pass-through the compare-and-write request since it's not a supported // operation of the ObjectCacher + ObjectExtents object_extents; + object_extents.emplace_back(data_object_name(m_image_ctx, object_no), + object_no, object_off, cmp_data.length(), 0); + + // if compare succeeds, discard the cache state after changes are + // committed to disk + auto ctx = *on_finish; + *on_finish = new LambdaContext( + [this, object_extents, ctx](int r) { + // ObjectCacher doesn't provide a way to reliably invalidate + // extents: in case of a racing read (if the bh is in RX state), + // release_set() just returns while discard_set() populates the + // extent with zeroes. Neither is OK but the latter is better + // because it is at least deterministic... + if (r == 0) { + m_cache_lock.lock(); + m_object_cacher->discard_set(m_object_set, object_extents); + m_cache_lock.unlock(); + } + + ctx->complete(r); + }); + // ensure we aren't holding the cache lock post-flush on_dispatched = util::create_async_context_callback(*m_image_ctx, on_dispatched); - // flush any pending writes from the cache + // flush any pending writes from the cache before compare ZTracer::Trace trace(parent_trace); *dispatch_result = io::DISPATCH_RESULT_CONTINUE; - ObjectExtents object_extents; - object_extents.emplace_back(data_object_name(m_image_ctx, object_no), - object_no, object_off, cmp_data.length(), 0); - std::lock_guard cache_locker{m_cache_lock}; m_object_cacher->flush_set(m_object_set, object_extents, &trace, on_dispatched);