From: Ilya Dryomov Date: Sat, 6 Jan 2024 16:08:04 +0000 (+0100) Subject: librbd: try to preserve object map for diff-iterate in fast-diff mode X-Git-Tag: v18.2.4~277^2~1 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=ddd8ae7be42e95168a5aaef117ef5fb208540303;p=ceph.git librbd: try to preserve object map for diff-iterate in fast-diff mode As an optimization, try to ensure that the object map for the end version is preloaded through the acquisition of exclusive lock and as a consequence remains around until exclusive lock is released. If it's not around, DiffRequest would (re)load it on each call. Signed-off-by: Ilya Dryomov (cherry picked from commit 89b0d9e7b40a5f962094428e613315d3697d261f) --- diff --git a/src/librbd/ImageCtx.h b/src/librbd/ImageCtx.h index 9a432c764d542..066651ba4c53b 100644 --- a/src/librbd/ImageCtx.h +++ b/src/librbd/ImageCtx.h @@ -148,6 +148,7 @@ namespace librbd { // encryption_format ceph::shared_mutex timestamp_lock; // protects (create/access/modify)_timestamp + // and internal diff_iterate_lock_timestamp ceph::mutex async_ops_lock; // protects async_ops and async_requests ceph::mutex copyup_list_lock; // protects copyup_waiting_list @@ -173,6 +174,7 @@ namespace librbd { utime_t create_timestamp; utime_t access_timestamp; utime_t modify_timestamp; + utime_t diff_iterate_lock_timestamp; file_layout_t layout; diff --git a/src/librbd/api/DiffIterate.cc b/src/librbd/api/DiffIterate.cc index b114c32fabad0..717110bd38a13 100644 --- a/src/librbd/api/DiffIterate.cc +++ b/src/librbd/api/DiffIterate.cc @@ -2,6 +2,7 @@ // vim: ts=8 sw=2 smarttab #include "librbd/api/DiffIterate.h" +#include "librbd/ExclusiveLock.h" #include "librbd/ImageCtx.h" #include "librbd/ImageState.h" #include "librbd/ObjectMap.h" @@ -30,6 +31,8 @@ namespace api { namespace { +constexpr uint32_t LOCK_INTERVAL_SECONDS = 5; + struct DiffContext { DiffIterate<>::Callback callback; void *callback_arg; @@ -149,6 +152,35 @@ private: } }; +template +bool should_try_acquire_lock(I* image_ctx) { + if (image_ctx->exclusive_lock == nullptr || + image_ctx->exclusive_lock->is_lock_owner()) { + return false; + } + if ((image_ctx->features & RBD_FEATURE_FAST_DIFF) == 0) { + return false; + } + + utime_t now = ceph_clock_now(); + utime_t cutoff = now - utime_t(LOCK_INTERVAL_SECONDS, 0); + + { + std::shared_lock timestamp_locker{image_ctx->timestamp_lock}; + if (image_ctx->diff_iterate_lock_timestamp > cutoff) { + return false; + } + } + + std::unique_lock timestamp_locker{image_ctx->timestamp_lock}; + if (image_ctx->diff_iterate_lock_timestamp > cutoff) { + return false; + } + + image_ctx->diff_iterate_lock_timestamp = now; + return true; +} + int simple_diff_cb(uint64_t off, size_t len, int exists, void *arg) { // This reads the existing extents in a parent from the beginning // of time. Since images are thin-provisioned, the extents will @@ -168,10 +200,14 @@ int DiffIterate::diff_iterate(I *ictx, uint64_t off, uint64_t len, bool include_parent, bool whole_object, int (*cb)(uint64_t, size_t, int, void *), - void *arg) -{ - ldout(ictx->cct, 20) << "diff_iterate " << ictx << " off = " << off - << " len = " << len << dendl; + void *arg) { + ldout(ictx->cct, 10) << "from_snap_namespace=" << from_snap_namespace + << ", fromsnapname=" << (fromsnapname ?: "") + << ", off=" << off + << ", len=" << len + << ", include_parent=" << include_parent + << ", whole_object=" << whole_object + << dendl; if (!ictx->data_ctx.is_valid()) { return -ENODEV; @@ -198,11 +234,28 @@ int DiffIterate::diff_iterate(I *ictx, return r; } - ictx->image_lock.lock_shared(); - r = clip_io(ictx, off, &len, io::ImageArea::DATA); - ictx->image_lock.unlock_shared(); - if (r < 0) { - return r; + { + std::shared_lock owner_locker{ictx->owner_lock}; + std::shared_lock image_locker{ictx->image_lock}; + + r = clip_io(ictx, off, &len, io::ImageArea::DATA); + if (r < 0) { + return r; + } + + // optimization: hang onto the only object map needed to run fast + // diff against the beginning of time -- it's loaded when exclusive + // lock is acquired + // acquire exclusive lock only if not busy (i.e. don't request), + // throttle acquisition attempts and ignore errors + if (fromsnapname == nullptr && whole_object && + should_try_acquire_lock(ictx)) { + C_SaferCond lock_ctx; + ictx->exclusive_lock->try_acquire_lock(&lock_ctx); + image_locker.unlock(); + owner_locker.unlock(); + lock_ctx.wait(); + } } DiffIterate command(*ictx, from_snap_namespace, fromsnapname, off, len, diff --git a/src/test/librbd/test_librbd.cc b/src/test/librbd/test_librbd.cc index 3ff284c3d47ea..121616d373da9 100644 --- a/src/test/librbd/test_librbd.cc +++ b/src/test/librbd/test_librbd.cc @@ -8205,6 +8205,83 @@ TYPED_TEST(DiffIterateTest, DiffIterateUnaligned) ioctx.close(); } +TYPED_TEST(DiffIterateTest, DiffIterateTryAcquireLock) +{ + REQUIRE_FEATURE(RBD_FEATURE_EXCLUSIVE_LOCK); + + librados::IoCtx ioctx; + ASSERT_EQ(0, this->_rados.ioctx_create(this->m_pool_name.c_str(), ioctx)); + + { + librbd::RBD rbd; + int order = 22; + std::string name = this->get_temp_image_name(); + ssize_t size = 20 << 20; + + uint64_t object_size = 0; + if (this->whole_object) { + object_size = 1 << order; + } + + ASSERT_EQ(0, create_image_pp(rbd, ioctx, name.c_str(), size, &order)); + + librbd::Image image1; + ASSERT_EQ(0, rbd.open(ioctx, image1, name.c_str(), NULL)); + + ceph::bufferlist bl; + bl.append(std::string(256, '1')); + ASSERT_EQ(256, image1.write(0, 256, bl)); + ASSERT_EQ(0, image1.flush()); + + bool lock_owner; + ASSERT_EQ(0, image1.is_exclusive_lock_owner(&lock_owner)); + ASSERT_TRUE(lock_owner); + + librbd::Image image2; + ASSERT_EQ(0, rbd.open(ioctx, image2, name.c_str(), NULL)); + + std::vector extents; + ASSERT_EQ(0, image2.diff_iterate2(NULL, 0, size, true, this->whole_object, + vector_iterate_cb, &extents)); + ASSERT_EQ(1u, extents.size()); + ASSERT_EQ(diff_extent(0, 256, true, object_size), extents[0]); + extents.clear(); + + ASSERT_EQ(0, image2.is_exclusive_lock_owner(&lock_owner)); + ASSERT_FALSE(lock_owner); + + ASSERT_EQ(0, image1.close()); + ASSERT_EQ(0, image2.diff_iterate2(NULL, 0, size, true, this->whole_object, + vector_iterate_cb, &extents)); + ASSERT_EQ(1u, extents.size()); + ASSERT_EQ(diff_extent(0, 256, true, object_size), extents[0]); + extents.clear(); + + ASSERT_EQ(0, image2.is_exclusive_lock_owner(&lock_owner)); + ASSERT_FALSE(lock_owner); + + sleep(5); + ASSERT_EQ(0, image2.diff_iterate2(NULL, 0, size, true, this->whole_object, + vector_iterate_cb, &extents)); + ASSERT_EQ(1u, extents.size()); + ASSERT_EQ(diff_extent(0, 256, true, object_size), extents[0]); + extents.clear(); + + ASSERT_EQ(0, image2.is_exclusive_lock_owner(&lock_owner)); + if (this->whole_object && + (is_feature_enabled(RBD_FEATURE_OBJECT_MAP) || + is_feature_enabled(RBD_FEATURE_FAST_DIFF))) { + ASSERT_TRUE(lock_owner); + } else { + ASSERT_FALSE(lock_owner); + } + + ASSERT_PASSED(this->validate_object_map, image2); + } + + ioctx.close(); +} + TYPED_TEST(DiffIterateTest, DiffIterateStriping) { REQUIRE_FEATURE(RBD_FEATURE_STRIPINGV2);