]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
librbd: try to preserve object map for diff-iterate in fast-diff mode
authorIlya Dryomov <idryomov@gmail.com>
Sat, 6 Jan 2024 16:08:04 +0000 (17:08 +0100)
committerIlya Dryomov <idryomov@gmail.com>
Sat, 20 Jan 2024 15:06:54 +0000 (16:06 +0100)
As an optimization, try to ensure that the object map for the end
version is preloaded through the acquisition of exclusive lock and
as a consequence remains around until exclusive lock is released.
If it's not around, DiffRequest would (re)load it on each call.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
src/librbd/ImageCtx.h
src/librbd/api/DiffIterate.cc
src/test/librbd/test_librbd.cc

index 0f6f742fcbd9da2f1348a15ba2b03a9dc2470638..197fea305b024695c82a80041d84985ded483f0f 100644 (file)
@@ -148,6 +148,7 @@ namespace librbd {
                        // encryption_format
 
     ceph::shared_mutex timestamp_lock; // protects (create/access/modify)_timestamp
+                                       // and internal diff_iterate_lock_timestamp
     ceph::mutex async_ops_lock; // protects async_ops and async_requests
     ceph::mutex copyup_list_lock; // protects copyup_waiting_list
 
@@ -173,6 +174,7 @@ namespace librbd {
     utime_t create_timestamp;
     utime_t access_timestamp;
     utime_t modify_timestamp;
+    utime_t diff_iterate_lock_timestamp;
 
     file_layout_t layout;
 
index b114c32fabad044149b793ef38c50134a745c840..717110bd38a1340c9876905d67750b8fb0933f64 100644 (file)
@@ -2,6 +2,7 @@
 // vim: ts=8 sw=2 smarttab
 
 #include "librbd/api/DiffIterate.h"
+#include "librbd/ExclusiveLock.h"
 #include "librbd/ImageCtx.h"
 #include "librbd/ImageState.h"
 #include "librbd/ObjectMap.h"
@@ -30,6 +31,8 @@ namespace api {
 
 namespace {
 
+constexpr uint32_t LOCK_INTERVAL_SECONDS = 5;
+
 struct DiffContext {
   DiffIterate<>::Callback callback;
   void *callback_arg;
@@ -149,6 +152,35 @@ private:
   }
 };
 
+template <typename I>
+bool should_try_acquire_lock(I* image_ctx) {
+  if (image_ctx->exclusive_lock == nullptr ||
+      image_ctx->exclusive_lock->is_lock_owner()) {
+    return false;
+  }
+  if ((image_ctx->features & RBD_FEATURE_FAST_DIFF) == 0) {
+    return false;
+  }
+
+  utime_t now = ceph_clock_now();
+  utime_t cutoff = now - utime_t(LOCK_INTERVAL_SECONDS, 0);
+
+  {
+    std::shared_lock timestamp_locker{image_ctx->timestamp_lock};
+    if (image_ctx->diff_iterate_lock_timestamp > cutoff) {
+      return false;
+    }
+  }
+
+  std::unique_lock timestamp_locker{image_ctx->timestamp_lock};
+  if (image_ctx->diff_iterate_lock_timestamp > cutoff) {
+    return false;
+  }
+
+  image_ctx->diff_iterate_lock_timestamp = now;
+  return true;
+}
+
 int simple_diff_cb(uint64_t off, size_t len, int exists, void *arg) {
   // This reads the existing extents in a parent from the beginning
   // of time.  Since images are thin-provisioned, the extents will
@@ -168,10 +200,14 @@ int DiffIterate<I>::diff_iterate(I *ictx,
                                  uint64_t off, uint64_t len,
                                  bool include_parent, bool whole_object,
                                  int (*cb)(uint64_t, size_t, int, void *),
-                                 void *arg)
-{
-  ldout(ictx->cct, 20) << "diff_iterate " << ictx << " off = " << off
-                << " len = " << len << dendl;
+                                 void *arg) {
+  ldout(ictx->cct, 10) << "from_snap_namespace=" << from_snap_namespace
+                       << ", fromsnapname=" << (fromsnapname ?: "")
+                       << ", off=" << off
+                       << ", len=" << len
+                       << ", include_parent=" << include_parent
+                       << ", whole_object=" << whole_object
+                       << dendl;
 
   if (!ictx->data_ctx.is_valid()) {
     return -ENODEV;
@@ -198,11 +234,28 @@ int DiffIterate<I>::diff_iterate(I *ictx,
     return r;
   }
 
-  ictx->image_lock.lock_shared();
-  r = clip_io(ictx, off, &len, io::ImageArea::DATA);
-  ictx->image_lock.unlock_shared();
-  if (r < 0) {
-    return r;
+  {
+    std::shared_lock owner_locker{ictx->owner_lock};
+    std::shared_lock image_locker{ictx->image_lock};
+
+    r = clip_io(ictx, off, &len, io::ImageArea::DATA);
+    if (r < 0) {
+      return r;
+    }
+
+    // optimization: hang onto the only object map needed to run fast
+    // diff against the beginning of time -- it's loaded when exclusive
+    // lock is acquired
+    // acquire exclusive lock only if not busy (i.e. don't request),
+    // throttle acquisition attempts and ignore errors
+    if (fromsnapname == nullptr && whole_object &&
+        should_try_acquire_lock(ictx)) {
+      C_SaferCond lock_ctx;
+      ictx->exclusive_lock->try_acquire_lock(&lock_ctx);
+      image_locker.unlock();
+      owner_locker.unlock();
+      lock_ctx.wait();
+    }
   }
 
   DiffIterate command(*ictx, from_snap_namespace, fromsnapname, off, len,
index bd73ecb931330936e5274aaf44b8401901896df5..2b955779534884ce1a915e152a1199cd9e5f4bfa 100644 (file)
@@ -8205,6 +8205,83 @@ TYPED_TEST(DiffIterateTest, DiffIterateUnaligned)
   ioctx.close();
 }
 
+TYPED_TEST(DiffIterateTest, DiffIterateTryAcquireLock)
+{
+  REQUIRE_FEATURE(RBD_FEATURE_EXCLUSIVE_LOCK);
+
+  librados::IoCtx ioctx;
+  ASSERT_EQ(0, this->_rados.ioctx_create(this->m_pool_name.c_str(), ioctx));
+
+  {
+    librbd::RBD rbd;
+    int order = 22;
+    std::string name = this->get_temp_image_name();
+    ssize_t size = 20 << 20;
+
+    uint64_t object_size = 0;
+    if (this->whole_object) {
+      object_size = 1 << order;
+    }
+
+    ASSERT_EQ(0, create_image_pp(rbd, ioctx, name.c_str(), size, &order));
+
+    librbd::Image image1;
+    ASSERT_EQ(0, rbd.open(ioctx, image1, name.c_str(), NULL));
+
+    ceph::bufferlist bl;
+    bl.append(std::string(256, '1'));
+    ASSERT_EQ(256, image1.write(0, 256, bl));
+    ASSERT_EQ(0, image1.flush());
+
+    bool lock_owner;
+    ASSERT_EQ(0, image1.is_exclusive_lock_owner(&lock_owner));
+    ASSERT_TRUE(lock_owner);
+
+    librbd::Image image2;
+    ASSERT_EQ(0, rbd.open(ioctx, image2, name.c_str(), NULL));
+
+    std::vector<diff_extent> extents;
+    ASSERT_EQ(0, image2.diff_iterate2(NULL, 0, size, true, this->whole_object,
+                                      vector_iterate_cb, &extents));
+    ASSERT_EQ(1u, extents.size());
+    ASSERT_EQ(diff_extent(0, 256, true, object_size), extents[0]);
+    extents.clear();
+
+    ASSERT_EQ(0, image2.is_exclusive_lock_owner(&lock_owner));
+    ASSERT_FALSE(lock_owner);
+
+    ASSERT_EQ(0, image1.close());
+    ASSERT_EQ(0, image2.diff_iterate2(NULL, 0, size, true, this->whole_object,
+                                      vector_iterate_cb, &extents));
+    ASSERT_EQ(1u, extents.size());
+    ASSERT_EQ(diff_extent(0, 256, true, object_size), extents[0]);
+    extents.clear();
+
+    ASSERT_EQ(0, image2.is_exclusive_lock_owner(&lock_owner));
+    ASSERT_FALSE(lock_owner);
+
+    sleep(5);
+    ASSERT_EQ(0, image2.diff_iterate2(NULL, 0, size, true, this->whole_object,
+                                      vector_iterate_cb, &extents));
+    ASSERT_EQ(1u, extents.size());
+    ASSERT_EQ(diff_extent(0, 256, true, object_size), extents[0]);
+    extents.clear();
+
+    ASSERT_EQ(0, image2.is_exclusive_lock_owner(&lock_owner));
+    if (this->whole_object &&
+        (is_feature_enabled(RBD_FEATURE_OBJECT_MAP) ||
+         is_feature_enabled(RBD_FEATURE_FAST_DIFF))) {
+      ASSERT_TRUE(lock_owner);
+    } else {
+      ASSERT_FALSE(lock_owner);
+    }
+
+    ASSERT_PASSED(this->validate_object_map, image2);
+  }
+
+  ioctx.close();
+}
+
 TYPED_TEST(DiffIterateTest, DiffIterateStriping)
 {
   REQUIRE_FEATURE(RBD_FEATURE_STRIPINGV2);