]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
librbd: make diff-iterate in fast-diff mode sort and merge reported extents 45638/head
authorIlya Dryomov <idryomov@gmail.com>
Sun, 20 Mar 2022 11:10:52 +0000 (12:10 +0100)
committerIlya Dryomov <idryomov@gmail.com>
Fri, 25 Mar 2022 08:50:06 +0000 (09:50 +0100)
Various users, the most notable example being the QEMU driver, assume
that extents are reported in image offset order.

Fixes: https://tracker.ceph.com/issues/53885
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
(cherry picked from commit 85e7075d5f021bd2d11024e6646d74a8a9f96e15)

src/librbd/api/DiffIterate.cc
src/test/librbd/test_librbd.cc

index b3812e1d5c39ec39d38dd1cc9b54fe754985fedc..042f5eafb3563270c1f209808d28bbf7ab5e9bb3 100644 (file)
@@ -300,12 +300,15 @@ int DiffIterate<I>::execute() {
                                &m_image_ctx.layout, off, read_len, 0,
                                object_extents, 0);
 
-      // get snap info for each object
+      // get diff info for each object and merge adjacent stripe units
+      // into an aggregate (this also sorts them)
+      io::SparseExtents aggregate_sparse_extents;
       for (auto& [object, extents] : object_extents) {
-        ldout(cct, 20) << "object " << object << dendl;
-
         const uint64_t object_no = extents.front().objectno;
         uint8_t diff_state = object_diff_state[object_no];
+        ldout(cct, 20) << "object " << object << ": diff_state="
+                       << (int)diff_state << dendl;
+
         if (diff_state == object_map::DIFF_STATE_HOLE &&
             from_snap_id == 0 && !parent_diff.empty()) {
           // no data in child object -- report parent diff instead
@@ -316,29 +319,36 @@ int DiffIterate<I>::execute() {
               o.intersection_of(parent_diff);
               ldout(cct, 20) << " reporting parent overlap " << o << dendl;
               for (auto e = o.begin(); e != o.end(); ++e) {
-                r = m_callback(e.get_start(), e.get_len(), true,
-                               m_callback_arg);
-                if (r < 0) {
-                  return r;
-                }
+                aggregate_sparse_extents.insert(e.get_start(), e.get_len(),
+                                                {io::SPARSE_EXTENT_STATE_DATA,
+                                                 e.get_len()});
               }
             }
           }
         } else if (diff_state == object_map::DIFF_STATE_HOLE_UPDATED ||
                    diff_state == object_map::DIFF_STATE_DATA_UPDATED) {
-          bool updated = (diff_state == object_map::DIFF_STATE_DATA_UPDATED);
+          auto state = (diff_state == object_map::DIFF_STATE_HOLE_UPDATED ?
+              io::SPARSE_EXTENT_STATE_ZEROED : io::SPARSE_EXTENT_STATE_DATA);
           for (auto& oe : extents) {
             for (auto& be : oe.buffer_extents) {
-              r = m_callback(off + be.first, be.second, updated,
-                             m_callback_arg);
-              if (r < 0) {
-                return r;
-              }
+              aggregate_sparse_extents.insert(off + be.first, be.second,
+                                              {state, be.second});
             }
           }
         }
       }
-    }  else {
+
+      for (const auto& se : aggregate_sparse_extents) {
+        ldout(cct, 20) << "off=" << se.get_off() << ", len=" << se.get_len()
+                       << ", state=" << se.get_val().state << dendl;
+        r = m_callback(se.get_off(), se.get_len(),
+                       se.get_val().state == io::SPARSE_EXTENT_STATE_DATA,
+                       m_callback_arg);
+        if (r < 0) {
+          return r;
+        }
+      }
+    } else {
       auto diff_object = new C_DiffObject<I>(m_image_ctx, diff_context, off,
                                              read_len);
       diff_object->send();
index 4bbc2ad869f6cd381783d0e358b0fd37d9e12fe6..c5b149d28579aeb698cf8a8d886d12f3f9fe0df8 100644 (file)
@@ -4787,6 +4787,77 @@ TYPED_TEST(DiffIterateTest, DiffIterateUnaligned)
   ioctx.close();
 }
 
+TYPED_TEST(DiffIterateTest, DiffIterateStriping)
+{
+  REQUIRE_FEATURE(RBD_FEATURE_STRIPINGV2);
+
+  librados::IoCtx ioctx;
+  ASSERT_EQ(0, this->_rados.ioctx_create(this->m_pool_name.c_str(), ioctx));
+
+  bool old_format;
+  uint64_t features;
+  ASSERT_EQ(0, get_features(&old_format, &features));
+  ASSERT_FALSE(old_format);
+
+  {
+    librbd::RBD rbd;
+    librbd::Image image;
+    int order = 22;
+    std::string name = this->get_temp_image_name();
+    ssize_t size = 24 << 20;
+
+    ASSERT_EQ(0, rbd.create3(ioctx, name.c_str(), size, features, &order,
+                             1 << 20, 3));
+    ASSERT_EQ(0, rbd.open(ioctx, image, name.c_str(), NULL));
+
+    ceph::bufferlist bl;
+    bl.append(std::string(size, '1'));
+    ASSERT_EQ(size, image.write(0, size, bl));
+
+    std::vector<diff_extent> extents;
+    ASSERT_EQ(0, image.diff_iterate2(NULL, 0, size, true, this->whole_object,
+                                     vector_iterate_cb, &extents));
+    ASSERT_EQ(2u, extents.size());
+    ASSERT_EQ(diff_extent(0, 12 << 20, true, 0), extents[0]);
+    ASSERT_EQ(diff_extent(12 << 20, 12 << 20, true, 0), extents[1]);
+    extents.clear();
+
+    ASSERT_EQ(0, image.snap_create("one"));
+    ASSERT_EQ(size, image.discard(0, size));
+
+    ASSERT_EQ(0, image.diff_iterate2("one", 0, size, true, this->whole_object,
+                                     vector_iterate_cb, &extents));
+    ASSERT_EQ(2u, extents.size());
+    ASSERT_EQ(diff_extent(0, 12 << 20, false, 0), extents[0]);
+    ASSERT_EQ(diff_extent(12 << 20, 12 << 20, false, 0), extents[1]);
+    extents.clear();
+
+    ASSERT_EQ(1 << 20, image.write(0, 1 << 20, bl));
+    ASSERT_EQ(2 << 20, image.write(2 << 20, 2 << 20, bl));
+    ASSERT_EQ(2 << 20, image.write(5 << 20, 2 << 20, bl));
+    ASSERT_EQ(2 << 20, image.write(8 << 20, 2 << 20, bl));
+    ASSERT_EQ(13 << 20, image.write(11 << 20, 13 << 20, bl));
+
+    ASSERT_EQ(0, image.diff_iterate2("one", 0, size, true, this->whole_object,
+                                     vector_iterate_cb, &extents));
+    ASSERT_EQ(10u, extents.size());
+    ASSERT_EQ(diff_extent(0, 1 << 20, true, 0), extents[0]);
+    ASSERT_EQ(diff_extent(1 << 20, 1 << 20, false, 0), extents[1]);
+    ASSERT_EQ(diff_extent(2 << 20, 2 << 20, true, 0), extents[2]);
+    ASSERT_EQ(diff_extent(4 << 20, 1 << 20, false, 0), extents[3]);
+    ASSERT_EQ(diff_extent(5 << 20, 2 << 20, true, 0), extents[4]);
+    ASSERT_EQ(diff_extent(7 << 20, 1 << 20, false, 0), extents[5]);
+    ASSERT_EQ(diff_extent(8 << 20, 2 << 20, true, 0), extents[6]);
+    ASSERT_EQ(diff_extent(10 << 20, 1 << 20, false, 0), extents[7]);
+    ASSERT_EQ(diff_extent(11 << 20, 1 << 20, true, 0), extents[8]);
+    ASSERT_EQ(diff_extent(12 << 20, 12 << 20, true, 0), extents[9]);
+
+    ASSERT_PASSED(this->validate_object_map, image);
+  }
+
+  ioctx.close();
+}
+
 TEST_F(TestLibRBD, ZeroLengthWrite)
 {
   rados_ioctx_t ioctx;