]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
librbd: avoid losing sparseness in read_parent()
authorIlya Dryomov <idryomov@gmail.com>
Thu, 15 Jan 2026 12:56:13 +0000 (13:56 +0100)
committerIlya Dryomov <idryomov@gmail.com>
Thu, 16 Apr 2026 08:15:09 +0000 (10:15 +0200)
When read_parent() constructs a read for image_ctx->parent, it employs
a thick bufferlist (either re-using the bufferlist on the object extent
or creating a temporary one inside of C_ObjectReadMergedExtents).  This
forgoes any sparseness: even if the result obtained by ObjectRequest is
sparse, it's thickened by ReadResult's handler for Bufferlist type.

This behavior is very old and hasn't been a problem for regular clones
because the public API returns a thick bufferlist in the case of C++ or
equivalent char* buf/struct iovec iov[] buffers in the case of C anyway.
ObjectCacher isn't sparse-aware but it's also not used for caching reads
by default and reading from parent for the purposes of a copyup is done
in CopyupRequest in a way that preserves sparseness.  However, when it
comes to migration, source image reads go through read_parent() and the
destination image gets thickened as an inadvertent side effect.

Fix this by introducing a new ChildObject type for ReadResult whose
handler would plant the result obtained by parent's ObjectRequest into
child's ObjectRequest, as if read_parent() wasn't even called.

Fixes: https://tracker.ceph.com/issues/73831
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
src/librbd/io/ReadResult.cc
src/librbd/io/ReadResult.h
src/librbd/io/Utils.cc
src/test/librbd/test_internal.cc

index 54d57e7af848bb22156a27bf5bef3374a131b680..fc1d9bf361f81fb0cacc34d170ab7622e8598129 100644 (file)
@@ -33,6 +33,10 @@ struct ReadResult::SetImageExtentsVisitor {
     sbl.image_extents = image_extents;
   }
 
+  void operator()(ChildObject &child_object) const {
+    child_object.overlap_bytes = util::get_extents_length(image_extents);
+  }
+
   template <typename T>
   void operator()(T &t) const {
   }
@@ -137,6 +141,71 @@ struct ReadResult::AssembleResultVisitor {
                    << " bytes to bl "
                    << reinterpret_cast<void*>(sparse_bufferlist.bl) << dendl;
   }
+
+  void operator()(ChildObject &child_object) const {
+    bufferlist bl;
+    ExtentMap buffer_extent_map;
+    uint64_t buffer_extents_length = destriper.assemble_result(
+      cct, &buffer_extent_map, &bl);
+
+    ldout(cct, 20) << "buffer_extent_map=" << buffer_extent_map << dendl;
+
+    // buffer_extent_map is logically addressed by buffer extents not
+    // image or object extents. Translate buffer offsets (always 0-based)
+    // into object offsets since the buffer is tied to an object read
+    // (in child image, see read_parent()).
+    uint64_t child_buffer_offset = 0;
+    auto bem_it = buffer_extent_map.begin();
+    for (auto& read_extent : *child_object.read_extents) {
+      read_extent.bl.clear();
+      read_extent.extent_map.clear();
+
+      bool found_buffer_extent = false;
+      while (bem_it != buffer_extent_map.end()) {
+        auto [buffer_extent_offset, buffer_extent_length] = *bem_it;
+
+        if (child_buffer_offset + read_extent.length <= buffer_extent_offset) {
+          // no more buffer extents for the current object extent,
+          // current buffer extent belongs to the next object extent
+          break;
+        }
+
+        // current buffer extent should be within the current object extent
+        ceph_assert(child_buffer_offset <= buffer_extent_offset &&
+                    child_buffer_offset + read_extent.length >=
+                      buffer_extent_offset + buffer_extent_length);
+        found_buffer_extent = true;
+
+        uint64_t object_extent_offset =
+          read_extent.offset + (buffer_extent_offset - child_buffer_offset);
+        ldout(cct, 20) << "mapping buffer extent " << buffer_extent_offset
+                       << "~" << buffer_extent_length << " to object extent "
+                       << object_extent_offset << "~" << buffer_extent_length
+                       << " for " << read_extent.offset << "~"
+                       << read_extent.length << dendl;
+        bl.splice(0, buffer_extent_length, &read_extent.bl);
+        read_extent.extent_map.emplace_back(object_extent_offset,
+                                            buffer_extent_length);
+        ++bem_it;
+      }
+
+      // skip any object extent that is not included in the results
+      if (!found_buffer_extent) {
+        ldout(cct, 20) << "no buffer extents for object extent "
+                       << read_extent.offset << "~" << read_extent.length
+                       << dendl;
+      }
+
+      child_buffer_offset += read_extent.length;
+    }
+    ceph_assert(bl.length() == 0);
+    ceph_assert(child_buffer_offset >= buffer_extents_length);
+    ceph_assert(child_object.overlap_bytes == buffer_extents_length);
+    ceph_assert(bem_it == buffer_extent_map.end());
+
+    ldout(cct, 20) << "planted result in " << *child_object.read_extents
+                   << dendl;
+  }
 };
 
 ReadResult::C_ImageReadRequest::C_ImageReadRequest(
@@ -244,6 +313,10 @@ ReadResult::ReadResult(Extents* extent_map, ceph::bufferlist* bl)
   : m_buffer(SparseBufferlist(extent_map, bl)) {
 }
 
+ReadResult::ReadResult(ReadExtents* read_extents)
+  : m_buffer(ChildObject(read_extents)) {
+}
+
 void ReadResult::set_image_extents(const Extents& image_extents) {
   std::visit(SetImageExtentsVisitor(image_extents), m_buffer);
 }
index b0f78c285491e7727ab764e4ec1da676f161faa7..5c2f5c71629ecee3677d5ee5ccbb1860df6d9983 100644 (file)
@@ -65,6 +65,7 @@ public:
   ReadResult(const struct iovec *iov, int iov_count);
   ReadResult(ceph::bufferlist *bl);
   ReadResult(Extents* extent_map, ceph::bufferlist* bl);
+  ReadResult(ReadExtents* read_extents);
 
   void set_image_extents(const Extents& image_extents);
 
@@ -106,11 +107,21 @@ private:
     }
   };
 
+  struct ChildObject {
+    ReadExtents* read_extents;
+    uint64_t overlap_bytes = 0;
+
+    ChildObject(ReadExtents* read_extents)
+      : read_extents(read_extents) {
+    }
+  };
+
   typedef std::variant<std::monostate,
                       Linear,
                       Vector,
                       Bufferlist,
-                      SparseBufferlist> Buffer;
+                      SparseBufferlist,
+                      ChildObject> Buffer;
   struct SetImageExtentsVisitor;
   struct AssembleResultVisitor;
 
index 0a973a68626f77d1cbcfd2cd776f1128f7e2663f..0799270b09033952b088d46d91c289cf24eb50d7 100644 (file)
@@ -119,16 +119,6 @@ void read_parent(I *image_ctx, uint64_t object_no, ReadExtents* read_extents,
 
   ldout(cct, 20) << dendl;
 
-  ceph::bufferlist* parent_read_bl;
-  if (read_extents->size() > 1) {
-    auto parent_comp = new ReadResult::C_ObjectReadMergedExtents(
-        cct, read_extents, on_finish);
-    parent_read_bl = &parent_comp->bl;
-    on_finish = parent_comp;
-  } else {
-    parent_read_bl = &read_extents->front().bl;
-  }
-
   auto comp = AioCompletion::create_and_start(on_finish, image_ctx->parent,
                                               AIO_TYPE_READ);
   ldout(cct, 20) << "completion=" << comp
@@ -136,7 +126,7 @@ void read_parent(I *image_ctx, uint64_t object_no, ReadExtents* read_extents,
                  << " area=" << area << dendl;
   auto req = io::ImageDispatchSpec::create_read(
     *image_ctx->parent, io::IMAGE_DISPATCH_LAYER_INTERNAL_START, comp,
-    std::move(parent_extents), area, ReadResult{parent_read_bl},
+    std::move(parent_extents), area, ReadResult{read_extents},
     image_ctx->parent->get_data_io_context(), 0, 0, trace);
   req->send();
 }
index 0df558c4d8dd6a74868631714181df8d98fa70f1..4c4434d213984a189e8631a1f1157db0fca5cd10 100644 (file)
@@ -1110,6 +1110,464 @@ TEST_F(TestInternal, DiscardCopyup)
   }
 }
 
+TEST_F(TestInternal, SparseReadParent)
+{
+  // https://tracker.ceph.com/issues/72727
+  SKIP_IF_CRIMSON();
+  REQUIRE_FEATURE(RBD_FEATURE_LAYERING);
+
+  // all reads should go to the OSDs
+  auto cct = reinterpret_cast<CephContext*>(_rados.cct());
+  REQUIRE(!cct->_conf.get_val<bool>("rbd_cache") ||
+          cct->_conf.get_val<std::string>("rbd_cache_policy") == "writearound");
+  REQUIRE(!cct->_conf.get_val<bool>("rbd_parent_cache_enabled"));
+  REQUIRE(!is_rbd_pwl_enabled(cct));
+
+  // clone should remain empty for the entire duration of the test
+  REQUIRE(!cct->_conf.get_val<bool>("rbd_clone_copy_on_read"));
+
+  uint64_t features;
+  ASSERT_TRUE(::get_features(&features));
+
+  std::string parent_name = get_temp_image_name();
+  int order = 15;
+  ASSERT_EQ(0, m_rbd.create2(m_ioctx, parent_name.c_str(), 4096 * 16, features,
+                             &order));
+
+  librbd::ImageCtx *ictx;
+  ASSERT_EQ(0, open_image(parent_name, &ictx));
+  ASSERT_EQ(0, ictx->operations->metadata_set(
+      "conf_rbd_sparse_read_threshold_bytes", "4096"));
+
+  bool sparse_read_supported = is_sparse_read_supported(
+      ictx->data_ctx, ictx->get_object_name(10));
+
+  bufferlist bl1;
+  bl1.append(std::string(4096, '1'));
+  ASSERT_EQ(4096, api::Io<>::write(*ictx, 4096 * 10, 4096, std::move(bl1), 0));
+  bufferlist bl2;
+  bl2.append(std::string(4096 * 2, '2'));
+  ASSERT_EQ(4096 * 2, api::Io<>::write(*ictx, 4096 * 12, 4096 * 2,
+                                       std::move(bl2), 0));
+
+  ASSERT_EQ(0, snap_create(*ictx, "snap1"));
+  ASSERT_EQ(0, snap_protect(*ictx, "snap1"));
+
+  std::string clone_name = get_temp_image_name();
+  ASSERT_EQ(0, librbd::clone(m_ioctx, parent_name.c_str(), "snap1", m_ioctx,
+                            clone_name.c_str(), features, &order, 0, 0));
+
+  close_image(ictx);
+  ASSERT_EQ(0, open_image(clone_name, &ictx));
+
+  std::vector<std::pair<uint64_t, uint64_t>> read_m;
+  bufferlist read_bl;
+  librbd::io::ReadResult sparse_read_result{&read_m, &read_bl};
+
+  // 0. read entire clone at once
+  {
+    std::vector<std::pair<uint64_t, uint64_t>> expected_m;
+    bufferlist expected_bl;
+    if (sparse_read_supported) {
+      expected_m = {{4096 * 10, 4096}, {4096 * 12, 4096 * 2}};
+      expected_bl.append(std::string(4096, '1'));
+      expected_bl.append(std::string(4096 * 2, '2'));
+    } else {
+      expected_m = {{4096 * 8, 4096 * 6}};
+      expected_bl.append(std::string(4096 * 2, '\0'));
+      expected_bl.append(std::string(4096, '1'));
+      expected_bl.append(std::string(4096, '\0'));
+      expected_bl.append(std::string(4096 * 2, '2'));
+    }
+
+    ASSERT_EQ(4096 * 16,
+              api::Io<>::read(*ictx, 0, 4096 * 16,
+                              librbd::io::ReadResult{sparse_read_result}, 0));
+    ASSERT_EQ(expected_m, read_m);
+    ASSERT_TRUE(expected_bl.contents_equal(read_bl));
+  }
+
+  // 1. parent object DNE
+  {
+    std::vector<std::pair<uint64_t, uint64_t>> expected_m;
+    bufferlist expected_bl;
+
+    ASSERT_EQ(4096 * 6,
+              api::Io<>::read(*ictx, 4096, 4096 * 6,
+                              librbd::io::ReadResult{sparse_read_result}, 0));
+    ASSERT_EQ(expected_m, read_m);
+    ASSERT_TRUE(expected_bl.contents_equal(read_bl));
+  }
+
+  // 2. parent object has two extents
+  {
+    std::vector<std::pair<uint64_t, uint64_t>> expected_m;
+    bufferlist expected_bl;
+    if (sparse_read_supported) {
+      expected_m = {{4096 * 10, 4096}, {4096 * 12, 4096 * 2}};
+      expected_bl.append(std::string(4096, '1'));
+      expected_bl.append(std::string(4096 * 2, '2'));
+    } else {
+      expected_m = {{4096 * 9, 4096 * 5}};
+      expected_bl.append(std::string(4096, '\0'));
+      expected_bl.append(std::string(4096, '1'));
+      expected_bl.append(std::string(4096, '\0'));
+      expected_bl.append(std::string(4096 * 2, '2'));
+    }
+
+    ASSERT_EQ(4096 * 6,
+              api::Io<>::read(*ictx, 4096 * 9, 4096 * 6,
+                              librbd::io::ReadResult{sparse_read_result}, 0));
+    ASSERT_EQ(expected_m, read_m);
+    ASSERT_TRUE(expected_bl.contents_equal(read_bl));
+  }
+
+  // 3. parent object has two extents but the second extent is
+  //    partially beyond parent overlap
+  ASSERT_EQ(0, librbd::cls_client::set_size(&m_ioctx, ictx->header_oid,
+                                            4096 * 13));
+  ASSERT_EQ(0, librbd::cls_client::set_size(&m_ioctx, ictx->header_oid,
+                                            4096 * 16));
+  ASSERT_EQ(0, ictx->state->refresh());
+  {
+    std::vector<std::pair<uint64_t, uint64_t>> expected_m;
+    bufferlist expected_bl;
+    if (sparse_read_supported) {
+      expected_m = {{4096 * 10, 4096}, {4096 * 12, 4096}};
+      expected_bl.append(std::string(4096, '1'));
+      expected_bl.append(std::string(4096, '2'));
+    } else {
+      expected_m = {{4096 * 9, 4096 * 4}};
+      expected_bl.append(std::string(4096, '\0'));
+      expected_bl.append(std::string(4096, '1'));
+      expected_bl.append(std::string(4096, '\0'));
+      expected_bl.append(std::string(4096, '2'));
+    }
+
+    ASSERT_EQ(4096 * 6,
+              api::Io<>::read(*ictx, 4096 * 9, 4096 * 6,
+                              librbd::io::ReadResult{sparse_read_result}, 0));
+    ASSERT_EQ(expected_m, read_m);
+    ASSERT_TRUE(expected_bl.contents_equal(read_bl));
+  }
+
+  // 4. parent object has two extents but the second extent is
+  //    completely beyond parent overlap
+  ASSERT_EQ(0, librbd::cls_client::set_size(&m_ioctx, ictx->header_oid,
+                                            4096 * 12));
+  ASSERT_EQ(0, librbd::cls_client::set_size(&m_ioctx, ictx->header_oid,
+                                            4096 * 16));
+  ASSERT_EQ(0, ictx->state->refresh());
+  {
+    std::vector<std::pair<uint64_t, uint64_t>> expected_m;
+    bufferlist expected_bl;
+    if (sparse_read_supported) {
+      expected_m = {{4096 * 10, 4096}};
+      expected_bl.append(std::string(4096, '1'));
+    } else {
+      expected_m = {{4096 * 9, 4096 * 3}};
+      expected_bl.append(std::string(4096, '\0'));
+      expected_bl.append(std::string(4096, '1'));
+      expected_bl.append(std::string(4096, '\0'));
+    }
+
+    ASSERT_EQ(4096 * 6,
+              api::Io<>::read(*ictx, 4096 * 9, 4096 * 6,
+                              librbd::io::ReadResult{sparse_read_result}, 0));
+    ASSERT_EQ(expected_m, read_m);
+    ASSERT_TRUE(expected_bl.contents_equal(read_bl));
+  }
+
+  // 5. parent object has two extents but both extents are beyond
+  //    parent overlap
+  ASSERT_EQ(0, librbd::cls_client::set_size(&m_ioctx, ictx->header_oid,
+                                            4096 * 10));
+  ASSERT_EQ(0, librbd::cls_client::set_size(&m_ioctx, ictx->header_oid,
+                                            4096 * 16));
+  ASSERT_EQ(0, ictx->state->refresh());
+  {
+    std::vector<std::pair<uint64_t, uint64_t>> expected_m;
+    bufferlist expected_bl;
+    if (!sparse_read_supported) {
+      expected_m = {{4096 * 9, 4096}};
+      expected_bl.append(std::string(4096, '\0'));
+    }
+
+    ASSERT_EQ(4096 * 6,
+              api::Io<>::read(*ictx, 4096 * 9, 4096 * 6,
+                              librbd::io::ReadResult{sparse_read_result}, 0));
+    ASSERT_EQ(expected_m, read_m);
+    ASSERT_TRUE(expected_bl.contents_equal(read_bl));
+  }
+
+  // 6. parent object is beyond parent overlap
+  ASSERT_EQ(0, librbd::cls_client::set_size(&m_ioctx, ictx->header_oid,
+                                            4096 * 8));
+  ASSERT_EQ(0, librbd::cls_client::set_size(&m_ioctx, ictx->header_oid,
+                                            4096 * 16));
+  ASSERT_EQ(0, ictx->state->refresh());
+  {
+    std::vector<std::pair<uint64_t, uint64_t>> expected_m;
+    bufferlist expected_bl;
+
+    ASSERT_EQ(4096 * 6,
+              api::Io<>::read(*ictx, 4096 * 9, 4096 * 6,
+                              librbd::io::ReadResult{sparse_read_result}, 0));
+    ASSERT_EQ(expected_m, read_m);
+    ASSERT_TRUE(expected_bl.contents_equal(read_bl));
+  }
+}
+
+TEST_F(TestInternal, SparseReadParentMultipleObjectExtents)
+{
+  // https://tracker.ceph.com/issues/72727
+  SKIP_IF_CRIMSON();
+  REQUIRE_FEATURE(RBD_FEATURE_LAYERING | RBD_FEATURE_STRIPINGV2);
+
+  // all reads should go to the OSDs
+  auto cct = reinterpret_cast<CephContext*>(_rados.cct());
+  REQUIRE(!cct->_conf.get_val<bool>("rbd_cache") ||
+          cct->_conf.get_val<std::string>("rbd_cache_policy") == "writearound");
+  REQUIRE(!cct->_conf.get_val<bool>("rbd_parent_cache_enabled"));
+  REQUIRE(!is_rbd_pwl_enabled(cct));
+
+  // clone should remain empty for the entire duration of the test
+  REQUIRE(!cct->_conf.get_val<bool>("rbd_clone_copy_on_read"));
+
+  uint64_t features;
+  ASSERT_TRUE(::get_features(&features));
+
+  std::string parent_name = get_temp_image_name();
+  int order = 17;
+  ASSERT_EQ(0, m_rbd.create2(m_ioctx, parent_name.c_str(), 4096 * 64, features,
+                             &order));
+
+  librbd::ImageCtx *ictx;
+  ASSERT_EQ(0, open_image(parent_name, &ictx));
+  ASSERT_EQ(0, ictx->operations->metadata_set(
+      "conf_rbd_sparse_read_threshold_bytes", "4096"));
+
+  bool sparse_read_supported = is_sparse_read_supported(
+      ictx->data_ctx, ictx->get_object_name(10));
+
+  // parent obj1: ... .1 1. .2 .. 33 3. .. 4
+  // clone obj0:  ... 1. .. 3. 4
+  // clone obj1:  ... .1 .2 33
+
+  bufferlist bl1;
+  bl1.append(std::string(4096 * 2, '1'));
+  ASSERT_EQ(4096 * 2, api::Io<>::write(*ictx, 4096 * 39, 4096 * 2,
+                                       std::move(bl1), 0));
+  bufferlist bl2;
+  bl2.append(std::string(4096, '2'));
+  ASSERT_EQ(4096, api::Io<>::write(*ictx, 4096 * 43, 4096, std::move(bl2), 0));
+  bufferlist bl3;
+  bl3.append(std::string(4096 * 3, '3'));
+  ASSERT_EQ(4096 * 3, api::Io<>::write(*ictx, 4096 * 46, 4096 * 3,
+                                       std::move(bl3), 0));
+  bufferlist bl4;
+  bl4.append(std::string(4096, '4'));
+  ASSERT_EQ(4096, api::Io<>::write(*ictx, 4096 * 52, 4096, std::move(bl4), 0));
+
+  ASSERT_EQ(0, snap_create(*ictx, "snap1"));
+  ASSERT_EQ(0, snap_protect(*ictx, "snap1"));
+
+  std::string mid_clone_name = get_temp_image_name();
+  ASSERT_EQ(0, librbd::clone(m_ioctx, parent_name.c_str(), "snap1", m_ioctx,
+                            mid_clone_name.c_str(), features, &order, 0, 0));
+
+  close_image(ictx);
+  ASSERT_EQ(0, open_image(mid_clone_name, &ictx));
+
+  ASSERT_EQ(0, snap_create(*ictx, "snap1"));
+  ASSERT_EQ(0, snap_protect(*ictx, "snap1"));
+
+  std::string top_clone_name = get_temp_image_name();
+  ASSERT_EQ(0, librbd::clone(m_ioctx, mid_clone_name.c_str(), "snap1", m_ioctx,
+                            top_clone_name.c_str(), features, &order,
+                             4096 * 2, 2));
+
+  close_image(ictx);
+  ASSERT_EQ(0, open_image(top_clone_name, &ictx));
+
+  std::vector<std::pair<uint64_t, uint64_t>> expected_m;
+  bufferlist expected_bl;
+  if (sparse_read_supported) {
+    expected_m = {{4096 * 39, 4096}, {4096 * 40, 4096},
+                  {4096 * 43, 4096}, {4096 * 46, 4096 * 2},
+                  {4096 * 48, 4096}, {4096 * 52, 4096}};
+    expected_bl.append(std::string(4096 * 2, '1'));
+    expected_bl.append(std::string(4096, '2'));
+    expected_bl.append(std::string(4096 * 3, '3'));
+    expected_bl.append(std::string(4096, '4'));
+  } else {
+    expected_m = {{4096 * 32, 4096 * 2}, {4096 * 34, 4096 * 2},
+                  {4096 * 36, 4096 * 2}, {4096 * 38, 4096 * 2},
+                  {4096 * 40, 4096 * 2}, {4096 * 42, 4096 * 2},
+                  {4096 * 44, 4096 * 2}, {4096 * 46, 4096 * 2},
+                  {4096 * 48, 4096 * 2}, {4096 * 50, 4096 * 2},
+                  {4096 * 52, 4096}};
+    expected_bl.append(std::string(4096 * 7, '\0'));
+    expected_bl.append(std::string(4096 * 2, '1'));
+    expected_bl.append(std::string(4096 * 2, '\0'));
+    expected_bl.append(std::string(4096, '2'));
+    expected_bl.append(std::string(4096 * 2, '\0'));
+    expected_bl.append(std::string(4096 * 3, '3'));
+    expected_bl.append(std::string(4096 * 3, '\0'));
+    expected_bl.append(std::string(4096, '4'));
+  }
+
+  std::vector<std::pair<uint64_t, uint64_t>> read_m;
+  bufferlist read_bl;
+  ASSERT_EQ(4096 * 64,
+            api::Io<>::read(*ictx, 0, 4096 * 64,
+                            librbd::io::ReadResult{&read_m, &read_bl}, 0));
+  ASSERT_EQ(expected_m, read_m);
+  ASSERT_TRUE(expected_bl.contents_equal(read_bl));
+}
+
+TEST_F(TestInternal, SparseReadParentImportOnlyMigration)
+{
+  // https://tracker.ceph.com/issues/72727
+  SKIP_IF_CRIMSON();
+  REQUIRE_FEATURE(RBD_FEATURE_LAYERING);
+
+  // all reads should go to the OSDs
+  auto cct = reinterpret_cast<CephContext*>(_rados.cct());
+  REQUIRE(!cct->_conf.get_val<bool>("rbd_cache") ||
+          cct->_conf.get_val<std::string>("rbd_cache_policy") == "writearound");
+  REQUIRE(!cct->_conf.get_val<bool>("rbd_parent_cache_enabled"));
+  REQUIRE(!is_rbd_pwl_enabled(cct));
+
+  // clone should remain empty for the entire duration of the test
+  REQUIRE(!cct->_conf.get_val<bool>("rbd_clone_copy_on_read"));
+
+  uint64_t features;
+  ASSERT_TRUE(::get_features(&features));
+
+  std::string parent_name = get_temp_image_name();
+  int order = 22;
+  ASSERT_EQ(0, m_rbd.create2(m_ioctx, parent_name.c_str(), 32 << 20, features,
+                             &order));
+
+  librbd::ImageCtx *ictx;
+  ASSERT_EQ(0, open_image(parent_name, &ictx));
+  ASSERT_EQ(0, ictx->operations->metadata_set(
+      "conf_rbd_sparse_read_threshold_bytes", "4096"));
+
+  bool sparse_read_supported = is_sparse_read_supported(
+      ictx->data_ctx, ictx->get_object_name(10));
+
+  bufferlist bl1;
+  bl1.append(std::string((4 << 20) + (4 << 10), '1'));
+  ASSERT_EQ((4 << 20) + (4 << 10),
+            api::Io<>::write(*ictx, 4 << 20, (4 << 20) + (4 << 10),
+                             std::move(bl1), 0));
+  bufferlist bl2;
+  bl2.append(std::string(16 << 10, '2'));
+  ASSERT_EQ(16 << 10, api::Io<>::write(*ictx, 10 << 20, 16 << 10,
+                                       std::move(bl2), 0));
+  bufferlist bl3;
+  bl3.append(std::string(24 << 10, '3'));
+  ASSERT_EQ(24 << 10, api::Io<>::write(*ictx, (12 << 20) - (4 << 10), 24 << 10,
+                                       std::move(bl3), 0));
+  bufferlist bl4;
+  bl4.append(std::string(8 << 10, '4'));
+  ASSERT_EQ(8 << 10, api::Io<>::write(*ictx, (24 << 20) - (8 << 10), 8 << 10,
+                                      std::move(bl4), 0));
+  bufferlist bl5;
+  bl5.append(std::string(4 << 10, '5'));
+  ASSERT_EQ(4 << 10, api::Io<>::write(*ictx, 25 << 20, 4 << 10,
+                                      std::move(bl5), 0));
+  bufferlist bl6;
+  bl6.append(std::string(12 << 10, '6'));
+  ASSERT_EQ(12 << 10, api::Io<>::write(*ictx, 27 << 20, 12 << 10,
+                                       std::move(bl6), 0));
+
+  ASSERT_EQ(0, snap_create(*ictx, "snap1"));
+  ASSERT_EQ(0, snap_protect(*ictx, "snap1"));
+
+  std::string clone_name = get_temp_image_name();
+  ASSERT_EQ(0, librbd::clone(m_ioctx, parent_name.c_str(), "snap1", m_ioctx,
+                            clone_name.c_str(), features, &order, 0, 0));
+
+  close_image(ictx);
+  ASSERT_EQ(0, open_image(clone_name, &ictx));
+
+  std::vector<std::pair<uint64_t, uint64_t>> read_m;
+  bufferlist read_bl;
+  librbd::io::ReadResult sparse_read_result{&read_m, &read_bl};
+
+  std::vector<std::pair<uint64_t, uint64_t>> expected_m;
+  bufferlist expected_bl;
+  if (sparse_read_supported) {
+    expected_m = {{4 << 20, 4 << 20}, {8 << 20, 4 << 10},
+                  {10 << 20, 16 << 10}, {(12 << 20) - (4 << 10), 4 << 10},
+                  {12 << 20, 20 << 10}, {(24 << 20) - (8 << 10), 8 << 10},
+                  {25 << 20, 4 << 10}, {27 << 20, 12 << 10}};
+    expected_bl.append(std::string((4 << 20) + (4 << 10), '1'));
+    expected_bl.append(std::string(16 << 10, '2'));
+    expected_bl.append(std::string(24 << 10, '3'));
+    expected_bl.append(std::string(8 << 10, '4'));
+    expected_bl.append(std::string(4 << 10, '5'));
+    expected_bl.append(std::string(12 << 10, '6'));
+  } else {
+    expected_m = {{4 << 20, 4 << 20}, {8 << 20, 4 << 20},
+                  {12 << 20, 20 << 10}, {20 << 20, 4 << 20},
+                  {24 << 20, (3 << 20) + (12 << 10)}};
+    expected_bl.append(std::string((4 << 20) + (4 << 10), '1'));
+    expected_bl.append(std::string((2 << 20) - (4 << 10), '\0'));
+    expected_bl.append(std::string(16 << 10, '2'));
+    expected_bl.append(std::string((2 << 20) - (20 << 10), '\0'));
+    expected_bl.append(std::string(24 << 10, '3'));
+    expected_bl.append(std::string((4 << 20) - (8 << 10), '\0'));
+    expected_bl.append(std::string(8 << 10, '4'));
+    expected_bl.append(std::string(1 << 20, '\0'));
+    expected_bl.append(std::string(4 << 10, '5'));
+    expected_bl.append(std::string((2 << 20) - (4 << 10), '\0'));
+    expected_bl.append(std::string(12 << 10, '6'));
+  }
+
+  ASSERT_EQ(32 << 20,
+            api::Io<>::read(*ictx, 0, 32 << 20,
+                            librbd::io::ReadResult{sparse_read_result}, 0));
+  ASSERT_EQ(expected_m, read_m);
+  ASSERT_TRUE(expected_bl.contents_equal(read_bl));
+
+  ASSERT_EQ(0, snap_create(*ictx, "snap1"));
+  ASSERT_EQ(0, snap_protect(*ictx, "snap1"));
+
+  std::string dst_name = get_temp_image_name();
+  std::string source_spec = R"({)"
+      R"("type": "native", )"
+      R"("pool_name": ")" + m_ioctx.get_pool_name() + R"(", )"
+      R"("image_name": ")" + clone_name + R"(", )"
+      R"("snap_name": "snap1"})";
+  librbd::ImageOptions dst_opts;
+  dst_opts.set(RBD_IMAGE_OPTION_FEATURES, features);
+  ASSERT_EQ(0, api::Migration<>::prepare_import(source_spec.c_str(), m_ioctx,
+                                                dst_name.c_str(), dst_opts));
+
+  close_image(ictx);
+  ASSERT_EQ(0, open_image(dst_name, &ictx));
+
+  ASSERT_EQ(32 << 20,
+            api::Io<>::read(*ictx, 0, 32 << 20,
+                            librbd::io::ReadResult{sparse_read_result}, 0));
+  ASSERT_EQ(expected_m, read_m);
+  ASSERT_TRUE(expected_bl.contents_equal(read_bl));
+
+  librbd::NoOpProgressContext no_op;
+  ASSERT_EQ(0, api::Migration<>::execute(m_ioctx, dst_name.c_str(), no_op));
+  ASSERT_EQ(0, api::Migration<>::commit(m_ioctx, dst_name.c_str(), no_op));
+
+  ASSERT_EQ(32 << 20,
+            api::Io<>::read(*ictx, 0, 32 << 20,
+                            librbd::io::ReadResult{sparse_read_result}, 0));
+  ASSERT_EQ(expected_m, read_m);
+  ASSERT_TRUE(expected_bl.contents_equal(read_bl));
+}
+
 TEST_F(TestInternal, ImageOptions) {
   rbd_image_options_t opts1 = NULL, opts2 = NULL;
   uint64_t uint64_val1 = 10, uint64_val2 = 0;