From: Ilya Dryomov Date: Thu, 15 Jan 2026 12:56:13 +0000 (+0100) Subject: librbd: avoid losing sparseness in read_parent() X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=bd2204f7283a30d699778834280eed908440fd62;p=ceph.git librbd: avoid losing sparseness in read_parent() When read_parent() constructs a read for image_ctx->parent, it employs a thick bufferlist (either re-using the bufferlist on the object extent or creating a temporary one inside of C_ObjectReadMergedExtents). This forgoes any sparseness: even if the result obtained by ObjectRequest is sparse, it's thickened by ReadResult's handler for Bufferlist type. This behavior is very old and hasn't been a problem for regular clones because the public API returns a thick bufferlist in the case of C++ or equivalent char* buf/struct iovec iov[] buffers in the case of C anyway. ObjectCacher isn't sparse-aware but it's also not used for caching reads by default and reading from parent for the purposes of a copyup is done in CopyupRequest in a way that preserves sparseness. However, when it comes to migration, source image reads go through read_parent() and the destination image gets thickened as an inadvertent side effect. Fix this by introducing a new ChildObject type for ReadResult whose handler would plant the result obtained by parent's ObjectRequest into child's ObjectRequest, as if read_parent() wasn't even called. Fixes: https://tracker.ceph.com/issues/73831 Signed-off-by: Ilya Dryomov --- diff --git a/src/librbd/io/ReadResult.cc b/src/librbd/io/ReadResult.cc index 54d57e7af848..fc1d9bf361f8 100644 --- a/src/librbd/io/ReadResult.cc +++ b/src/librbd/io/ReadResult.cc @@ -33,6 +33,10 @@ struct ReadResult::SetImageExtentsVisitor { sbl.image_extents = image_extents; } + void operator()(ChildObject &child_object) const { + child_object.overlap_bytes = util::get_extents_length(image_extents); + } + template void operator()(T &t) const { } @@ -137,6 +141,71 @@ struct ReadResult::AssembleResultVisitor { << " bytes to bl " << reinterpret_cast(sparse_bufferlist.bl) << dendl; } + + void operator()(ChildObject &child_object) const { + bufferlist bl; + ExtentMap buffer_extent_map; + uint64_t buffer_extents_length = destriper.assemble_result( + cct, &buffer_extent_map, &bl); + + ldout(cct, 20) << "buffer_extent_map=" << buffer_extent_map << dendl; + + // buffer_extent_map is logically addressed by buffer extents not + // image or object extents. Translate buffer offsets (always 0-based) + // into object offsets since the buffer is tied to an object read + // (in child image, see read_parent()). + uint64_t child_buffer_offset = 0; + auto bem_it = buffer_extent_map.begin(); + for (auto& read_extent : *child_object.read_extents) { + read_extent.bl.clear(); + read_extent.extent_map.clear(); + + bool found_buffer_extent = false; + while (bem_it != buffer_extent_map.end()) { + auto [buffer_extent_offset, buffer_extent_length] = *bem_it; + + if (child_buffer_offset + read_extent.length <= buffer_extent_offset) { + // no more buffer extents for the current object extent, + // current buffer extent belongs to the next object extent + break; + } + + // current buffer extent should be within the current object extent + ceph_assert(child_buffer_offset <= buffer_extent_offset && + child_buffer_offset + read_extent.length >= + buffer_extent_offset + buffer_extent_length); + found_buffer_extent = true; + + uint64_t object_extent_offset = + read_extent.offset + (buffer_extent_offset - child_buffer_offset); + ldout(cct, 20) << "mapping buffer extent " << buffer_extent_offset + << "~" << buffer_extent_length << " to object extent " + << object_extent_offset << "~" << buffer_extent_length + << " for " << read_extent.offset << "~" + << read_extent.length << dendl; + bl.splice(0, buffer_extent_length, &read_extent.bl); + read_extent.extent_map.emplace_back(object_extent_offset, + buffer_extent_length); + ++bem_it; + } + + // skip any object extent that is not included in the results + if (!found_buffer_extent) { + ldout(cct, 20) << "no buffer extents for object extent " + << read_extent.offset << "~" << read_extent.length + << dendl; + } + + child_buffer_offset += read_extent.length; + } + ceph_assert(bl.length() == 0); + ceph_assert(child_buffer_offset >= buffer_extents_length); + ceph_assert(child_object.overlap_bytes == buffer_extents_length); + ceph_assert(bem_it == buffer_extent_map.end()); + + ldout(cct, 20) << "planted result in " << *child_object.read_extents + << dendl; + } }; ReadResult::C_ImageReadRequest::C_ImageReadRequest( @@ -244,6 +313,10 @@ ReadResult::ReadResult(Extents* extent_map, ceph::bufferlist* bl) : m_buffer(SparseBufferlist(extent_map, bl)) { } +ReadResult::ReadResult(ReadExtents* read_extents) + : m_buffer(ChildObject(read_extents)) { +} + void ReadResult::set_image_extents(const Extents& image_extents) { std::visit(SetImageExtentsVisitor(image_extents), m_buffer); } diff --git a/src/librbd/io/ReadResult.h b/src/librbd/io/ReadResult.h index b0f78c285491..5c2f5c71629e 100644 --- a/src/librbd/io/ReadResult.h +++ b/src/librbd/io/ReadResult.h @@ -65,6 +65,7 @@ public: ReadResult(const struct iovec *iov, int iov_count); ReadResult(ceph::bufferlist *bl); ReadResult(Extents* extent_map, ceph::bufferlist* bl); + ReadResult(ReadExtents* read_extents); void set_image_extents(const Extents& image_extents); @@ -106,11 +107,21 @@ private: } }; + struct ChildObject { + ReadExtents* read_extents; + uint64_t overlap_bytes = 0; + + ChildObject(ReadExtents* read_extents) + : read_extents(read_extents) { + } + }; + typedef std::variant Buffer; + SparseBufferlist, + ChildObject> Buffer; struct SetImageExtentsVisitor; struct AssembleResultVisitor; diff --git a/src/librbd/io/Utils.cc b/src/librbd/io/Utils.cc index 0a973a68626f..0799270b0903 100644 --- a/src/librbd/io/Utils.cc +++ b/src/librbd/io/Utils.cc @@ -119,16 +119,6 @@ void read_parent(I *image_ctx, uint64_t object_no, ReadExtents* read_extents, ldout(cct, 20) << dendl; - ceph::bufferlist* parent_read_bl; - if (read_extents->size() > 1) { - auto parent_comp = new ReadResult::C_ObjectReadMergedExtents( - cct, read_extents, on_finish); - parent_read_bl = &parent_comp->bl; - on_finish = parent_comp; - } else { - parent_read_bl = &read_extents->front().bl; - } - auto comp = AioCompletion::create_and_start(on_finish, image_ctx->parent, AIO_TYPE_READ); ldout(cct, 20) << "completion=" << comp @@ -136,7 +126,7 @@ void read_parent(I *image_ctx, uint64_t object_no, ReadExtents* read_extents, << " area=" << area << dendl; auto req = io::ImageDispatchSpec::create_read( *image_ctx->parent, io::IMAGE_DISPATCH_LAYER_INTERNAL_START, comp, - std::move(parent_extents), area, ReadResult{parent_read_bl}, + std::move(parent_extents), area, ReadResult{read_extents}, image_ctx->parent->get_data_io_context(), 0, 0, trace); req->send(); } diff --git a/src/test/librbd/test_internal.cc b/src/test/librbd/test_internal.cc index 0df558c4d8dd..4c4434d21398 100644 --- a/src/test/librbd/test_internal.cc +++ b/src/test/librbd/test_internal.cc @@ -1110,6 +1110,464 @@ TEST_F(TestInternal, DiscardCopyup) } } +TEST_F(TestInternal, SparseReadParent) +{ + // https://tracker.ceph.com/issues/72727 + SKIP_IF_CRIMSON(); + REQUIRE_FEATURE(RBD_FEATURE_LAYERING); + + // all reads should go to the OSDs + auto cct = reinterpret_cast(_rados.cct()); + REQUIRE(!cct->_conf.get_val("rbd_cache") || + cct->_conf.get_val("rbd_cache_policy") == "writearound"); + REQUIRE(!cct->_conf.get_val("rbd_parent_cache_enabled")); + REQUIRE(!is_rbd_pwl_enabled(cct)); + + // clone should remain empty for the entire duration of the test + REQUIRE(!cct->_conf.get_val("rbd_clone_copy_on_read")); + + uint64_t features; + ASSERT_TRUE(::get_features(&features)); + + std::string parent_name = get_temp_image_name(); + int order = 15; + ASSERT_EQ(0, m_rbd.create2(m_ioctx, parent_name.c_str(), 4096 * 16, features, + &order)); + + librbd::ImageCtx *ictx; + ASSERT_EQ(0, open_image(parent_name, &ictx)); + ASSERT_EQ(0, ictx->operations->metadata_set( + "conf_rbd_sparse_read_threshold_bytes", "4096")); + + bool sparse_read_supported = is_sparse_read_supported( + ictx->data_ctx, ictx->get_object_name(10)); + + bufferlist bl1; + bl1.append(std::string(4096, '1')); + ASSERT_EQ(4096, api::Io<>::write(*ictx, 4096 * 10, 4096, std::move(bl1), 0)); + bufferlist bl2; + bl2.append(std::string(4096 * 2, '2')); + ASSERT_EQ(4096 * 2, api::Io<>::write(*ictx, 4096 * 12, 4096 * 2, + std::move(bl2), 0)); + + ASSERT_EQ(0, snap_create(*ictx, "snap1")); + ASSERT_EQ(0, snap_protect(*ictx, "snap1")); + + std::string clone_name = get_temp_image_name(); + ASSERT_EQ(0, librbd::clone(m_ioctx, parent_name.c_str(), "snap1", m_ioctx, + clone_name.c_str(), features, &order, 0, 0)); + + close_image(ictx); + ASSERT_EQ(0, open_image(clone_name, &ictx)); + + std::vector> read_m; + bufferlist read_bl; + librbd::io::ReadResult sparse_read_result{&read_m, &read_bl}; + + // 0. read entire clone at once + { + std::vector> expected_m; + bufferlist expected_bl; + if (sparse_read_supported) { + expected_m = {{4096 * 10, 4096}, {4096 * 12, 4096 * 2}}; + expected_bl.append(std::string(4096, '1')); + expected_bl.append(std::string(4096 * 2, '2')); + } else { + expected_m = {{4096 * 8, 4096 * 6}}; + expected_bl.append(std::string(4096 * 2, '\0')); + expected_bl.append(std::string(4096, '1')); + expected_bl.append(std::string(4096, '\0')); + expected_bl.append(std::string(4096 * 2, '2')); + } + + ASSERT_EQ(4096 * 16, + api::Io<>::read(*ictx, 0, 4096 * 16, + librbd::io::ReadResult{sparse_read_result}, 0)); + ASSERT_EQ(expected_m, read_m); + ASSERT_TRUE(expected_bl.contents_equal(read_bl)); + } + + // 1. parent object DNE + { + std::vector> expected_m; + bufferlist expected_bl; + + ASSERT_EQ(4096 * 6, + api::Io<>::read(*ictx, 4096, 4096 * 6, + librbd::io::ReadResult{sparse_read_result}, 0)); + ASSERT_EQ(expected_m, read_m); + ASSERT_TRUE(expected_bl.contents_equal(read_bl)); + } + + // 2. parent object has two extents + { + std::vector> expected_m; + bufferlist expected_bl; + if (sparse_read_supported) { + expected_m = {{4096 * 10, 4096}, {4096 * 12, 4096 * 2}}; + expected_bl.append(std::string(4096, '1')); + expected_bl.append(std::string(4096 * 2, '2')); + } else { + expected_m = {{4096 * 9, 4096 * 5}}; + expected_bl.append(std::string(4096, '\0')); + expected_bl.append(std::string(4096, '1')); + expected_bl.append(std::string(4096, '\0')); + expected_bl.append(std::string(4096 * 2, '2')); + } + + ASSERT_EQ(4096 * 6, + api::Io<>::read(*ictx, 4096 * 9, 4096 * 6, + librbd::io::ReadResult{sparse_read_result}, 0)); + ASSERT_EQ(expected_m, read_m); + ASSERT_TRUE(expected_bl.contents_equal(read_bl)); + } + + // 3. parent object has two extents but the second extent is + // partially beyond parent overlap + ASSERT_EQ(0, librbd::cls_client::set_size(&m_ioctx, ictx->header_oid, + 4096 * 13)); + ASSERT_EQ(0, librbd::cls_client::set_size(&m_ioctx, ictx->header_oid, + 4096 * 16)); + ASSERT_EQ(0, ictx->state->refresh()); + { + std::vector> expected_m; + bufferlist expected_bl; + if (sparse_read_supported) { + expected_m = {{4096 * 10, 4096}, {4096 * 12, 4096}}; + expected_bl.append(std::string(4096, '1')); + expected_bl.append(std::string(4096, '2')); + } else { + expected_m = {{4096 * 9, 4096 * 4}}; + expected_bl.append(std::string(4096, '\0')); + expected_bl.append(std::string(4096, '1')); + expected_bl.append(std::string(4096, '\0')); + expected_bl.append(std::string(4096, '2')); + } + + ASSERT_EQ(4096 * 6, + api::Io<>::read(*ictx, 4096 * 9, 4096 * 6, + librbd::io::ReadResult{sparse_read_result}, 0)); + ASSERT_EQ(expected_m, read_m); + ASSERT_TRUE(expected_bl.contents_equal(read_bl)); + } + + // 4. parent object has two extents but the second extent is + // completely beyond parent overlap + ASSERT_EQ(0, librbd::cls_client::set_size(&m_ioctx, ictx->header_oid, + 4096 * 12)); + ASSERT_EQ(0, librbd::cls_client::set_size(&m_ioctx, ictx->header_oid, + 4096 * 16)); + ASSERT_EQ(0, ictx->state->refresh()); + { + std::vector> expected_m; + bufferlist expected_bl; + if (sparse_read_supported) { + expected_m = {{4096 * 10, 4096}}; + expected_bl.append(std::string(4096, '1')); + } else { + expected_m = {{4096 * 9, 4096 * 3}}; + expected_bl.append(std::string(4096, '\0')); + expected_bl.append(std::string(4096, '1')); + expected_bl.append(std::string(4096, '\0')); + } + + ASSERT_EQ(4096 * 6, + api::Io<>::read(*ictx, 4096 * 9, 4096 * 6, + librbd::io::ReadResult{sparse_read_result}, 0)); + ASSERT_EQ(expected_m, read_m); + ASSERT_TRUE(expected_bl.contents_equal(read_bl)); + } + + // 5. parent object has two extents but both extents are beyond + // parent overlap + ASSERT_EQ(0, librbd::cls_client::set_size(&m_ioctx, ictx->header_oid, + 4096 * 10)); + ASSERT_EQ(0, librbd::cls_client::set_size(&m_ioctx, ictx->header_oid, + 4096 * 16)); + ASSERT_EQ(0, ictx->state->refresh()); + { + std::vector> expected_m; + bufferlist expected_bl; + if (!sparse_read_supported) { + expected_m = {{4096 * 9, 4096}}; + expected_bl.append(std::string(4096, '\0')); + } + + ASSERT_EQ(4096 * 6, + api::Io<>::read(*ictx, 4096 * 9, 4096 * 6, + librbd::io::ReadResult{sparse_read_result}, 0)); + ASSERT_EQ(expected_m, read_m); + ASSERT_TRUE(expected_bl.contents_equal(read_bl)); + } + + // 6. parent object is beyond parent overlap + ASSERT_EQ(0, librbd::cls_client::set_size(&m_ioctx, ictx->header_oid, + 4096 * 8)); + ASSERT_EQ(0, librbd::cls_client::set_size(&m_ioctx, ictx->header_oid, + 4096 * 16)); + ASSERT_EQ(0, ictx->state->refresh()); + { + std::vector> expected_m; + bufferlist expected_bl; + + ASSERT_EQ(4096 * 6, + api::Io<>::read(*ictx, 4096 * 9, 4096 * 6, + librbd::io::ReadResult{sparse_read_result}, 0)); + ASSERT_EQ(expected_m, read_m); + ASSERT_TRUE(expected_bl.contents_equal(read_bl)); + } +} + +TEST_F(TestInternal, SparseReadParentMultipleObjectExtents) +{ + // https://tracker.ceph.com/issues/72727 + SKIP_IF_CRIMSON(); + REQUIRE_FEATURE(RBD_FEATURE_LAYERING | RBD_FEATURE_STRIPINGV2); + + // all reads should go to the OSDs + auto cct = reinterpret_cast(_rados.cct()); + REQUIRE(!cct->_conf.get_val("rbd_cache") || + cct->_conf.get_val("rbd_cache_policy") == "writearound"); + REQUIRE(!cct->_conf.get_val("rbd_parent_cache_enabled")); + REQUIRE(!is_rbd_pwl_enabled(cct)); + + // clone should remain empty for the entire duration of the test + REQUIRE(!cct->_conf.get_val("rbd_clone_copy_on_read")); + + uint64_t features; + ASSERT_TRUE(::get_features(&features)); + + std::string parent_name = get_temp_image_name(); + int order = 17; + ASSERT_EQ(0, m_rbd.create2(m_ioctx, parent_name.c_str(), 4096 * 64, features, + &order)); + + librbd::ImageCtx *ictx; + ASSERT_EQ(0, open_image(parent_name, &ictx)); + ASSERT_EQ(0, ictx->operations->metadata_set( + "conf_rbd_sparse_read_threshold_bytes", "4096")); + + bool sparse_read_supported = is_sparse_read_supported( + ictx->data_ctx, ictx->get_object_name(10)); + + // parent obj1: ... .1 1. .2 .. 33 3. .. 4 + // clone obj0: ... 1. .. 3. 4 + // clone obj1: ... .1 .2 33 + + bufferlist bl1; + bl1.append(std::string(4096 * 2, '1')); + ASSERT_EQ(4096 * 2, api::Io<>::write(*ictx, 4096 * 39, 4096 * 2, + std::move(bl1), 0)); + bufferlist bl2; + bl2.append(std::string(4096, '2')); + ASSERT_EQ(4096, api::Io<>::write(*ictx, 4096 * 43, 4096, std::move(bl2), 0)); + bufferlist bl3; + bl3.append(std::string(4096 * 3, '3')); + ASSERT_EQ(4096 * 3, api::Io<>::write(*ictx, 4096 * 46, 4096 * 3, + std::move(bl3), 0)); + bufferlist bl4; + bl4.append(std::string(4096, '4')); + ASSERT_EQ(4096, api::Io<>::write(*ictx, 4096 * 52, 4096, std::move(bl4), 0)); + + ASSERT_EQ(0, snap_create(*ictx, "snap1")); + ASSERT_EQ(0, snap_protect(*ictx, "snap1")); + + std::string mid_clone_name = get_temp_image_name(); + ASSERT_EQ(0, librbd::clone(m_ioctx, parent_name.c_str(), "snap1", m_ioctx, + mid_clone_name.c_str(), features, &order, 0, 0)); + + close_image(ictx); + ASSERT_EQ(0, open_image(mid_clone_name, &ictx)); + + ASSERT_EQ(0, snap_create(*ictx, "snap1")); + ASSERT_EQ(0, snap_protect(*ictx, "snap1")); + + std::string top_clone_name = get_temp_image_name(); + ASSERT_EQ(0, librbd::clone(m_ioctx, mid_clone_name.c_str(), "snap1", m_ioctx, + top_clone_name.c_str(), features, &order, + 4096 * 2, 2)); + + close_image(ictx); + ASSERT_EQ(0, open_image(top_clone_name, &ictx)); + + std::vector> expected_m; + bufferlist expected_bl; + if (sparse_read_supported) { + expected_m = {{4096 * 39, 4096}, {4096 * 40, 4096}, + {4096 * 43, 4096}, {4096 * 46, 4096 * 2}, + {4096 * 48, 4096}, {4096 * 52, 4096}}; + expected_bl.append(std::string(4096 * 2, '1')); + expected_bl.append(std::string(4096, '2')); + expected_bl.append(std::string(4096 * 3, '3')); + expected_bl.append(std::string(4096, '4')); + } else { + expected_m = {{4096 * 32, 4096 * 2}, {4096 * 34, 4096 * 2}, + {4096 * 36, 4096 * 2}, {4096 * 38, 4096 * 2}, + {4096 * 40, 4096 * 2}, {4096 * 42, 4096 * 2}, + {4096 * 44, 4096 * 2}, {4096 * 46, 4096 * 2}, + {4096 * 48, 4096 * 2}, {4096 * 50, 4096 * 2}, + {4096 * 52, 4096}}; + expected_bl.append(std::string(4096 * 7, '\0')); + expected_bl.append(std::string(4096 * 2, '1')); + expected_bl.append(std::string(4096 * 2, '\0')); + expected_bl.append(std::string(4096, '2')); + expected_bl.append(std::string(4096 * 2, '\0')); + expected_bl.append(std::string(4096 * 3, '3')); + expected_bl.append(std::string(4096 * 3, '\0')); + expected_bl.append(std::string(4096, '4')); + } + + std::vector> read_m; + bufferlist read_bl; + ASSERT_EQ(4096 * 64, + api::Io<>::read(*ictx, 0, 4096 * 64, + librbd::io::ReadResult{&read_m, &read_bl}, 0)); + ASSERT_EQ(expected_m, read_m); + ASSERT_TRUE(expected_bl.contents_equal(read_bl)); +} + +TEST_F(TestInternal, SparseReadParentImportOnlyMigration) +{ + // https://tracker.ceph.com/issues/72727 + SKIP_IF_CRIMSON(); + REQUIRE_FEATURE(RBD_FEATURE_LAYERING); + + // all reads should go to the OSDs + auto cct = reinterpret_cast(_rados.cct()); + REQUIRE(!cct->_conf.get_val("rbd_cache") || + cct->_conf.get_val("rbd_cache_policy") == "writearound"); + REQUIRE(!cct->_conf.get_val("rbd_parent_cache_enabled")); + REQUIRE(!is_rbd_pwl_enabled(cct)); + + // clone should remain empty for the entire duration of the test + REQUIRE(!cct->_conf.get_val("rbd_clone_copy_on_read")); + + uint64_t features; + ASSERT_TRUE(::get_features(&features)); + + std::string parent_name = get_temp_image_name(); + int order = 22; + ASSERT_EQ(0, m_rbd.create2(m_ioctx, parent_name.c_str(), 32 << 20, features, + &order)); + + librbd::ImageCtx *ictx; + ASSERT_EQ(0, open_image(parent_name, &ictx)); + ASSERT_EQ(0, ictx->operations->metadata_set( + "conf_rbd_sparse_read_threshold_bytes", "4096")); + + bool sparse_read_supported = is_sparse_read_supported( + ictx->data_ctx, ictx->get_object_name(10)); + + bufferlist bl1; + bl1.append(std::string((4 << 20) + (4 << 10), '1')); + ASSERT_EQ((4 << 20) + (4 << 10), + api::Io<>::write(*ictx, 4 << 20, (4 << 20) + (4 << 10), + std::move(bl1), 0)); + bufferlist bl2; + bl2.append(std::string(16 << 10, '2')); + ASSERT_EQ(16 << 10, api::Io<>::write(*ictx, 10 << 20, 16 << 10, + std::move(bl2), 0)); + bufferlist bl3; + bl3.append(std::string(24 << 10, '3')); + ASSERT_EQ(24 << 10, api::Io<>::write(*ictx, (12 << 20) - (4 << 10), 24 << 10, + std::move(bl3), 0)); + bufferlist bl4; + bl4.append(std::string(8 << 10, '4')); + ASSERT_EQ(8 << 10, api::Io<>::write(*ictx, (24 << 20) - (8 << 10), 8 << 10, + std::move(bl4), 0)); + bufferlist bl5; + bl5.append(std::string(4 << 10, '5')); + ASSERT_EQ(4 << 10, api::Io<>::write(*ictx, 25 << 20, 4 << 10, + std::move(bl5), 0)); + bufferlist bl6; + bl6.append(std::string(12 << 10, '6')); + ASSERT_EQ(12 << 10, api::Io<>::write(*ictx, 27 << 20, 12 << 10, + std::move(bl6), 0)); + + ASSERT_EQ(0, snap_create(*ictx, "snap1")); + ASSERT_EQ(0, snap_protect(*ictx, "snap1")); + + std::string clone_name = get_temp_image_name(); + ASSERT_EQ(0, librbd::clone(m_ioctx, parent_name.c_str(), "snap1", m_ioctx, + clone_name.c_str(), features, &order, 0, 0)); + + close_image(ictx); + ASSERT_EQ(0, open_image(clone_name, &ictx)); + + std::vector> read_m; + bufferlist read_bl; + librbd::io::ReadResult sparse_read_result{&read_m, &read_bl}; + + std::vector> expected_m; + bufferlist expected_bl; + if (sparse_read_supported) { + expected_m = {{4 << 20, 4 << 20}, {8 << 20, 4 << 10}, + {10 << 20, 16 << 10}, {(12 << 20) - (4 << 10), 4 << 10}, + {12 << 20, 20 << 10}, {(24 << 20) - (8 << 10), 8 << 10}, + {25 << 20, 4 << 10}, {27 << 20, 12 << 10}}; + expected_bl.append(std::string((4 << 20) + (4 << 10), '1')); + expected_bl.append(std::string(16 << 10, '2')); + expected_bl.append(std::string(24 << 10, '3')); + expected_bl.append(std::string(8 << 10, '4')); + expected_bl.append(std::string(4 << 10, '5')); + expected_bl.append(std::string(12 << 10, '6')); + } else { + expected_m = {{4 << 20, 4 << 20}, {8 << 20, 4 << 20}, + {12 << 20, 20 << 10}, {20 << 20, 4 << 20}, + {24 << 20, (3 << 20) + (12 << 10)}}; + expected_bl.append(std::string((4 << 20) + (4 << 10), '1')); + expected_bl.append(std::string((2 << 20) - (4 << 10), '\0')); + expected_bl.append(std::string(16 << 10, '2')); + expected_bl.append(std::string((2 << 20) - (20 << 10), '\0')); + expected_bl.append(std::string(24 << 10, '3')); + expected_bl.append(std::string((4 << 20) - (8 << 10), '\0')); + expected_bl.append(std::string(8 << 10, '4')); + expected_bl.append(std::string(1 << 20, '\0')); + expected_bl.append(std::string(4 << 10, '5')); + expected_bl.append(std::string((2 << 20) - (4 << 10), '\0')); + expected_bl.append(std::string(12 << 10, '6')); + } + + ASSERT_EQ(32 << 20, + api::Io<>::read(*ictx, 0, 32 << 20, + librbd::io::ReadResult{sparse_read_result}, 0)); + ASSERT_EQ(expected_m, read_m); + ASSERT_TRUE(expected_bl.contents_equal(read_bl)); + + ASSERT_EQ(0, snap_create(*ictx, "snap1")); + ASSERT_EQ(0, snap_protect(*ictx, "snap1")); + + std::string dst_name = get_temp_image_name(); + std::string source_spec = R"({)" + R"("type": "native", )" + R"("pool_name": ")" + m_ioctx.get_pool_name() + R"(", )" + R"("image_name": ")" + clone_name + R"(", )" + R"("snap_name": "snap1"})"; + librbd::ImageOptions dst_opts; + dst_opts.set(RBD_IMAGE_OPTION_FEATURES, features); + ASSERT_EQ(0, api::Migration<>::prepare_import(source_spec.c_str(), m_ioctx, + dst_name.c_str(), dst_opts)); + + close_image(ictx); + ASSERT_EQ(0, open_image(dst_name, &ictx)); + + ASSERT_EQ(32 << 20, + api::Io<>::read(*ictx, 0, 32 << 20, + librbd::io::ReadResult{sparse_read_result}, 0)); + ASSERT_EQ(expected_m, read_m); + ASSERT_TRUE(expected_bl.contents_equal(read_bl)); + + librbd::NoOpProgressContext no_op; + ASSERT_EQ(0, api::Migration<>::execute(m_ioctx, dst_name.c_str(), no_op)); + ASSERT_EQ(0, api::Migration<>::commit(m_ioctx, dst_name.c_str(), no_op)); + + ASSERT_EQ(32 << 20, + api::Io<>::read(*ictx, 0, 32 << 20, + librbd::io::ReadResult{sparse_read_result}, 0)); + ASSERT_EQ(expected_m, read_m); + ASSERT_TRUE(expected_bl.contents_equal(read_bl)); +} + TEST_F(TestInternal, ImageOptions) { rbd_image_options_t opts1 = NULL, opts2 = NULL; uint64_t uint64_val1 = 10, uint64_val2 = 0;