From cc87a8bd697e77cfb9f3b549e6006aa44d9a2901 Mon Sep 17 00:00:00 2001 From: Jason Dillaman Date: Thu, 3 Sep 2020 11:59:01 -0400 Subject: [PATCH] librbd: deep-copy object utilizes image-extent IO methods The state machine now lists the snaps for the full overlap image-extent using the new list-snaps API. Additionally, read operations are handled via the image-extent read API. Signed-off-by: Jason Dillaman --- src/librbd/deep_copy/ObjectCopyRequest.cc | 855 ++++++------------ src/librbd/deep_copy/ObjectCopyRequest.h | 112 +-- src/librbd/io/ImageRequest.cc | 4 + src/librbd/io/Types.h | 1 + .../deep_copy/test_mock_ObjectCopyRequest.cc | 309 ++----- 5 files changed, 424 insertions(+), 857 deletions(-) diff --git a/src/librbd/deep_copy/ObjectCopyRequest.cc b/src/librbd/deep_copy/ObjectCopyRequest.cc index 0ece45aafa95f..3602a54f74d01 100644 --- a/src/librbd/deep_copy/ObjectCopyRequest.cc +++ b/src/librbd/deep_copy/ObjectCopyRequest.cc @@ -2,6 +2,7 @@ // vim: ts=8 sw=2 smarttab #include "ObjectCopyRequest.h" +#include "include/neorados/RADOS.hpp" #include "common/errno.h" #include "librados/snap_set_diff.h" #include "librbd/ExclusiveLock.h" @@ -10,7 +11,7 @@ #include "librbd/deep_copy/Handler.h" #include "librbd/io/AioCompletion.h" #include "librbd/io/AsyncOperation.h" -#include "librbd/io/ImageRequest.h" +#include "librbd/io/ImageDispatchSpec.h" #include "librbd/io/ReadResult.h" #include "osdc/Striper.h" @@ -19,22 +20,6 @@ #define dout_prefix *_dout << "librbd::deep_copy::ObjectCopyRequest: " \ << this << " " << __func__ << ": " -namespace librados { - -inline bool operator==(const clone_info_t& rhs, const clone_info_t& lhs) { - return (rhs.cloneid == lhs.cloneid && - rhs.snaps == lhs.snaps && - rhs.overlap == lhs.overlap && - rhs.size == lhs.size); -} - -inline bool operator==(const snap_set_t& rhs, const snap_set_t& lhs) { - return (rhs.clones == lhs.clones && - rhs.seq == lhs.seq); -} - -} // namespace librados - namespace librbd { namespace deep_copy { @@ -69,8 +54,6 @@ ObjectCopyRequest::ObjectCopyRequest(I *src_image_ctx, m_dst_oid = m_dst_image_ctx->get_object_name(dst_object_number); ldout(m_cct, 20) << "dst_oid=" << m_dst_oid << dendl; - - compute_src_object_extents(); } template @@ -80,143 +63,112 @@ void ObjectCopyRequest::send() { template void ObjectCopyRequest::send_list_snaps() { - ceph_assert(!m_src_objects.empty()); - m_src_ono = *m_src_objects.begin(); - m_src_oid = m_src_image_ctx->get_object_name(m_src_ono); - - ldout(m_cct, 20) << "src_oid=" << m_src_oid << dendl; + // image extents are consistent across src and dst so compute once + Striper::extent_to_file(m_cct, &m_dst_image_ctx->layout, m_dst_object_number, + 0, m_dst_image_ctx->layout.object_size, + m_image_extents); + ldout(m_cct, 20) << "image_extents=" << m_image_extents << dendl; + + io::SnapIds snap_ids; + snap_ids.reserve(1 + m_snap_map.size()); + snap_ids.push_back(m_src_snap_id_start); + for (auto& [src_snap_id, _] : m_snap_map) { + if (src_snap_id != snap_ids.front()) { + snap_ids.push_back(src_snap_id); + } + } - librados::AioCompletion *rados_completion = create_rados_callback< - ObjectCopyRequest, &ObjectCopyRequest::handle_list_snaps>(this); + auto list_snaps_flags = io::LIST_SNAPS_FLAG_DISABLE_LIST_FROM_PARENT; - librados::ObjectReadOperation op; - m_snap_set = {}; - m_snap_ret = 0; - op.list_snaps(&m_snap_set, &m_snap_ret); + m_snapshot_delta.clear(); - m_src_io_ctx.snap_set_read(CEPH_SNAPDIR); - int r = m_src_io_ctx.aio_operate(m_src_oid, rados_completion, &op, - nullptr); - ceph_assert(r == 0); - rados_completion->release(); + auto ctx = create_context_callback< + ObjectCopyRequest, &ObjectCopyRequest::handle_list_snaps>(this); + auto aio_comp = io::AioCompletion::create_and_start( + ctx, util::get_image_ctx(m_src_image_ctx), io::AIO_TYPE_GENERIC); + auto req = io::ImageDispatchSpec::create_list_snaps( + *m_src_image_ctx, io::IMAGE_DISPATCH_LAYER_NONE, aio_comp, + io::Extents{m_image_extents}, std::move(snap_ids), list_snaps_flags, + &m_snapshot_delta, {}); + req->send(); } template void ObjectCopyRequest::handle_list_snaps(int r) { - if (r == 0 && m_snap_ret < 0) { - r = m_snap_ret; - } - ldout(m_cct, 20) << "r=" << r << dendl; - if (r < 0 && r != -ENOENT) { + if (r < 0) { lderr(m_cct) << "failed to list snaps: " << cpp_strerror(r) << dendl; finish(r); return; } - if (m_retry_missing_read) { - if (m_snap_set == m_retry_snap_set) { - lderr(m_cct) << "read encountered missing object using up-to-date snap set" - << dendl; - finish(-ENOENT); - return; - } - - ldout(m_cct, 20) << "retrying using updated snap set" << dendl; - m_retry_missing_read = false; - m_retry_snap_set = {}; - } + ldout(m_cct, 20) << "snapshot_delta=" << m_snapshot_delta << dendl; - if (r == -ENOENT) { - for (auto &it : m_src_object_extents) { - auto &e = it.second; - if (e.object_no == m_src_ono) { - e.noent = true; - } - } - m_read_ops = {}; - m_read_snaps = {}; - m_zero_interval = {}; - } else { - compute_read_ops(); - } - send_read_object(); + compute_read_ops(); + send_read(); } template -void ObjectCopyRequest::send_read_object() { - +void ObjectCopyRequest::send_read() { if (m_read_snaps.empty()) { // all snapshots have been read merge_write_ops(); + compute_zero_ops(); - ceph_assert(!m_src_objects.empty()); - m_src_objects.erase(m_src_objects.begin()); - - if (!m_src_objects.empty()) { - send_list_snaps(); + if (m_write_ops.empty()) { + // nothing to copy + finish(-ENOENT); return; } - // all objects have been read - send_read_from_parent(); + send_write_object(); return; } auto index = *m_read_snaps.begin(); - auto src_snap_seq = index.second; + auto& read_op = m_read_ops[index]; + if (read_op.image_interval.empty()) { + // nothing written to this object for this snapshot (must be trunc/remove) + handle_read(0); + return; + } - bool read_required = false; - librados::ObjectReadOperation op; + auto io_context = m_src_image_ctx->duplicate_data_io_context(); + io_context->read_snap(index.second); - for (auto ©_op : m_read_ops[index]) { - if (!read_required) { - // map the copy op start snap id back to the necessary read snap id - m_src_io_ctx.snap_set_read(src_snap_seq); + io::Extents image_extents{read_op.image_interval.begin(), + read_op.image_interval.end()}; + io::ReadResult read_result{&read_op.image_extent_map, + &read_op.out_bl}; - ldout(m_cct, 20) << "src_snap_seq=" << src_snap_seq << dendl; - read_required = true; - } - ldout(m_cct, 20) << "read op: " << copy_op.src_offset << "~" - << copy_op.length << dendl; - op.sparse_read(copy_op.src_offset, copy_op.length, ©_op.src_extent_map, - ©_op.out_bl, nullptr); - op.set_op_flags2(LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL | - LIBRADOS_OP_FLAG_FADVISE_NOCACHE); - } + ldout(m_cct, 20) << "read: src_snap_seq=" << index.second << ", " + << "image_extents=" << image_extents << dendl; - if (!read_required) { - // nothing written to this object for this snapshot (must be trunc/remove) - handle_read_object(0); - return; + int op_flags = (LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL | + LIBRADOS_OP_FLAG_FADVISE_NOCACHE); + + int read_flags = 0; + if (index.second != m_src_image_ctx->snap_id) { + read_flags |= io::READ_FLAG_DISABLE_CLIPPING; } auto ctx = create_context_callback< - ObjectCopyRequest, &ObjectCopyRequest::handle_read_object>(this); - auto comp = create_rados_callback(ctx); - - ldout(m_cct, 20) << "read " << m_src_oid << dendl; - - int r = m_src_io_ctx.aio_operate(m_src_oid, comp, &op, nullptr); - ceph_assert(r == 0); - comp->release(); + ObjectCopyRequest, &ObjectCopyRequest::handle_read>(this); + auto aio_comp = io::AioCompletion::create_and_start( + ctx, util::get_image_ctx(m_src_image_ctx), io::AIO_TYPE_READ); + + auto req = io::ImageDispatchSpec::create_read( + *m_src_image_ctx, io::IMAGE_DISPATCH_LAYER_INTERNAL_START, aio_comp, + std::move(image_extents), std::move(read_result), io_context, op_flags, + read_flags, {}); + req->send(); } template -void ObjectCopyRequest::handle_read_object(int r) { +void ObjectCopyRequest::handle_read(int r) { ldout(m_cct, 20) << "r=" << r << dendl; - if (r == -ENOENT) { - m_retry_snap_set = m_snap_set; - m_retry_missing_read = true; - - ldout(m_cct, 5) << "object missing potentially due to removed snapshot" - << dendl; - send_list_snaps(); - return; - } - if (r < 0) { lderr(m_cct) << "failed to read from source object: " << cpp_strerror(r) << dendl; @@ -225,97 +177,21 @@ void ObjectCopyRequest::handle_read_object(int r) { } if (m_handler != nullptr) { - uint64_t bytes_read = 0; - auto index = *m_read_snaps.begin(); - for (auto ©_op : m_read_ops[index]) { - bytes_read += copy_op.out_bl.length(); - } - m_handler->handle_read(bytes_read); + auto& read_op = m_read_ops[index]; + m_handler->handle_read(read_op.out_bl.length()); } ceph_assert(!m_read_snaps.empty()); m_read_snaps.erase(m_read_snaps.begin()); - send_read_object(); -} - -template -void ObjectCopyRequest::send_read_from_parent() { - m_src_image_ctx->image_lock.lock_shared(); - io::Extents image_extents; - compute_read_from_parent_ops(&image_extents); - m_src_image_ctx->image_lock.unlock_shared(); - - if (image_extents.empty()) { - handle_read_from_parent(0); - return; - } - - ldout(m_cct, 20) << dendl; - - ceph_assert(m_src_image_ctx->parent != nullptr); - - auto ctx = create_context_callback< - ObjectCopyRequest, &ObjectCopyRequest::handle_read_from_parent>(this); - auto comp = io::AioCompletion::create_and_start( - ctx, util::get_image_ctx(m_src_image_ctx->parent), io::AIO_TYPE_READ); - ldout(m_cct, 20) << "completion " << comp << ", extents " << image_extents - << dendl; - - auto src_image_ctx = m_src_image_ctx; - io::ImageRequest::aio_read(src_image_ctx->parent, comp, - std::move(image_extents), - io::ReadResult{&m_read_from_parent_data}, - src_image_ctx->get_data_io_context(), 0, 0, - ZTracer::Trace()); -} - -template -void ObjectCopyRequest::handle_read_from_parent(int r) { - ldout(m_cct, 20) << "r=" << r << dendl; - - if (r < 0) { - lderr(m_cct) << "failed to read from parent: " << cpp_strerror(r) << dendl; - finish(r); - return; - } - - if (!m_read_ops.empty()) { - ceph_assert(m_read_ops.size() == 1); - auto src_snap_seq = m_read_ops.begin()->first.first; - auto ©_ops = m_read_ops.begin()->second; - uint64_t offset = 0; - for (auto it = copy_ops.begin(); it != copy_ops.end(); ) { - it->out_bl.substr_of(m_read_from_parent_data, offset, it->length); - offset += it->length; - if (it->out_bl.is_zero()) { - m_zero_interval[src_snap_seq].insert(it->dst_offset, it->length); - it = copy_ops.erase(it); - } else { - it++; - } - } - merge_write_ops(); - } - - compute_dst_object_may_exist(); - compute_zero_ops(); - - if (m_write_ops.empty()) { - // nothing to copy - finish(-ENOENT); - return; - } - - send_write_object(); - return; + send_read(); } template void ObjectCopyRequest::send_write_object() { ceph_assert(!m_write_ops.empty()); - auto& copy_ops = m_write_ops.begin()->second; + auto& write_ops = m_write_ops.begin()->second; // retrieve the destination snap context for the op SnapIds dst_snap_ids; @@ -328,10 +204,10 @@ void ObjectCopyRequest::send_write_object() { auto dst_snap_id = snap_map_it->second.front(); auto dst_may_exist_it = m_dst_object_may_exist.find(dst_snap_id); ceph_assert(dst_may_exist_it != m_dst_object_may_exist.end()); - if (!dst_may_exist_it->second && !copy_ops.empty()) { + if (!dst_may_exist_it->second && !write_ops.empty()) { // if the object cannot exist, the only valid op is to remove it - ceph_assert(copy_ops.size() == 1U); - ceph_assert(copy_ops.begin()->type == COPY_OP_TYPE_REMOVE); + ceph_assert(write_ops.size() == 1U); + ceph_assert(write_ops.begin()->type == WRITE_OP_TYPE_REMOVE); } // write snapshot context should be before actual snapshot @@ -346,46 +222,39 @@ void ObjectCopyRequest::send_write_object() { ceph_assert(dst_snap_seq != CEPH_NOSNAP); } - ldout(m_cct, 20) << "dst_snap_seq=" << dst_snap_seq << ", " + ldout(m_cct, 20) << "src_snap_seq=" << src_snap_seq << ", " + << "dst_snap_seq=" << dst_snap_seq << ", " << "dst_snaps=" << dst_snap_ids << dendl; librados::ObjectWriteOperation op; - uint64_t buffer_offset; - if (!m_dst_image_ctx->migration_info.empty()) { cls_client::assert_snapc_seq(&op, dst_snap_seq, cls::rbd::ASSERT_SNAPC_SEQ_GT_SNAPSET_SEQ); } - for (auto ©_op : copy_ops) { - switch (copy_op.type) { - case COPY_OP_TYPE_WRITE: - buffer_offset = 0; - for (auto &e : copy_op.dst_extent_map) { - ldout(m_cct, 20) << "write op: " << e.first << "~" << e.second - << dendl; - bufferlist tmpbl; - tmpbl.substr_of(copy_op.out_bl, buffer_offset, e.second); - op.write(e.first, tmpbl); - op.set_op_flags2(LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL | - LIBRADOS_OP_FLAG_FADVISE_NOCACHE); - buffer_offset += e.second; - } + for (auto& write_op : write_ops) { + switch (write_op.type) { + case WRITE_OP_TYPE_WRITE: + ldout(m_cct, 20) << "write op: " << write_op.object_offset << "~" + << write_op.object_length << dendl; + op.write(write_op.object_offset, write_op.bl); + op.set_op_flags2(LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL | + LIBRADOS_OP_FLAG_FADVISE_NOCACHE); break; - case COPY_OP_TYPE_ZERO: - ldout(m_cct, 20) << "zero op: " << copy_op.dst_offset << "~" - << copy_op.length << dendl; - op.zero(copy_op.dst_offset, copy_op.length); + case WRITE_OP_TYPE_ZERO: + ldout(m_cct, 20) << "zero op: " << write_op.object_offset << "~" + << write_op.object_length << dendl; + op.zero(write_op.object_offset, write_op.object_length); break; - case COPY_OP_TYPE_REMOVE_TRUNC: + case WRITE_OP_TYPE_REMOVE_TRUNC: ldout(m_cct, 20) << "create op" << dendl; op.create(false); - // fall through - case COPY_OP_TYPE_TRUNC: - ldout(m_cct, 20) << "trunc op: " << copy_op.dst_offset << dendl; - op.truncate(copy_op.dst_offset); + [[fallthrough]]; + case WRITE_OP_TYPE_TRUNC: + ldout(m_cct, 20) << "trunc op: " << write_op.object_offset << dendl; + op.truncate(write_op.object_offset); break; - case COPY_OP_TYPE_REMOVE: + case WRITE_OP_TYPE_REMOVE: ldout(m_cct, 20) << "remove op" << dendl; op.remove(); break; @@ -534,348 +403,212 @@ Context *ObjectCopyRequest::start_lock_op(ceph::shared_mutex &owner_lock, return m_dst_image_ctx->exclusive_lock->start_op(r); } -template -uint64_t ObjectCopyRequest::src_to_dst_object_offset(uint64_t objectno, - uint64_t offset) { - std::vector> image_extents; - Striper::extent_to_file(m_cct, &m_src_image_ctx->layout, objectno, offset, 1, - image_extents); - ceph_assert(image_extents.size() == 1); - auto dst_object_offset = image_extents.begin()->first; - - std::map> dst_object_extents; - Striper::file_to_extents(m_cct, m_dst_image_ctx->format_string, - &m_dst_image_ctx->layout, dst_object_offset, 1, 0, - dst_object_extents); - ceph_assert(dst_object_extents.size() == 1); - ceph_assert(dst_object_extents.begin()->second.size() == 1); - auto &e = *dst_object_extents.begin()->second.begin(); - ceph_assert(e.objectno == m_dst_object_number); - - return e.offset; -} - -template -void ObjectCopyRequest::compute_src_object_extents() { - std::vector> image_extents; - Striper::extent_to_file(m_cct, &m_dst_image_ctx->layout, m_dst_object_number, - 0, m_dst_image_ctx->layout.object_size, image_extents); - - size_t total = 0; - for (auto &e : image_extents) { - std::map> src_object_extents; - Striper::file_to_extents(m_cct, m_src_image_ctx->format_string, - &m_src_image_ctx->layout, e.first, e.second, 0, - src_object_extents); - auto stripe_unit = std::min(m_src_image_ctx->layout.stripe_unit, - m_dst_image_ctx->layout.stripe_unit); - for (auto &p : src_object_extents) { - for (auto &s : p.second) { - m_src_objects.insert(s.objectno); - total += s.length; - while (s.length > 0) { - ceph_assert(s.length >= stripe_unit); - auto dst_object_offset = src_to_dst_object_offset(s.objectno, s.offset); - m_src_object_extents[dst_object_offset] = {s.objectno, s.offset, - stripe_unit}; - s.offset += stripe_unit; - s.length -= stripe_unit; - } - } - } - } - - ceph_assert(total == m_dst_image_ctx->layout.object_size); - - ldout(m_cct, 20) << m_src_object_extents.size() << " src extents" << dendl; -} - template void ObjectCopyRequest::compute_read_ops() { - m_read_ops = {}; - m_read_snaps = {}; - m_zero_interval = {}; + ldout(m_cct, 20) << dendl; m_src_image_ctx->image_lock.lock_shared(); - bool hide_parent = (m_src_image_ctx->parent != nullptr); + bool read_from_parent = (m_src_snap_id_start == 0 && + m_src_image_ctx->parent != nullptr); m_src_image_ctx->image_lock.unlock_shared(); - librados::snap_t src_copy_point_snap_id = m_snap_map.rbegin()->first; - bool prev_exists = (hide_parent || m_src_snap_id_start > 0); - uint64_t prev_end_size = prev_exists ? - m_src_image_ctx->layout.object_size : 0; - librados::snap_t start_src_snap_id = m_src_snap_id_start; - - for (auto &pair : m_snap_map) { - ceph_assert(!pair.second.empty()); - librados::snap_t end_src_snap_id = pair.first; - librados::snap_t end_dst_snap_id = pair.second.front(); - - interval_set diff; - uint64_t end_size; - bool exists; - librados::snap_t clone_end_snap_id; - calc_snap_set_diff(m_cct, m_snap_set, start_src_snap_id, - end_src_snap_id, &diff, &end_size, &exists, - &clone_end_snap_id, &m_read_whole_object); - - if (m_read_whole_object) { - ldout(m_cct, 1) << "need to read full object" << dendl; - diff.insert(0, m_src_image_ctx->layout.object_size); - exists = true; - end_size = m_src_image_ctx->layout.object_size; - clone_end_snap_id = end_src_snap_id; - } else if (!exists) { - end_size = 0; - if (hide_parent && end_src_snap_id == m_snap_map.begin()->first && - m_snap_set.clones.empty()) { - ldout(m_cct, 20) << "no clones for existing object" << dendl; - exists = true; - diff.insert(0, m_src_image_ctx->layout.object_size); - clone_end_snap_id = end_src_snap_id; - } - } - - ldout(m_cct, 20) << "start_src_snap_id=" << start_src_snap_id << ", " - << "end_src_snap_id=" << end_src_snap_id << ", " - << "clone_end_snap_id=" << clone_end_snap_id << ", " - << "end_dst_snap_id=" << end_dst_snap_id << ", " - << "diff=" << diff << ", " - << "end_size=" << end_size << ", " - << "exists=" << exists << dendl; - - m_zero_interval[end_src_snap_id] = {}; - - if (exists || prev_exists) { - // clip diff to size of object (in case it was truncated) - if (end_size < prev_end_size) { - interval_set trunc; - trunc.insert(end_size, prev_end_size - end_size); - trunc.intersection_of(diff); - diff.subtract(trunc); - ldout(m_cct, 20) << "clearing truncate diff: " << trunc << dendl; - } - - if (exists) { - // reads should be issued against the newest (existing) snapshot within - // the associated snapshot object clone. writes should be issued - // against the oldest snapshot in the snap_map. - ceph_assert(clone_end_snap_id >= end_src_snap_id); - if (clone_end_snap_id > src_copy_point_snap_id) { - // do not read past the copy point snapshot - clone_end_snap_id = src_copy_point_snap_id; + bool only_dne_extents = true; + interval_set dne_image_interval; + + // compute read ops for any data sections or for any extents that we need to + // read from our parent + for (auto& [key, image_intervals] : m_snapshot_delta) { + io::WriteReadSnapIds write_read_snap_ids{key}; + for (auto& image_interval : image_intervals) { + auto state = image_interval.get_val().state; + switch (state) { + case io::SNAPSHOT_EXTENT_STATE_DNE: + ceph_assert(write_read_snap_ids == io::INITIAL_WRITE_READ_SNAP_IDS); + if (read_from_parent) { + // special-case for DNE object-extents since when flattening we need + // to read data from the parent images extents + ldout(m_cct, 20) << "DNE extent: " + << image_interval.get_off() << "~" + << image_interval.get_len() << dendl; + dne_image_interval.insert( + image_interval.get_off(), image_interval.get_len()); } + break; + case io::SNAPSHOT_EXTENT_STATE_ZEROED: + only_dne_extents = false; + break; + case io::SNAPSHOT_EXTENT_STATE_DATA: + ldout(m_cct, 20) << "read op: " + << "snap_ids=" << write_read_snap_ids << " " + << image_interval.get_off() << "~" + << image_interval.get_len() << dendl; + m_read_ops[write_read_snap_ids].image_interval.union_insert( + image_interval.get_off(), image_interval.get_len()); + only_dne_extents = false; + break; + default: + ceph_abort(); + break; } + } + } - for (auto &it : m_src_object_extents) { - auto dst_object_offset = it.first; - auto &e = it.second; + if (!dne_image_interval.empty() && (!only_dne_extents || m_flatten)) { + auto snap_map_it = m_snap_map.begin(); + ceph_assert(snap_map_it != m_snap_map.end()); - if (e.object_no != m_src_ono) { + auto src_snap_seq = snap_map_it->first; + WriteReadSnapIds write_read_snap_ids{src_snap_seq, src_snap_seq}; + + // prepare to prune the extents to the maximum parent overlap + m_src_image_ctx->image_lock.lock_shared(); + uint64_t src_parent_overlap = 0; + int r = m_src_image_ctx->get_parent_overlap(src_snap_seq, + &src_parent_overlap); + m_src_image_ctx->image_lock.unlock_shared(); + + if (r < 0) { + ldout(m_cct, 5) << "failed getting parent overlap for snap_id: " + << src_snap_seq << ": " << cpp_strerror(r) << dendl; + } else { + ldout(m_cct, 20) << "parent overlap=" << src_parent_overlap << dendl; + for (auto& [image_offset, image_length] : dne_image_interval) { + auto end_image_offset = std::min( + image_offset + image_length, src_parent_overlap); + if (image_offset >= end_image_offset) { + // starting offset is beyond the end of the parent overlap continue; } - interval_set read_interval; - read_interval.insert(e.offset, e.length); - - if (end_size < prev_end_size) { - interval_set zero_interval; - zero_interval.insert(end_size, prev_end_size - end_size); - zero_interval.intersection_of(read_interval); - if (!zero_interval.empty()) { - auto it = zero_interval.begin(); - auto offset = it.get_start() - e.offset; - m_zero_interval[end_src_snap_id].insert(dst_object_offset + offset, - it.get_len()); - ldout(m_cct, 20) << "extent " << e.offset << "~" << e.length - << " intersects truncation " << end_size << "~" - << prev_end_size - end_size << ", inserting zero " - << dst_object_offset + offset << "~" - << it.get_len() << dendl; - } - } - - // limit read interval to diff - read_interval.intersection_of(diff); - - ldout(m_cct, 20) << "src_object_extent: " << e.offset << "~" << e.length - << ", dst_object_offset=" << dst_object_offset - << ", read: " << read_interval << dendl; - - ceph_assert(exists || read_interval.empty()); - - for (auto it = read_interval.begin(); it != read_interval.end(); - it++) { - ceph_assert(it.get_start() >= e.offset); - auto offset = it.get_start() - e.offset; - ldout(m_cct, 20) << "read/write op: " << it.get_start() << "~" - << it.get_len() << " dst: " - << dst_object_offset + offset << dendl; - m_read_ops[{end_src_snap_id, clone_end_snap_id}] - .emplace_back(COPY_OP_TYPE_WRITE, it.get_start(), - dst_object_offset + offset, it.get_len()); - } + image_length = end_image_offset - image_offset; + ldout(m_cct, 20) << "parent read op: " + << "snap_ids=" << write_read_snap_ids << " " + << image_offset << "~" << image_length << dendl; + m_read_ops[write_read_snap_ids].image_interval.union_insert( + image_offset, image_length); } } - - prev_end_size = end_size; - prev_exists = exists; - if (hide_parent && prev_exists && prev_end_size == 0) { - // hide parent - prev_end_size = m_src_image_ctx->layout.object_size; - } - start_src_snap_id = end_src_snap_id; } - for (auto &it : m_read_ops) { - m_read_snaps.push_back(it.first); + for (auto& [write_read_snap_ids, _] : m_read_ops) { + m_read_snaps.push_back(write_read_snap_ids); } } template -void ObjectCopyRequest::compute_read_from_parent_ops( - io::Extents *parent_image_extents) { - assert(ceph_mutex_is_locked(m_src_image_ctx->image_lock)); - - m_read_ops = {}; - m_zero_interval = {}; - parent_image_extents->clear(); - - if (m_src_image_ctx->parent == nullptr) { - ldout(m_cct, 20) << "no parent" << dendl; - return; - } - - size_t noent_count = 0; - for (auto &it : m_src_object_extents) { - if (it.second.noent) { - noent_count++; - } - } - - if (noent_count == 0) { - ldout(m_cct, 20) << "no extents need read from parent" << dendl; - return; - } - - if (noent_count == m_src_object_extents.size() && !m_flatten) { - ldout(m_cct, 20) << "reading all extents skipped when no flatten" - << dendl; - return; - } - +void ObjectCopyRequest::merge_write_ops() { ldout(m_cct, 20) << dendl; - auto src_snap_seq = m_snap_map.begin()->first; - - uint64_t parent_overlap; - int r = m_src_image_ctx->get_parent_overlap(src_snap_seq, &parent_overlap); - if (r < 0) { - ldout(m_cct, 5) << "failed getting parent overlap for snap_id: " - << src_snap_seq << ": " << cpp_strerror(r) << dendl; - return; - } - if (parent_overlap == 0) { - ldout(m_cct, 20) << "no parent overlap" << dendl; - return; - } - - for (auto &it : m_src_object_extents) { - auto dst_object_offset = it.first; - auto &e = it.second; + for (auto& [write_read_snap_ids, read_op] : m_read_ops) { + auto src_snap_seq = write_read_snap_ids.first; - if (!e.noent) { - continue; + // convert the the resulting sparse image extent map to an interval ... + auto& image_data_interval = m_dst_data_interval[src_snap_seq]; + for (auto [image_offset, image_length] : read_op.image_extent_map) { + image_data_interval.union_insert(image_offset, image_length); } - std::vector> image_extents; - Striper::extent_to_file(m_cct, &m_src_image_ctx->layout, e.object_no, - e.offset, e.length, image_extents); - - uint64_t overlap = m_src_image_ctx->prune_parent_extents(image_extents, - parent_overlap); - if (overlap == 0) { - ldout(m_cct, 20) << "no parent overlap for object_no " << e.object_no - << " extent " << e.offset << "~" << e.length << dendl; - continue; + // ... and compute the difference between it and the image extents since + // that indicates zeroed extents + interval_set intersection; + intersection.intersection_of(read_op.image_interval, image_data_interval); + read_op.image_interval.subtract(intersection); + + for (auto& [image_offset, image_length] : read_op.image_interval) { + ldout(m_cct, 20) << "src_snap_seq=" << src_snap_seq << ", " + << "inserting sparse-read zero " << image_offset << "~" + << image_length << dendl; + m_dst_zero_interval[src_snap_seq].union_insert( + image_offset, image_length); } - ldout(m_cct, 20) << "object_no " << e.object_no << " extent " << e.offset - << "~" << e.length << " overlap " << parent_overlap - << " parent extents " << image_extents << dendl; - - ceph_assert(image_extents.size() == 1); - - auto src_image_offset = image_extents.begin()->first; - auto length = image_extents.begin()->second; - m_read_ops[{src_snap_seq, 0}].emplace_back(COPY_OP_TYPE_WRITE, e.offset, - dst_object_offset, length); - m_read_ops[{src_snap_seq, 0}].rbegin()->src_extent_map[e.offset] = length; - parent_image_extents->emplace_back(src_image_offset, length); - } - - if (!parent_image_extents->empty()) { - m_dst_object_state[src_snap_seq] = OBJECT_EXISTS; + uint64_t buffer_offset = 0; + for (auto [image_offset, image_length] : read_op.image_extent_map) { + // convert image extents back to object extents for the write op + striper::LightweightObjectExtents object_extents; + Striper::file_to_extents(m_cct, &m_dst_image_ctx->layout, image_offset, + image_length, 0, buffer_offset, &object_extents); + for (auto& object_extent : object_extents) { + ldout(m_cct, 20) << "src_snap_seq=" << src_snap_seq << ", " + << "object_offset=" << object_extent.offset << ", " + << "object_length=" << object_extent.length << dendl; + + bufferlist tmp_bl; + tmp_bl.substr_of(read_op.out_bl, buffer_offset, object_extent.length); + m_write_ops[src_snap_seq].emplace_back( + WRITE_OP_TYPE_WRITE, object_extent.offset, object_extent.length, + std::move(tmp_bl)); + + buffer_offset += object_extent.length; + } + } } } template -void ObjectCopyRequest::merge_write_ops() { +void ObjectCopyRequest::compute_zero_ops() { + compute_dst_object_may_exist(); + ldout(m_cct, 20) << dendl; - for (auto &it : m_zero_interval) { - m_dst_zero_interval[it.first].insert(it.second); - } + m_src_image_ctx->image_lock.lock_shared(); + bool hide_parent = (m_src_snap_id_start == 0 && + m_src_image_ctx->parent != nullptr); + m_src_image_ctx->image_lock.unlock_shared(); - for (auto &it : m_read_ops) { - auto src_snap_seq = it.first.first; - auto ©_ops = it.second; - for (auto ©_op : copy_ops) { - uint64_t src_offset = copy_op.src_offset; - uint64_t dst_offset = copy_op.dst_offset; - for (auto &e : copy_op.src_extent_map) { - uint64_t zero_len = e.first - src_offset; - if (zero_len > 0) { - ldout(m_cct, 20) << "src_snap_seq=" << src_snap_seq - << ", inserting zero " << dst_offset << "~" - << zero_len << dendl; - m_dst_zero_interval[src_snap_seq].insert(dst_offset, zero_len); - src_offset += zero_len; - dst_offset += zero_len; + // collect all known zeroed extents from the snapshot delta + for (auto& [write_read_snap_ids, image_intervals] : m_snapshot_delta) { + auto src_snap_seq = write_read_snap_ids.first; + for (auto& image_interval : image_intervals) { + auto state = image_interval.get_val().state; + switch (state) { + case io::SNAPSHOT_EXTENT_STATE_ZEROED: + if (write_read_snap_ids != io::WriteReadSnapIds{0, 0}) { + ldout(m_cct, 20) << "zeroed extent: " + << "src_snap_seq=" << src_snap_seq << " " + << image_interval.get_off() << "~" + << image_interval.get_len() << dendl; + m_dst_zero_interval[src_snap_seq].union_insert( + image_interval.get_off(), image_interval.get_len()); + } else if (hide_parent) { + auto first_src_snap_id = m_snap_map.begin()->first; + ldout(m_cct, 20) << "zeroed (hide parent) extent: " + << "src_snap_seq=" << first_src_snap_id << " " + << image_interval.get_off() << "~" + << image_interval.get_len() << dendl; + m_dst_zero_interval[first_src_snap_id].union_insert( + image_interval.get_off(), image_interval.get_len()); } - copy_op.dst_extent_map[dst_offset] = e.second; - src_offset += e.second; - dst_offset += e.second; - } - if (dst_offset < copy_op.dst_offset + copy_op.length) { - uint64_t zero_len = copy_op.dst_offset + copy_op.length - dst_offset; - ldout(m_cct, 20) << "src_snap_seq=" << src_snap_seq - << ", inserting zero " << dst_offset << "~" - << zero_len << dendl; - m_dst_zero_interval[src_snap_seq].insert(dst_offset, zero_len); - } else { - ceph_assert(dst_offset == copy_op.dst_offset + copy_op.length); + break; + case io::SNAPSHOT_EXTENT_STATE_DNE: + case io::SNAPSHOT_EXTENT_STATE_DATA: + break; + default: + ceph_abort(); + break; } - m_write_ops[src_snap_seq].emplace_back(std::move(copy_op)); } } -} - -template -void ObjectCopyRequest::compute_zero_ops() { - ldout(m_cct, 20) << dendl; bool fast_diff = m_dst_image_ctx->test_features(RBD_FEATURE_FAST_DIFF); uint64_t prev_end_size = 0; - m_src_image_ctx->image_lock.lock_shared(); - bool hide_parent = (m_src_image_ctx->parent != nullptr); - m_src_image_ctx->image_lock.unlock_shared(); + // ensure we have a zeroed interval for each snapshot + for (auto& [src_snap_seq, _] : m_snap_map) { + m_dst_zero_interval[src_snap_seq]; + } + // compute zero ops from the zeroed intervals for (auto &it : m_dst_zero_interval) { auto src_snap_seq = it.first; auto &zero_interval = it.second; + // subtract any data intervals from our zero intervals + auto& data_interval = m_dst_data_interval[src_snap_seq]; + interval_set intersection; + intersection.intersection_of(zero_interval, data_interval); + zero_interval.subtract(intersection); + auto snap_map_it = m_snap_map.find(src_snap_seq); ceph_assert(snap_map_it != m_snap_map.end()); auto dst_snap_seq = snap_map_it->second.front(); @@ -884,7 +617,7 @@ void ObjectCopyRequest::compute_zero_ops() { ceph_assert(dst_may_exist_it != m_dst_object_may_exist.end()); if (!dst_may_exist_it->second && prev_end_size > 0) { ldout(m_cct, 5) << "object DNE for snap_id: " << dst_snap_seq << dendl; - m_write_ops[src_snap_seq].emplace_back(COPY_OP_TYPE_REMOVE, 0, 0, 0); + m_write_ops[src_snap_seq].emplace_back(WRITE_OP_TYPE_REMOVE, 0, 0); prev_end_size = 0; continue; } @@ -902,13 +635,9 @@ void ObjectCopyRequest::compute_zero_ops() { ldout(m_cct, 20) << "no parent overlap" << dendl; hide_parent = false; } else { - std::vector> image_extents; - Striper::extent_to_file(m_cct, &m_dst_image_ctx->layout, - m_dst_object_number, 0, - m_dst_image_ctx->layout.object_size, - image_extents); - uint64_t overlap = m_dst_image_ctx->prune_parent_extents(image_extents, - parent_overlap); + auto image_extents = m_image_extents; + uint64_t overlap = m_dst_image_ctx->prune_parent_extents( + image_extents, parent_overlap); if (overlap == 0) { ldout(m_cct, 20) << "no parent overlap" << dendl; hide_parent = false; @@ -926,46 +655,58 @@ void ObjectCopyRequest::compute_zero_ops() { // update end_size if there are writes into higher offsets auto iter = m_write_ops.find(src_snap_seq); if (iter != m_write_ops.end()) { - for (auto ©_op : iter->second) { - for (auto &e : copy_op.dst_extent_map) { - end_size = std::max(end_size, e.first + e.second); - } + for (auto &write_op : iter->second) { + end_size = std::max( + end_size, write_op.object_offset + write_op.object_length); } } - for (auto z = zero_interval.begin(); z != zero_interval.end(); z++) { - if (z.get_start() + z.get_len() >= end_size) { - // zero interval at the object end - if (z.get_start() == 0 && hide_parent) { - m_write_ops[src_snap_seq] - .emplace_back(COPY_OP_TYPE_REMOVE_TRUNC, 0, 0, 0); - ldout(m_cct, 20) << "COPY_OP_TYPE_REMOVE_TRUNC" << dendl; - } else if (z.get_start() < prev_end_size) { - if (z.get_start() == 0) { - m_write_ops[src_snap_seq] - .emplace_back(COPY_OP_TYPE_REMOVE, 0, 0, 0); - ldout(m_cct, 20) << "COPY_OP_TYPE_REMOVE" << dendl; - } else { - m_write_ops[src_snap_seq] - .emplace_back(COPY_OP_TYPE_TRUNC, 0, z.get_start(), 0); - ldout(m_cct, 20) << "COPY_OP_TYPE_TRUNC " << z.get_start() << dendl; + ldout(m_cct, 20) << "src_snap_seq=" << src_snap_seq << ", " + << "dst_snap_seq=" << dst_snap_seq << ", " + << "zero_interval=" << zero_interval << ", " + << "end_size=" << end_size << dendl; + for (auto z = zero_interval.begin(); z != zero_interval.end(); ++z) { + // convert image extents back to object extents for the write op + striper::LightweightObjectExtents object_extents; + Striper::file_to_extents(m_cct, &m_dst_image_ctx->layout, z.get_start(), + z.get_len(), 0, 0, &object_extents); + for (auto& object_extent : object_extents) { + if (object_extent.offset + object_extent.length >= end_size) { + // zero interval at the object end + if (object_extent.offset == 0 && hide_parent) { + ldout(m_cct, 20) << "WRITE_OP_TYPE_REMOVE_TRUNC" << dendl; + m_write_ops[src_snap_seq].emplace_back( + WRITE_OP_TYPE_REMOVE_TRUNC, 0, 0); + } else if (object_extent.offset < prev_end_size) { + if (object_extent.offset == 0) { + ldout(m_cct, 20) << "WRITE_OP_TYPE_REMOVE" << dendl; + m_write_ops[src_snap_seq].emplace_back( + WRITE_OP_TYPE_REMOVE, 0, 0); + } else { + ldout(m_cct, 20) << "WRITE_OP_TYPE_TRUNC " << object_extent.offset + << dendl; + m_write_ops[src_snap_seq].emplace_back( + WRITE_OP_TYPE_TRUNC, object_extent.offset, 0); + } } + end_size = std::min(end_size, object_extent.offset); + } else { + // zero interval inside the object + ldout(m_cct, 20) << "WRITE_OP_TYPE_ZERO " + << object_extent.offset << "~" + << object_extent.length << dendl; + m_write_ops[src_snap_seq].emplace_back( + WRITE_OP_TYPE_ZERO, object_extent.offset, object_extent.length); } - end_size = std::min(end_size, z.get_start()); - } else { - // zero interval inside the object - m_write_ops[src_snap_seq] - .emplace_back(COPY_OP_TYPE_ZERO, 0, z.get_start(), z.get_len()); - ldout(m_cct, 20) << "COPY_OP_TYPE_ZERO " << z.get_start() << "~" - << z.get_len() << dendl; } } - ldout(m_cct, 20) << "src_snap_seq=" << src_snap_seq << ", end_size=" - << end_size << dendl; + + ldout(m_cct, 20) << "src_snap_seq=" << src_snap_seq << ", " + << "end_size=" << end_size << dendl; if (end_size > 0 || hide_parent) { m_dst_object_state[src_snap_seq] = OBJECT_EXISTS; if (fast_diff && end_size == prev_end_size && - m_write_ops[src_snap_seq].empty()) { + m_write_ops.count(src_snap_seq) == 0) { m_dst_object_state[src_snap_seq] = OBJECT_EXISTS_CLEAN; } } diff --git a/src/librbd/deep_copy/ObjectCopyRequest.h b/src/librbd/deep_copy/ObjectCopyRequest.h index 5d57a8b32c222..e45685e796e5d 100644 --- a/src/librbd/deep_copy/ObjectCopyRequest.h +++ b/src/librbd/deep_copy/ObjectCopyRequest.h @@ -62,20 +62,14 @@ private: * @verbatim * * - * | /----------------------\ - * | | | - * v v | (repeat for each src object) - * LIST_SNAPS < * * * | - * | * (-ENOENT and snap set stale) - * | * * * * * * | - * | * /-----------\ | - * | * | | (repeat for each snapshot) - * v * v | | - * READ_OBJECT ---------/ | - * | | | - * | \----------------------/ + * | * v - * READ_FROM_PARENT (skip if not needed) + * LIST_SNAPS + * | + * |/---------\ + * | | (repeat for each snapshot) + * v | + * READ ---------/ * | * | /-----------\ * | | | (repeat for each snapshot) @@ -93,52 +87,41 @@ private: * @endverbatim */ - struct SrcObjectExtent { - uint64_t object_no = 0; - uint64_t offset = 0; - uint64_t length = 0; - bool noent = false; - - SrcObjectExtent() { - } - SrcObjectExtent(uint64_t object_no, uint64_t offset, uint64_t length) - : object_no(object_no), offset(offset), length(length) { - } + enum WriteOpType { + WRITE_OP_TYPE_WRITE, + WRITE_OP_TYPE_ZERO, + WRITE_OP_TYPE_TRUNC, + WRITE_OP_TYPE_REMOVE, + WRITE_OP_TYPE_REMOVE_TRUNC, }; - typedef std::map SrcObjectExtents; + typedef std::map ExtentMap; - enum CopyOpType { - COPY_OP_TYPE_WRITE, - COPY_OP_TYPE_ZERO, - COPY_OP_TYPE_TRUNC, - COPY_OP_TYPE_REMOVE, - COPY_OP_TYPE_REMOVE_TRUNC, + struct ReadOp { + interval_set image_interval; + ExtentMap image_extent_map; + bufferlist out_bl; }; - typedef std::map ExtentMap; - - struct CopyOp { - CopyOp(CopyOpType type, uint64_t src_offset, uint64_t dst_offset, - uint64_t length) - : type(type), src_offset(src_offset), dst_offset(dst_offset), - length(length) { + struct WriteOp { + WriteOp(WriteOpType type, uint64_t object_offset, uint64_t object_length) + : type(type), object_offset(object_offset), object_length(object_length) { + } + WriteOp(WriteOpType type, uint64_t object_offset, uint64_t object_length, + bufferlist&& bl) + : type(type), object_offset(object_offset), object_length(object_length), + bl(std::move(bl)) { } - CopyOpType type; - uint64_t src_offset; - uint64_t dst_offset; - uint64_t length; + WriteOpType type; + uint64_t object_offset; + uint64_t object_length; - ExtentMap src_extent_map; - ExtentMap dst_extent_map; - bufferlist out_bl; + bufferlist bl; }; - typedef std::list CopyOps; typedef std::pair WriteReadSnapIds; - typedef std::map SnapObjectStates; - typedef std::map> SnapObjectSizes; + typedef std::list WriteOps; ImageCtxT *m_src_image_ctx; ImageCtxT *m_dst_image_ctx; @@ -155,35 +138,26 @@ private: decltype(m_dst_image_ctx->data_ctx) m_dst_io_ctx; std::string m_dst_oid; - std::set m_src_objects; - uint64_t m_src_ono; - std::string m_src_oid; - SrcObjectExtents m_src_object_extents; - librados::snap_set_t m_snap_set; - int m_snap_ret = 0; - bool m_retry_missing_read = false; - librados::snap_set_t m_retry_snap_set; - bool m_read_whole_object = false; - - std::map m_read_ops; + io::Extents m_image_extents; + + io::SnapshotDelta m_snapshot_delta; + + std::map m_read_ops; std::list m_read_snaps; - std::map m_write_ops; - std::map> m_zero_interval; + std::map m_write_ops; + + std::map> m_dst_data_interval; std::map> m_dst_zero_interval; std::map m_dst_object_state; std::map m_dst_object_may_exist; - bufferlist m_read_from_parent_data; io::AsyncOperation* m_src_async_op = nullptr; void send_list_snaps(); void handle_list_snaps(int r); - void send_read_object(); - void handle_read_object(int r); - - void send_read_from_parent(); - void handle_read_from_parent(int r); + void send_read(); + void handle_read(int r); void send_write_object(); void handle_write_object(int r); @@ -193,11 +167,7 @@ private: Context *start_lock_op(ceph::shared_mutex &owner_lock, int* r); - uint64_t src_to_dst_object_offset(uint64_t objectno, uint64_t offset); - - void compute_src_object_extents(); void compute_read_ops(); - void compute_read_from_parent_ops(io::Extents *image_extents); void merge_write_ops(); void compute_zero_ops(); diff --git a/src/librbd/io/ImageRequest.cc b/src/librbd/io/ImageRequest.cc index 51f37dce0e8f7..09a37c50bf4d1 100644 --- a/src/librbd/io/ImageRequest.cc +++ b/src/librbd/io/ImageRequest.cc @@ -433,6 +433,10 @@ ImageReadRequest::ImageReadRequest(I &image_ctx, AioCompletion *aio_comp, template int ImageReadRequest::clip_request() { + if ((m_read_flags & READ_FLAG_DISABLE_CLIPPING) != 0) { + return 0; + } + int r = ImageRequest::clip_request(); if (r < 0) { return r; diff --git a/src/librbd/io/Types.h b/src/librbd/io/Types.h index 663cd2399dea1..03d5f528baba4 100644 --- a/src/librbd/io/Types.h +++ b/src/librbd/io/Types.h @@ -108,6 +108,7 @@ enum ObjectDispatchLayer { enum { READ_FLAG_DISABLE_READ_FROM_PARENT = 1UL << 0, + READ_FLAG_DISABLE_CLIPPING = 1UL << 1, }; enum { diff --git a/src/test/librbd/deep_copy/test_mock_ObjectCopyRequest.cc b/src/test/librbd/deep_copy/test_mock_ObjectCopyRequest.cc index cd3e255144aa1..582875c81ee4a 100644 --- a/src/test/librbd/deep_copy/test_mock_ObjectCopyRequest.cc +++ b/src/test/librbd/deep_copy/test_mock_ObjectCopyRequest.cc @@ -3,6 +3,7 @@ #include "test/librbd/test_mock_fixture.h" #include "include/interval_set.h" +#include "include/neorados/RADOS.hpp" #include "include/rbd/librbd.hpp" #include "include/rbd/object_map_types.h" #include "librbd/AsioEngine.h" @@ -13,7 +14,6 @@ #include "librbd/api/Image.h" #include "librbd/api/Io.h" #include "librbd/deep_copy/ObjectCopyRequest.h" -#include "librbd/io/ImageRequest.h" #include "librbd/io/ReadResult.h" #include "test/librados_test_stub/MockTestMemIoCtxImpl.h" #include "test/librbd/mock/MockImageCtx.h" @@ -39,28 +39,6 @@ inline ImageCtx* get_image_ctx(MockTestImageCtx* image_ctx) { } } // namespace util - -namespace io { - -template <> -struct ImageRequest { - static ImageRequest *s_instance; - - static void aio_read(MockTestImageCtx *ictx, AioCompletion *c, - Extents &&image_extents, ReadResult &&read_result, - IOContext io_context, int op_flags, - int read_flags, - const ZTracer::Trace &parent_trace) { - ceph_assert(s_instance != nullptr); - s_instance->aio_read(c, image_extents); - } - MOCK_METHOD2(aio_read, void(AioCompletion *, const Extents&)); -}; - -ImageRequest *ImageRequest::s_instance = nullptr; - -} // namespace io - } // namespace librbd // template definitions @@ -109,6 +87,32 @@ void scribble(librbd::ImageCtx *image_ctx, int num_ops, size_t max_size, } // anonymous namespace + +MATCHER(IsListSnaps, "") { + auto req = boost::get(&arg->request); + return (req != nullptr); +} + +MATCHER_P2(IsRead, snap_id, image_interval, "") { + auto req = boost::get(&arg->request); + if (req == nullptr || + arg->io_context->read_snap().value_or(CEPH_NOSNAP) != snap_id) { + return false; + } + + // ensure the read request encloses the full snapshot delta + interval_set expected_interval(image_interval); + interval_set read_interval; + for (auto &image_extent : arg->image_extents) { + read_interval.insert(image_extent.first, image_extent.second); + } + + interval_set intersection; + intersection.intersection_of(expected_interval, read_interval); + expected_interval.subtract(intersection); + return expected_interval.empty(); +} + class TestMockDeepCopyObjectCopyRequest : public TestMockFixture { public: typedef ObjectCopyRequest MockObjectCopyRequest; @@ -175,31 +179,19 @@ public: EXPECT_CALL(mock_exclusive_lock, start_op(_)).WillOnce(Return(new LambdaContext([](int){}))); } - void expect_list_snaps(librbd::MockTestImageCtx &mock_image_ctx, - librados::MockTestMemIoCtxImpl &mock_io_ctx, - const librados::snap_set_t &snap_set) { - expect_get_object_name(mock_image_ctx); - expect_set_snap_read(mock_io_ctx, CEPH_SNAPDIR); - EXPECT_CALL(mock_io_ctx, - list_snaps(mock_image_ctx.image_ctx->get_object_name(0), _)) - .WillOnce(DoAll(WithArg<1>(Invoke([&snap_set](librados::snap_set_t *out_snap_set) { - *out_snap_set = snap_set; - })), - Return(0))); - } - - void expect_list_snaps(librbd::MockTestImageCtx &mock_image_ctx, - librados::MockTestMemIoCtxImpl &mock_io_ctx, int r) { - expect_get_object_name(mock_image_ctx); - expect_set_snap_read(mock_io_ctx, CEPH_SNAPDIR); - auto &expect = EXPECT_CALL(mock_io_ctx, - list_snaps(mock_image_ctx.image_ctx->get_object_name(0), - _)); - if (r < 0) { - expect.WillOnce(Return(r)); - } else { - expect.WillOnce(DoDefault()); - } + void expect_list_snaps(librbd::MockTestImageCtx &mock_image_ctx, int r) { + EXPECT_CALL(*mock_image_ctx.io_image_dispatcher, send(IsListSnaps())) + .WillOnce(Invoke( + [&mock_image_ctx, r](io::ImageDispatchSpec* spec) { + if (r < 0) { + spec->fail(r); + return; + } + + spec->image_dispatcher = + mock_image_ctx.image_ctx->io_image_dispatcher; + mock_image_ctx.image_ctx->io_image_dispatcher->send(spec); + })); } void expect_get_object_name(librbd::MockTestImageCtx &mock_image_ctx) { @@ -219,31 +211,28 @@ public: m_snap_map, 0, false, nullptr, on_finish); } - void expect_set_snap_read(librados::MockTestMemIoCtxImpl &mock_io_ctx, - uint64_t snap_id) { - EXPECT_CALL(mock_io_ctx, set_snap_read(snap_id)); - } - - void expect_sparse_read(librados::MockTestMemIoCtxImpl &mock_io_ctx, uint64_t offset, - uint64_t length, int r) { - - auto &expect = EXPECT_CALL(mock_io_ctx, sparse_read(_, offset, length, _, _, - _)); - if (r < 0) { - expect.WillOnce(Return(r)); - } else { - expect.WillOnce(DoDefault()); - } + void expect_read(librbd::MockTestImageCtx& mock_image_ctx, + uint64_t snap_id, uint64_t offset, uint64_t length, int r) { + interval_set extents; + extents.insert(offset, length); + expect_read(mock_image_ctx, snap_id, extents, r); } - void expect_sparse_read(librados::MockTestMemIoCtxImpl &mock_io_ctx, + void expect_read(librbd::MockTestImageCtx& mock_image_ctx, uint64_t snap_id, const interval_set &extents, int r) { - for (auto extent : extents) { - expect_sparse_read(mock_io_ctx, extent.first, extent.second, r); - if (r < 0) { - break; - } - } + EXPECT_CALL(*mock_image_ctx.io_image_dispatcher, + send(IsRead(snap_id, extents))) + .WillOnce(Invoke( + [&mock_image_ctx, r](io::ImageDispatchSpec* spec) { + if (r < 0) { + spec->fail(r); + return; + } + + spec->image_dispatcher = + mock_image_ctx.image_ctx->io_image_dispatcher; + mock_image_ctx.image_ctx->io_image_dispatcher->send(spec); + })); } void expect_write(librados::MockTestMemIoCtxImpl &mock_io_ctx, @@ -490,11 +479,8 @@ TEST_F(TestMockDeepCopyObjectCopyRequest, DNE) { mock_dst_image_ctx, 0, 0, &ctx); - librados::MockTestMemIoCtxImpl &mock_src_io_ctx(get_mock_io_ctx( - request->get_src_io_ctx())); - InSequence seq; - expect_list_snaps(mock_src_image_ctx, mock_src_io_ctx, -ENOENT); + expect_list_snaps(mock_src_image_ctx, -ENOENT); request->send(); ASSERT_EQ(-ENOENT, ctx.wait()); @@ -523,15 +509,12 @@ TEST_F(TestMockDeepCopyObjectCopyRequest, Write) { mock_dst_image_ctx, 0, 0, &ctx); - librados::MockTestMemIoCtxImpl &mock_src_io_ctx(get_mock_io_ctx( - request->get_src_io_ctx())); librados::MockTestMemIoCtxImpl &mock_dst_io_ctx(get_mock_io_ctx( request->get_dst_io_ctx())); InSequence seq; - expect_list_snaps(mock_src_image_ctx, mock_src_io_ctx, 0); - expect_set_snap_read(mock_src_io_ctx, m_src_snap_ids[0]); - expect_sparse_read(mock_src_io_ctx, 0, one.range_end(), 0); + expect_list_snaps(mock_src_image_ctx, 0); + expect_read(mock_src_image_ctx, m_src_snap_ids[0], 0, one.range_end(), 0); expect_start_op(mock_exclusive_lock); expect_write(mock_dst_io_ctx, 0, one.range_end(), {0, {}}, 0); expect_start_op(mock_exclusive_lock); @@ -543,112 +526,6 @@ TEST_F(TestMockDeepCopyObjectCopyRequest, Write) { ASSERT_EQ(0, compare_objects()); } -TEST_F(TestMockDeepCopyObjectCopyRequest, ReadMissingStaleSnapSet) { - ASSERT_EQ(0, create_snap("one")); - ASSERT_EQ(0, create_snap("two")); - - // scribble some data - interval_set one; - scribble(m_src_image_ctx, 10, 102400, &one); - ASSERT_EQ(0, create_snap("three")); - - ASSERT_EQ(0, create_snap("copy")); - librbd::MockTestImageCtx mock_src_image_ctx(*m_src_image_ctx); - librbd::MockTestImageCtx mock_dst_image_ctx(*m_dst_image_ctx); - - librbd::MockExclusiveLock mock_exclusive_lock; - prepare_exclusive_lock(mock_dst_image_ctx, mock_exclusive_lock); - - librbd::MockObjectMap mock_object_map; - mock_dst_image_ctx.object_map = &mock_object_map; - - expect_test_features(mock_dst_image_ctx); - expect_get_object_count(mock_dst_image_ctx); - - C_SaferCond ctx; - MockObjectCopyRequest *request = create_request(mock_src_image_ctx, - mock_dst_image_ctx, 0, 0, - &ctx); - - librados::MockTestMemIoCtxImpl &mock_src_io_ctx(get_mock_io_ctx( - request->get_src_io_ctx())); - librados::MockTestMemIoCtxImpl &mock_dst_io_ctx(get_mock_io_ctx( - request->get_dst_io_ctx())); - - librados::clone_info_t dummy_clone_info; - dummy_clone_info.cloneid = librados::SNAP_HEAD; - dummy_clone_info.size = 123; - - librados::snap_set_t dummy_snap_set1; - dummy_snap_set1.clones.push_back(dummy_clone_info); - - dummy_clone_info.size = 234; - librados::snap_set_t dummy_snap_set2; - dummy_snap_set2.clones.push_back(dummy_clone_info); - - InSequence seq; - expect_list_snaps(mock_src_image_ctx, mock_src_io_ctx, dummy_snap_set1); - expect_set_snap_read(mock_src_io_ctx, m_src_snap_ids[3]); - expect_sparse_read(mock_src_io_ctx, 0, 123, -ENOENT); - expect_list_snaps(mock_src_image_ctx, mock_src_io_ctx, dummy_snap_set2); - expect_set_snap_read(mock_src_io_ctx, m_src_snap_ids[3]); - expect_sparse_read(mock_src_io_ctx, 0, 234, -ENOENT); - expect_list_snaps(mock_src_image_ctx, mock_src_io_ctx, 0); - expect_set_snap_read(mock_src_io_ctx, m_src_snap_ids[3]); - expect_sparse_read(mock_src_io_ctx, 0, one.range_end(), 0); - expect_start_op(mock_exclusive_lock); - expect_write(mock_dst_io_ctx, 0, one.range_end(), - {m_dst_snap_ids[1], {m_dst_snap_ids[1], - m_dst_snap_ids[0]}}, - 0); - expect_start_op(mock_exclusive_lock); - expect_update_object_map(mock_dst_image_ctx, mock_object_map, - m_dst_snap_ids[2], OBJECT_EXISTS, 0); - expect_start_op(mock_exclusive_lock); - expect_update_object_map(mock_dst_image_ctx, mock_object_map, - m_dst_snap_ids[3], is_fast_diff(mock_dst_image_ctx) ? - OBJECT_EXISTS_CLEAN : OBJECT_EXISTS, 0); - - request->send(); - ASSERT_EQ(0, ctx.wait()); - ASSERT_EQ(0, compare_objects()); -} - -TEST_F(TestMockDeepCopyObjectCopyRequest, ReadMissingUpToDateSnapMap) { - // scribble some data - interval_set one; - scribble(m_src_image_ctx, 10, 102400, &one); - - ASSERT_EQ(0, create_snap("copy")); - librbd::MockTestImageCtx mock_src_image_ctx(*m_src_image_ctx); - librbd::MockTestImageCtx mock_dst_image_ctx(*m_dst_image_ctx); - - librbd::MockExclusiveLock mock_exclusive_lock; - prepare_exclusive_lock(mock_dst_image_ctx, mock_exclusive_lock); - - librbd::MockObjectMap mock_object_map; - mock_dst_image_ctx.object_map = &mock_object_map; - - expect_test_features(mock_dst_image_ctx); - - C_SaferCond ctx; - MockObjectCopyRequest *request = create_request(mock_src_image_ctx, - mock_dst_image_ctx, 0, 0, - &ctx); - - librados::MockTestMemIoCtxImpl &mock_src_io_ctx(get_mock_io_ctx( - request->get_src_io_ctx())); - - InSequence seq; - expect_list_snaps(mock_src_image_ctx, mock_src_io_ctx, 0); - expect_set_snap_read(mock_src_io_ctx, m_src_snap_ids[0]); - expect_sparse_read(mock_src_io_ctx, 0, one.range_end(), -ENOENT); - expect_list_snaps(mock_src_image_ctx, mock_src_io_ctx, 0); - - request->send(); - ASSERT_EQ(-ENOENT, ctx.wait()); -} - TEST_F(TestMockDeepCopyObjectCopyRequest, ReadError) { // scribble some data interval_set one; @@ -671,13 +548,10 @@ TEST_F(TestMockDeepCopyObjectCopyRequest, ReadError) { mock_dst_image_ctx, 0, 0, &ctx); - librados::MockTestMemIoCtxImpl &mock_src_io_ctx(get_mock_io_ctx( - request->get_src_io_ctx())); - InSequence seq; - expect_list_snaps(mock_src_image_ctx, mock_src_io_ctx, 0); - expect_set_snap_read(mock_src_io_ctx, m_src_snap_ids[0]); - expect_sparse_read(mock_src_io_ctx, 0, one.range_end(), -EINVAL); + expect_list_snaps(mock_src_image_ctx, 0); + expect_read(mock_src_image_ctx, m_src_snap_ids[0], 0, one.range_end(), + -EINVAL); request->send(); ASSERT_EQ(-EINVAL, ctx.wait()); @@ -706,15 +580,12 @@ TEST_F(TestMockDeepCopyObjectCopyRequest, WriteError) { mock_dst_image_ctx, 0, 0, &ctx); - librados::MockTestMemIoCtxImpl &mock_src_io_ctx(get_mock_io_ctx( - request->get_src_io_ctx())); librados::MockTestMemIoCtxImpl &mock_dst_io_ctx(get_mock_io_ctx( request->get_dst_io_ctx())); InSequence seq; - expect_list_snaps(mock_src_image_ctx, mock_src_io_ctx, 0); - expect_set_snap_read(mock_src_io_ctx, m_src_snap_ids[0]); - expect_sparse_read(mock_src_io_ctx, 0, one.range_end(), 0); + expect_list_snaps(mock_src_image_ctx, 0); + expect_read(mock_src_image_ctx, m_src_snap_ids[0], 0, one.range_end(), 0); expect_start_op(mock_exclusive_lock); expect_write(mock_dst_io_ctx, 0, one.range_end(), {0, {}}, -EINVAL); @@ -756,17 +627,13 @@ TEST_F(TestMockDeepCopyObjectCopyRequest, WriteSnaps) { mock_dst_image_ctx, 0, 0, &ctx); - librados::MockTestMemIoCtxImpl &mock_src_io_ctx(get_mock_io_ctx( - request->get_src_io_ctx())); librados::MockTestMemIoCtxImpl &mock_dst_io_ctx(get_mock_io_ctx( request->get_dst_io_ctx())); InSequence seq; - expect_list_snaps(mock_src_image_ctx, mock_src_io_ctx, 0); - expect_set_snap_read(mock_src_io_ctx, m_src_snap_ids[0]); - expect_sparse_read(mock_src_io_ctx, 0, one.range_end(), 0); - expect_set_snap_read(mock_src_io_ctx, m_src_snap_ids[2]); - expect_sparse_read(mock_src_io_ctx, two, 0); + expect_list_snaps(mock_src_image_ctx, 0); + expect_read(mock_src_image_ctx, m_src_snap_ids[0], 0, one.range_end(), 0); + expect_read(mock_src_image_ctx, m_src_snap_ids[2], two, 0); expect_start_op(mock_exclusive_lock); expect_write(mock_dst_io_ctx, 0, one.range_end(), {0, {}}, 0); expect_start_op(mock_exclusive_lock); @@ -822,15 +689,12 @@ TEST_F(TestMockDeepCopyObjectCopyRequest, Trim) { mock_dst_image_ctx, 0, 0, &ctx); - librados::MockTestMemIoCtxImpl &mock_src_io_ctx(get_mock_io_ctx( - request->get_src_io_ctx())); librados::MockTestMemIoCtxImpl &mock_dst_io_ctx(get_mock_io_ctx( request->get_dst_io_ctx())); InSequence seq; - expect_list_snaps(mock_src_image_ctx, mock_src_io_ctx, 0); - expect_set_snap_read(mock_src_io_ctx, m_src_snap_ids[0]); - expect_sparse_read(mock_src_io_ctx, 0, one.range_end(), 0); + expect_list_snaps(mock_src_image_ctx, 0); + expect_read(mock_src_image_ctx, m_src_snap_ids[0], 0, one.range_end(), 0); expect_start_op(mock_exclusive_lock); expect_write(mock_dst_io_ctx, 0, one.range_end(), {0, {}}, 0); expect_start_op(mock_exclusive_lock); @@ -877,15 +741,12 @@ TEST_F(TestMockDeepCopyObjectCopyRequest, Remove) { mock_dst_image_ctx, 0, 0, &ctx); - librados::MockTestMemIoCtxImpl &mock_src_io_ctx(get_mock_io_ctx( - request->get_src_io_ctx())); librados::MockTestMemIoCtxImpl &mock_dst_io_ctx(get_mock_io_ctx( request->get_dst_io_ctx())); InSequence seq; - expect_list_snaps(mock_src_image_ctx, mock_src_io_ctx, 0); - expect_set_snap_read(mock_src_io_ctx, m_src_snap_ids[1]); - expect_sparse_read(mock_src_io_ctx, 0, one.range_end(), 0); + expect_list_snaps(mock_src_image_ctx, 0); + expect_read(mock_src_image_ctx, m_src_snap_ids[1], 0, one.range_end(), 0); expect_start_op(mock_exclusive_lock); expect_write(mock_dst_io_ctx, 0, one.range_end(), {0, {}}, 0); expect_start_op(mock_exclusive_lock); @@ -929,15 +790,12 @@ TEST_F(TestMockDeepCopyObjectCopyRequest, ObjectMapUpdateError) { mock_dst_image_ctx, 0, 0, &ctx); - librados::MockTestMemIoCtxImpl &mock_src_io_ctx(get_mock_io_ctx( - request->get_src_io_ctx())); librados::MockTestMemIoCtxImpl &mock_dst_io_ctx(get_mock_io_ctx( request->get_dst_io_ctx())); InSequence seq; - expect_list_snaps(mock_src_image_ctx, mock_src_io_ctx, 0); - expect_set_snap_read(mock_src_io_ctx, m_src_snap_ids[0]); - expect_sparse_read(mock_src_io_ctx, 0, one.range_end(), 0); + expect_list_snaps(mock_src_image_ctx, 0); + expect_read(mock_src_image_ctx, m_src_snap_ids[0], 0, one.range_end(), 0); expect_start_op(mock_exclusive_lock); expect_write(mock_dst_io_ctx, 0, one.range_end(), {0, {}}, 0); expect_start_op(mock_exclusive_lock); @@ -983,9 +841,7 @@ TEST_F(TestMockDeepCopyObjectCopyRequest, WriteSnapsStart) { interval_set four; scribble(m_src_image_ctx, 10, 102400, &four); - // map should begin after src start and src end's dst snap seqs should - // point to HEAD revision - m_snap_map.erase(src_snap_id_start); + // src end's dst snap seqs should point to HEAD revision m_snap_map[m_src_image_ctx->snaps[0]][0] = CEPH_NOSNAP; librbd::MockTestImageCtx mock_src_image_ctx(*m_src_image_ctx); @@ -1007,19 +863,14 @@ TEST_F(TestMockDeepCopyObjectCopyRequest, WriteSnapsStart) { dst_snap_id_start, &ctx); - librados::MockTestMemIoCtxImpl &mock_src_io_ctx(get_mock_io_ctx( - request->get_src_io_ctx())); librados::MockTestMemIoCtxImpl &mock_dst_io_ctx(get_mock_io_ctx( request->get_dst_io_ctx())); InSequence seq; - expect_list_snaps(mock_src_image_ctx, mock_src_io_ctx, 0); - - expect_set_snap_read(mock_src_io_ctx, m_src_snap_ids[1]); - expect_sparse_read(mock_src_io_ctx, two, 0); + expect_list_snaps(mock_src_image_ctx, 0); - expect_set_snap_read(mock_src_io_ctx, m_src_snap_ids[2]); - expect_sparse_read(mock_src_io_ctx, three, 0); + expect_read(mock_src_image_ctx, m_src_snap_ids[1], two, 0); + expect_read(mock_src_image_ctx, m_src_snap_ids[2], three, 0); expect_start_op(mock_exclusive_lock); expect_write(mock_dst_io_ctx, two, -- 2.47.3