From 7ed0cedfb8869920b0a23090cf1825736b1764ed Mon Sep 17 00:00:00 2001 From: Jason Dillaman Date: Sun, 6 Mar 2016 13:06:42 -0500 Subject: [PATCH] rbd_mirror: deep-copy of object and its associated snapshots Signed-off-by: Jason Dillaman --- src/tools/Makefile-client.am | 6 +- .../image_sync/ObjectCopyRequest.cc | 299 ++++++++++++++++++ .../rbd_mirror/image_sync/ObjectCopyRequest.h | 118 +++++++ 3 files changed, 421 insertions(+), 2 deletions(-) create mode 100644 src/tools/rbd_mirror/image_sync/ObjectCopyRequest.cc create mode 100644 src/tools/rbd_mirror/image_sync/ObjectCopyRequest.h diff --git a/src/tools/Makefile-client.am b/src/tools/Makefile-client.am index 534e536fe5d69..ed6da4e560f44 100644 --- a/src/tools/Makefile-client.am +++ b/src/tools/Makefile-client.am @@ -87,7 +87,8 @@ librbd_mirror_internal_la_SOURCES = \ tools/rbd_mirror/PoolWatcher.cc \ tools/rbd_mirror/Replayer.cc \ tools/rbd_mirror/Threads.cc \ - tools/rbd_mirror/types.cc + tools/rbd_mirror/types.cc \ + tools/rbd_mirror/image_sync/ObjectCopyRequest.cc noinst_LTLIBRARIES += librbd_mirror_internal.la noinst_HEADERS += \ tools/rbd_mirror/ClusterWatcher.h \ @@ -96,7 +97,8 @@ noinst_HEADERS += \ tools/rbd_mirror/PoolWatcher.h \ tools/rbd_mirror/Replayer.h \ tools/rbd_mirror/Threads.h \ - tools/rbd_mirror/types.h + tools/rbd_mirror/types.h \ + tools/rbd_mirror/image_sync/ObjectCopyRequest.h rbd_mirror_SOURCES = \ tools/rbd_mirror/main.cc diff --git a/src/tools/rbd_mirror/image_sync/ObjectCopyRequest.cc b/src/tools/rbd_mirror/image_sync/ObjectCopyRequest.cc new file mode 100644 index 0000000000000..eb669345c6ea7 --- /dev/null +++ b/src/tools/rbd_mirror/image_sync/ObjectCopyRequest.cc @@ -0,0 +1,299 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "ObjectCopyRequest.h" +#include "librados/snap_set_diff.h" +#include "librbd/Utils.h" +#include "common/errno.h" + +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_sync::ObjectCopyRequest: " \ + << this << " " << __func__ + +namespace rbd { +namespace mirror { +namespace image_sync { + +using librbd::util::create_rados_ack_callback; +using librbd::util::create_rados_safe_callback; + +template +ObjectCopyRequest::ObjectCopyRequest(I *local_image_ctx, I *remote_image_ctx, + const SnapMap *snap_map, + uint64_t object_number, + Context *on_finish) + : m_local_image_ctx(local_image_ctx), m_remote_image_ctx(remote_image_ctx), + m_snap_map(snap_map), m_object_number(object_number), + m_on_finish(on_finish) { + assert(!snap_map->empty()); + + m_local_io_ctx.dup(m_local_image_ctx->data_ctx); + m_local_oid = m_local_image_ctx->get_object_name(object_number); + + m_remote_io_ctx.dup(m_remote_image_ctx->data_ctx); + m_remote_oid = m_remote_image_ctx->get_object_name(object_number); +} + +template +void ObjectCopyRequest::send() { + send_list_snaps(); +} + +template +void ObjectCopyRequest::send_list_snaps() { + CephContext *cct = m_local_image_ctx->cct; + ldout(cct, 20) << dendl; + + librados::AioCompletion *rados_completion = create_rados_ack_callback< + ObjectCopyRequest, &ObjectCopyRequest::handle_list_snaps>(this); + + librados::ObjectReadOperation op; + op.list_snaps(&m_snap_set, &m_snap_ret); + + m_remote_io_ctx.snap_set_read(CEPH_SNAPDIR); + int r = m_remote_io_ctx.aio_operate(m_remote_oid, rados_completion, &op, + nullptr); + assert(r == 0); + rados_completion->release(); +} + +template +void ObjectCopyRequest::handle_list_snaps(int r) { + if (r == 0 && m_snap_ret < 0) { + r = m_snap_ret; + } + + CephContext *cct = m_local_image_ctx->cct; + ldout(cct, 20) << ": r=" << r << dendl; + + if (r == -ENOENT) { + finish(0); + return; + } + if (r < 0) { + lderr(cct) << "failed to list snaps: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + compute_diffs(); + send_read_object(); +} + +template +void ObjectCopyRequest::send_read_object() { + CephContext *cct = m_local_image_ctx->cct; + if (m_snap_sync_ops.empty()) { + // no more snapshot diffs to read from remote + finish(0); + return; + } + + // build the read request + auto &sync_ops = m_snap_sync_ops.begin()->second; + assert(!sync_ops.empty()); + + // map the sync op start snap id back to the necessary read snap id + auto snap_map_it = m_snap_map->upper_bound( + m_snap_sync_ops.begin()->first); + assert(snap_map_it != m_snap_map->end()); + librados::snap_t snap_seq = snap_map_it->first; + m_remote_io_ctx.snap_set_read(snap_seq); + + bool read_required = false; + librados::ObjectReadOperation op; + for (auto &sync_op : sync_ops) { + switch (std::get<0>(sync_op)) { + case SYNC_OP_TYPE_WRITE: + if (!read_required) { + ldout(cct, 20) << ": snap_seq=" << snap_seq << dendl; + read_required = true; + } + + ldout(cct, 20) << ": read op: " << std::get<1>(sync_op) << "~" + << std::get<2>(sync_op) << dendl; + op.read(std::get<1>(sync_op), std::get<2>(sync_op), + &std::get<3>(sync_op), nullptr); + break; + default: + break; + } + } + + if (!read_required) { + // nothing written to this object for this snapshot (must be trunc/remove) + send_write_object(); + return; + } + + librados::AioCompletion *comp = create_rados_safe_callback< + ObjectCopyRequest, &ObjectCopyRequest::handle_read_object>(this); + int r = m_remote_io_ctx.aio_operate(m_remote_oid, comp, &op, nullptr); + assert(r == 0); + comp->release(); +} + +template +void ObjectCopyRequest::handle_read_object(int r) { + CephContext *cct = m_local_image_ctx->cct; + ldout(cct, 20) << ": r=" << r << dendl; + + if (r < 0) { + lderr(cct) << "failed to read from remote object: " << cpp_strerror(r) + << dendl; + finish(r); + return; + } + + send_write_object(); +} + +template +void ObjectCopyRequest::send_write_object() { + // retrieve the local snap context for the op + SnapIds snap_ids; + librados::snap_t snap_seq = m_snap_sync_ops.begin()->first; + if (snap_seq != 0) { + auto snap_map_it = m_snap_map->find(snap_seq); + assert(snap_map_it != m_snap_map->end()); + snap_ids = snap_map_it->second; + } + + CephContext *cct = m_local_image_ctx->cct; + ldout(cct, 20) << ": " + << "snap_seq=" << snap_seq << ", " + << "snaps=" << snap_ids << dendl; + + auto &sync_ops = m_snap_sync_ops.begin()->second; + assert(!sync_ops.empty()); + + librados::ObjectWriteOperation op; + for (auto &sync_op : sync_ops) { + switch (std::get<0>(sync_op)) { + case SYNC_OP_TYPE_WRITE: + ldout(cct, 20) << ": write op: " << std::get<1>(sync_op) << "~" + << std::get<3>(sync_op).length() << dendl; + op.write(std::get<1>(sync_op), std::get<3>(sync_op)); + break; + case SYNC_OP_TYPE_TRUNC: + ldout(cct, 20) << ": trunc op: " << std::get<1>(sync_op) << dendl; + op.truncate(std::get<1>(sync_op)); + break; + case SYNC_OP_TYPE_REMOVE: + ldout(cct, 20) << ": remove op" << dendl; + op.remove(); + break; + default: + assert(false); + } + } + + librados::AioCompletion *comp = create_rados_safe_callback< + ObjectCopyRequest, &ObjectCopyRequest::handle_write_object>(this); + int r = m_local_io_ctx.aio_operate(m_local_oid, comp, &op, snap_seq, + snap_ids); + assert(r == 0); + comp->release(); +} + +template +void ObjectCopyRequest::handle_write_object(int r) { + CephContext *cct = m_local_image_ctx->cct; + ldout(cct, 20) << ": r=" << r << dendl; + + if (r == -ENOENT) { + r = 0; + } + if (r < 0) { + lderr(cct) << "failed to write to local object: " << cpp_strerror(r) + << dendl; + finish(r); + return; + } + + m_snap_sync_ops.erase(m_snap_sync_ops.begin()); + if (!m_snap_sync_ops.empty()) { + send_read_object(); + return; + } + + finish(r); +} + +template +void ObjectCopyRequest::compute_diffs() { + CephContext *cct = m_local_image_ctx->cct; + + uint64_t prev_end_size = 0; + bool prev_exists = false; + librados::snap_t start_snap_id = 0; + librados::snap_t end_snap_id; + for (auto &pair : *m_snap_map) { + assert(!pair.second.empty()); + end_snap_id = pair.second.front(); + + interval_set diff; + uint64_t end_size; + bool exists; + calc_snap_set_diff(cct, m_snap_set, start_snap_id, end_snap_id, &diff, + &end_size, &exists); + + ldout(cct, 20) << ": " + << "start_snap=" << start_snap_id << ", " + << "end_snap_id=" << end_snap_id << ", " + << "diff=" << diff << ", " + << "end_size=" << end_size << ", " + << "exists=" << exists << dendl; + + if (exists) { + // clip diff to size of object (in case it was truncated) + if (end_size < prev_end_size) { + interval_set trunc; + trunc.insert(end_size, prev_end_size); + trunc.intersection_of(diff); + diff.subtract(trunc); + ldout(cct, 20) << ": clearing truncate diff: " << trunc << dendl; + } + + // object write/zero, or truncate + for (auto it = diff.begin(); it != diff.end(); ++it) { + ldout(cct, 20) << ": read/write op: " << it.get_start() << "~" + << it.get_len() << dendl; + m_snap_sync_ops[start_snap_id].emplace_back(SYNC_OP_TYPE_WRITE, + it.get_start(), + it.get_len(), + bufferlist()); + } + if (end_size < prev_end_size) { + ldout(cct, 20) << ": trunc op: " << end_size << dendl; + m_snap_sync_ops[start_snap_id].emplace_back(SYNC_OP_TYPE_TRUNC, + end_size, 0U, bufferlist()); + } + } else if (prev_exists) { + // object remove + ldout(cct, 20) << ": remove op" << dendl; + m_snap_sync_ops[start_snap_id].emplace_back(SYNC_OP_TYPE_REMOVE, 0U, 0U, + bufferlist()); + } + + prev_end_size = end_size; + prev_exists = exists; + start_snap_id = end_snap_id; + } +} + +template +void ObjectCopyRequest::finish(int r) { + CephContext *cct = m_local_image_ctx->cct; + ldout(cct, 20) << ": r=" << r << dendl; + + m_on_finish->complete(r); + delete this; +} + +} // namespace image_sync +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_sync::ObjectCopyRequest; diff --git a/src/tools/rbd_mirror/image_sync/ObjectCopyRequest.h b/src/tools/rbd_mirror/image_sync/ObjectCopyRequest.h new file mode 100644 index 0000000000000..64d8e18cf0188 --- /dev/null +++ b/src/tools/rbd_mirror/image_sync/ObjectCopyRequest.h @@ -0,0 +1,118 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_SYNC_OBJECT_COPY_REQUEST_H +#define RBD_MIRROR_IMAGE_SYNC_OBJECT_COPY_REQUEST_H + +#include "include/int_types.h" +#include "include/rados/librados.hpp" +#include "common/snap_types.h" +#include "librbd/ImageCtx.h" +#include +#include +#include +#include +#include + +class Context; + +namespace rbd { +namespace mirror { +namespace image_sync { + +template +class ObjectCopyRequest { +public: + typedef std::vector SnapIds; + typedef std::map SnapMap; + + static ObjectCopyRequest* create(ImageCtxT *local_image_ctx, + ImageCtxT *remote_image_ctx, + const SnapMap *snap_map, + uint64_t object_number, Context *on_finish) { + return new ObjectCopyRequest(local_image_ctx, remote_image_ctx, snap_map, + object_number, on_finish); + } + + ObjectCopyRequest(ImageCtxT *local_image_ctx, ImageCtxT *remote_image_ctx, + const SnapMap *snap_map, uint64_t object_number, + Context *on_finish); + + void send(); + + // testing support + inline librados::IoCtx &get_local_io_ctx() { + return m_local_io_ctx; + } + inline librados::IoCtx &get_remote_io_ctx() { + return m_remote_io_ctx; + } + +private: + /** + * @verbatim + * + * + * | + * v + * LIST_SNAPS + * | + * v + * READ_OBJECT <----\ + * | | (repeat for each snapshot) + * v | + * WRITE_OBJECT ----/ + * | + * v + * + * + * @endverbatim + */ + + enum SyncOpType { + SYNC_OP_TYPE_WRITE, + SYNC_OP_TYPE_TRUNC, + SYNC_OP_TYPE_REMOVE + }; + + typedef std::tuple SyncOp; + typedef std::list SyncOps; + typedef std::map SnapSyncOps; + + ImageCtxT *m_local_image_ctx; + ImageCtxT *m_remote_image_ctx; + const SnapMap *m_snap_map; + uint64_t m_object_number; + Context *m_on_finish; + + decltype(m_local_image_ctx->data_ctx) m_local_io_ctx; + decltype(m_remote_image_ctx->data_ctx) m_remote_io_ctx; + std::string m_local_oid; + std::string m_remote_oid; + + librados::snap_set_t m_snap_set; + int m_snap_ret; + + SnapSyncOps m_snap_sync_ops; + + void send_list_snaps(); + void handle_list_snaps(int r); + + void send_read_object(); + void handle_read_object(int r); + + void send_write_object(); + void handle_write_object(int r); + + void compute_diffs(); + void finish(int r); + +}; + +} // namespace image_sync +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_sync::ObjectCopyRequest; + +#endif // RBD_MIRROR_IMAGE_SYNC_OBJECT_COPY_REQUEST_H -- 2.39.5