]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
rbd_mirror: deep-copy of object and its associated snapshots
authorJason Dillaman <dillaman@redhat.com>
Sun, 6 Mar 2016 18:06:42 +0000 (13:06 -0500)
committerJason Dillaman <dillaman@redhat.com>
Sun, 13 Mar 2016 03:40:15 +0000 (22:40 -0500)
Signed-off-by: Jason Dillaman <dillaman@redhat.com>
src/tools/Makefile-client.am
src/tools/rbd_mirror/image_sync/ObjectCopyRequest.cc [new file with mode: 0644]
src/tools/rbd_mirror/image_sync/ObjectCopyRequest.h [new file with mode: 0644]

index 534e536fe5d6973be3f7bace5c20b299335489df..ed6da4e560f4481dfdfb6b4e812f67caec38261c 100644 (file)
@@ -87,7 +87,8 @@ librbd_mirror_internal_la_SOURCES = \
        tools/rbd_mirror/PoolWatcher.cc \
        tools/rbd_mirror/Replayer.cc \
        tools/rbd_mirror/Threads.cc \
-       tools/rbd_mirror/types.cc
+       tools/rbd_mirror/types.cc \
+       tools/rbd_mirror/image_sync/ObjectCopyRequest.cc
 noinst_LTLIBRARIES += librbd_mirror_internal.la
 noinst_HEADERS += \
        tools/rbd_mirror/ClusterWatcher.h \
@@ -96,7 +97,8 @@ noinst_HEADERS += \
        tools/rbd_mirror/PoolWatcher.h \
        tools/rbd_mirror/Replayer.h \
        tools/rbd_mirror/Threads.h \
-       tools/rbd_mirror/types.h
+       tools/rbd_mirror/types.h \
+       tools/rbd_mirror/image_sync/ObjectCopyRequest.h
 
 rbd_mirror_SOURCES = \
        tools/rbd_mirror/main.cc
diff --git a/src/tools/rbd_mirror/image_sync/ObjectCopyRequest.cc b/src/tools/rbd_mirror/image_sync/ObjectCopyRequest.cc
new file mode 100644 (file)
index 0000000..eb66934
--- /dev/null
@@ -0,0 +1,299 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "ObjectCopyRequest.h"
+#include "librados/snap_set_diff.h"
+#include "librbd/Utils.h"
+#include "common/errno.h"
+
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_sync::ObjectCopyRequest: " \
+                           << this << " " << __func__
+
+namespace rbd {
+namespace mirror {
+namespace image_sync {
+
+using librbd::util::create_rados_ack_callback;
+using librbd::util::create_rados_safe_callback;
+
+template <typename I>
+ObjectCopyRequest<I>::ObjectCopyRequest(I *local_image_ctx, I *remote_image_ctx,
+                                        const SnapMap *snap_map,
+                                        uint64_t object_number,
+                                        Context *on_finish)
+  : m_local_image_ctx(local_image_ctx), m_remote_image_ctx(remote_image_ctx),
+    m_snap_map(snap_map), m_object_number(object_number),
+    m_on_finish(on_finish) {
+  assert(!snap_map->empty());
+
+  m_local_io_ctx.dup(m_local_image_ctx->data_ctx);
+  m_local_oid = m_local_image_ctx->get_object_name(object_number);
+
+  m_remote_io_ctx.dup(m_remote_image_ctx->data_ctx);
+  m_remote_oid = m_remote_image_ctx->get_object_name(object_number);
+}
+
+template <typename I>
+void ObjectCopyRequest<I>::send() {
+  send_list_snaps();
+}
+
+template <typename I>
+void ObjectCopyRequest<I>::send_list_snaps() {
+  CephContext *cct = m_local_image_ctx->cct;
+  ldout(cct, 20) << dendl;
+
+  librados::AioCompletion *rados_completion = create_rados_ack_callback<
+    ObjectCopyRequest<I>, &ObjectCopyRequest<I>::handle_list_snaps>(this);
+
+  librados::ObjectReadOperation op;
+  op.list_snaps(&m_snap_set, &m_snap_ret);
+
+  m_remote_io_ctx.snap_set_read(CEPH_SNAPDIR);
+  int r = m_remote_io_ctx.aio_operate(m_remote_oid, rados_completion, &op,
+                                      nullptr);
+  assert(r == 0);
+  rados_completion->release();
+}
+
+template <typename I>
+void ObjectCopyRequest<I>::handle_list_snaps(int r) {
+  if (r == 0 && m_snap_ret < 0) {
+    r = m_snap_ret;
+  }
+
+  CephContext *cct = m_local_image_ctx->cct;
+  ldout(cct, 20) << ": r=" << r << dendl;
+
+  if (r == -ENOENT) {
+    finish(0);
+    return;
+  }
+  if (r < 0) {
+    lderr(cct) << "failed to list snaps: " << cpp_strerror(r) << dendl;
+    finish(r);
+    return;
+  }
+
+  compute_diffs();
+  send_read_object();
+}
+
+template <typename I>
+void ObjectCopyRequest<I>::send_read_object() {
+  CephContext *cct = m_local_image_ctx->cct;
+  if (m_snap_sync_ops.empty()) {
+    // no more snapshot diffs to read from remote
+    finish(0);
+    return;
+  }
+
+  // build the read request
+  auto &sync_ops = m_snap_sync_ops.begin()->second;
+  assert(!sync_ops.empty());
+
+  // map the sync op start snap id back to the necessary read snap id
+  auto snap_map_it = m_snap_map->upper_bound(
+    m_snap_sync_ops.begin()->first);
+  assert(snap_map_it != m_snap_map->end());
+  librados::snap_t snap_seq = snap_map_it->first;
+  m_remote_io_ctx.snap_set_read(snap_seq);
+
+  bool read_required = false;
+  librados::ObjectReadOperation op;
+  for (auto &sync_op : sync_ops) {
+    switch (std::get<0>(sync_op)) {
+    case SYNC_OP_TYPE_WRITE:
+      if (!read_required) {
+        ldout(cct, 20) << ": snap_seq=" << snap_seq << dendl;
+        read_required = true;
+      }
+
+      ldout(cct, 20) << ": read op: " << std::get<1>(sync_op) << "~"
+                     << std::get<2>(sync_op) << dendl;
+      op.read(std::get<1>(sync_op), std::get<2>(sync_op),
+              &std::get<3>(sync_op), nullptr);
+      break;
+    default:
+      break;
+    }
+  }
+
+  if (!read_required) {
+    // nothing written to this object for this snapshot (must be trunc/remove)
+    send_write_object();
+    return;
+  }
+
+  librados::AioCompletion *comp = create_rados_safe_callback<
+    ObjectCopyRequest<I>, &ObjectCopyRequest<I>::handle_read_object>(this);
+  int r = m_remote_io_ctx.aio_operate(m_remote_oid, comp, &op, nullptr);
+  assert(r == 0);
+  comp->release();
+}
+
+template <typename I>
+void ObjectCopyRequest<I>::handle_read_object(int r) {
+  CephContext *cct = m_local_image_ctx->cct;
+  ldout(cct, 20) << ": r=" << r << dendl;
+
+  if (r < 0) {
+    lderr(cct) << "failed to read from remote object: " << cpp_strerror(r)
+               << dendl;
+    finish(r);
+    return;
+  }
+
+  send_write_object();
+}
+
+template <typename I>
+void ObjectCopyRequest<I>::send_write_object() {
+  // retrieve the local snap context for the op
+  SnapIds snap_ids;
+  librados::snap_t snap_seq = m_snap_sync_ops.begin()->first;
+  if (snap_seq != 0) {
+    auto snap_map_it = m_snap_map->find(snap_seq);
+    assert(snap_map_it != m_snap_map->end());
+    snap_ids = snap_map_it->second;
+  }
+
+  CephContext *cct = m_local_image_ctx->cct;
+  ldout(cct, 20) << ": "
+                 << "snap_seq=" << snap_seq << ", "
+                 << "snaps=" << snap_ids << dendl;
+
+  auto &sync_ops = m_snap_sync_ops.begin()->second;
+  assert(!sync_ops.empty());
+
+  librados::ObjectWriteOperation op;
+  for (auto &sync_op : sync_ops) {
+    switch (std::get<0>(sync_op)) {
+    case SYNC_OP_TYPE_WRITE:
+      ldout(cct, 20) << ": write op: " << std::get<1>(sync_op) << "~"
+                     << std::get<3>(sync_op).length() << dendl;
+      op.write(std::get<1>(sync_op), std::get<3>(sync_op));
+      break;
+    case SYNC_OP_TYPE_TRUNC:
+      ldout(cct, 20) << ": trunc op: " << std::get<1>(sync_op) << dendl;
+      op.truncate(std::get<1>(sync_op));
+      break;
+    case SYNC_OP_TYPE_REMOVE:
+      ldout(cct, 20) << ": remove op" << dendl;
+      op.remove();
+      break;
+    default:
+      assert(false);
+    }
+  }
+
+  librados::AioCompletion *comp = create_rados_safe_callback<
+    ObjectCopyRequest<I>, &ObjectCopyRequest<I>::handle_write_object>(this);
+  int r = m_local_io_ctx.aio_operate(m_local_oid, comp, &op, snap_seq,
+                                     snap_ids);
+  assert(r == 0);
+  comp->release();
+}
+
+template <typename I>
+void ObjectCopyRequest<I>::handle_write_object(int r) {
+  CephContext *cct = m_local_image_ctx->cct;
+  ldout(cct, 20) << ": r=" << r << dendl;
+
+  if (r == -ENOENT) {
+    r = 0;
+  }
+  if (r < 0) {
+    lderr(cct) << "failed to write to local object: " << cpp_strerror(r)
+               << dendl;
+    finish(r);
+    return;
+  }
+
+  m_snap_sync_ops.erase(m_snap_sync_ops.begin());
+  if (!m_snap_sync_ops.empty()) {
+    send_read_object();
+    return;
+  }
+
+  finish(r);
+}
+
+template <typename I>
+void ObjectCopyRequest<I>::compute_diffs() {
+  CephContext *cct = m_local_image_ctx->cct;
+
+  uint64_t prev_end_size = 0;
+  bool prev_exists = false;
+  librados::snap_t start_snap_id = 0;
+  librados::snap_t end_snap_id;
+  for (auto &pair : *m_snap_map) {
+    assert(!pair.second.empty());
+    end_snap_id = pair.second.front();
+
+    interval_set<uint64_t> diff;
+    uint64_t end_size;
+    bool exists;
+    calc_snap_set_diff(cct, m_snap_set, start_snap_id, end_snap_id, &diff,
+                       &end_size, &exists);
+
+    ldout(cct, 20) << ": "
+                   << "start_snap=" << start_snap_id << ", "
+                   << "end_snap_id=" << end_snap_id << ", "
+                   << "diff=" << diff << ", "
+                   << "end_size=" << end_size << ", "
+                   << "exists=" << exists << dendl;
+
+    if (exists) {
+      // clip diff to size of object (in case it was truncated)
+      if (end_size < prev_end_size) {
+        interval_set<uint64_t> trunc;
+        trunc.insert(end_size, prev_end_size);
+        trunc.intersection_of(diff);
+        diff.subtract(trunc);
+        ldout(cct, 20) << ": clearing truncate diff: " << trunc << dendl;
+      }
+
+      // object write/zero, or truncate
+      for (auto it = diff.begin(); it != diff.end(); ++it) {
+        ldout(cct, 20) << ": read/write op: " << it.get_start() << "~"
+                       << it.get_len() << dendl;
+        m_snap_sync_ops[start_snap_id].emplace_back(SYNC_OP_TYPE_WRITE,
+                                                    it.get_start(),
+                                                    it.get_len(),
+                                                    bufferlist());
+      }
+      if (end_size < prev_end_size) {
+        ldout(cct, 20) << ": trunc op: " << end_size << dendl;
+        m_snap_sync_ops[start_snap_id].emplace_back(SYNC_OP_TYPE_TRUNC,
+                                                    end_size, 0U, bufferlist());
+      }
+    } else if (prev_exists) {
+      // object remove
+      ldout(cct, 20) << ": remove op" << dendl;
+      m_snap_sync_ops[start_snap_id].emplace_back(SYNC_OP_TYPE_REMOVE, 0U, 0U,
+                                                  bufferlist());
+    }
+
+    prev_end_size = end_size;
+    prev_exists = exists;
+    start_snap_id = end_snap_id;
+  }
+}
+
+template <typename I>
+void ObjectCopyRequest<I>::finish(int r) {
+  CephContext *cct = m_local_image_ctx->cct;
+  ldout(cct, 20) << ": r=" << r << dendl;
+
+  m_on_finish->complete(r);
+  delete this;
+}
+
+} // namespace image_sync
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_sync::ObjectCopyRequest<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_sync/ObjectCopyRequest.h b/src/tools/rbd_mirror/image_sync/ObjectCopyRequest.h
new file mode 100644 (file)
index 0000000..64d8e18
--- /dev/null
@@ -0,0 +1,118 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_SYNC_OBJECT_COPY_REQUEST_H
+#define RBD_MIRROR_IMAGE_SYNC_OBJECT_COPY_REQUEST_H
+
+#include "include/int_types.h"
+#include "include/rados/librados.hpp"
+#include "common/snap_types.h"
+#include "librbd/ImageCtx.h"
+#include <list>
+#include <map>
+#include <string>
+#include <tuple>
+#include <vector>
+
+class Context;
+
+namespace rbd {
+namespace mirror {
+namespace image_sync {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class ObjectCopyRequest {
+public:
+  typedef std::vector<librados::snap_t> SnapIds;
+  typedef std::map<librados::snap_t, SnapIds> SnapMap;
+
+  static ObjectCopyRequest* create(ImageCtxT *local_image_ctx,
+                                   ImageCtxT *remote_image_ctx,
+                                   const SnapMap *snap_map,
+                                   uint64_t object_number, Context *on_finish) {
+    return new ObjectCopyRequest(local_image_ctx, remote_image_ctx, snap_map,
+                                 object_number, on_finish);
+  }
+
+  ObjectCopyRequest(ImageCtxT *local_image_ctx, ImageCtxT *remote_image_ctx,
+                    const SnapMap *snap_map, uint64_t object_number,
+                    Context *on_finish);
+
+  void send();
+
+  // testing support
+  inline librados::IoCtx &get_local_io_ctx() {
+    return m_local_io_ctx;
+  }
+  inline librados::IoCtx &get_remote_io_ctx() {
+    return m_remote_io_ctx;
+  }
+
+private:
+  /**
+   * @verbatim
+   *
+   * <start>
+   *    |
+   *    v
+   * LIST_SNAPS
+   *    |
+   *    v
+   * READ_OBJECT <----\
+   *    |             | (repeat for each snapshot)
+   *    v             |
+   * WRITE_OBJECT ----/
+   *    |
+   *    v
+   * <finish>
+   *
+   * @endverbatim
+   */
+
+  enum SyncOpType {
+    SYNC_OP_TYPE_WRITE,
+    SYNC_OP_TYPE_TRUNC,
+    SYNC_OP_TYPE_REMOVE
+  };
+
+  typedef std::tuple<SyncOpType, uint64_t, uint64_t, bufferlist> SyncOp;
+  typedef std::list<SyncOp> SyncOps;
+  typedef std::map<librados::snap_t, SyncOps> SnapSyncOps;
+
+  ImageCtxT *m_local_image_ctx;
+  ImageCtxT *m_remote_image_ctx;
+  const SnapMap *m_snap_map;
+  uint64_t m_object_number;
+  Context *m_on_finish;
+
+  decltype(m_local_image_ctx->data_ctx) m_local_io_ctx;
+  decltype(m_remote_image_ctx->data_ctx) m_remote_io_ctx;
+  std::string m_local_oid;
+  std::string m_remote_oid;
+
+  librados::snap_set_t m_snap_set;
+  int m_snap_ret;
+
+  SnapSyncOps m_snap_sync_ops;
+
+  void send_list_snaps();
+  void handle_list_snaps(int r);
+
+  void send_read_object();
+  void handle_read_object(int r);
+
+  void send_write_object();
+  void handle_write_object(int r);
+
+  void compute_diffs();
+  void finish(int r);
+
+};
+
+} // namespace image_sync
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_sync::ObjectCopyRequest<librbd::ImageCtx>;
+
+#endif // RBD_MIRROR_IMAGE_SYNC_OBJECT_COPY_REQUEST_H