]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
librbd: API for image migration
authorMykola Golub <mgolub@suse.com>
Sat, 24 Feb 2018 09:43:12 +0000 (11:43 +0200)
committerJason Dillaman <dillaman@redhat.com>
Tue, 14 Aug 2018 22:29:45 +0000 (18:29 -0400)
Signed-off-by: Mykola Golub <mgolub@suse.com>
40 files changed:
src/include/rbd/librbd.h
src/include/rbd/librbd.hpp
src/include/rbd_types.h
src/librbd/CMakeLists.txt
src/librbd/ImageCtx.h
src/librbd/ImageWatcher.cc
src/librbd/ImageWatcher.h
src/librbd/Operations.cc
src/librbd/Operations.h
src/librbd/Types.h
src/librbd/WatchNotifyTypes.cc
src/librbd/WatchNotifyTypes.h
src/librbd/api/Migration.cc [new file with mode: 0644]
src/librbd/api/Migration.h [new file with mode: 0644]
src/librbd/deep_copy/ObjectCopyRequest.cc
src/librbd/deep_copy/Types.h
src/librbd/image/CloneRequest.cc
src/librbd/image/CloseRequest.cc
src/librbd/image/OpenRequest.cc
src/librbd/image/RefreshParentRequest.cc
src/librbd/image/RefreshParentRequest.h
src/librbd/image/RefreshRequest.cc
src/librbd/image/RefreshRequest.h
src/librbd/image/RemoveRequest.cc
src/librbd/image/SetSnapRequest.cc
src/librbd/internal.cc
src/librbd/io/CopyupRequest.cc
src/librbd/io/CopyupRequest.h
src/librbd/io/ObjectRequest.cc
src/librbd/librbd.cc
src/librbd/operation/MigrateRequest.cc [new file with mode: 0644]
src/librbd/operation/MigrateRequest.h [new file with mode: 0644]
src/test/librbd/CMakeLists.txt
src/test/librbd/image/test_mock_RefreshRequest.cc
src/test/librbd/image/test_mock_RemoveRequest.cc
src/test/librbd/mock/MockImageCtx.h
src/test/librbd/test_Migration.cc [new file with mode: 0644]
src/test/librbd/test_librbd.cc
src/test/librbd/test_main.cc
src/tracing/librbd.tp

index 174bece298ff601b955b9f1c388114aff9a1d4eb..045c0dfe15b9f87e1802503b377d92ee788906e3 100644 (file)
@@ -230,6 +230,26 @@ typedef struct {
   uint64_t cookie;
 } rbd_image_watcher_t;
 
+typedef enum {
+  RBD_IMAGE_MIGRATION_STATE_UNKNOWN = -1,
+  RBD_IMAGE_MIGRATION_STATE_ERROR = 0,
+  RBD_IMAGE_MIGRATION_STATE_PREPARING = 1,
+  RBD_IMAGE_MIGRATION_STATE_PREPARED = 2,
+  RBD_IMAGE_MIGRATION_STATE_EXECUTING = 3,
+  RBD_IMAGE_MIGRATION_STATE_EXECUTED = 4,
+} rbd_image_migration_state_t;
+
+typedef struct {
+  int64_t source_pool_id;
+  char *source_image_name;
+  char *source_image_id;
+  int64_t dest_pool_id;
+  char *dest_image_name;
+  char *dest_image_id;
+  rbd_image_migration_state_t state;
+  char *state_description;
+} rbd_image_migration_status_t;
+
 CEPH_RBD_API void rbd_image_options_create(rbd_image_options_t* opts);
 CEPH_RBD_API void rbd_image_options_destroy(rbd_image_options_t opts);
 CEPH_RBD_API int rbd_image_options_set_string(rbd_image_options_t opts,
@@ -309,6 +329,37 @@ CEPH_RBD_API int rbd_trash_remove_with_progress(rados_ioctx_t io, const char *id
 CEPH_RBD_API int rbd_trash_restore(rados_ioctx_t io, const char *id,
                                    const char *name);
 
+/* migration */
+CEPH_RBD_API int rbd_migration_prepare(rados_ioctx_t ioctx,
+                                       const char *image_name,
+                                       rados_ioctx_t dest_ioctx,
+                                       const char *dest_image_name,
+                                       rbd_image_options_t opts);
+CEPH_RBD_API int rbd_migration_execute(rados_ioctx_t ioctx,
+                                       const char *image_name);
+CEPH_RBD_API int rbd_migration_execute_with_progress(rados_ioctx_t ioctx,
+                                                     const char *image_name,
+                                                     librbd_progress_fn_t cb,
+                                                     void *cbdata);
+CEPH_RBD_API int rbd_migration_abort(rados_ioctx_t ioctx,
+                                     const char *image_name);
+CEPH_RBD_API int rbd_migration_abort_with_progress(rados_ioctx_t ioctx,
+                                                   const char *image_name,
+                                                   librbd_progress_fn_t cb,
+                                                   void *cbdata);
+CEPH_RBD_API int rbd_migration_commit(rados_ioctx_t ioctx,
+                                      const char *image_name);
+CEPH_RBD_API int rbd_migration_commit_with_progress(rados_ioctx_t ioctx,
+                                                    const char *image_name,
+                                                    librbd_progress_fn_t cb,
+                                                    void *cbdata);
+CEPH_RBD_API int rbd_migration_status(rados_ioctx_t ioctx,
+                                      const char *image_name,
+                                      rbd_image_migration_status_t *status,
+                                      size_t status_size);
+CEPH_RBD_API void rbd_migration_status_cleanup(
+    rbd_image_migration_status_t *status);
+
 /* pool mirroring */
 CEPH_RBD_API int rbd_mirror_mode_get(rados_ioctx_t io_ctx,
                                      rbd_mirror_mode_t *mirror_mode);
index bf150f47479770b374ef3f0c8d5c56219e05b7df..62b3081f750e36ef2a4b66112ad64269abecdfd7 100644 (file)
@@ -128,6 +128,19 @@ namespace librbd {
     uint64_t cookie;
   } image_watcher_t;
 
+  typedef rbd_image_migration_state_t image_migration_state_t;
+
+  typedef struct {
+    int64_t source_pool_id;
+    std::string source_image_name;
+    std::string source_image_id;
+    int64_t dest_pool_id;
+    std::string dest_image_name;
+    std::string dest_image_id;
+    image_migration_state_t state;
+    std::string state_description;
+  } image_migration_status_t;
+
 class CEPH_RBD_API RBD
 {
 public:
@@ -195,6 +208,22 @@ public:
                                  bool force, ProgressContext &pctx);
   int trash_restore(IoCtx &io_ctx, const char *id, const char *name);
 
+  // Migration
+  int migration_prepare(IoCtx& io_ctx, const char *image_name,
+                        IoCtx& dest_io_ctx, const char *dest_image_name,
+                        ImageOptions& opts);
+  int migration_execute(IoCtx& io_ctx, const char *image_name);
+  int migration_execute_with_progress(IoCtx& io_ctx, const char *image_name,
+                                      ProgressContext &prog_ctx);
+  int migration_abort(IoCtx& io_ctx, const char *image_name);
+  int migration_abort_with_progress(IoCtx& io_ctx, const char *image_name,
+                                    ProgressContext &prog_ctx);
+  int migration_commit(IoCtx& io_ctx, const char *image_name);
+  int migration_commit_with_progress(IoCtx& io_ctx, const char *image_name,
+                                     ProgressContext &prog_ctx);
+  int migration_status(IoCtx& io_ctx, const char *image_name,
+                       image_migration_status_t *status, size_t status_size);
+
   // RBD pool mirroring support functions
   int mirror_mode_get(IoCtx& io_ctx, rbd_mirror_mode_t *mirror_mode);
   int mirror_mode_set(IoCtx& io_ctx, rbd_mirror_mode_t mirror_mode);
index b7d680c109880ae314227a489dc2bb0883fd7108..65135bf8657eb7c647e8ecdc3c70e2ad846aad89 100644 (file)
@@ -99,6 +99,7 @@
 #define RBD_CRYPT_NONE         0
 
 #define RBD_HEADER_TEXT                "<<< Rados Block Device Image >>>\n"
+#define RBD_MIGRATE_HEADER_TEXT        "<<< Migrating RBD Image      >>>\n"
 #define RBD_HEADER_SIGNATURE   "RBD"
 #define RBD_HEADER_VERSION     "001.005"
 
index 2cd5779f7da3259bf4033f4ee91ed9d2e4197b88..9e88f69a18d789bce089bf0d94c9c1722134c199 100644 (file)
@@ -27,6 +27,7 @@ set(librbd_internal_srcs
   api/DiffIterate.cc
   api/Group.cc
   api/Image.cc
+  api/Migration.cc
   api/Mirror.cc
   api/Namespace.cc
   api/Snapshot.cc
@@ -108,6 +109,7 @@ set(librbd_internal_srcs
   operation/FlattenRequest.cc
   operation/MetadataRemoveRequest.cc
   operation/MetadataSetRequest.cc
+  operation/MigrateRequest.cc
   operation/ObjectMapIterate.cc
   operation/RebuildObjectMapRequest.cc
   operation/RenameRequest.cc
index d197c2498329fa4688b371ea0af075db209e38b6..0881e6129d118e6f646ee05a5cea28cd1ec5d51a 100644 (file)
@@ -122,6 +122,7 @@ namespace librbd {
     ParentInfo parent_md;
     ImageCtx *parent;
     ImageCtx *child = nullptr;
+    MigrationInfo migration_info;
     cls::rbd::GroupSpec group_spec;
     uint64_t stripe_unit, stripe_count;
     uint64_t flags;
@@ -158,6 +159,8 @@ namespace librbd {
 
     ContextWQ *op_work_queue;
 
+    bool ignore_migrating = false;
+
     // Configuration
     static const string METADATA_CONF_PREFIX;
     bool non_blocking_aio;
index d733814224e71f183c4d45962c895e0c0bbc29dc..946c781cb75cc8832b1389edaf32da6781b74186 100644 (file)
@@ -281,6 +281,20 @@ void ImageWatcher<I>::notify_update_features(uint64_t features, bool enabled,
   notify_lock_owner(UpdateFeaturesPayload(features, enabled), on_finish);
 }
 
+template <typename I>
+void ImageWatcher<I>::notify_migrate(uint64_t request_id,
+                                     ProgressContext &prog_ctx,
+                                     Context *on_finish) {
+  assert(m_image_ctx.owner_lock.is_locked());
+  assert(m_image_ctx.exclusive_lock &&
+         !m_image_ctx.exclusive_lock->is_lock_owner());
+
+  AsyncRequestId async_request_id(get_client_id(), request_id);
+
+  notify_async_request(async_request_id, MigratePayload(async_request_id),
+                       prog_ctx, on_finish);
+}
+
 template <typename I>
 void ImageWatcher<I>::notify_header_update(Context *on_finish) {
   ldout(m_image_ctx.cct, 10) << this << ": " << __func__ << dendl;
@@ -912,6 +926,33 @@ bool ImageWatcher<I>::handle_payload(const UpdateFeaturesPayload& payload,
   return true;
 }
 
+template <typename I>
+bool ImageWatcher<I>::handle_payload(const MigratePayload &payload,
+                                    C_NotifyAck *ack_ctx) {
+
+  RWLock::RLocker l(m_image_ctx.owner_lock);
+  if (m_image_ctx.exclusive_lock != nullptr) {
+    int r;
+    if (m_image_ctx.exclusive_lock->accept_requests(&r)) {
+      bool new_request;
+      Context *ctx;
+      ProgressContext *prog_ctx;
+      r = prepare_async_request(payload.async_request_id, &new_request,
+                                &ctx, &prog_ctx);
+      if (r == 0 && new_request) {
+        ldout(m_image_ctx.cct, 10) << this << " remote migrate request: "
+                                  << payload.async_request_id << dendl;
+        m_image_ctx.operations->execute_migrate(*prog_ctx, ctx);
+      }
+
+      encode(ResponseMessage(r), ack_ctx->out);
+    } else if (r < 0) {
+      encode(ResponseMessage(r), ack_ctx->out);
+    }
+  }
+  return true;
+}
+
 template <typename I>
 bool ImageWatcher<I>::handle_payload(const UnknownPayload &payload,
                                     C_NotifyAck *ack_ctx) {
index 5e30c8e5b3beba851ec59e593558c69468a29706..33000c01271e39513415e3bd64400d19041bc74f 100644 (file)
@@ -65,6 +65,9 @@ public:
   void notify_update_features(uint64_t features, bool enabled,
                               Context *on_finish);
 
+  void notify_migrate(uint64_t request_id, ProgressContext &prog_ctx,
+                      Context *on_finish);
+
   void notify_acquired_lock();
   void notify_released_lock();
   void notify_request_lock();
@@ -233,6 +236,8 @@ private:
                       C_NotifyAck *ctx);
   bool handle_payload(const watch_notify::UpdateFeaturesPayload& payload,
                       C_NotifyAck *ctx);
+  bool handle_payload(const watch_notify::MigratePayload& payload,
+                      C_NotifyAck *ctx);
   bool handle_payload(const watch_notify::UnknownPayload& payload,
                       C_NotifyAck *ctx);
   void process_payload(uint64_t notify_id, uint64_t handle,
index 7c9e29ddcd840664c0deee39123a3e83631cd0bb..b723a304db4ce6516d86efd5bb5f792ef7704870 100644 (file)
@@ -23,6 +23,7 @@
 #include "librbd/operation/FlattenRequest.h"
 #include "librbd/operation/MetadataRemoveRequest.h"
 #include "librbd/operation/MetadataSetRequest.h"
+#include "librbd/operation/MigrateRequest.h"
 #include "librbd/operation/ObjectMapIterate.h"
 #include "librbd/operation/RebuildObjectMapRequest.h"
 #include "librbd/operation/RenameRequest.h"
@@ -249,9 +250,10 @@ struct C_InvokeAsyncRequest : public Context {
     CephContext *cct = image_ctx.cct;
     ldout(cct, 20) << __func__ << dendl;
 
-    Context *ctx = util::create_context_callback<
-      C_InvokeAsyncRequest<I>, &C_InvokeAsyncRequest<I>::handle_remote_request>(
-        this);
+    Context *ctx = util::create_async_context_callback(
+      image_ctx, util::create_context_callback<
+        C_InvokeAsyncRequest<I>,
+        &C_InvokeAsyncRequest<I>::handle_remote_request>(this));
     remote(ctx);
   }
 
@@ -1536,6 +1538,84 @@ void Operations<I>::execute_metadata_remove(const std::string &key,
   request->send();
 }
 
+template <typename I>
+int Operations<I>::migrate(ProgressContext &prog_ctx) {
+  CephContext *cct = m_image_ctx.cct;
+  ldout(cct, 20) << "migrate" << dendl;
+
+  int r = m_image_ctx.state->refresh_if_required();
+  if (r < 0) {
+    return r;
+  }
+
+  if (m_image_ctx.read_only) {
+    return -EROFS;
+  }
+
+  {
+    RWLock::RLocker parent_locker(m_image_ctx.parent_lock);
+    if (m_image_ctx.migration_info.empty()) {
+      lderr(cct) << "image has no migrating parent" << dendl;
+      return -EINVAL;
+    }
+  }
+
+  uint64_t request_id = ++m_async_request_seq;
+  r = invoke_async_request("migrate", false,
+                           boost::bind(&Operations<I>::execute_migrate, this,
+                                       boost::ref(prog_ctx), _1),
+                           boost::bind(&ImageWatcher<I>::notify_migrate,
+                                       m_image_ctx.image_watcher, request_id,
+                                       boost::ref(prog_ctx), _1));
+
+  if (r < 0 && r != -EINVAL) {
+    return r;
+  }
+  ldout(cct, 20) << "migrate finished" << dendl;
+  return 0;
+}
+
+template <typename I>
+void Operations<I>::execute_migrate(ProgressContext &prog_ctx,
+                                    Context *on_finish) {
+  assert(m_image_ctx.owner_lock.is_locked());
+  assert(m_image_ctx.exclusive_lock == nullptr ||
+         m_image_ctx.exclusive_lock->is_lock_owner());
+
+  CephContext *cct = m_image_ctx.cct;
+  ldout(cct, 20) << "migrate" << dendl;
+
+  if (m_image_ctx.read_only || m_image_ctx.operations_disabled) {
+    on_finish->complete(-EROFS);
+    return;
+  }
+
+  m_image_ctx.snap_lock.get_read();
+  m_image_ctx.parent_lock.get_read();
+
+  if (m_image_ctx.migration_info.empty()) {
+    lderr(cct) << "image has no migrating parent" << dendl;
+    m_image_ctx.parent_lock.put_read();
+    m_image_ctx.snap_lock.put_read();
+    on_finish->complete(-EINVAL);
+    return;
+  }
+  if (m_image_ctx.snap_id != CEPH_NOSNAP) {
+    lderr(cct) << "snapshots cannot be migrated" << dendl;
+    m_image_ctx.parent_lock.put_read();
+    m_image_ctx.snap_lock.put_read();
+    on_finish->complete(-EROFS);
+    return;
+  }
+
+  m_image_ctx.parent_lock.put_read();
+  m_image_ctx.snap_lock.put_read();
+
+  operation::MigrateRequest<I> *req = new operation::MigrateRequest<I>(
+    m_image_ctx, new C_NotifyUpdate<I>(m_image_ctx, on_finish), prog_ctx);
+  req->send();
+}
+
 template <typename I>
 int Operations<I>::prepare_image_update(bool request_lock) {
   assert(m_image_ctx.owner_lock.is_locked() &&
index ff1238ff50e549e28711c7284730a29723318cfb..0d70f10d482027ac46838e964ab44e61745c67a6 100644 (file)
@@ -100,6 +100,9 @@ public:
   int metadata_remove(const std::string &key);
   void execute_metadata_remove(const std::string &key, Context *on_finish);
 
+  int migrate(ProgressContext &prog_ctx);
+  void execute_migrate(ProgressContext &prog_ctx, Context *on_finish);
+
   int prepare_image_update(bool request_lock);
 
 private:
index afcb848f98a1500aab19d7c424c3e6a99ab44288..901bc56dee10a4d64310fe35e0fd202c0dd79b50 100644 (file)
@@ -6,6 +6,7 @@
 
 #include "include/types.h"
 #include "cls/rbd/cls_rbd_types.h"
+#include "deep_copy/Types.h"
 #include <map>
 #include <string>
 
@@ -117,6 +118,29 @@ struct SnapInfo {
 enum {
   OPEN_FLAG_SKIP_OPEN_PARENT = 1 << 0,
   OPEN_FLAG_OLD_FORMAT = 1 << 1,
+  OPEN_FLAG_IGNORE_MIGRATING = 1 << 2,
+};
+
+struct MigrationInfo {
+  int64_t pool_id = -1;
+  std::string image_name;
+  std::string image_id;
+  deep_copy::SnapMap snap_map;
+  uint64_t overlap = 0;
+  bool flatten = false;
+
+  MigrationInfo() {
+  }
+  MigrationInfo(int64_t pool_id, std::string image_name, std::string image_id,
+                const deep_copy::SnapMap &snap_map, uint64_t overlap,
+                bool flatten)
+    : pool_id(pool_id), image_name(image_name), image_id(image_id),
+      snap_map(snap_map), overlap(overlap), flatten(flatten) {
+  }
+
+  bool empty() const {
+    return pool_id == -1;
+  }
 };
 
 } // namespace librbd
index 432d5c75dad24165558e33982036790306e3dfe6..42d84dc59c27e0aee8b1817af0f59faf5e3cfb6f 100644 (file)
@@ -368,6 +368,9 @@ void NotifyMessage::decode(bufferlist::const_iterator& iter) {
   case NOTIFY_OP_UPDATE_FEATURES:
     payload = UpdateFeaturesPayload();
     break;
+  case NOTIFY_OP_MIGRATE:
+    payload = MigratePayload();
+    break;
   default:
     payload = UnknownPayload();
     break;
@@ -402,6 +405,7 @@ void NotifyMessage::generate_test_instances(std::list<NotifyMessage *> &o) {
   o.push_back(new NotifyMessage(RebuildObjectMapPayload(AsyncRequestId(ClientId(0, 1), 2))));
   o.push_back(new NotifyMessage(RenamePayload("foo")));
   o.push_back(new NotifyMessage(UpdateFeaturesPayload(1, true)));
+  o.push_back(new NotifyMessage(MigratePayload(AsyncRequestId(ClientId(0, 1), 2))));
 }
 
 void ResponseMessage::encode(bufferlist& bl) const {
@@ -477,6 +481,9 @@ std::ostream &operator<<(std::ostream &out,
   case NOTIFY_OP_UPDATE_FEATURES:
     out << "UpdateFeatures";
     break;
+  case NOTIFY_OP_MIGRATE:
+    out << "Migrate";
+    break;
   default:
     out << "Unknown (" << static_cast<uint32_t>(op) << ")";
     break;
index 6c13804d0705e2b140caed9d8b6a923eb9194185..79f232f0e57c220f860811c289557bea1330da69 100644 (file)
@@ -65,6 +65,7 @@ enum NotifyOp {
   NOTIFY_OP_SNAP_UNPROTECT     = 13,
   NOTIFY_OP_RENAME             = 14,
   NOTIFY_OP_UPDATE_FEATURES    = 15,
+  NOTIFY_OP_MIGRATE            = 16,
 };
 
 struct AcquiredLockPayload {
@@ -301,6 +302,14 @@ struct UpdateFeaturesPayload {
   void dump(Formatter *f) const;
 };
 
+struct MigratePayload : public AsyncRequestPayloadBase {
+  static const NotifyOp NOTIFY_OP = NOTIFY_OP_MIGRATE;
+  static const bool CHECK_FOR_REFRESH = true;
+
+  MigratePayload() {}
+  MigratePayload(const AsyncRequestId &id) : AsyncRequestPayloadBase(id) {}
+};
+
 struct UnknownPayload {
   static const NotifyOp NOTIFY_OP = static_cast<NotifyOp>(-1);
   static const bool CHECK_FOR_REFRESH = false;
@@ -326,6 +335,7 @@ typedef boost::variant<AcquiredLockPayload,
                        RebuildObjectMapPayload,
                        RenamePayload,
                        UpdateFeaturesPayload,
+                       MigratePayload,
                        UnknownPayload> Payload;
 
 struct NotifyMessage {
diff --git a/src/librbd/api/Migration.cc b/src/librbd/api/Migration.cc
new file mode 100644 (file)
index 0000000..8dd7e47
--- /dev/null
@@ -0,0 +1,1336 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/api/Migration.h"
+#include "include/rados/librados.hpp"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "librbd/ExclusiveLock.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
+#include "librbd/Operations.h"
+#include "librbd/Utils.h"
+#include "librbd/api/Group.h"
+#include "librbd/deep_copy/MetadataCopyRequest.h"
+#include "librbd/deep_copy/SnapshotCopyRequest.h"
+#include "librbd/image/CreateRequest.h"
+#include "librbd/image/ListWatchersRequest.h"
+#include "librbd/image/RemoveRequest.h"
+#include "librbd/internal.h"
+#include "librbd/io/ImageRequestWQ.h"
+#include "librbd/mirror/DisableRequest.h"
+#include "librbd/mirror/EnableRequest.h"
+
+#include <boost/scope_exit.hpp>
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::Migration: " << __func__ << ": "
+
+namespace librbd {
+namespace api {
+
+namespace {
+
+int trash_search(librados::IoCtx &io_ctx, rbd_trash_image_source_t source,
+                 const std::string &image_name, std::string *image_id) {
+  std::vector<trash_image_info_t> entries;
+
+  int r = trash_list(io_ctx, entries);
+  if (r < 0) {
+    return r;
+  }
+
+  for (auto &entry : entries) {
+    if (entry.source == source && entry.name == image_name) {
+      *image_id = entry.id;
+      return 0;
+    }
+  }
+
+  return -ENOENT;
+}
+
+template <typename I>
+int open_source_image(librados::IoCtx& io_ctx, const std::string &image_name,
+                      I **src_image_ctx, librados::IoCtx *dst_io_ctx,
+                      std::string *dst_image_name, std::string *dst_image_id,
+                      bool *flatten, bool *mirroring,
+                      cls::rbd::MigrationState *state,
+                      std::string *state_description) {
+  CephContext* cct = reinterpret_cast<CephContext *>(io_ctx.cct());
+
+  librados::IoCtx src_io_ctx;
+  std::string src_image_name;
+  std::string src_image_id;
+  cls::rbd::MigrationSpec migration_spec;
+  I *image_ctx = I::create(image_name, "", nullptr, io_ctx, false);
+
+  ldout(cct, 10) << "trying to open image by name " << io_ctx.get_pool_name()
+                 << "/" << image_name << dendl;
+
+  int r = image_ctx->state->open(OPEN_FLAG_IGNORE_MIGRATING);
+  if (r < 0) {
+    if (r != -ENOENT) {
+      lderr(cct) << "failed to open image: " << cpp_strerror(r) << dendl;
+      return r;
+    }
+    image_ctx = nullptr;
+  }
+
+  BOOST_SCOPE_EXIT_TPL(&r, &image_ctx) {
+    if (r != 0 && image_ctx != nullptr) {
+      image_ctx->state->close();
+    }
+  } BOOST_SCOPE_EXIT_END;
+
+  if (r == 0) {
+    // The opened image is either a source (then just proceed) or a
+    // destination (then look for the source image id in the migration
+    // header).
+
+    r = cls_client::migration_get(&image_ctx->md_ctx, image_ctx->header_oid,
+                                  &migration_spec);
+
+    if (r < 0) {
+      lderr(cct) << "failed retrieving migration header: " << cpp_strerror(r)
+                 << dendl;
+      return r;
+    }
+
+    ldout(cct, 10) << "migration spec: " << migration_spec << dendl;
+
+    if (migration_spec.header_type != cls::rbd::MIGRATION_HEADER_TYPE_SRC &&
+        migration_spec.header_type != cls::rbd::MIGRATION_HEADER_TYPE_DST) {
+        lderr(cct) << "unexpected migration header type: "
+                   << migration_spec.header_type << dendl;
+        r = -EINVAL;
+        return r;
+    }
+
+    if (migration_spec.header_type == cls::rbd::MIGRATION_HEADER_TYPE_DST) {
+      ldout(cct, 10) << "the destination image is opened" << dendl;
+
+      // Close and look for the source image.
+      r = image_ctx->state->close();
+      image_ctx = nullptr;
+      if (r < 0) {
+        lderr(cct) << "failed closing image: " << cpp_strerror(r)
+                   << dendl;
+        return r;
+      }
+
+      if (io_ctx.get_id() == migration_spec.pool_id) {
+        src_io_ctx.dup(io_ctx);
+      } else {
+        r = librados::Rados(io_ctx).ioctx_create2(migration_spec.pool_id,
+                                                  src_io_ctx);
+        if (r < 0) {
+          lderr(cct) << "error accessing source pool "
+                     << migration_spec.pool_id << ": " << cpp_strerror(r)
+                     << dendl;
+          return r;
+        }
+      }
+
+      src_image_name = migration_spec.image_name;
+      src_image_id = migration_spec.image_id;
+    } else {
+      ldout(cct, 10) << "the source image is opened" << dendl;
+    }
+  } else {
+    assert (r == -ENOENT);
+
+    ldout(cct, 10) << "source image is not found. Trying trash" << dendl;
+
+    r = trash_search(io_ctx, RBD_TRASH_IMAGE_SOURCE_MIGRATION, image_name,
+                     &src_image_id);
+    if (r < 0) {
+      lderr(cct) << "failed to determine image id: " << cpp_strerror(r)
+                 << dendl;
+      return r;
+    }
+
+    ldout(cct, 10) << "source image id from trash: " << src_image_id << dendl;
+
+    src_io_ctx.dup(io_ctx);
+  }
+
+  if (image_ctx == nullptr) {
+    int flags = OPEN_FLAG_IGNORE_MIGRATING;
+
+    if (src_image_id.empty()) {
+      ldout(cct, 20) << "trying to open v1 image by name "
+                     << src_io_ctx.get_pool_name() << "/" << src_image_name
+                     << dendl;
+
+      flags |= OPEN_FLAG_OLD_FORMAT;
+    } else {
+      ldout(cct, 20) << "trying to open v2 image by id "
+                     << src_io_ctx.get_pool_name() << "/" << src_image_id
+                     << dendl;
+    }
+
+    image_ctx = I::create(src_image_name, src_image_id, nullptr, src_io_ctx,
+                          false);
+    r = image_ctx->state->open(flags);
+    if (r < 0) {
+      lderr(cct) << "failed to open source image " << src_io_ctx.get_pool_name()
+                 << "/" << (src_image_id.empty() ? src_image_name : src_image_id)
+                 << ": " << cpp_strerror(r) << dendl;
+      image_ctx = nullptr;
+      return r;
+    }
+
+    r = cls_client::migration_get(&image_ctx->md_ctx, image_ctx->header_oid,
+                                  &migration_spec);
+    if (r < 0) {
+      lderr(cct) << "failed retrieving migration header: " << cpp_strerror(r)
+                 << dendl;
+      return r;
+    }
+
+    ldout(cct, 20) << "migration spec: " << migration_spec << dendl;
+  }
+
+  if (image_ctx->md_ctx.get_id() == migration_spec.pool_id) {
+    dst_io_ctx->dup(io_ctx);
+  } else {
+    r = librados::Rados(image_ctx->md_ctx).ioctx_create2(migration_spec.pool_id,
+                                                         *dst_io_ctx);
+    if (r < 0) {
+      lderr(cct) << "error accessing destination pool "
+                 << migration_spec.pool_id << ": " << cpp_strerror(r) << dendl;
+      return r;
+    }
+  }
+
+  *src_image_ctx = image_ctx;
+  *dst_image_name = migration_spec.image_name;
+  *dst_image_id = migration_spec.image_id;
+  *flatten = migration_spec.flatten;
+  *mirroring = migration_spec.mirroring;
+  *state = migration_spec.state;
+  *state_description = migration_spec.state_description;
+
+  return 0;
+}
+
+} // anonymous namespace
+
+template <typename I>
+int Migration<I>::prepare(librados::IoCtx& io_ctx,
+                          const std::string &image_name,
+                          librados::IoCtx& dest_io_ctx,
+                          const std::string &dest_image_name_,
+                          ImageOptions& opts) {
+  CephContext* cct = reinterpret_cast<CephContext *>(io_ctx.cct());
+
+  std::string dest_image_name = dest_image_name_.empty() ? image_name :
+    dest_image_name_;
+
+  ldout(cct, 10) << io_ctx.get_pool_name() << "/" << image_name << " -> "
+                 << dest_io_ctx.get_pool_name() << "/" << dest_image_name
+                 << ", opts=" << opts << dendl;
+
+  auto image_ctx = I::create(image_name, "", nullptr, io_ctx, false);
+  int r = image_ctx->state->open(0);
+  if (r < 0) {
+    lderr(cct) << "failed to open image: " << cpp_strerror(r) << dendl;
+    return r;
+  }
+  BOOST_SCOPE_EXIT_TPL(image_ctx) {
+    image_ctx->state->close();
+  } BOOST_SCOPE_EXIT_END;
+
+  std::list<obj_watch_t> watchers;
+  int flags = librbd::image::LIST_WATCHERS_FILTER_OUT_MY_INSTANCE |
+              librbd::image::LIST_WATCHERS_FILTER_OUT_MIRROR_INSTANCES;
+  C_SaferCond on_list_watchers;
+  auto list_watchers_request = librbd::image::ListWatchersRequest<I>::create(
+      *image_ctx, flags, &watchers, &on_list_watchers);
+  list_watchers_request->send();
+  r = on_list_watchers.wait();
+  if (r < 0) {
+    lderr(cct) << "failed listing watchers:" << cpp_strerror(r) << dendl;
+    return r;
+  }
+  if (!watchers.empty()) {
+    lderr(cct) << "image has watchers - not migrating" << dendl;
+    return -EBUSY;
+  }
+
+  uint64_t format = 2;
+  if (opts.get(RBD_IMAGE_OPTION_FORMAT, &format) != 0) {
+    opts.set(RBD_IMAGE_OPTION_FORMAT, format);
+  }
+  if (format != 2) {
+    lderr(cct) << "unsupported destination image format: " << format << dendl;
+    return -EINVAL;
+  }
+
+  uint64_t features;
+  {
+    RWLock::RLocker snap_locker(image_ctx->snap_lock);
+    features = image_ctx->features;
+  }
+  opts.get(RBD_IMAGE_OPTION_FEATURES, &features);
+  if ((features & ~RBD_FEATURES_ALL) != 0) {
+    lderr(cct) << "librbd does not support requested features" << dendl;
+    return -ENOSYS;
+  }
+  features &= ~RBD_FEATURES_INTERNAL;
+  features |= RBD_FEATURE_MIGRATING;
+  opts.set(RBD_IMAGE_OPTION_FEATURES, features);
+
+  uint64_t order = image_ctx->order;
+  if (opts.get(RBD_IMAGE_OPTION_ORDER, &order) != 0) {
+    opts.set(RBD_IMAGE_OPTION_ORDER, order);
+  }
+  r = image::CreateRequest<I>::validate_order(cct, order);
+  if (r < 0) {
+    return r;
+  }
+
+  uint64_t stripe_unit = image_ctx->stripe_unit;
+  if (opts.get(RBD_IMAGE_OPTION_STRIPE_UNIT, &stripe_unit) != 0) {
+    opts.set(RBD_IMAGE_OPTION_STRIPE_UNIT, stripe_unit);
+  }
+  uint64_t stripe_count = image_ctx->stripe_count;
+  if (opts.get(RBD_IMAGE_OPTION_STRIPE_COUNT, &stripe_count) != 0) {
+    opts.set(RBD_IMAGE_OPTION_STRIPE_COUNT, stripe_count);
+  }
+
+  uint64_t flatten = 0;
+  if (opts.get(RBD_IMAGE_OPTION_FLATTEN, &flatten) == 0) {
+    opts.unset(RBD_IMAGE_OPTION_FLATTEN);
+  }
+
+  ldout(cct, 20) << "updated opts=" << opts << dendl;
+
+  Migration migration(image_ctx, dest_io_ctx, dest_image_name, "", opts, flatten > 0,
+                      false, cls::rbd::MIGRATION_STATE_PREPARING, "", nullptr);
+  r = migration.prepare();
+
+  features &= ~RBD_FEATURE_MIGRATING;
+  opts.set(RBD_IMAGE_OPTION_FEATURES, features);
+
+  return r;
+}
+
+template <typename I>
+int Migration<I>::execute(librados::IoCtx& io_ctx,
+                          const std::string &image_name,
+                          ProgressContext &prog_ctx) {
+  CephContext* cct = reinterpret_cast<CephContext *>(io_ctx.cct());
+
+  ldout(cct, 10) << io_ctx.get_pool_name() << "/" << image_name << dendl;
+
+  I *image_ctx;
+  librados::IoCtx dest_io_ctx;
+  std::string dest_image_name;
+  std::string dest_image_id;
+  bool flatten;
+  bool mirroring;
+  cls::rbd::MigrationState state;
+  std::string state_description;
+
+  int r = open_source_image(io_ctx, image_name, &image_ctx, &dest_io_ctx,
+                            &dest_image_name, &dest_image_id, &flatten,
+                            &mirroring, &state, &state_description);
+  if (r < 0) {
+    return r;
+  }
+
+  BOOST_SCOPE_EXIT_TPL(image_ctx) {
+    image_ctx->state->close();
+  } BOOST_SCOPE_EXIT_END;
+
+  if (state != cls::rbd::MIGRATION_STATE_PREPARED) {
+    lderr(cct) << "current migration state is '" << state << "'"
+               << " (should be 'prepared')" << dendl;
+    return -EINVAL;
+  }
+
+  ldout(cct, 5) << "migrating " << image_ctx->md_ctx.get_pool_name() << "/"
+                << image_ctx->name << " -> " << dest_io_ctx.get_pool_name()
+                << "/" << dest_image_name << dendl;
+
+  ImageOptions opts;
+  Migration migration(image_ctx, dest_io_ctx, dest_image_name, dest_image_id,
+                      opts, flatten, mirroring, state, state_description,
+                      &prog_ctx);
+  r = migration.execute();
+  if (r < 0) {
+    return r;
+  }
+
+  return 0;
+}
+
+template <typename I>
+int Migration<I>::abort(librados::IoCtx& io_ctx, const std::string &image_name,
+                        ProgressContext &prog_ctx) {
+  CephContext* cct = reinterpret_cast<CephContext *>(io_ctx.cct());
+
+  ldout(cct, 10) << io_ctx.get_pool_name() << "/" << image_name << dendl;
+
+  I *image_ctx;
+  librados::IoCtx dest_io_ctx;
+  std::string dest_image_name;
+  std::string dest_image_id;
+  bool flatten;
+  bool mirroring;
+  cls::rbd::MigrationState state;
+  std::string state_description;
+
+  int r = open_source_image(io_ctx, image_name, &image_ctx, &dest_io_ctx,
+                            &dest_image_name, &dest_image_id, &flatten,
+                            &mirroring, &state, &state_description);
+  if (r < 0) {
+    return r;
+  }
+
+  ldout(cct, 5) << "canceling incomplete migration "
+                << image_ctx->md_ctx.get_pool_name() << "/" << image_ctx->name
+                << " -> " << dest_io_ctx.get_pool_name() << "/" << dest_image_name
+                << dendl;
+
+  ImageOptions opts;
+  Migration migration(image_ctx, dest_io_ctx, dest_image_name, dest_image_id,
+                      opts, flatten, mirroring, state, state_description,
+                      &prog_ctx);
+  r = migration.abort();
+
+  image_ctx->state->close();
+
+  if (r < 0) {
+    return r;
+  }
+
+  return 0;
+}
+
+template <typename I>
+int Migration<I>::commit(librados::IoCtx& io_ctx,
+                         const std::string &image_name,
+                         ProgressContext &prog_ctx) {
+  CephContext* cct = reinterpret_cast<CephContext *>(io_ctx.cct());
+
+  ldout(cct, 10) << io_ctx.get_pool_name() << "/" << image_name << dendl;
+
+  I *image_ctx;
+  librados::IoCtx dest_io_ctx;
+  std::string dest_image_name;
+  std::string dest_image_id;
+  bool flatten;
+  bool mirroring;
+  cls::rbd::MigrationState state;
+  std::string state_description;
+
+  int r = open_source_image(io_ctx, image_name, &image_ctx, &dest_io_ctx,
+                            &dest_image_name, &dest_image_id, &flatten,
+                            &mirroring, &state, &state_description);
+  if (r < 0) {
+    return r;
+  }
+
+  if (state != cls::rbd::MIGRATION_STATE_EXECUTED) {
+    lderr(cct) << "current migration state is '" << state << "'"
+               << " (should be 'executed')" << dendl;
+    image_ctx->state->close();
+    return -EINVAL;
+  }
+
+  ldout(cct, 5) << "migrating " << image_ctx->md_ctx.get_pool_name() << "/"
+                << image_ctx->name << " -> " << dest_io_ctx.get_pool_name()
+                << "/" << dest_image_name << dendl;
+
+  ImageOptions opts;
+  Migration migration(image_ctx, dest_io_ctx, dest_image_name, dest_image_id,
+                      opts, flatten, mirroring, state, state_description,
+                      &prog_ctx);
+  r = migration.commit();
+
+  // image_ctx is closed in commit when removing src image
+
+  if (r < 0) {
+    return r;
+  }
+
+  return 0;
+}
+
+template <typename I>
+int Migration<I>::status(librados::IoCtx& io_ctx,
+                         const std::string &image_name,
+                         image_migration_status_t *status) {
+  CephContext* cct = reinterpret_cast<CephContext *>(io_ctx.cct());
+
+  ldout(cct, 10) << io_ctx.get_pool_name() << "/" << image_name << dendl;
+
+  I *image_ctx;
+  librados::IoCtx dest_io_ctx;
+  std::string dest_image_name;
+  std::string dest_image_id;
+  bool flatten;
+  bool mirroring;
+  cls::rbd::MigrationState state;
+  std::string state_description;
+
+  int r = open_source_image(io_ctx, image_name, &image_ctx, &dest_io_ctx,
+                            &dest_image_name, &dest_image_id, &flatten,
+                            &mirroring, &state, &state_description);
+  if (r < 0) {
+    return r;
+  }
+
+  ldout(cct, 5) << "migrating " << image_ctx->md_ctx.get_pool_name() << "/"
+                << image_ctx->name << " -> " << dest_io_ctx.get_pool_name()
+                << "/" << dest_image_name << dendl;
+
+  ImageOptions opts;
+  Migration migration(image_ctx, dest_io_ctx, dest_image_name, dest_image_id,
+                      opts, flatten, mirroring, state, state_description,
+                      nullptr);
+  r = migration.status(status);
+
+  image_ctx->state->close();
+
+  if (r < 0) {
+    return r;
+  }
+
+  return 0;
+}
+
+template <typename I>
+Migration<I>::Migration(I *src_image_ctx, librados::IoCtx& dst_io_ctx,
+                        const std::string &dstname,
+                        const std::string &dst_image_id,
+                        ImageOptions& opts, bool flatten, bool mirroring,
+                        cls::rbd::MigrationState state,
+                        const std::string &state_description,
+                        ProgressContext *prog_ctx)
+  : m_cct(static_cast<CephContext *>(dst_io_ctx.cct())),
+    m_src_image_ctx(src_image_ctx), m_dst_io_ctx(dst_io_ctx),
+    m_src_old_format(m_src_image_ctx->old_format),
+    m_src_image_name(m_src_image_ctx->old_format ? m_src_image_ctx->name : ""),
+    m_src_image_id(m_src_image_ctx->id),
+    m_src_header_oid(m_src_image_ctx->header_oid), m_dst_image_name(dstname),
+    m_dst_image_id(dst_image_id.empty() ?
+                   util::generate_image_id(m_dst_io_ctx) : dst_image_id),
+    m_dst_header_oid(util::header_name(m_dst_image_id)), m_image_options(opts),
+    m_flatten(flatten), m_mirroring(mirroring), m_prog_ctx(prog_ctx),
+    m_src_migration_spec(cls::rbd::MIGRATION_HEADER_TYPE_SRC,
+                         m_dst_io_ctx.get_id(), m_dst_image_name,
+                         m_dst_image_id, {}, 0, flatten, mirroring, state,
+                         state_description),
+    m_dst_migration_spec(cls::rbd::MIGRATION_HEADER_TYPE_DST,
+                         src_image_ctx->md_ctx.get_id(), m_src_image_ctx->name,
+                         m_src_image_ctx->id, {}, 0, flatten, mirroring, state,
+                         state_description) {
+  m_src_io_ctx.dup(src_image_ctx->md_ctx);
+}
+
+template <typename I>
+int Migration<I>::prepare() {
+  ldout(m_cct, 10) << dendl;
+
+  int r = list_snaps();
+  if (r < 0) {
+    return r;
+  }
+
+  r = disable_mirroring(m_src_image_ctx, &m_mirroring);
+  if (r < 0) {
+    return r;
+  }
+
+  r = unlink_src_image();
+  if (r < 0) {
+    enable_mirroring(m_src_image_ctx, m_mirroring);
+    return r;
+  }
+
+  r = set_migration();
+  if (r < 0) {
+    relink_src_image();
+    enable_mirroring(m_src_image_ctx, m_mirroring);
+    return r;
+  }
+
+  r = create_dst_image();
+  if (r < 0) {
+    abort();
+    return r;
+  }
+
+  r = set_state(cls::rbd::MIGRATION_STATE_PREPARED, "");
+  if (r < 0) {
+    return r;
+  }
+
+  ldout(m_cct, 10) << "succeeded" << dendl;
+
+  return 0;
+}
+
+template <typename I>
+int Migration<I>::execute() {
+  ldout(m_cct, 10) << dendl;
+
+  auto dst_image_ctx = I::create(m_dst_image_name, m_dst_image_id, nullptr,
+                                 m_dst_io_ctx, false);
+  int r = dst_image_ctx->state->open(0);
+  if (r < 0) {
+    lderr(m_cct) << "failed to open destination image: " << cpp_strerror(r)
+                 << dendl;
+    return r;
+  }
+
+  BOOST_SCOPE_EXIT_TPL(dst_image_ctx) {
+    dst_image_ctx->state->close();
+  } BOOST_SCOPE_EXIT_END;
+
+  r = set_state(cls::rbd::MIGRATION_STATE_EXECUTING, "");
+  if (r < 0) {
+    return r;
+  }
+
+  while (true) {
+    r = dst_image_ctx->operations->migrate(*m_prog_ctx);
+    if (r == -EROFS) {
+      RWLock::RLocker owner_locker(dst_image_ctx->owner_lock);
+      if (dst_image_ctx->exclusive_lock != nullptr &&
+          !dst_image_ctx->exclusive_lock->accept_ops()) {
+        ldout(m_cct, 5) << "lost exclusive lock, retrying remote" << dendl;
+        continue;
+      }
+    }
+    break;
+  }
+  if (r < 0) {
+    lderr(m_cct) << "migration failed: " << cpp_strerror(r) << dendl;
+    return r;
+  }
+
+  r = set_state(cls::rbd::MIGRATION_STATE_EXECUTED, "");
+  if (r < 0) {
+    return r;
+  }
+
+  dst_image_ctx->notify_update();
+
+  ldout(m_cct, 10) << "succeeded" << dendl;
+
+  return 0;
+}
+
+template <typename I>
+int Migration<I>::abort() {
+  ldout(m_cct, 10) << dendl;
+
+  int r;
+
+  m_src_image_ctx->owner_lock.get_read();
+  if (m_src_image_ctx->exclusive_lock != nullptr &&
+      !m_src_image_ctx->exclusive_lock->is_lock_owner()) {
+    C_SaferCond ctx;
+    m_src_image_ctx->exclusive_lock->acquire_lock(&ctx);
+    m_src_image_ctx->owner_lock.put_read();
+    r = ctx.wait();
+    if (r < 0) {
+      lderr(m_cct) << "error acquiring exclusive lock: " << cpp_strerror(r)
+                   << dendl;
+      return r;
+    }
+  } else {
+    m_src_image_ctx->owner_lock.put_read();
+  }
+
+  group_info_t group_info;
+  group_info.pool = -1;
+
+  auto dst_image_ctx = I::create(m_dst_image_name, m_dst_image_id, nullptr,
+                                 m_dst_io_ctx, false);
+  r = dst_image_ctx->state->open(OPEN_FLAG_IGNORE_MIGRATING);
+  if (r < 0) {
+    ldout(m_cct, 1) << "failed to open destination image: " << cpp_strerror(r)
+                    << dendl;
+  } else {
+    ldout(m_cct, 10) << "removing dst image snapshots" << dendl;
+
+    BOOST_SCOPE_EXIT_TPL(&dst_image_ctx) {
+      if (dst_image_ctx != nullptr) {
+        dst_image_ctx->state->close();
+      }
+    } BOOST_SCOPE_EXIT_END;
+
+    std::vector<librbd::snap_info_t> snaps;
+    r = snap_list(dst_image_ctx, snaps);
+    if (r < 0) {
+      lderr(m_cct) << "failed listing snapshots: " << cpp_strerror(r)
+                   << dendl;
+      return r;
+    }
+
+    for (auto &snap : snaps) {
+      librbd::NoOpProgressContext prog_ctx;
+      int r = snap_remove(dst_image_ctx, snap.name.c_str(), 0, prog_ctx);
+      if (r < 0) {
+        lderr(m_cct) << "failed removing snapshot: " << cpp_strerror(r)
+                     << dendl;
+        return r;
+      }
+    }
+
+    ldout(m_cct, 10) << "removing group" << dendl;
+
+    r = remove_group(dst_image_ctx, &group_info);
+    if (r < 0 && r != -ENOENT) {
+      return r;
+    }
+
+    ldout(m_cct, 10) << "removing dst image" << dendl;
+
+    assert(dst_image_ctx->ignore_migrating);
+
+    ThreadPool *thread_pool;
+    ContextWQ *op_work_queue;
+    ImageCtx::get_thread_pool_instance(m_cct, &thread_pool, &op_work_queue);
+    C_SaferCond on_remove;
+    auto req = librbd::image::RemoveRequest<>::create(
+      m_dst_io_ctx, dst_image_ctx, false, false, *m_prog_ctx, op_work_queue,
+      &on_remove);
+    req->send();
+    r = on_remove.wait();
+
+    dst_image_ctx = nullptr;
+
+    if (r < 0) {
+      lderr(m_cct) << "failed removing destination image '"
+                   << m_dst_io_ctx.get_pool_name() << "/" << m_dst_image_name
+                   << " (" << m_dst_image_id << ")': " << cpp_strerror(r)
+                   << dendl;
+      // not fatal
+    }
+  }
+
+  r = relink_src_image();
+  if (r < 0) {
+    return r;
+  }
+
+  r = add_group(m_src_image_ctx, group_info);
+  if (r < 0) {
+    return r;
+  }
+
+  r = remove_migration(m_src_image_ctx);
+  if (r < 0) {
+    return r;
+  }
+
+  r = enable_mirroring(m_src_image_ctx, m_mirroring);
+  if (r < 0) {
+    return r;
+  }
+
+  ldout(m_cct, 10) << "succeeded" << dendl;
+
+  return 0;
+}
+
+template <typename I>
+int Migration<I>::commit() {
+  ldout(m_cct, 10) << dendl;
+
+  BOOST_SCOPE_EXIT_TPL(&m_src_image_ctx) {
+    if (m_src_image_ctx != nullptr) {
+      m_src_image_ctx->state->close();
+    }
+  } BOOST_SCOPE_EXIT_END;
+
+  auto dst_image_ctx = I::create(m_dst_image_name, m_dst_image_id, nullptr,
+                                 m_dst_io_ctx, false);
+  int r = dst_image_ctx->state->open(0);
+  if (r < 0) {
+    lderr(m_cct) << "failed to open destination image: " << cpp_strerror(r)
+                 << dendl;
+    return r;
+  }
+
+  BOOST_SCOPE_EXIT_TPL(dst_image_ctx) {
+    dst_image_ctx->state->close();
+  } BOOST_SCOPE_EXIT_END;
+
+  r = remove_migration(dst_image_ctx);
+  if (r < 0) {
+    return r;
+  }
+
+  r = remove_src_image();
+
+  if (r < 0) {
+    return r;
+  }
+
+  r = enable_mirroring(dst_image_ctx, m_mirroring);
+  if (r < 0) {
+    return r;
+  }
+
+  ldout(m_cct, 10) << "succeeded" << dendl;
+
+  return 0;
+}
+
+template <typename I>
+int Migration<I>::status(image_migration_status_t *status) {
+  ldout(m_cct, 10) << dendl;
+
+  status->source_pool_id = m_dst_migration_spec.pool_id;
+  status->source_image_name = m_dst_migration_spec.image_name;
+  status->source_image_id = m_dst_migration_spec.image_id;
+  status->dest_pool_id = m_src_migration_spec.pool_id;
+  status->dest_image_name = m_src_migration_spec.image_name;
+  status->dest_image_id = m_src_migration_spec.image_id;
+
+  switch (m_src_migration_spec.state) {
+  case cls::rbd::MIGRATION_STATE_ERROR:
+    status->state = RBD_IMAGE_MIGRATION_STATE_ERROR;
+    break;
+  case cls::rbd::MIGRATION_STATE_PREPARING:
+    status->state = RBD_IMAGE_MIGRATION_STATE_PREPARING;
+    break;
+  case cls::rbd::MIGRATION_STATE_PREPARED:
+    status->state = RBD_IMAGE_MIGRATION_STATE_PREPARED;
+    break;
+  case cls::rbd::MIGRATION_STATE_EXECUTING:
+    status->state = RBD_IMAGE_MIGRATION_STATE_EXECUTING;
+    break;
+  case cls::rbd::MIGRATION_STATE_EXECUTED:
+    status->state = RBD_IMAGE_MIGRATION_STATE_EXECUTED;
+    break;
+  default:
+    status->state = RBD_IMAGE_MIGRATION_STATE_UNKNOWN;
+    break;
+  }
+
+  status->state_description = m_src_migration_spec.state_description;
+
+  return 0;
+}
+
+template <typename I>
+int Migration<I>::set_state(cls::rbd::MigrationState state,
+                            const std::string &description) {
+  int r = cls_client::migration_set_state(&m_src_io_ctx, m_src_header_oid,
+                                          state, description);
+  if (r < 0) {
+    lderr(m_cct) << "failed to set source migration header: " << cpp_strerror(r)
+                 << dendl;
+    return r;
+  }
+
+  r = cls_client::migration_set_state(&m_dst_io_ctx, m_dst_header_oid, state,
+                                      description);
+  if (r < 0) {
+    lderr(m_cct) << "failed to set destination migration header: "
+                 << cpp_strerror(r) << dendl;
+    return r;
+  }
+
+  return 0;
+}
+
+template <typename I>
+int Migration<I>::list_snaps(std::vector<librbd::snap_info_t> *snapsptr) {
+  ldout(m_cct, 10) << dendl;
+
+  std::vector<librbd::snap_info_t> snaps;
+
+  int r = snap_list(m_src_image_ctx, snaps);
+  if (r < 0) {
+    lderr(m_cct) << "failed listing snapshots: " << cpp_strerror(r) << dendl;
+    return r;
+  }
+
+  for (auto &snap : snaps) {
+    bool is_protected;
+    r = snap_is_protected(m_src_image_ctx, snap.name.c_str(), &is_protected);
+    if (r < 0) {
+      lderr(m_cct) << "failed retrieving snapshot status: " << cpp_strerror(r)
+                   << dendl;
+      return r;
+    }
+    if (is_protected) {
+      lderr(m_cct) << "image has protected snapshot '" << snap.name << "'"
+                   << dendl;
+      return -EBUSY;
+    }
+  }
+
+  if (snapsptr != nullptr) {
+    *snapsptr = snaps;
+  }
+
+  return 0;
+}
+
+template <typename I>
+int Migration<I>::set_migration() {
+  ldout(m_cct, 10) << dendl;
+
+  m_src_image_ctx->ignore_migrating = true;
+
+  int r = cls_client::migration_set(&m_src_io_ctx, m_src_header_oid,
+                                    m_src_migration_spec);
+  if (r < 0) {
+    lderr(m_cct) << "failed to set migration header: " << cpp_strerror(r)
+                 << dendl;
+    return r;
+  }
+
+  m_src_image_ctx->notify_update();
+
+  return 0;
+}
+
+template <typename I>
+int Migration<I>::remove_migration(I *image_ctx) {
+  ldout(m_cct, 10) << dendl;
+
+  int r;
+
+  r = cls_client::migration_remove(&image_ctx->md_ctx, image_ctx->header_oid);
+  if (r == -ENOENT) {
+    r = 0;
+  }
+  if (r < 0) {
+    lderr(m_cct) << "failed removing migration header: " << cpp_strerror(r)
+                 << dendl;
+    return r;
+  }
+
+  image_ctx->notify_update();
+
+  return 0;
+}
+
+template <typename I>
+int Migration<I>::unlink_src_image() {
+  if (m_src_old_format) {
+    return v1_unlink_src_image();
+  } else {
+    return v2_unlink_src_image();
+  }
+}
+
+template <typename I>
+int Migration<I>::v1_unlink_src_image() {
+  ldout(m_cct, 10) << dendl;
+
+  int r = tmap_rm(m_src_io_ctx, m_src_image_name);
+  if (r < 0) {
+    lderr(m_cct) << "failed removing " << m_src_image_name << " from tmap: "
+                 << cpp_strerror(r) << dendl;
+    return r;
+  }
+
+  return 0;
+}
+
+template <typename I>
+int Migration<I>::v2_unlink_src_image() {
+  ldout(m_cct, 10) << dendl;
+
+  m_src_image_ctx->owner_lock.get_read();
+  if (m_src_image_ctx->exclusive_lock != nullptr &&
+      m_src_image_ctx->exclusive_lock->is_lock_owner()) {
+    C_SaferCond ctx;
+    m_src_image_ctx->exclusive_lock->release_lock(&ctx);
+    m_src_image_ctx->owner_lock.put_read();
+    int r = ctx.wait();
+     if (r < 0) {
+      lderr(m_cct) << "error releasing exclusive lock: " << cpp_strerror(r)
+                   << dendl;
+      return r;
+     }
+  } else {
+    m_src_image_ctx->owner_lock.put_read();
+  }
+
+  int r = trash_move(m_src_io_ctx, RBD_TRASH_IMAGE_SOURCE_MIGRATION,
+                     m_src_image_ctx->name, 0);
+  if (r < 0) {
+    lderr(m_cct) << "failed moving image to trash: " << cpp_strerror(r)
+                 << dendl;
+    return r;
+  }
+
+  return 0;
+}
+
+template <typename I>
+int Migration<I>::relink_src_image() {
+  if (m_src_old_format) {
+    return v1_relink_src_image();
+  } else {
+    return v2_relink_src_image();
+  }
+}
+
+template <typename I>
+int Migration<I>::v1_relink_src_image() {
+  ldout(m_cct, 10) << dendl;
+
+  int r = tmap_set(m_src_io_ctx, m_src_image_name);
+  if (r < 0) {
+    lderr(m_cct) << "failed adding " << m_src_image_name << " to tmap: "
+                 << cpp_strerror(r) << dendl;
+    return r;
+  }
+
+  return 0;
+}
+
+template <typename I>
+int Migration<I>::v2_relink_src_image() {
+  ldout(m_cct, 10) << dendl;
+
+  int r = trash_restore(m_src_io_ctx, m_src_image_ctx->id, m_src_image_ctx->name);
+  if (r < 0) {
+    lderr(m_cct) << "failed restoring image from trash: " << cpp_strerror(r)
+                 << dendl;
+    return r;
+  }
+
+  return 0;
+}
+
+template <typename I>
+int Migration<I>::create_dst_image() {
+  ldout(m_cct, 10) << dendl;
+
+  uint64_t size;
+  {
+    RWLock::RLocker snap_locker(m_src_image_ctx->snap_lock);
+    size = m_src_image_ctx->size;
+  }
+
+  ThreadPool *thread_pool;
+  ContextWQ *op_work_queue;
+  ImageCtx::get_thread_pool_instance(m_cct, &thread_pool, &op_work_queue);
+
+  C_SaferCond on_create;
+  auto *req = image::CreateRequest<I>::create(
+      m_dst_io_ctx, m_dst_image_name, m_dst_image_id, size, m_image_options, "",
+      "", true /* skip_mirror_enable */, op_work_queue, &on_create);
+  req->send();
+  int r = on_create.wait();
+  if (r < 0) {
+    lderr(m_cct) << "header creation failed: " << cpp_strerror(r) << dendl;
+    return r;
+  }
+
+  auto dst_image_ctx = I::create(m_dst_image_name, m_dst_image_id, nullptr,
+                                 m_dst_io_ctx, false);
+
+  r = dst_image_ctx->state->open(OPEN_FLAG_IGNORE_MIGRATING);
+  if (r < 0) {
+    lderr(m_cct) << "failed to open newly created header: " << cpp_strerror(r)
+                 << dendl;
+    return r;
+  }
+
+  BOOST_SCOPE_EXIT_TPL(dst_image_ctx) {
+    dst_image_ctx->state->close();
+  } BOOST_SCOPE_EXIT_END;
+
+  {
+    RWLock::RLocker owner_locker(dst_image_ctx->owner_lock);
+    r = dst_image_ctx->operations->prepare_image_update(true);
+    if (r < 0) {
+      lderr(m_cct) << "cannot obtain exclusive lock" << dendl;
+      return r;
+    }
+    if (dst_image_ctx->exclusive_lock != nullptr) {
+      dst_image_ctx->exclusive_lock->block_requests(0);
+    }
+  }
+
+  SnapSeqs snap_seqs;
+
+  C_SaferCond on_snapshot_copy;
+  auto snapshot_copy_req = librbd::deep_copy::SnapshotCopyRequest<I>::create(
+      m_src_image_ctx, dst_image_ctx, CEPH_NOSNAP, m_flatten,
+      m_src_image_ctx->op_work_queue, &snap_seqs, &on_snapshot_copy);
+  snapshot_copy_req->send();
+  r = on_snapshot_copy.wait();
+  if (r < 0) {
+    lderr(m_cct) << "failed to copy snapshots: " << cpp_strerror(r) << dendl;
+    return r;
+  }
+
+  C_SaferCond on_metadata_copy;
+  auto metadata_copy_req = librbd::deep_copy::MetadataCopyRequest<I>::create(
+      m_src_image_ctx, dst_image_ctx, &on_metadata_copy);
+  metadata_copy_req->send();
+  r = on_metadata_copy.wait();
+  if (r < 0) {
+    lderr(m_cct) << "failed to copy metadata: " << cpp_strerror(r) << dendl;
+    return r;
+  }
+
+  m_dst_migration_spec = {cls::rbd::MIGRATION_HEADER_TYPE_DST,
+                          m_src_io_ctx.get_id(), m_src_image_name,
+                          m_src_image_id, snap_seqs, size, m_flatten,
+                          m_mirroring, cls::rbd::MIGRATION_STATE_PREPARING, ""};
+
+  r = cls_client::migration_set(&m_dst_io_ctx, m_dst_header_oid,
+                                m_dst_migration_spec);
+  if (r < 0) {
+    lderr(m_cct) << "failed to set migration header: " << cpp_strerror(r)
+                 << dendl;
+    return r;
+  }
+
+  r = update_group(m_src_image_ctx, dst_image_ctx);
+  if (r < 0) {
+    return r;
+  }
+
+  return 0;
+}
+
+template <typename I>
+int Migration<I>::remove_group(I *image_ctx, group_info_t *group_info) {
+  int r = librbd::api::Group<I>::image_get_group(image_ctx, group_info);
+  if (r < 0) {
+    lderr(m_cct) << "failed to get image group: " << cpp_strerror(r) << dendl;
+    return r;
+  }
+
+  if (group_info->pool == -1) {
+    return -ENOENT;
+  }
+
+  assert(!image_ctx->id.empty());
+
+  ldout(m_cct, 10) << dendl;
+
+  librados::Rados rados(image_ctx->md_ctx);
+  IoCtx group_ioctx;
+  r = rados.ioctx_create2(group_info->pool, group_ioctx);
+  if (r < 0) {
+    lderr(m_cct) << "failed to access pool by ID " << group_info->pool << ": "
+                 << cpp_strerror(r) << dendl;
+    return r;
+  }
+
+  r = librbd::api::Group<I>::image_remove_by_id(group_ioctx,
+                                                group_info->name.c_str(),
+                                                image_ctx->md_ctx,
+                                                image_ctx->id.c_str());
+  if (r < 0) {
+    lderr(m_cct) << "failed to remove image from group: " << cpp_strerror(r)
+                 << dendl;
+    return r;
+  }
+
+  return 0;
+}
+
+template <typename I>
+int Migration<I>::add_group(I *image_ctx, group_info_t &group_info) {
+  if (group_info.pool == -1) {
+    return 0;
+  }
+
+  ldout(m_cct, 10) << dendl;
+
+  librados::Rados rados(image_ctx->md_ctx);
+  IoCtx group_ioctx;
+  int r = rados.ioctx_create2(group_info.pool, group_ioctx);
+  if (r < 0) {
+    lderr(m_cct) << "failed to access pool by ID " << group_info.pool << ": "
+                 << cpp_strerror(r) << dendl;
+    return r;
+  }
+
+  r = librbd::api::Group<I>::image_add(group_ioctx, group_info.name.c_str(),
+                                       image_ctx->md_ctx,
+                                       image_ctx->name.c_str());
+  if (r < 0) {
+    lderr(m_cct) << "failed to add image to group: " << cpp_strerror(r)
+                 << dendl;
+    return r;
+  }
+
+  return 0;
+}
+
+template <typename I>
+int Migration<I>::update_group(I *from_image_ctx, I *to_image_ctx) {
+  ldout(m_cct, 10) << dendl;
+
+  group_info_t group_info;
+
+  int r = remove_group(from_image_ctx, &group_info);
+  if (r < 0) {
+    return r == -ENOENT ? 0 : r;
+  }
+
+  r = add_group(to_image_ctx, group_info);
+  if (r < 0) {
+    return r;
+  }
+
+  return 0;
+}
+
+template <typename I>
+int Migration<I>::disable_mirroring(I *image_ctx, bool *was_enabled) {
+  *was_enabled = false;
+
+  if (!image_ctx->test_features(RBD_FEATURE_JOURNALING)) {
+    return 0;
+  }
+
+  cls::rbd::MirrorImage mirror_image;
+  int r = cls_client::mirror_image_get(&image_ctx->md_ctx, image_ctx->id,
+                                       &mirror_image);
+  if (r == -ENOENT) {
+    ldout(m_cct, 10) << "mirroring is not enabled for this image" << dendl;
+    return 0;
+  }
+
+  if (r < 0) {
+    lderr(m_cct) << "failed to retrieve mirror image: " << cpp_strerror(r)
+                 << dendl;
+    return r;
+  }
+
+  if (mirror_image.state == cls::rbd::MIRROR_IMAGE_STATE_ENABLED) {
+    *was_enabled = true;
+  }
+
+  ldout(m_cct, 10) << dendl;
+
+  C_SaferCond ctx;
+  auto req = mirror::DisableRequest<I>::create(image_ctx, false, true, &ctx);
+  req->send();
+  r = ctx.wait();
+  if (r < 0) {
+    lderr(m_cct) << "failed to disable mirroring: " << cpp_strerror(r)
+                 << dendl;
+    return r;
+  }
+
+  m_src_migration_spec.mirroring = true;
+
+  return 0;
+}
+
+template <typename I>
+int Migration<I>::enable_mirroring(I *image_ctx, bool was_enabled) {
+
+  if (!image_ctx->test_features(RBD_FEATURE_JOURNALING)) {
+    return 0;
+  }
+
+  cls::rbd::MirrorMode mirror_mode;
+  int r = cls_client::mirror_mode_get(&image_ctx->md_ctx, &mirror_mode);
+  if (r < 0 && r != -ENOENT) {
+    lderr(m_cct) << "failed to retrieve mirror mode: " << cpp_strerror(r)
+                 << dendl;
+    return r;
+  }
+
+  if (mirror_mode == cls::rbd::MIRROR_MODE_DISABLED) {
+    ldout(m_cct, 10) << "mirroring is not enabled for destination pool"
+                     << dendl;
+    return 0;
+  }
+  if (mirror_mode == cls::rbd::MIRROR_MODE_IMAGE && !was_enabled) {
+    ldout(m_cct, 10) << "mirroring is not enabled for image" << dendl;
+    return 0;
+  }
+
+  ldout(m_cct, 10) << dendl;
+
+  C_SaferCond ctx;
+  auto req = mirror::EnableRequest<I>::create(image_ctx->md_ctx, image_ctx->id,
+                                              "", image_ctx->op_work_queue,
+                                              &ctx);
+  req->send();
+  r = ctx.wait();
+  if (r < 0) {
+    lderr(m_cct) << "failed to enable mirroring: " << cpp_strerror(r)
+                 << dendl;
+    return r;
+  }
+
+  return 0;
+}
+
+template <typename I>
+int Migration<I>::remove_src_image() {
+  ldout(m_cct, 10) << dendl;
+
+  std::vector<librbd::snap_info_t> snaps;
+  int r = list_snaps(&snaps);
+  if (r < 0) {
+    return r;
+  }
+
+  for (auto &snap : snaps) {
+    librbd::NoOpProgressContext prog_ctx;
+    int r = snap_remove(m_src_image_ctx, snap.name.c_str(), 0, prog_ctx);
+    if (r < 0) {
+      lderr(m_cct) << "failed removing snapshot '" << snap.name << "': "
+                   << cpp_strerror(r) << dendl;
+      return r;
+    }
+  }
+
+  assert(m_src_image_ctx->ignore_migrating);
+
+  ThreadPool *thread_pool;
+  ContextWQ *op_work_queue;
+  ImageCtx::get_thread_pool_instance(m_cct, &thread_pool, &op_work_queue);
+  C_SaferCond on_remove;
+  auto req = librbd::image::RemoveRequest<I>::create(
+      m_src_io_ctx, m_src_image_ctx, false, true, *m_prog_ctx, op_work_queue,
+      &on_remove);
+  req->send();
+  r = on_remove.wait();
+
+  m_src_image_ctx = nullptr;
+
+  // For old format image it will return -ENOENT due to expected
+  // tmap_rm failure at the end.
+  if (r < 0 && r != -ENOENT) {
+    lderr(m_cct) << "failed removing source image: " << cpp_strerror(r)
+                 << dendl;
+    return r;
+  }
+
+  if (!m_src_image_id.empty()) {
+    r = cls_client::trash_remove(&m_src_io_ctx, m_src_image_id);
+    if (r < 0 && r != -ENOENT) {
+      lderr(m_cct) << "error removing image " << m_src_image_id
+                   << " from rbd_trash object" << dendl;
+    }
+  }
+
+  return 0;
+}
+
+} // namespace api
+} // namespace librbd
+
+template class librbd::api::Migration<librbd::ImageCtx>;
diff --git a/src/librbd/api/Migration.h b/src/librbd/api/Migration.h
new file mode 100644 (file)
index 0000000..88c1c95
--- /dev/null
@@ -0,0 +1,100 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_API_MIGRATION_H
+#define CEPH_LIBRBD_API_MIGRATION_H
+
+#include "include/int_types.h"
+#include "include/rbd/librbd.hpp"
+#include "cls/rbd/cls_rbd_types.h"
+
+#include <vector>
+
+namespace librados {
+
+class IoCtx;
+
+}
+
+namespace librbd {
+
+class ImageCtx;
+
+namespace api {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class Migration {
+public:
+  static int prepare(librados::IoCtx& io_ctx, const std::string &image_name,
+                     librados::IoCtx& dest_io_ctx,
+                     const std::string &dest_image_name, ImageOptions& opts);
+  static int execute(librados::IoCtx& io_ctx, const std::string &image_name,
+                     ProgressContext &prog_ctx);
+  static int abort(librados::IoCtx& io_ctx, const std::string &image_name,
+                   ProgressContext &prog_ctx);
+  static int commit(librados::IoCtx& io_ctx, const std::string &image_name,
+                    ProgressContext &prog_ctx);
+  static int status(librados::IoCtx& io_ctx, const std::string &image_name,
+                    image_migration_status_t *status);
+
+private:
+  CephContext* m_cct;
+  ImageCtxT *m_src_image_ctx;
+  librados::IoCtx m_src_io_ctx;
+  librados::IoCtx &m_dst_io_ctx;
+  bool m_src_old_format;
+  std::string m_src_image_name;
+  std::string m_src_image_id;
+  std::string m_src_header_oid;
+  std::string m_dst_image_name;
+  std::string m_dst_image_id;
+  std::string m_dst_header_oid;
+  ImageOptions &m_image_options;
+  bool m_flatten;
+  bool m_mirroring;
+  ProgressContext *m_prog_ctx;
+
+  cls::rbd::MigrationSpec m_src_migration_spec;
+  cls::rbd::MigrationSpec m_dst_migration_spec;
+
+  Migration(ImageCtxT *image_ctx, librados::IoCtx& dest_io_ctx,
+            const std::string &dest_image_name, const std::string &dst_image_id,
+            ImageOptions& opts, bool flatten, bool mirroring,
+            cls::rbd::MigrationState state, const std::string &state_desc,
+            ProgressContext *prog_ctx);
+
+  int prepare();
+  int execute();
+  int abort();
+  int commit();
+  int status(image_migration_status_t *status);
+
+  int set_state(cls::rbd::MigrationState state, const std::string &description);
+
+  int list_snaps(std::vector<librbd::snap_info_t> *snaps = nullptr);
+  int disable_mirroring(ImageCtxT *image_ctx, bool *was_enabled);
+  int enable_mirroring(ImageCtxT *image_ctx, bool was_enabled);
+  int set_migration();
+  int unlink_src_image();
+  int relink_src_image();
+  int create_dst_image();
+  int remove_group(ImageCtxT *image_ctx, group_info_t *group_info);
+  int add_group(ImageCtxT *image_ctx, group_info_t &group_info);
+  int update_group(ImageCtxT *from_image_ctx, ImageCtxT *to_image_ctx);
+  int remove_migration(ImageCtxT *image_ctx);
+  int remove_src_image();
+
+  int v1_set_migration();
+  int v2_set_migration();
+  int v1_unlink_src_image();
+  int v2_unlink_src_image();
+  int v1_relink_src_image();
+  int v2_relink_src_image();
+};
+
+} // namespace api
+} // namespace librbd
+
+extern template class librbd::api::Migration<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_API_MIGRATION_H
index 19072aee7eb1c34aa5a2425cfbf8adfc1e4a42de..3d6bbe8404e0a5e32bf10cc33e2624588230e0c5 100644 (file)
@@ -329,6 +329,11 @@ void ObjectCopyRequest<I>::send_write_object() {
   librados::ObjectWriteOperation op;
   uint64_t buffer_offset;
 
+  if (!m_dst_image_ctx->migration_info.empty()) {
+    cls_client::assert_snapc_seq(&op, dst_snap_seq,
+                                 cls::rbd::ASSERT_SNAPC_SEQ_GT_SNAPSET_SEQ);
+  }
+
   for (auto &copy_op : copy_ops) {
     switch (copy_op.type) {
     case COPY_OP_TYPE_WRITE:
@@ -366,7 +371,7 @@ void ObjectCopyRequest<I>::send_write_object() {
     }
   }
 
-  if (op.size() == 0) {
+  if (op.size() == (m_dst_image_ctx->migration_info.empty() ? 0 : 1)) {
     handle_write_object(0);
     return;
   }
@@ -388,7 +393,7 @@ void ObjectCopyRequest<I>::send_write_object() {
     });
   librados::AioCompletion *comp = create_rados_callback(ctx);
   int r = m_dst_io_ctx.aio_operate(m_dst_oid, comp, &op, dst_snap_seq,
-                                   dst_snap_ids);
+                                   dst_snap_ids, nullptr);
   assert(r == 0);
   comp->release();
 }
@@ -399,6 +404,9 @@ void ObjectCopyRequest<I>::handle_write_object(int r) {
 
   if (r == -ENOENT) {
     r = 0;
+  } else if (r == -ERANGE) {
+    ldout(m_cct, 10) << "concurrent deep copy" << dendl;
+    r = 0;
   }
   if (r < 0) {
     lderr(m_cct) << "failed to write to destination object: " << cpp_strerror(r)
index 1b513c35d45cdaf41222d5444702fdd5203a6e53..10d3c7c1a756749981707fba6c1a572ee28844cc 100644 (file)
@@ -5,6 +5,7 @@
 #define CEPH_LIBRBD_DEEP_COPY_TYPES_H
 
 #include "include/int_types.h"
+#include "include/rados/librados.hpp"
 #include <boost/optional.hpp>
 
 namespace librbd {
index b784b9ab0c362a20e7fe9886c60858ade73c85c4..5389abf34498df35b7b33f6210fe9ad422adc250 100644 (file)
@@ -238,7 +238,12 @@ void CloneRequest<I>::send_open() {
   using klass = CloneRequest<I>;
   Context *ctx = create_context_callback<klass, &klass::handle_open>(this);
 
-  m_imctx->state->open(OPEN_FLAG_SKIP_OPEN_PARENT, ctx);
+  uint64_t flags = OPEN_FLAG_SKIP_OPEN_PARENT;
+  if ((m_features & RBD_FEATURE_MIGRATING) != 0) {
+    flags |= OPEN_FLAG_IGNORE_MIGRATING;
+  }
+
+  m_imctx->state->open(flags, ctx);
 }
 
 template <typename I>
index d3cca91b8d2475d7353c71f383e4908585acf7b6..de0928ee1819eccfc0f0c13079a1846f3b4ed23a 100644 (file)
@@ -290,6 +290,7 @@ void CloseRequest<I>::handle_close_parent(int r) {
   ldout(cct, 10) << this << " " << __func__ << ": r=" << r << dendl;
 
   delete m_image_ctx->parent;
+  m_image_ctx->parent = nullptr;
   save_result(r);
   if (r < 0) {
     lderr(cct) << "error closing parent image: " << cpp_strerror(r) << dendl;
index 4032f4f2577ccd49ff3923d5408e97900c8ae78f..89f2a5d0d194732181cfd51a1fa68ce5b6c276d8 100644 (file)
@@ -33,6 +33,9 @@ OpenRequest<I>::OpenRequest(I *image_ctx, uint64_t flags,
   if ((flags & OPEN_FLAG_OLD_FORMAT) != 0) {
     m_image_ctx->old_format = true;
   }
+  if ((flags & OPEN_FLAG_IGNORE_MIGRATING) != 0) {
+    m_image_ctx->ignore_migrating = true;
+  }
 }
 
 template <typename I>
index 57d0c17655f065281287d3daf0301e2b8547da95..5bb00a66834708325d37d006985292640a517c72 100644 (file)
@@ -24,43 +24,55 @@ using util::create_async_context_callback;
 using util::create_context_callback;
 
 template <typename I>
-RefreshParentRequest<I>::RefreshParentRequest(I &child_image_ctx,
-                                              const ParentInfo &parent_md,
-                                              Context *on_finish)
+RefreshParentRequest<I>::RefreshParentRequest(
+    I &child_image_ctx, const ParentInfo &parent_md,
+    const MigrationInfo &migration_info, Context *on_finish)
   : m_child_image_ctx(child_image_ctx), m_parent_md(parent_md),
-    m_on_finish(on_finish), m_parent_image_ctx(nullptr),
-    m_parent_snap_id(CEPH_NOSNAP), m_error_result(0) {
+    m_migration_info(migration_info), m_on_finish(on_finish),
+    m_parent_image_ctx(nullptr), m_parent_snap_id(CEPH_NOSNAP),
+    m_error_result(0) {
 }
 
 template <typename I>
-bool RefreshParentRequest<I>::is_refresh_required(I &child_image_ctx,
-                                                  const ParentInfo &parent_md) {
+bool RefreshParentRequest<I>::is_refresh_required(
+    I &child_image_ctx, const ParentInfo &parent_md,
+    const MigrationInfo &migration_info) {
   assert(child_image_ctx.snap_lock.is_locked());
   assert(child_image_ctx.parent_lock.is_locked());
-  return (is_open_required(child_image_ctx, parent_md) ||
-          is_close_required(child_image_ctx, parent_md));
+  return (is_open_required(child_image_ctx, parent_md, migration_info) ||
+          is_close_required(child_image_ctx, parent_md, migration_info));
 }
 
 template <typename I>
-bool RefreshParentRequest<I>::is_close_required(I &child_image_ctx,
-                                                const ParentInfo &parent_md) {
+bool RefreshParentRequest<I>::is_close_required(
+    I &child_image_ctx, const ParentInfo &parent_md,
+    const MigrationInfo &migration_info) {
   return (child_image_ctx.parent != nullptr &&
-          (parent_md.spec.pool_id == -1 || parent_md.overlap == 0));
+          !does_parent_exist(child_image_ctx, parent_md, migration_info));
 }
 
 template <typename I>
-bool RefreshParentRequest<I>::is_open_required(I &child_image_ctx,
-                                               const ParentInfo &parent_md) {
-  return (parent_md.spec.pool_id > -1 && parent_md.overlap > 0 &&
+bool RefreshParentRequest<I>::is_open_required(
+    I &child_image_ctx, const ParentInfo &parent_md,
+    const MigrationInfo &migration_info) {
+  return (does_parent_exist(child_image_ctx, parent_md, migration_info) &&
           (child_image_ctx.parent == nullptr ||
            child_image_ctx.parent->md_ctx.get_id() != parent_md.spec.pool_id ||
            child_image_ctx.parent->id != parent_md.spec.image_id ||
            child_image_ctx.parent->snap_id != parent_md.spec.snap_id));
 }
 
+template <typename I>
+bool RefreshParentRequest<I>::does_parent_exist(
+    I &child_image_ctx, const ParentInfo &parent_md,
+    const MigrationInfo &migration_info) {
+  return (parent_md.spec.pool_id > -1 && parent_md.overlap > 0) ||
+      !migration_info.empty();
+}
+
 template <typename I>
 void RefreshParentRequest<I>::send() {
-  if (is_open_required(m_child_image_ctx, m_parent_md)) {
+  if (is_open_required(m_child_image_ctx, m_parent_md, m_migration_info)) {
     send_open_parent();
   } else {
     // parent will be closed (if necessary) during finalize
@@ -108,10 +120,15 @@ void RefreshParentRequest<I>::send_open_parent() {
   // TODO support clone v2 parent namespaces
   parent_io_ctx.set_namespace(m_child_image_ctx.md_ctx.get_namespace());
 
-  // since we don't know the image and snapshot name, set their ids and
-  // reset the snap_name and snap_exists fields after we read the header
-  m_parent_image_ctx = new I("", m_parent_md.spec.image_id, NULL, parent_io_ctx,
-                             true);
+  std::string image_name;
+  uint64_t flags = 0;
+  if (!m_migration_info.empty() && !m_migration_info.image_name.empty()) {
+    image_name = m_migration_info.image_name;
+    flags |= OPEN_FLAG_OLD_FORMAT;
+  }
+
+  m_parent_image_ctx = new I(image_name, m_parent_md.spec.image_id, nullptr,
+                             parent_io_ctx, true);
   m_parent_image_ctx->child = &m_child_image_ctx;
 
   // set rados flags for reading the parent image
@@ -121,10 +138,6 @@ void RefreshParentRequest<I>::send_open_parent() {
     m_parent_image_ctx->set_read_flag(librados::OPERATION_LOCALIZE_READS);
   }
 
-  uint64_t flags = 0;
-  if (m_parent_md.spec.image_id.empty()) {
-    flags |= OPEN_FLAG_OLD_FORMAT;
-  }
   using klass = RefreshParentRequest<I>;
   Context *ctx = create_async_context_callback(
     m_child_image_ctx, create_context_callback<
@@ -150,6 +163,10 @@ Context *RefreshParentRequest<I>::handle_open_parent(int *result) {
     return m_on_finish;
   }
 
+  if (m_parent_md.spec.snap_id == CEPH_NOSNAP) {
+    return m_on_finish;
+  }
+
   send_set_parent_snap();
   return nullptr;
 }
index cecec88341ad8f622045814a1d41cadcbd0954f6..aeedbf88c69f0207730524c6e94da1eed950d533 100644 (file)
@@ -20,12 +20,15 @@ class RefreshParentRequest {
 public:
   static RefreshParentRequest *create(ImageCtxT &child_image_ctx,
                                       const ParentInfo &parent_md,
+                                      const MigrationInfo &migration_info,
                                       Context *on_finish) {
-    return new RefreshParentRequest(child_image_ctx, parent_md, on_finish);
+    return new RefreshParentRequest(child_image_ctx, parent_md, migration_info,
+                                    on_finish);
   }
 
   static bool is_refresh_required(ImageCtxT &child_image_ctx,
-                                  const ParentInfo &parent_md);
+                                  const ParentInfo &parent_md,
+                                  const MigrationInfo &migration_info);
 
   void send();
   void apply();
@@ -59,10 +62,11 @@ private:
    */
 
   RefreshParentRequest(ImageCtxT &child_image_ctx, const ParentInfo &parent_md,
-                       Context *on_finish);
+                       const MigrationInfo &migration_info, Context *on_finish);
 
   ImageCtxT &m_child_image_ctx;
   ParentInfo m_parent_md;
+  MigrationInfo m_migration_info;
   Context *m_on_finish;
 
   ImageCtxT *m_parent_image_ctx;
@@ -71,9 +75,14 @@ private:
   int m_error_result;
 
   static bool is_close_required(ImageCtxT &child_image_ctx,
-                                const ParentInfo &parent_md);
+                                const ParentInfo &parent_md,
+                                const MigrationInfo &migration_info);
   static bool is_open_required(ImageCtxT &child_image_ctx,
-                               const ParentInfo &parent_md);
+                               const ParentInfo &parent_md,
+                               const MigrationInfo &migration_info);
+  static bool does_parent_exist(ImageCtxT &child_image_ctx,
+                                const ParentInfo &parent_md,
+                                const MigrationInfo &migration_info);
 
   void send_open_parent();
   Context *handle_open_parent(int *result);
index 2afee7a2aad5d5ef77597b632479d04db3921ca1..a53a3528bfc57d1e361cb050829f2c80dc2c352a 100644 (file)
@@ -15,6 +15,7 @@
 #include "librbd/Journal.h"
 #include "librbd/ObjectMap.h"
 #include "librbd/Utils.h"
+#include "librbd/deep_copy/Utils.h"
 #include "librbd/image/RefreshParentRequest.h"
 #include "librbd/io/AioCompletion.h"
 #include "librbd/io/ImageDispatchSpec.h"
@@ -67,6 +68,90 @@ void RefreshRequest<I>::send() {
   }
 }
 
+template <typename I>
+void RefreshRequest<I>::send_get_migration_header() {
+  if (m_image_ctx.ignore_migrating) {
+    if (m_image_ctx.old_format) {
+      send_v1_get_snapshots();
+    } else {
+      send_v2_get_metadata();
+    }
+    return;
+  }
+
+  CephContext *cct = m_image_ctx.cct;
+  ldout(cct, 10) << this << " " << __func__ << dendl;
+
+  librados::ObjectReadOperation op;
+  cls_client::migration_get_start(&op);
+
+  using klass = RefreshRequest<I>;
+  librados::AioCompletion *comp =
+    create_rados_callback<klass, &klass::handle_get_migration_header>(this);
+  m_out_bl.clear();
+  m_image_ctx.md_ctx.aio_operate(m_image_ctx.header_oid, comp, &op,
+                                 &m_out_bl);
+  comp->release();
+}
+
+template <typename I>
+Context *RefreshRequest<I>::handle_get_migration_header(int *result) {
+  CephContext *cct = m_image_ctx.cct;
+  ldout(cct, 10) << this << " " << __func__ << ": r=" << *result << dendl;
+
+  if (*result == 0) {
+    auto it = m_out_bl.cbegin();
+    *result = cls_client::migration_get_finish(&it, &m_migration_spec);
+  } else if (*result == -ENOENT) {
+    ldout(cct, 5) << this << " " << __func__ << ": no migration header found"
+                  << ", retrying" << dendl;
+    send();
+    return nullptr;
+  }
+
+  if (*result < 0) {
+    lderr(cct) << "failed to retrieve migration header: "
+               << cpp_strerror(*result) << dendl;
+    return m_on_finish;
+  }
+
+  switch(m_migration_spec.header_type) {
+  case cls::rbd::MIGRATION_HEADER_TYPE_SRC:
+    if (!m_image_ctx.read_only) {
+      lderr(cct) << "image being migrated" << dendl;
+      *result = -EROFS;
+      return m_on_finish;
+    }
+    ldout(cct, 1) << this << " " << __func__ << ": migrating to: "
+                  << m_migration_spec << dendl;
+    break;
+  case cls::rbd::MIGRATION_HEADER_TYPE_DST:
+    ldout(cct, 1) << this << " " << __func__ << ": migrating from: "
+                  << m_migration_spec << dendl;
+    if (m_migration_spec.state != cls::rbd::MIGRATION_STATE_PREPARED &&
+        m_migration_spec.state != cls::rbd::MIGRATION_STATE_EXECUTING &&
+        m_migration_spec.state != cls::rbd::MIGRATION_STATE_EXECUTED) {
+      ldout(cct, 5) << this << " " << __func__ << ": current migration state: "
+                    << m_migration_spec.state << ", retrying" << dendl;
+      send();
+      return nullptr;
+    }
+    break;
+  default:
+    ldout(cct, 1) << this << " " << __func__ << ": migration type "
+                  << m_migration_spec.header_type << dendl;
+    *result = -EBADMSG;
+    return m_on_finish;
+  }
+
+  if (m_image_ctx.old_format) {
+    send_v1_get_snapshots();
+  } else {
+    send_v2_get_metadata();
+  }
+  return nullptr;
+}
+
 template <typename I>
 void RefreshRequest<I>::send_v1_read_header() {
   CephContext *cct = m_image_ctx.cct;
@@ -91,6 +176,7 @@ Context *RefreshRequest<I>::handle_v1_read_header(int *result) {
   ldout(cct, 10) << this << " " << __func__ << ": " << "r=" << *result << dendl;
 
   rbd_obj_header_ondisk v1_header;
+  bool migrating = false;
   if (*result < 0) {
     return m_on_finish;
   } else if (m_out_bl.length() < sizeof(v1_header)) {
@@ -99,16 +185,27 @@ Context *RefreshRequest<I>::handle_v1_read_header(int *result) {
     return m_on_finish;
   } else if (memcmp(RBD_HEADER_TEXT, m_out_bl.c_str(),
                     sizeof(RBD_HEADER_TEXT)) != 0) {
-    lderr(cct) << "unrecognized v1 header" << dendl;
-    *result = -ENXIO;
-    return m_on_finish;
+    if (memcmp(RBD_MIGRATE_HEADER_TEXT, m_out_bl.c_str(),
+               sizeof(RBD_MIGRATE_HEADER_TEXT)) == 0) {
+      ldout(cct, 1) << this << " " << __func__ << ": migration v1 header detected"
+                    << dendl;
+      migrating = true;
+    } else {
+      lderr(cct) << "unrecognized v1 header" << dendl;
+      *result = -ENXIO;
+      return m_on_finish;
+    }
   }
 
   memcpy(&v1_header, m_out_bl.c_str(), sizeof(v1_header));
   m_order = v1_header.options.order;
   m_size = v1_header.image_size;
   m_object_prefix = v1_header.block_name;
-  send_v1_get_snapshots();
+  if (migrating) {
+    send_get_migration_header();
+  } else {
+    send_v1_get_snapshots();
+  }
   return nullptr;
 }
 
@@ -299,6 +396,12 @@ Context *RefreshRequest<I>::handle_v2_get_mutable_metadata(int *result) {
     m_incomplete_update = true;
   }
 
+  if ((m_features & RBD_FEATURE_MIGRATING) != 0) {
+    ldout(cct, 1) << "migrating feature set" << dendl;
+    send_get_migration_header();
+    return nullptr;
+  }
+
   send_v2_get_metadata();
   return nullptr;
 }
@@ -668,9 +771,11 @@ void RefreshRequest<I>::send_v2_refresh_parent() {
     RWLock::RLocker parent_locker(m_image_ctx.parent_lock);
 
     ParentInfo parent_md;
-    int r = get_parent_info(m_image_ctx.snap_id, &parent_md);
+    MigrationInfo migration_info;
+    int r = get_parent_info(m_image_ctx.snap_id, &parent_md, &migration_info);
     if (!m_skip_open_parent_image && (r < 0 ||
-        RefreshParentRequest<I>::is_refresh_required(m_image_ctx, parent_md))) {
+        RefreshParentRequest<I>::is_refresh_required(m_image_ctx, parent_md,
+                                                     migration_info))) {
       CephContext *cct = m_image_ctx.cct;
       ldout(cct, 10) << this << " " << __func__ << dendl;
 
@@ -678,7 +783,7 @@ void RefreshRequest<I>::send_v2_refresh_parent() {
       Context *ctx = create_context_callback<
         klass, &klass::handle_v2_refresh_parent>(this);
       m_refresh_parent = RefreshParentRequest<I>::create(
-        m_image_ctx, parent_md, ctx);
+        m_image_ctx, parent_md, migration_info, ctx);
     }
   }
 
@@ -1140,6 +1245,8 @@ void RefreshRequest<I>::apply() {
     m_image_ctx.lock_tag = m_lock_tag;
     m_image_ctx.exclusive_locked = m_exclusive_locked;
 
+    std::map<uint64_t, uint64_t> migration_reverse_snap_seq;
+
     if (m_image_ctx.old_format) {
       m_image_ctx.order = m_order;
       m_image_ctx.features = 0;
@@ -1155,7 +1262,15 @@ void RefreshRequest<I>::apply() {
       m_image_ctx.operations_disabled = (
         (m_op_features & ~RBD_OPERATION_FEATURES_ALL) != 0ULL);
       m_image_ctx.group_spec = m_group_spec;
-      m_image_ctx.parent_md = m_parent_md;
+      if (get_migration_info(&m_image_ctx.parent_md,
+                             &m_image_ctx.migration_info)) {
+        for (auto it : m_image_ctx.migration_info.snap_map) {
+          migration_reverse_snap_seq[it.second.front()] = it.first;
+        }
+      } else {
+        m_image_ctx.parent_md = m_parent_md;
+        m_image_ctx.migration_info = {};
+      }
     }
 
     for (size_t i = 0; i < m_snapc.snaps.size(); ++i) {
@@ -1174,6 +1289,7 @@ void RefreshRequest<I>::apply() {
     m_image_ctx.snaps.clear();
     m_image_ctx.snap_info.clear();
     m_image_ctx.snap_ids.clear();
+    auto overlap = m_image_ctx.parent_md.overlap;
     for (size_t i = 0; i < m_snapc.snaps.size(); ++i) {
       uint64_t flags = m_image_ctx.old_format ? 0 : m_snap_flags[i];
       uint8_t protection_status = m_image_ctx.old_format ?
@@ -1181,15 +1297,27 @@ void RefreshRequest<I>::apply() {
         m_snap_protection[i];
       ParentInfo parent;
       if (!m_image_ctx.old_format) {
-        parent = m_snap_parents[i];
+        if (!m_image_ctx.migration_info.empty()) {
+          parent = m_image_ctx.parent_md;
+          auto it = migration_reverse_snap_seq.find(m_snapc.snaps[i].val);
+          if (it != migration_reverse_snap_seq.end()) {
+            parent.spec.snap_id = it->second;
+            parent.overlap = m_snap_infos[i].image_size;
+          } else {
+            overlap = std::min(overlap, m_snap_infos[i].image_size);
+            parent.overlap = overlap;
+          }
+        } else {
+          parent = m_snap_parents[i];
+        }
       }
-
       m_image_ctx.add_snap(m_snap_infos[i].snapshot_namespace,
                            m_snap_infos[i].name, m_snapc.snaps[i].val,
                            m_snap_infos[i].image_size, parent,
                           protection_status, flags,
                            m_snap_infos[i].timestamp);
     }
+    m_image_ctx.parent_md.overlap = std::min(overlap, m_image_ctx.size);
     m_image_ctx.snapc = m_snapc;
 
     if (m_image_ctx.snap_id != CEPH_NOSNAP &&
@@ -1240,14 +1368,19 @@ void RefreshRequest<I>::apply() {
 
 template <typename I>
 int RefreshRequest<I>::get_parent_info(uint64_t snap_id,
-                                       ParentInfo *parent_md) {
-  if (snap_id == CEPH_NOSNAP) {
+                                       ParentInfo *parent_md,
+                                       MigrationInfo *migration_info) {
+  if (get_migration_info(parent_md, migration_info)) {
+    return 0;
+  } else if (snap_id == CEPH_NOSNAP) {
     *parent_md = m_parent_md;
+    *migration_info = {};
     return 0;
   } else {
     for (size_t i = 0; i < m_snapc.snaps.size(); ++i) {
       if (m_snapc.snaps[i].val == snap_id) {
         *parent_md = m_snap_parents[i];
+        *migration_info = {};
         return 0;
       }
     }
@@ -1255,6 +1388,46 @@ int RefreshRequest<I>::get_parent_info(uint64_t snap_id,
   return -ENOENT;
 }
 
+template <typename I>
+bool RefreshRequest<I>::get_migration_info(ParentInfo *parent_md,
+                                           MigrationInfo *migration_info) {
+  if (m_migration_spec.header_type != cls::rbd::MIGRATION_HEADER_TYPE_DST ||
+      (m_migration_spec.state != cls::rbd::MIGRATION_STATE_PREPARED &&
+       m_migration_spec.state != cls::rbd::MIGRATION_STATE_EXECUTING)) {
+    assert(m_migration_spec.header_type == cls::rbd::MIGRATION_HEADER_TYPE_SRC ||
+           m_migration_spec.pool_id == -1 ||
+           m_migration_spec.state == cls::rbd::MIGRATION_STATE_EXECUTED);
+
+    return false;
+  }
+
+  parent_md->spec.pool_id = m_migration_spec.pool_id;
+  parent_md->spec.image_id = m_migration_spec.image_id;
+  parent_md->spec.snap_id = CEPH_NOSNAP;
+  parent_md->overlap = m_migration_spec.overlap;
+
+  *migration_info = {m_migration_spec.pool_id, m_migration_spec.image_name,
+                     m_migration_spec.image_id, {}, m_migration_spec.overlap,
+                     m_migration_spec.flatten};
+
+  auto snap_seqs = m_migration_spec.snap_seqs;
+  // If new snapshots have been created on destination image after
+  // migration stared, map the source CEPH_NOSNAP to the earliest of
+  // these snapshots.
+  snapid_t snap_id = snap_seqs.empty() ? 0 : snap_seqs.rbegin()->second;
+  auto it = std::upper_bound(m_snapc.snaps.rbegin(), m_snapc.snaps.rend(),
+                             snap_id);
+  if (it != m_snapc.snaps.rend()) {
+    snap_seqs[CEPH_NOSNAP] = *it;
+  } else {
+    snap_seqs[CEPH_NOSNAP] = CEPH_NOSNAP;
+  }
+
+  deep_copy::util::compute_snap_map(0, CEPH_NOSNAP, snap_seqs,
+                                    &migration_info->snap_map);
+  return true;
+}
+
 } // namespace image
 } // namespace librbd
 
index 5728a5d1a5f4c9a5ff2946f04be24a2fc8b43504..86d7ea61e28ae7f741f018184a2f5a074b8d3740 100644 (file)
@@ -43,16 +43,19 @@ private:
   /**
    * @verbatim
    *
-   * <start>
-   *    |
-   *    | (v1)
-   *    |-----> V1_READ_HEADER ---> V1_GET_SNAPSHOTS ---> V1_GET_LOCKS
-   *    |                                                     |
-   *    | (v2)                                                v
-   *    \-----> V2_GET_MUTABLE_METADATA                    <apply>
-   *                |                                         |
-   *                v                                         |
-   *            V2_GET_METADATA                               |
+   * <start> < * * * * * * * * * * * * * * * * * * * * * * * * * * (ENOENT)
+   *  ^ |                                                        *
+   *  * | (v1)                                                   *
+   *  * |-----> V1_READ_HEADER -------------> GET_MIGRATION_HEADER (skip if not
+   *  * |                                                     |     migrating)
+   *  * | (v2)                                                v
+   *  * \-----> V2_GET_MUTABLE_METADATA                   V1_GET_SNAPSHOTS
+   *  *             |                                         |
+   *  *             v                                         v
+   *  * * * * * GET_MIGRATION_HEADER (skip if not         V1_GET_LOCKS
+   *  (ENOENT)      |                 migrating)              |
+   *                v                                         v
+   *            V2_GET_METADATA                            <apply>
    *                |                                         |
    *                v                                         |
    *            V2_GET_FLAGS                                  |
@@ -119,6 +122,7 @@ private:
   bool m_skip_open_parent_image;
   Context *m_on_finish;
 
+  cls::rbd::MigrationSpec m_migration_spec;
   int m_error_result;
   bool m_flush_aio;
   decltype(m_image_ctx.exclusive_lock) m_exclusive_lock;
@@ -156,6 +160,9 @@ private:
   bool m_blocked_writes = false;
   bool m_incomplete_update = false;
 
+  void send_get_migration_header();
+  Context *handle_get_migration_header(int *result);
+
   void send_v1_read_header();
   Context *handle_v1_read_header(int *result);
 
@@ -234,7 +241,9 @@ private:
   }
 
   void apply();
-  int get_parent_info(uint64_t snap_id, ParentInfo *parent_md);
+  int get_parent_info(uint64_t snap_id, ParentInfo *parent_md,
+                      MigrationInfo *migration_info);
+  bool get_migration_info(ParentInfo *parent_md, MigrationInfo *migration_info);
 };
 
 } // namespace image
index 76fbf8d0df57755cc59e38c7f96396b474f92aca..9140b42290de6db5abe1b73d9fd6a481093d8198 100644 (file)
@@ -202,6 +202,13 @@ template<typename I>
 void RemoveRequest<I>::validate_image_removal() {
   ldout(m_cct, 20) << dendl;
 
+  if (!m_image_ctx->ignore_migrating &&
+      m_image_ctx->test_features(RBD_FEATURE_MIGRATING)) {
+    lderr(m_cct) << "image in migration state - not removing" << dendl;
+    send_close_image(-EBUSY);
+    return;
+  }
+
   check_image_snaps();
 }
 
index cfe22992e784b784c9ff38b8acabb89ff8970812..40e25e3e945e88540c5f6e53d42256f530ce80fa 100644 (file)
@@ -190,8 +190,8 @@ Context *SetSnapRequest<I>::send_refresh_parent(int *result) {
     }
 
     parent_md = *parent_info;
-    refresh_parent = RefreshParentRequest<I>::is_refresh_required(m_image_ctx,
-                                                                  parent_md);
+    refresh_parent = RefreshParentRequest<I>::is_refresh_required(
+        m_image_ctx, parent_md, m_image_ctx.migration_info);
   }
 
   if (!refresh_parent) {
@@ -212,6 +212,7 @@ Context *SetSnapRequest<I>::send_refresh_parent(int *result) {
   Context *ctx = create_context_callback<
     klass, &klass::handle_refresh_parent>(this);
   m_refresh_parent = RefreshParentRequest<I>::create(m_image_ctx, parent_md,
+                                                     m_image_ctx.migration_info,
                                                      ctx);
   m_refresh_parent->send();
   return nullptr;
index cb7cf88f2fb2c5d281c1a04e7b69478c90b8c2f1..1433ce54fa70fd77177901e38bda29f427dde81f 100644 (file)
@@ -244,8 +244,14 @@ bool compare_by_name(const child_info_t& c1, const child_info_t& c2)
       off += r;
     } while (r == READ_SIZE);
 
+    static_assert(sizeof(RBD_HEADER_TEXT) == sizeof(RBD_MIGRATE_HEADER_TEXT),
+                  "length of rbd headers must be the same");
+
     if (header.length() < sizeof(RBD_HEADER_TEXT) ||
-       memcmp(RBD_HEADER_TEXT, header.c_str(), sizeof(RBD_HEADER_TEXT))) {
+        (memcmp(RBD_HEADER_TEXT, header.c_str(),
+                sizeof(RBD_HEADER_TEXT)) != 0 &&
+         memcmp(RBD_MIGRATE_HEADER_TEXT, header.c_str(),
+                sizeof(RBD_MIGRATE_HEADER_TEXT)) != 0)) {
       CephContext *cct = (CephContext *)io_ctx.cct();
       lderr(cct) << "unrecognized header format" << dendl;
       return -ENXIO;
@@ -1419,6 +1425,12 @@ bool compare_by_name(const child_info_t& c1, const child_info_t& c2)
     }
     ictx->owner_lock.put_read();
 
+    if (!ictx->migration_info.empty()) {
+      lderr(cct) << "cannot move migrating image to trash" << dendl;
+      ictx->state->close();
+      return -EINVAL;
+    }
+
     utime_t delete_time{ceph_clock_now()};
     utime_t deferment_end_time{delete_time};
     deferment_end_time += delay;
index ce7f087e8ca869977f7cca69281cc01c5b60e5f8..f33830b153053b0a1690f455539236d38e57e614 100644 (file)
@@ -12,6 +12,7 @@
 #include "librbd/ImageCtx.h"
 #include "librbd/ObjectMap.h"
 #include "librbd/Utils.h"
+#include "librbd/deep_copy/ObjectCopyRequest.h"
 #include "librbd/io/AioCompletion.h"
 #include "librbd/io/ImageRequest.h"
 #include "librbd/io/ObjectRequest.h"
@@ -203,10 +204,48 @@ bool CopyupRequest<I>::is_copyup_required() {
   return false;
 }
 
+template <typename I>
+bool CopyupRequest<I>::is_update_object_map_required() {
+  RWLock::RLocker owner_locker(m_ictx->owner_lock);
+  RWLock::RLocker snap_locker(m_ictx->snap_lock);
+  if (m_ictx->object_map == nullptr) {
+    return false;
+  }
+
+  if (!is_deep_copy()) {
+    return false;
+  }
+
+  auto it = m_ictx->migration_info.snap_map.find(CEPH_NOSNAP);
+  assert(it != m_ictx->migration_info.snap_map.end());
+  return it->second[0] != CEPH_NOSNAP;
+}
+
+template <typename I>
+bool CopyupRequest<I>::is_deep_copy() const {
+  return !m_ictx->migration_info.empty() &&
+    m_ictx->migration_info.snap_map.size() > 1;
+}
+
 template <typename I>
 void CopyupRequest<I>::send()
 {
   m_state = STATE_READ_FROM_PARENT;
+
+  if (is_deep_copy()) {
+    bool flatten = is_copyup_required() ? true : m_ictx->migration_info.flatten;
+    auto req = deep_copy::ObjectCopyRequest<I>::create(
+        m_ictx->parent, m_ictx->parent->parent /* TODO */, m_ictx,
+        m_ictx->migration_info.snap_map, m_object_no, flatten,
+        util::create_context_callback(this));
+    ldout(m_ictx->cct, 20) << "deep copy object req " << req
+                           << ", object_no " << m_object_no
+                           << ", flatten " << flatten
+                           << dendl;
+    req->send();
+    return;
+  }
+
   AioCompletion *comp = AioCompletion::create_and_start(
     this, m_ictx, AIO_TYPE_READ);
 
@@ -240,8 +279,8 @@ bool CopyupRequest<I>::should_complete(int r)
     ldout(cct, 20) << "READ_FROM_PARENT" << dendl;
     remove_from_list();
     if (r >= 0 || r == -ENOENT) {
-      if (!is_copyup_required()) {
-        ldout(cct, 20) << "nop, skipping" << dendl;
+      if (!is_copyup_required() && !is_update_object_map_required()) {
+        ldout(cct, 20) << "skipping" << dendl;
         return true;
       }
 
@@ -257,6 +296,10 @@ bool CopyupRequest<I>::should_complete(int r)
   case STATE_OBJECT_MAP:
     ldout(cct, 20) << "OBJECT_MAP" << dendl;
     assert(r == 0);
+    if (!is_copyup_required()) {
+      ldout(cct, 20) << "skipping copyup" << dendl;
+      return true;
+    }
     return send_copyup();
 
   case STATE_COPYUP:
@@ -310,9 +353,25 @@ bool CopyupRequest<I>::send_object_map_head() {
       assert(m_ictx->exclusive_lock->is_lock_owner());
 
       RWLock::WLocker object_map_locker(m_ictx->object_map_lock);
+
       if (!m_ictx->snaps.empty()) {
-        m_snap_ids.insert(m_snap_ids.end(), m_ictx->snaps.begin(),
-                          m_ictx->snaps.end());
+        if (is_deep_copy()) {
+          // don't copy ids for the snaps updated by object deep copy
+          std::set<uint64_t> deep_copied;
+          for (auto &it : m_ictx->migration_info.snap_map) {
+            if (it.first != CEPH_NOSNAP) {
+              deep_copied.insert(it.second.front());
+            }
+          }
+          std::copy_if(m_ictx->snaps.begin(), m_ictx->snaps.end(),
+                       std::back_inserter(m_snap_ids),
+                       [&deep_copied](uint64_t i) {
+                         return !deep_copied.count(i);
+                       });
+        } else {
+          m_snap_ids.insert(m_snap_ids.end(), m_ictx->snaps.begin(),
+                            m_ictx->snaps.end());
+        }
       }
       if (copy_on_read &&
           (*m_ictx->object_map)[m_object_no] != OBJECT_EXISTS) {
index a45a48cd61b7da90646c3b8ebe4030cd96e70e35..c92494c2f998992c5e233ffb5004dc613e66efc3 100644 (file)
@@ -113,6 +113,8 @@ private:
   bool send_object_map();
   bool send_copyup();
   bool is_copyup_required();
+  bool is_update_object_map_required();
+  bool is_deep_copy() const;
 };
 
 } // namespace io
index 23f38ee6414169ef168bbbe64cad0c1c45261052..03dfbef951de1ae70409aee4bf27276f825886cd 100644 (file)
@@ -478,7 +478,12 @@ void AbstractObjectWriteRequest<I>::write_object() {
   librados::ObjectWriteOperation write;
   if (m_copyup_enabled) {
     ldout(image_ctx->cct, 20) << "guarding write" << dendl;
-    write.assert_exists();
+    if (!image_ctx->migration_info.empty()) {
+      cls_client::assert_snapc_seq(
+        &write, m_snap_seq, cls::rbd::ASSERT_SNAPC_SEQ_NOT_GT_SNAPSET_SEQ);
+    } else {
+      write.assert_exists();
+    }
   }
 
   add_write_hint(&write);
@@ -501,7 +506,7 @@ void AbstractObjectWriteRequest<I>::handle_write_object(int r) {
   ldout(image_ctx->cct, 20) << "r=" << r << dendl;
 
   r = filter_write_result(r);
-  if (r == -ENOENT) {
+  if (r == -ENOENT || (r == -ERANGE && !image_ctx->migration_info.empty())) {
     if (m_copyup_enabled) {
       copyup();
       return;
index fee8f1d8f005c61d0356ea7559207c19dffe1357..4b21ec5fe3cd9c7c955abc043b2d97d7d99a64b3 100644 (file)
@@ -29,6 +29,7 @@
 #include "librbd/api/DiffIterate.h"
 #include "librbd/api/Group.h"
 #include "librbd/api/Image.h"
+#include "librbd/api/Migration.h"
 #include "librbd/api/Mirror.h"
 #include "librbd/api/Namespace.h"
 #include "librbd/api/Snapshot.h"
@@ -652,6 +653,105 @@ namespace librbd {
     return r;
   }
 
+  int RBD::migration_prepare(IoCtx& io_ctx, const char *image_name,
+                             IoCtx& dest_io_ctx, const char *dest_image_name,
+                             ImageOptions& opts)
+  {
+    TracepointProvider::initialize<tracepoint_traits>(get_cct(io_ctx));
+    tracepoint(librbd, migration_prepare_enter, io_ctx.get_pool_name().c_str(),
+               io_ctx.get_id(), image_name, dest_io_ctx.get_pool_name().c_str(),
+               dest_io_ctx.get_id(), dest_image_name, opts.opts);
+    int r = librbd::api::Migration<>::prepare(io_ctx, image_name, dest_io_ctx,
+                                              dest_image_name, opts);
+    tracepoint(librbd, migration_prepare_exit, r);
+    return r;
+  }
+
+  int RBD::migration_execute(IoCtx& io_ctx, const char *image_name)
+  {
+    TracepointProvider::initialize<tracepoint_traits>(get_cct(io_ctx));
+    tracepoint(librbd, migration_execute_enter, io_ctx.get_pool_name().c_str(),
+               io_ctx.get_id(), image_name);
+    librbd::NoOpProgressContext prog_ctx;
+    int r = librbd::api::Migration<>::execute(io_ctx, image_name, prog_ctx);
+    tracepoint(librbd, migration_execute_exit, r);
+    return r;
+  }
+
+  int RBD::migration_execute_with_progress(IoCtx& io_ctx,
+                                           const char *image_name,
+                                           librbd::ProgressContext &prog_ctx)
+  {
+    TracepointProvider::initialize<tracepoint_traits>(get_cct(io_ctx));
+    tracepoint(librbd, migration_execute_enter, io_ctx.get_pool_name().c_str(),
+               io_ctx.get_id(), image_name);
+    int r = librbd::api::Migration<>::execute(io_ctx, image_name, prog_ctx);
+    tracepoint(librbd, migration_execute_exit, r);
+    return r;
+  }
+
+  int RBD::migration_abort(IoCtx& io_ctx, const char *image_name)
+  {
+    TracepointProvider::initialize<tracepoint_traits>(get_cct(io_ctx));
+    tracepoint(librbd, migration_abort_enter, io_ctx.get_pool_name().c_str(),
+               io_ctx.get_id(), image_name);
+    librbd::NoOpProgressContext prog_ctx;
+    int r = librbd::api::Migration<>::abort(io_ctx, image_name, prog_ctx);
+    tracepoint(librbd, migration_abort_exit, r);
+    return r;
+  }
+
+  int RBD::migration_abort_with_progress(IoCtx& io_ctx, const char *image_name,
+                                         librbd::ProgressContext &prog_ctx)
+  {
+    TracepointProvider::initialize<tracepoint_traits>(get_cct(io_ctx));
+    tracepoint(librbd, migration_abort_enter, io_ctx.get_pool_name().c_str(),
+               io_ctx.get_id(), image_name);
+    int r = librbd::api::Migration<>::abort(io_ctx, image_name, prog_ctx);
+    tracepoint(librbd, migration_abort_exit, r);
+    return r;
+  }
+
+  int RBD::migration_commit(IoCtx& io_ctx, const char *image_name)
+  {
+    TracepointProvider::initialize<tracepoint_traits>(get_cct(io_ctx));
+    tracepoint(librbd, migration_commit_enter, io_ctx.get_pool_name().c_str(),
+               io_ctx.get_id(), image_name);
+    librbd::NoOpProgressContext prog_ctx;
+    int r = librbd::api::Migration<>::commit(io_ctx, image_name, prog_ctx);
+    tracepoint(librbd, migration_commit_exit, r);
+    return r;
+  }
+
+  int RBD::migration_commit_with_progress(IoCtx& io_ctx, const char *image_name,
+                                          librbd::ProgressContext &prog_ctx)
+  {
+    TracepointProvider::initialize<tracepoint_traits>(get_cct(io_ctx));
+    tracepoint(librbd, migration_commit_enter, io_ctx.get_pool_name().c_str(),
+               io_ctx.get_id(), image_name);
+    int r = librbd::api::Migration<>::commit(io_ctx, image_name, prog_ctx);
+    tracepoint(librbd, migration_commit_exit, r);
+    return r;
+  }
+
+  int RBD::migration_status(IoCtx& io_ctx, const char *image_name,
+                            image_migration_status_t *status,
+                            size_t status_size)
+  {
+    TracepointProvider::initialize<tracepoint_traits>(get_cct(io_ctx));
+    tracepoint(librbd, migration_status_enter, io_ctx.get_pool_name().c_str(),
+               io_ctx.get_id(), image_name);
+
+    if (status_size != sizeof(image_migration_status_t)) {
+      tracepoint(librbd, migration_status_exit, -ERANGE);
+      return -ERANGE;
+    }
+
+    int r = librbd::api::Migration<>::status(io_ctx, image_name, status);
+    tracepoint(librbd, migration_status_exit, r);
+    return r;
+  }
+
   int RBD::mirror_mode_get(IoCtx& io_ctx, rbd_mirror_mode_t *mirror_mode) {
     return librbd::api::Mirror<>::mode_get(io_ctx, mirror_mode);
   }
@@ -2943,6 +3043,153 @@ extern "C" int rbd_rename(rados_ioctx_t src_p, const char *srcname,
   return r;
 }
 
+extern "C" int rbd_migration_prepare(rados_ioctx_t p, const char *image_name,
+                                     rados_ioctx_t dest_p,
+                                     const char *dest_image_name,
+                                     rbd_image_options_t opts_)
+{
+  librados::IoCtx io_ctx;
+  librados::IoCtx::from_rados_ioctx_t(p, io_ctx);
+  librados::IoCtx dest_io_ctx;
+  librados::IoCtx::from_rados_ioctx_t(dest_p, dest_io_ctx);
+  tracepoint(librbd, migration_prepare_enter, io_ctx.get_pool_name().c_str(),
+             io_ctx.get_id(), image_name, dest_io_ctx.get_pool_name().c_str(),
+             dest_io_ctx.get_id(), dest_image_name, opts_);
+  librbd::ImageOptions opts(opts_);
+  int r = librbd::api::Migration<>::prepare(io_ctx, image_name, dest_io_ctx,
+                                            dest_image_name, opts);
+  tracepoint(librbd, migration_prepare_exit, r);
+  return r;
+}
+
+extern "C" int rbd_migration_execute(rados_ioctx_t p, const char *image_name)
+{
+  librados::IoCtx io_ctx;
+  librados::IoCtx::from_rados_ioctx_t(p, io_ctx);
+  TracepointProvider::initialize<tracepoint_traits>(get_cct(io_ctx));
+  tracepoint(librbd, migration_execute_enter, io_ctx.get_pool_name().c_str(),
+             io_ctx.get_id(), image_name);
+  librbd::NoOpProgressContext prog_ctx;
+  int r = librbd::api::Migration<>::execute(io_ctx, image_name, prog_ctx);
+  tracepoint(librbd, migration_execute_exit, r);
+  return r;
+}
+
+extern "C" int rbd_migration_execute_with_progress(rados_ioctx_t p,
+                                                   const char *name,
+                                                   librbd_progress_fn_t fn,
+                                                   void *data)
+{
+  librados::IoCtx io_ctx;
+  librados::IoCtx::from_rados_ioctx_t(p, io_ctx);
+  TracepointProvider::initialize<tracepoint_traits>(get_cct(io_ctx));
+  tracepoint(librbd, migration_execute_enter, io_ctx.get_pool_name().c_str(),
+             io_ctx.get_id(), name);
+  librbd::CProgressContext prog_ctx(fn, data);
+  int r = librbd::api::Migration<>::execute(io_ctx, name, prog_ctx);
+  tracepoint(librbd, migration_execute_exit, r);
+  return r;
+}
+
+extern "C" int rbd_migration_abort(rados_ioctx_t p, const char *image_name)
+{
+  librados::IoCtx io_ctx;
+  librados::IoCtx::from_rados_ioctx_t(p, io_ctx);
+  TracepointProvider::initialize<tracepoint_traits>(get_cct(io_ctx));
+  tracepoint(librbd, migration_abort_enter, io_ctx.get_pool_name().c_str(),
+             io_ctx.get_id(), image_name);
+  librbd::NoOpProgressContext prog_ctx;
+  int r = librbd::api::Migration<>::abort(io_ctx, image_name, prog_ctx);
+  tracepoint(librbd, migration_abort_exit, r);
+  return r;
+}
+
+extern "C" int rbd_migration_abort_with_progress(rados_ioctx_t p,
+                                                 const char *name,
+                                                 librbd_progress_fn_t fn,
+                                                 void *data)
+{
+  librados::IoCtx io_ctx;
+  librados::IoCtx::from_rados_ioctx_t(p, io_ctx);
+  TracepointProvider::initialize<tracepoint_traits>(get_cct(io_ctx));
+  tracepoint(librbd, migration_abort_enter, io_ctx.get_pool_name().c_str(),
+             io_ctx.get_id(), name);
+  librbd::CProgressContext prog_ctx(fn, data);
+  int r = librbd::api::Migration<>::abort(io_ctx, name, prog_ctx);
+  tracepoint(librbd, migration_abort_exit, r);
+  return r;
+}
+
+extern "C" int rbd_migration_commit(rados_ioctx_t p, const char *image_name)
+{
+  librados::IoCtx io_ctx;
+  librados::IoCtx::from_rados_ioctx_t(p, io_ctx);
+  TracepointProvider::initialize<tracepoint_traits>(get_cct(io_ctx));
+  tracepoint(librbd, migration_commit_enter, io_ctx.get_pool_name().c_str(),
+             io_ctx.get_id(), image_name);
+  librbd::NoOpProgressContext prog_ctx;
+  int r = librbd::api::Migration<>::commit(io_ctx, image_name, prog_ctx);
+  tracepoint(librbd, migration_commit_exit, r);
+  return r;
+}
+
+extern "C" int rbd_migration_commit_with_progress(rados_ioctx_t p,
+                                                  const char *name,
+                                                  librbd_progress_fn_t fn,
+                                                  void *data)
+{
+  librados::IoCtx io_ctx;
+  librados::IoCtx::from_rados_ioctx_t(p, io_ctx);
+  TracepointProvider::initialize<tracepoint_traits>(get_cct(io_ctx));
+  tracepoint(librbd, migration_commit_enter, io_ctx.get_pool_name().c_str(),
+             io_ctx.get_id(), name);
+  librbd::CProgressContext prog_ctx(fn, data);
+  int r = librbd::api::Migration<>::commit(io_ctx, name, prog_ctx);
+  tracepoint(librbd, migration_commit_exit, r);
+  return r;
+}
+
+extern "C" int rbd_migration_status(rados_ioctx_t p, const char *image_name,
+                                    rbd_image_migration_status_t *status,
+                                    size_t status_size)
+{
+  librados::IoCtx io_ctx;
+  librados::IoCtx::from_rados_ioctx_t(p, io_ctx);
+  TracepointProvider::initialize<tracepoint_traits>(get_cct(io_ctx));
+  tracepoint(librbd, migration_status_enter, io_ctx.get_pool_name().c_str(),
+             io_ctx.get_id(), image_name);
+
+  if (status_size != sizeof(rbd_image_migration_status_t)) {
+    tracepoint(librbd, migration_status_exit, -ERANGE);
+    return -ERANGE;
+  }
+
+  librbd::image_migration_status_t cpp_status;
+  int r = librbd::api::Migration<>::status(io_ctx, image_name, &cpp_status);
+  if (r >= 0) {
+    status->source_pool_id = cpp_status.source_pool_id;
+    status->source_image_name = strdup(cpp_status.source_image_name.c_str());
+    status->source_image_id = strdup(cpp_status.source_image_id.c_str());
+    status->dest_pool_id = cpp_status.dest_pool_id;
+    status->dest_image_name = strdup(cpp_status.dest_image_name.c_str());
+    status->dest_image_id = strdup(cpp_status.dest_image_id.c_str());
+    status->state = cpp_status.state;
+    status->state_description = strdup(cpp_status.state_description.c_str());
+  }
+
+  tracepoint(librbd, migration_status_exit, r);
+  return r;
+}
+
+extern "C" void rbd_migration_status_cleanup(rbd_image_migration_status_t *s)
+{
+  free(s->source_image_name);
+  free(s->source_image_id);
+  free(s->dest_image_name);
+  free(s->dest_image_id);
+  free(s->state_description);
+}
+
 extern "C" int rbd_open(rados_ioctx_t p, const char *name, rbd_image_t *image,
                        const char *snap_name)
 {
diff --git a/src/librbd/operation/MigrateRequest.cc b/src/librbd/operation/MigrateRequest.cc
new file mode 100644 (file)
index 0000000..5d51f33
--- /dev/null
@@ -0,0 +1,226 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/operation/MigrateRequest.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "librbd/AsyncObjectThrottle.h"
+#include "librbd/ExclusiveLock.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/Utils.h"
+#include "librbd/deep_copy/ObjectCopyRequest.h"
+#include "librbd/io/AsyncOperation.h"
+#include "librbd/io/ImageRequestWQ.h"
+#include "librbd/io/ObjectRequest.h"
+#include "osdc/Striper.h"
+#include <boost/lambda/bind.hpp>
+#include <boost/lambda/construct.hpp>
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::MigrateRequest: " << this << " " \
+                           << __func__ << ": "
+
+namespace librbd {
+namespace operation {
+
+using util::create_context_callback;
+using util::create_async_context_callback;
+
+namespace {
+
+template <typename I>
+class C_MigrateObject : public C_AsyncObjectThrottle<I> {
+public:
+  C_MigrateObject(AsyncObjectThrottle<I> &throttle, I *image_ctx,
+                  ::SnapContext snapc, uint64_t object_no)
+    : C_AsyncObjectThrottle<I>(throttle, *image_ctx), m_snapc(snapc),
+      m_object_no(object_no) {
+  }
+
+  int send() override {
+    I &image_ctx = this->m_image_ctx;
+    assert(image_ctx.owner_lock.is_locked());
+    CephContext *cct = image_ctx.cct;
+
+    if (image_ctx.exclusive_lock != nullptr &&
+        !image_ctx.exclusive_lock->is_lock_owner()) {
+      ldout(cct, 1) << "lost exclusive lock during migrate" << dendl;
+      return -ERESTART;
+    }
+
+    start_async_op();
+    return 0;
+  }
+
+private:
+  uint64_t m_object_size;
+  ::SnapContext m_snapc;
+  uint64_t m_object_no;
+
+  io::AsyncOperation m_async_op;
+
+  void start_async_op() {
+    I &image_ctx = this->m_image_ctx;
+    assert(image_ctx.owner_lock.is_locked());
+    CephContext *cct = image_ctx.cct;
+    ldout(cct, 10) << dendl;
+
+    m_async_op.start_op(image_ctx);
+
+    if (!image_ctx.io_work_queue->writes_blocked()) {
+      migrate_object();
+      return;
+    }
+
+    auto ctx = create_async_context_callback(
+      image_ctx, create_context_callback<
+        C_MigrateObject<I>, &C_MigrateObject<I>::handle_start_async_op>(this));
+    m_async_op.finish_op();
+    image_ctx.io_work_queue->wait_on_writes_unblocked(ctx);
+  }
+
+  void handle_start_async_op(int r) {
+    I &image_ctx = this->m_image_ctx;
+    CephContext *cct = image_ctx.cct;
+    ldout(cct, 10) << "r=" << r << dendl;
+
+    if (r < 0) {
+      lderr(cct) << "failed to start async op: " << cpp_strerror(r) << dendl;
+      this->complete(r);
+      return;
+    }
+
+    RWLock::RLocker owner_locker(image_ctx.owner_lock);
+    start_async_op();
+  }
+
+  bool is_within_overlap_bounds() {
+    I &image_ctx = this->m_image_ctx;
+    RWLock::RLocker snap_locker(image_ctx.snap_lock);
+
+    auto overlap = std::min(image_ctx.size, image_ctx.migration_info.overlap);
+    return overlap > 0 &&
+      Striper::get_num_objects(image_ctx.layout, overlap) > m_object_no;
+  }
+
+  void migrate_object() {
+    I &image_ctx = this->m_image_ctx;
+    assert(image_ctx.owner_lock.is_locked());
+    CephContext *cct = image_ctx.cct;
+
+    auto ctx = create_context_callback<
+      C_MigrateObject<I>, &C_MigrateObject<I>::handle_migrate_object>(this);
+
+    if (is_within_overlap_bounds()) {
+      bufferlist bl;
+      string oid = image_ctx.get_object_name(m_object_no);
+      auto req = new io::ObjectWriteRequest<I>(&image_ctx, oid, m_object_no, 0,
+                                               std::move(bl), m_snapc, 0, {},
+                                               ctx);
+
+      ldout(cct, 20) << "copyup object req " << req << ", object_no "
+                     << m_object_no << dendl;
+
+      req->send();
+    } else {
+      assert(image_ctx.parent != nullptr);
+
+      auto req = deep_copy::ObjectCopyRequest<I>::create(
+        image_ctx.parent, image_ctx.parent->parent /* TODO */, &image_ctx,
+        image_ctx.migration_info.snap_map, m_object_no,
+        image_ctx.migration_info.flatten, ctx);
+
+      ldout(cct, 20) << "deep copy object req " << req << ", object_no "
+                     << m_object_no << dendl;
+      req->send();
+    }
+  }
+
+  void handle_migrate_object(int r) {
+    CephContext *cct = this->m_image_ctx.cct;
+    ldout(cct, 10) << "r=" << r << dendl;
+
+    m_async_op.finish_op();
+    this->complete(r);
+  }
+};
+
+} // anonymous namespace
+
+template <typename I>
+void MigrateRequest<I>::send_op() {
+  I &image_ctx = this->m_image_ctx;
+  assert(image_ctx.owner_lock.is_locked());
+  CephContext *cct = image_ctx.cct;
+  ldout(cct, 10) << dendl;
+
+  migrate_objects();
+}
+
+template <typename I>
+bool MigrateRequest<I>::should_complete(int r) {
+  I &image_ctx = this->m_image_ctx;
+  CephContext *cct = image_ctx.cct;
+  ldout(cct, 10) << "r=" << r << dendl;
+
+  if (r < 0) {
+    lderr(cct) << "encountered error: " << cpp_strerror(r) << dendl;
+  }
+
+  return true;
+}
+
+template <typename I>
+void MigrateRequest<I>::migrate_objects() {
+  I &image_ctx = this->m_image_ctx;
+  CephContext *cct = image_ctx.cct;
+  assert(image_ctx.owner_lock.is_locked());
+
+  uint64_t overlap_objects = get_num_overlap_objects();
+
+  ldout(cct, 10) << "from 0 to " << overlap_objects << dendl;
+
+  auto ctx = create_context_callback<
+    MigrateRequest<I>, &MigrateRequest<I>::handle_migrate_objects>(this);
+
+  typename AsyncObjectThrottle<I>::ContextFactory context_factory(
+    boost::lambda::bind(boost::lambda::new_ptr<C_MigrateObject<I> >(),
+      boost::lambda::_1, &image_ctx, image_ctx.snapc, boost::lambda::_2));
+  AsyncObjectThrottle<I> *throttle = new AsyncObjectThrottle<I>(
+    this, image_ctx, context_factory, ctx, &m_prog_ctx, 0, overlap_objects);
+  throttle->start_ops(image_ctx.concurrent_management_ops);
+}
+
+template <typename I>
+void MigrateRequest<I>::handle_migrate_objects(int r) {
+  I &image_ctx = this->m_image_ctx;
+  CephContext *cct = image_ctx.cct;
+  ldout(cct, 5) << "r=" << r << dendl;
+
+  if (r < 0) {
+    lderr(cct) << "failed to migrate objects: " << cpp_strerror(r) << dendl;
+  }
+
+  this->complete(r);
+}
+
+template <typename I>
+uint64_t MigrateRequest<I>::get_num_overlap_objects() {
+  I &image_ctx = this->m_image_ctx;
+  CephContext *cct = image_ctx.cct;
+  ldout(cct, 10) << dendl;
+
+  RWLock::RLocker snap_locker(image_ctx.snap_lock);
+  RWLock::RLocker parent_locker(image_ctx.parent_lock);
+
+  auto overlap = image_ctx.migration_info.overlap;
+
+  return overlap > 0 ?
+    Striper::get_num_objects(image_ctx.layout, overlap) : 0;
+}
+
+} // namespace operation
+} // namespace librbd
+
+template class librbd::operation::MigrateRequest<librbd::ImageCtx>;
diff --git a/src/librbd/operation/MigrateRequest.h b/src/librbd/operation/MigrateRequest.h
new file mode 100644 (file)
index 0000000..a74dab3
--- /dev/null
@@ -0,0 +1,69 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+#ifndef CEPH_LIBRBD_OPERATION_MIGRATE_REQUEST_H
+#define CEPH_LIBRBD_OPERATION_MIGRATE_REQUEST_H
+
+#include "librbd/operation/Request.h"
+#include "common/snap_types.h"
+#include "librbd/Types.h"
+
+namespace librbd {
+
+class ImageCtx;
+class ProgressContext;
+
+namespace operation {
+
+template <typename ImageCtxT = ImageCtx>
+class MigrateRequest : public Request<ImageCtxT>
+{
+public:
+  MigrateRequest(ImageCtxT &image_ctx, Context *on_finish,
+                 ProgressContext &prog_ctx)
+    : Request<ImageCtxT>(image_ctx, on_finish), m_prog_ctx(prog_ctx) {
+  }
+
+protected:
+  void send_op() override;
+  bool should_complete(int r) override;
+  bool can_affect_io() const override {
+    return true;
+  }
+  journal::Event create_event(uint64_t op_tid) const override {
+    assert(0);
+    return journal::UnknownEvent();
+  }
+
+private:
+  /**
+   * Migrate goes through the following state machine to copy objects
+   * from the parent (migrating source) image:
+   *
+   * @verbatim
+   *
+   * <start>
+   *    |
+   *    v
+   * MIGRATE_OBJECTS
+   *    |
+   *    v
+   * <finish>
+   *
+   * @endverbatim
+   *
+   */
+
+  ProgressContext &m_prog_ctx;
+
+  void migrate_objects();
+  void handle_migrate_objects(int r);
+
+  uint64_t get_num_overlap_objects();
+};
+
+} // namespace operation
+} // namespace librbd
+
+extern template class librbd::operation::MigrateRequest<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_OPERATION_MIGRATE_REQUEST_H
index ced4f678e3ce612cf6f5c18fb9111ebfe6498cf0..2c685849259df4246a9daa1c527d86ca91c8b855 100644 (file)
@@ -8,6 +8,7 @@ set(librbd_test
   test_BlockGuard.cc
   test_DeepCopy.cc
   test_Groups.cc
+  test_Migration.cc
   test_MirroringWatcher.cc
   test_ObjectMap.cc
   test_Operations.cc
index ae8736a9dfb0882e53b05fe25af918a317d1e3ef..5e01b5c84745c200d87617933b8bfabb0b5e7eac 100644 (file)
@@ -40,14 +40,16 @@ template <>
 struct RefreshParentRequest<MockRefreshImageCtx> {
   static RefreshParentRequest* s_instance;
   static RefreshParentRequest* create(MockRefreshImageCtx &mock_image_ctx,
-                                      const ParentInfo& parent_md,
+                                      const ParentInfo &parent_md,
+                                      const MigrationInfo &migration_info,
                                       Context *on_finish) {
     assert(s_instance != nullptr);
     s_instance->on_finish = on_finish;
     return s_instance;
   }
   static bool is_refresh_required(MockRefreshImageCtx &mock_image_ctx,
-                                  const ParentInfo& parent_md) {
+                                  const ParentInfo& parent_md,
+                                  const MigrationInfo &migration_info) {
     assert(s_instance != nullptr);
     return s_instance->is_refresh_required();
   }
@@ -134,6 +136,17 @@ public:
   typedef RefreshParentRequest<MockRefreshImageCtx> MockRefreshParentRequest;
   typedef io::ImageDispatchSpec<librbd::MockRefreshImageCtx> MockIoImageDispatchSpec;
 
+  void set_v1_migration_header(ImageCtx *ictx) {
+    bufferlist hdr;
+    ASSERT_EQ(0, read_header_bl(ictx->md_ctx, ictx->header_oid, hdr, nullptr));
+    ASSERT_TRUE(hdr.length() >= sizeof(rbd_obj_header_ondisk));
+    ASSERT_EQ(0, memcmp(RBD_HEADER_TEXT, hdr.c_str(), sizeof(RBD_HEADER_TEXT)));
+
+    bufferlist::iterator it = hdr.begin();
+    it.copy_in(sizeof(RBD_MIGRATE_HEADER_TEXT), RBD_MIGRATE_HEADER_TEXT);
+    ASSERT_EQ(0, ictx->md_ctx.write(ictx->header_oid, hdr, hdr.length(), 0));
+  }
+
   void expect_set_require_lock(MockRefreshImageCtx &mock_image_ctx,
                                librbd::io::Direction direction, bool enabled) {
     EXPECT_CALL(*mock_image_ctx.io_work_queue, set_require_lock(direction,
@@ -201,6 +214,17 @@ public:
     }
   }
 
+  void expect_get_migration_header(MockRefreshImageCtx &mock_image_ctx, int r) {
+    auto &expect = EXPECT_CALL(get_mock_io_ctx(mock_image_ctx.md_ctx),
+                               exec(mock_image_ctx.header_oid, _, StrEq("rbd"),
+                                    StrEq("migration_get"), _, _, _));
+    if (r < 0) {
+      expect.WillOnce(Return(r));
+    } else {
+      expect.WillOnce(DoDefault());
+    }
+  }
+
   void expect_get_metadata(MockRefreshImageCtx &mock_image_ctx, int r) {
     auto &expect = EXPECT_CALL(get_mock_io_ctx(mock_image_ctx.md_ctx),
                                exec(mock_image_ctx.header_oid, _, StrEq("rbd"), StrEq("metadata_list"), _, _, _));
index 47a637740339124321c0c8eb518545f3247717aa..54b5acf3a26ac591a2a31f676b44e40532cd323e 100644 (file)
@@ -357,10 +357,8 @@ public:
   }
 
   void expect_test_features(MockTestImageCtx &mock_image_ctx) {
-    if (m_mock_imctx->exclusive_lock != nullptr) {
-      EXPECT_CALL(mock_image_ctx, test_features(_))
-        .WillRepeatedly(TestFeatures(&mock_image_ctx));
-    }
+    EXPECT_CALL(mock_image_ctx, test_features(_))
+      .WillRepeatedly(TestFeatures(&mock_image_ctx));
   }
 
   void expect_set_journal_policy(MockTestImageCtx &mock_image_ctx) {
@@ -391,6 +389,7 @@ TEST_F(TestMockImageRemoveRequest, SuccessV1) {
 
   InSequence seq;
   expect_state_open(*m_mock_imctx, 0);
+  expect_test_features(*m_mock_imctx);
 
   MockListWatchersRequest mock_list_watchers_request;
   expect_list_image_watchers(*m_mock_imctx, mock_list_watchers_request, 0);
@@ -446,11 +445,16 @@ TEST_F(TestMockImageRemoveRequest, SuccessV2CloneV1) {
 
   InSequence seq;
   expect_state_open(*m_mock_imctx, 0);
-
   expect_test_features(*m_mock_imctx);
+
+  if (m_mock_imctx->exclusive_lock != nullptr) {
+    expect_test_features(*m_mock_imctx);
+  }
   expect_set_journal_policy(*m_mock_imctx);
   expect_shut_down_exclusive_lock(*m_mock_imctx, *mock_exclusive_lock, 0);
 
+  expect_test_features(*m_mock_imctx);
+
   MockListWatchersRequest mock_list_watchers_request;
   expect_list_image_watchers(*m_mock_imctx, mock_list_watchers_request, 0);
 
@@ -499,11 +503,16 @@ TEST_F(TestMockImageRemoveRequest, SuccessV2CloneV2) {
 
   InSequence seq;
   expect_state_open(*m_mock_imctx, 0);
-
   expect_test_features(*m_mock_imctx);
+
+  if (m_mock_imctx->exclusive_lock != nullptr) {
+    expect_test_features(*m_mock_imctx);
+  }
   expect_set_journal_policy(*m_mock_imctx);
   expect_shut_down_exclusive_lock(*m_mock_imctx, *mock_exclusive_lock, 0);
 
+  expect_test_features(*m_mock_imctx);
+
   MockListWatchersRequest mock_list_watchers_request;
   expect_list_image_watchers(*m_mock_imctx, mock_list_watchers_request, 0);
 
@@ -552,11 +561,14 @@ TEST_F(TestMockImageRemoveRequest, NotExistsV2) {
 
   InSequence seq;
   expect_state_open(*m_mock_imctx, 0);
+  expect_test_features(*m_mock_imctx);
 
   expect_test_features(*m_mock_imctx);
   expect_set_journal_policy(*m_mock_imctx);
   expect_shut_down_exclusive_lock(*m_mock_imctx, *mock_exclusive_lock, 0);
 
+  expect_test_features(*m_mock_imctx);
+
   MockListWatchersRequest mock_list_watchers_request;
   expect_list_image_watchers(*m_mock_imctx, mock_list_watchers_request, 0);
 
@@ -598,6 +610,7 @@ TEST_F(TestMockImageRemoveRequest, OperationsDisabled) {
 
   InSequence seq;
   expect_state_open(*m_mock_imctx, 0);
+  expect_test_features(*m_mock_imctx);
   expect_state_close(*m_mock_imctx);
 
   C_SaferCond ctx;
@@ -610,12 +623,31 @@ TEST_F(TestMockImageRemoveRequest, OperationsDisabled) {
   ASSERT_EQ(-EROFS, ctx.wait());
 }
 
+TEST_F(TestMockImageRemoveRequest, Migration) {
+  m_mock_imctx->features |= RBD_FEATURE_MIGRATING;
+
+  InSequence seq;
+  expect_state_open(*m_mock_imctx, 0);
+  expect_test_features(*m_mock_imctx);
+  expect_state_close(*m_mock_imctx);
+
+  C_SaferCond ctx;
+  librbd::NoOpProgressContext no_op;
+  ContextWQ op_work_queue;
+  MockRemoveRequest *req = MockRemoveRequest::create(
+    m_ioctx, m_image_name, "", true, false, no_op, &op_work_queue, &ctx);
+  req->send();
+
+  ASSERT_EQ(-EBUSY, ctx.wait());
+}
+
 TEST_F(TestMockImageRemoveRequest, Snapshots) {
   m_mock_imctx->snap_info = {
     {123, {"snap1", {cls::rbd::UserSnapshotNamespace{}}, {}, {}, {}, {}, {}}}};
 
   InSequence seq;
   expect_state_open(*m_mock_imctx, 0);
+  expect_test_features(*m_mock_imctx);
   expect_state_close(*m_mock_imctx);
 
   C_SaferCond ctx;
@@ -643,11 +675,16 @@ TEST_F(TestMockImageRemoveRequest, AutoDeleteSnapshots) {
 
   InSequence seq;
   expect_state_open(*m_mock_imctx, 0);
-
   expect_test_features(*m_mock_imctx);
+
+  if (m_mock_imctx->exclusive_lock != nullptr) {
+    expect_test_features(*m_mock_imctx);
+  }
   expect_set_journal_policy(*m_mock_imctx);
   expect_shut_down_exclusive_lock(*m_mock_imctx, *mock_exclusive_lock, 0);
 
+  expect_test_features(*m_mock_imctx);
+
   MockListWatchersRequest mock_list_watchers_request;
   expect_list_image_watchers(*m_mock_imctx, mock_list_watchers_request, 0);
 
index 44ade806c755c0143aa5faeb5aee5db2bdaffd55..b08d408200d025611c0b48178af970235f6e8cba 100644 (file)
@@ -111,7 +111,8 @@ struct MockImageCtx {
       mirroring_replay_delay(image_ctx.mirroring_replay_delay),
       non_blocking_aio(image_ctx.non_blocking_aio),
       blkin_trace_all(image_ctx.blkin_trace_all),
-      enable_alloc_hint(image_ctx.enable_alloc_hint)
+      enable_alloc_hint(image_ctx.enable_alloc_hint),
+      ignore_migrating(image_ctx.ignore_migrating)
   {
     md_ctx.dup(image_ctx.md_ctx);
     data_ctx.dup(image_ctx.data_ctx);
@@ -264,6 +265,7 @@ struct MockImageCtx {
   std::string id;
   std::string name;
   ParentInfo parent_md;
+  MigrationInfo migration_info;
   char *format_string;
   cls::rbd::GroupSpec group_spec;
 
@@ -316,6 +318,7 @@ struct MockImageCtx {
   bool non_blocking_aio;
   bool blkin_trace_all;
   bool enable_alloc_hint;
+  bool ignore_migrating;
 };
 
 } // namespace librbd
diff --git a/src/test/librbd/test_Migration.cc b/src/test/librbd/test_Migration.cc
new file mode 100644 (file)
index 0000000..9cebb17
--- /dev/null
@@ -0,0 +1,1020 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "test/librados/test.h"
+#include "test/librbd/test_fixture.h"
+#include "test/librbd/test_support.h"
+#include "librbd/ImageState.h"
+#include "librbd/Operations.h"
+#include "librbd/api/Group.h"
+#include "librbd/api/Image.h"
+#include "librbd/api/Migration.h"
+#include "librbd/api/Mirror.h"
+#include "librbd/internal.h"
+#include "librbd/io/ImageRequestWQ.h"
+#include "librbd/io/ReadResult.h"
+
+void register_test_migration() {
+}
+
+struct TestMigration : public TestFixture {
+  void SetUp() override {
+    TestFixture::SetUp();
+
+    open_image(m_ioctx, m_image_name, &m_ictx);
+    m_image_id = m_ictx->id;
+
+    std::string ref_image_name = get_temp_image_name();
+    ASSERT_EQ(0, create_image_pp(m_rbd, m_ioctx, ref_image_name, m_ictx->size));
+    EXPECT_EQ(0, _rados.ioctx_create2(m_ioctx.get_id(), m_ref_ioctx));
+    open_image(m_ref_ioctx, ref_image_name, &m_ref_ictx);
+
+    resize(20 * (1 << 22));
+  }
+
+  void TearDown() override {
+    if (m_ref_ictx != nullptr) {
+      close_image(m_ref_ictx);
+    }
+    if (m_ictx != nullptr) {
+      close_image(m_ictx);
+    }
+
+    m_other_pool_ioctx.close();
+
+    TestFixture::TearDown();
+  }
+
+  void require_other_pool() {
+    std::string pool_name = get_temp_pool_name("test-librbd-");
+    ASSERT_EQ("", create_one_pool_pp(pool_name, m_other_pool_rados));
+    ASSERT_EQ(0, m_other_pool_rados.ioctx_create(pool_name.c_str(),
+                                                 m_other_pool_ioctx));
+  }
+
+  bool is_librados_test_stub() {
+    std::string fsid;
+    EXPECT_EQ(0, _rados.cluster_fsid(&fsid));
+    return fsid == "00000000-1111-2222-3333-444444444444";
+  }
+
+  void compare(const std::string &description = "") {
+    vector<librbd::snap_info_t> src_snaps, dst_snaps;
+
+    EXPECT_EQ(m_ref_ictx->size, m_ictx->size);
+    EXPECT_EQ(0, librbd::snap_list(m_ref_ictx, src_snaps));
+    EXPECT_EQ(0, librbd::snap_list(m_ictx, dst_snaps));
+    EXPECT_EQ(src_snaps.size(), dst_snaps.size());
+    for (size_t i = 0; i <= src_snaps.size(); i++) {
+      const char *src_snap_name = nullptr;
+      const char *dst_snap_name = nullptr;
+      if (i < src_snaps.size()) {
+        EXPECT_EQ(src_snaps[i].name, dst_snaps[i].name);
+        src_snap_name = src_snaps[i].name.c_str();
+        dst_snap_name = dst_snaps[i].name.c_str();
+      }
+      EXPECT_EQ(0, librbd::api::Image<>::snap_set(
+                     m_ref_ictx, cls::rbd::UserSnapshotNamespace(),
+                     src_snap_name));
+      EXPECT_EQ(0, librbd::api::Image<>::snap_set(
+                     m_ictx, cls::rbd::UserSnapshotNamespace(),
+                     dst_snap_name));
+      compare_snaps(
+        description + " snap: " + (src_snap_name ? src_snap_name : "null"),
+        m_ref_ictx, m_ictx);
+    }
+  }
+
+  void compare_snaps(const std::string &description, librbd::ImageCtx *src_ictx,
+                     librbd::ImageCtx *dst_ictx) {
+    uint64_t src_size, dst_size;
+    {
+      RWLock::RLocker src_locker(src_ictx->snap_lock);
+      RWLock::RLocker dst_locker(dst_ictx->snap_lock);
+      src_size = src_ictx->get_image_size(src_ictx->snap_id);
+      dst_size = dst_ictx->get_image_size(dst_ictx->snap_id);
+    }
+    if (src_size != dst_size) {
+      std::cout << description << ": size differs" << std::endl;
+      EXPECT_EQ(src_size, dst_size);
+    }
+
+    if (dst_ictx->test_features(RBD_FEATURE_LAYERING)) {
+      bool flags_set;
+      EXPECT_EQ(0, dst_ictx->test_flags(RBD_FLAG_OBJECT_MAP_INVALID,
+                                        &flags_set));
+      EXPECT_FALSE(flags_set);
+    }
+
+    ssize_t read_size = 1 << src_ictx->order;
+    uint64_t offset = 0;
+    while (offset < src_size) {
+      read_size = std::min(read_size, static_cast<ssize_t>(src_size - offset));
+
+      bufferptr src_ptr(read_size);
+      bufferlist src_bl;
+      src_bl.push_back(src_ptr);
+      librbd::io::ReadResult src_result{&src_bl};
+      EXPECT_EQ(read_size, src_ictx->io_work_queue->read(
+                  offset, read_size, librbd::io::ReadResult{src_result}, 0));
+
+      bufferptr dst_ptr(read_size);
+      bufferlist dst_bl;
+      dst_bl.push_back(dst_ptr);
+      librbd::io::ReadResult dst_result{&dst_bl};
+      EXPECT_EQ(read_size, dst_ictx->io_work_queue->read(
+                  offset, read_size, librbd::io::ReadResult{dst_result}, 0));
+
+      if (!src_bl.contents_equal(dst_bl)) {
+        std::cout << description
+                  << ", block " << offset << "~" << read_size << " differs"
+                  << std::endl;
+        char *c = getenv("TEST_RBD_MIGRATION_VERBOSE");
+        if (c != NULL && *c != '\0') {
+          std::cout << "src block: " << std::endl; src_bl.hexdump(std::cout);
+          std::cout << "dst block: " << std::endl; dst_bl.hexdump(std::cout);
+        }
+      }
+      EXPECT_TRUE(src_bl.contents_equal(dst_bl));
+      offset += read_size;
+    }
+  }
+
+  void open_image(librados::IoCtx& io_ctx, const std::string &name,
+                  librbd::ImageCtx **ictx) {
+    *ictx = new librbd::ImageCtx(name.c_str(), "", nullptr, io_ctx, false);
+    m_ictxs.insert(*ictx);
+
+    ASSERT_EQ(0, (*ictx)->state->open(0));
+  }
+
+  void migration_prepare(librados::IoCtx& dst_io_ctx,
+                         const std::string &dst_name, int r = 0) {
+    std::cout << __func__ << std::endl;
+
+    close_image(m_ictx);
+    m_ictx = nullptr;
+
+    EXPECT_EQ(r, librbd::api::Migration<>::prepare(m_ioctx, m_image_name,
+                                                   dst_io_ctx, dst_name,
+                                                   m_opts));
+    if (r == 0) {
+      open_image(dst_io_ctx, dst_name, &m_ictx);
+    } else {
+      open_image(m_ioctx, m_image_name, &m_ictx);
+    }
+    compare("after prepare");
+  }
+
+  void migration_execute(librados::IoCtx& io_ctx, const std::string &name,
+                         int r = 0) {
+    std::cout << __func__ << std::endl;
+
+    librbd::NoOpProgressContext no_op;
+    EXPECT_EQ(r, librbd::api::Migration<>::execute(io_ctx, name, no_op));
+  }
+
+  void migration_abort(librados::IoCtx& io_ctx, const std::string &name,
+                       int r = 0) {
+    std::cout << __func__ << std::endl;
+
+    std::string dst_name = m_ictx->name;
+    close_image(m_ictx);
+    m_ictx = nullptr;
+
+    librbd::NoOpProgressContext no_op;
+    EXPECT_EQ(r, librbd::api::Migration<>::abort(io_ctx, name, no_op));
+
+    if (r == 0) {
+      open_image(m_ioctx, m_image_name, &m_ictx);
+    } else {
+      open_image(m_ioctx, dst_name, &m_ictx);
+    }
+
+    compare("after abort");
+  }
+
+  void migration_commit(librados::IoCtx& io_ctx, const std::string &name) {
+    std::cout << __func__ << std::endl;
+
+    librbd::NoOpProgressContext no_op;
+    EXPECT_EQ(0, librbd::api::Migration<>::commit(io_ctx, name, no_op));
+
+    compare("after commit");
+  }
+
+  void migration_status(librbd::image_migration_state_t state) {
+    librbd::image_migration_status_t status;
+    EXPECT_EQ(0, librbd::api::Migration<>::status(m_ioctx, m_image_name,
+                                                  &status));
+    EXPECT_EQ(status.source_pool_id, m_ioctx.get_id());
+    EXPECT_EQ(status.source_image_name, m_image_name);
+    EXPECT_EQ(status.source_image_id, m_image_id);
+    EXPECT_EQ(status.dest_pool_id, m_ictx->md_ctx.get_id());
+    EXPECT_EQ(status.dest_image_name, m_ictx->name);
+    EXPECT_EQ(status.dest_image_id, m_ictx->id);
+    EXPECT_EQ(status.state, state);
+  }
+
+  void migrate(librados::IoCtx& dst_io_ctx, const std::string &dst_name) {
+    migration_prepare(dst_io_ctx, dst_name);
+    migration_status(RBD_IMAGE_MIGRATION_STATE_PREPARED);
+    migration_execute(dst_io_ctx, dst_name);
+    migration_status(RBD_IMAGE_MIGRATION_STATE_EXECUTED);
+    migration_commit(dst_io_ctx, dst_name);
+  }
+
+  void write(uint64_t off, uint64_t len, char c) {
+    std::cout << "write: " << c << " " << off << "~" << len << std::endl;
+
+    bufferlist ref_bl;
+    ref_bl.append(std::string(len, c));
+    ASSERT_EQ(len, m_ref_ictx->io_work_queue->write(off, len, std::move(ref_bl),
+                                                    0));
+    bufferlist bl;
+    bl.append(std::string(len, c));
+    ASSERT_EQ(len, m_ictx->io_work_queue->write(off, len, std::move(bl), 0));
+  }
+
+  void discard(uint64_t off, uint64_t len) {
+    std::cout << "discard: " << off << "~" << len << std::endl;
+
+    ASSERT_EQ(static_cast<ssize_t>(len),
+              m_ref_ictx->io_work_queue->discard(off, len, false));
+    ASSERT_EQ(static_cast<ssize_t>(len),
+              m_ictx->io_work_queue->discard(off, len, false));
+  }
+
+  void flush() {
+    ASSERT_EQ(0, m_ref_ictx->io_work_queue->flush());
+    ASSERT_EQ(0, m_ictx->io_work_queue->flush());
+  }
+
+  void snap_create(const std::string &snap_name) {
+    std::cout << "snap_create: " << snap_name << std::endl;
+
+    flush();
+
+    ASSERT_EQ(0, TestFixture::snap_create(*m_ref_ictx, snap_name));
+    ASSERT_EQ(0, TestFixture::snap_create(*m_ictx, snap_name));
+  }
+
+  void snap_protect(const std::string &snap_name) {
+    std::cout << "snap_protect: " << snap_name << std::endl;
+
+    ASSERT_EQ(0, TestFixture::snap_protect(*m_ref_ictx, snap_name));
+    ASSERT_EQ(0, TestFixture::snap_protect(*m_ictx, snap_name));
+  }
+
+  void clone(const std::string &snap_name) {
+    snap_protect(snap_name);
+
+    int order = m_ref_ictx->order;
+    uint64_t features;
+    ASSERT_EQ(0, librbd::get_features(m_ref_ictx, &features));
+    features &= ~RBD_FEATURES_IMPLICIT_ENABLE;
+
+    std::string ref_clone_name = get_temp_image_name();
+    std::string clone_name = get_temp_image_name();
+
+    std::cout << "clone " << m_ictx->name << " -> " << clone_name
+              << std::endl;
+
+    ASSERT_EQ(0, librbd::clone(m_ref_ictx->md_ctx, m_ref_ictx->name.c_str(),
+                               snap_name.c_str(), m_ref_ioctx,
+                               ref_clone_name.c_str(), features, &order,
+                               m_ref_ictx->stripe_unit,
+                               m_ref_ictx->stripe_count));
+
+    ASSERT_EQ(0, librbd::clone(m_ictx->md_ctx, m_ictx->name.c_str(),
+                               snap_name.c_str(), m_ioctx,
+                               clone_name.c_str(), features, &order,
+                               m_ictx->stripe_unit,
+                               m_ictx->stripe_count));
+
+    close_image(m_ref_ictx);
+    open_image(m_ref_ioctx, ref_clone_name, &m_ref_ictx);
+
+    close_image(m_ictx);
+    open_image(m_ioctx, clone_name, &m_ictx);
+    m_image_name = m_ictx->name;
+    m_image_id = m_ictx->id;
+  }
+
+  void resize(uint64_t size) {
+    std::cout << "resize: " << size << std::endl;
+
+    librbd::NoOpProgressContext no_op;
+    ASSERT_EQ(0, m_ref_ictx->operations->resize(size, true, no_op));
+    ASSERT_EQ(0, m_ictx->operations->resize(size, true, no_op));
+  }
+
+  void test_no_snaps() {
+    uint64_t len = (1 << m_ictx->order) * 2 + 1;
+    write(0 * len, len, '1');
+    write(2 * len, len, '1');
+    flush();
+  }
+
+  void test_snaps() {
+    uint64_t len = (1 << m_ictx->order) * 2 + 1;
+    write(0 * len, len, '1');
+    snap_create("snap1");
+    write(1 * len, len, '1');
+
+    write(0 * len, 1000, 'X');
+    discard(1000 + 10, 1000);
+
+    snap_create("snap2");
+
+    write(1 * len, 1000, 'X');
+    discard(2 * len + 10, 1000);
+
+    uint64_t size = m_ictx->size;
+
+    resize(size << 1);
+
+    write(size - 1, len, '2');
+
+    snap_create("snap3");
+
+    resize(size);
+
+    discard(size - 1, 1);
+
+    flush();
+  }
+
+  void test_clone() {
+    uint64_t len = (1 << m_ictx->order) * 2 + 1;
+    write(0 * len, len, 'X');
+    write(2 * len, len, 'X');
+
+    snap_create("snap");
+    clone("snap");
+
+    write(0, 1000, 'X');
+    discard(1010, 1000);
+
+    snap_create("snap");
+    clone("snap");
+
+    write(1000, 1000, 'X');
+    discard(2010, 1000);
+
+    flush();
+  }
+
+  void test_stress(const std::string &snap_name_prefix = "snap",
+                   char start_char = 'A') {
+    uint64_t initial_size = m_ictx->size;
+
+    int nsnaps = 4;
+    const char *c = getenv("TEST_RBD_MIGRATION_STRESS_NSNAPS");
+    if (c != NULL) {
+      std::stringstream ss(c);
+      ASSERT_TRUE(ss >> nsnaps);
+    }
+
+    int nwrites = 4;
+    c = getenv("TEST_RBD_MIGRATION_STRESS_NWRITES");
+    if (c != NULL) {
+      std::stringstream ss(c);
+      ASSERT_TRUE(ss >> nwrites);
+    }
+
+    for (int i = 0; i < nsnaps; i++) {
+      for (int j = 0; j < nwrites; j++) {
+        size_t len = rand() % ((1 << m_ictx->order) * 2);
+        ASSERT_GT(m_ictx->size, len);
+        uint64_t off = std::min(static_cast<uint64_t>(rand() % m_ictx->size),
+                                static_cast<uint64_t>(m_ictx->size - len));
+        write(off, len, start_char + i);
+
+        len = rand() % ((1 << m_ictx->order) * 2);
+        ASSERT_GT(m_ictx->size, len);
+        off = std::min(static_cast<uint64_t>(rand() % m_ictx->size),
+                       static_cast<uint64_t>(m_ictx->size - len));
+        discard(off, len);
+      }
+
+      std::string snap_name = snap_name_prefix + stringify(i);
+      snap_create(snap_name);
+
+      if (m_ictx->test_features(RBD_FEATURE_LAYERING) &&
+          !m_ictx->test_features(RBD_FEATURE_MIGRATING) &&
+          rand() % 4) {
+        clone(snap_name);
+      }
+
+      if (rand() % 2) {
+        librbd::NoOpProgressContext no_op;
+        uint64_t new_size = initial_size + rand() % m_ictx->size;
+        resize(new_size);
+        ASSERT_EQ(new_size, m_ictx->size);
+      }
+    }
+    flush();
+  }
+
+  void test_stress2(bool concurrent) {
+    test_stress();
+
+    migration_prepare(m_ioctx, m_image_name);
+    migration_status(RBD_IMAGE_MIGRATION_STATE_PREPARED);
+
+    thread user([this]() {
+        test_stress("user", 'a');
+        for (int i = 0; i < 5; i++) {
+          uint64_t off = (i + 1) * m_ictx->size / 10;
+          uint64_t len = m_ictx->size / 40;
+          write(off, len, '1' + i);
+
+          off += len / 4;
+          len /= 2;
+          discard(off, len);
+        }
+        flush();
+      });
+
+    if (concurrent) {
+      librados::IoCtx io_ctx;
+      EXPECT_EQ(0, _rados.ioctx_create2(m_ioctx.get_id(), io_ctx));
+      migration_execute(io_ctx, m_image_name);
+      io_ctx.close();
+      user.join();
+    } else {
+      user.join();
+      compare("before execute");
+      migration_execute(m_ioctx, m_image_name);
+    }
+
+    migration_status(RBD_IMAGE_MIGRATION_STATE_EXECUTED);
+    migration_commit(m_ioctx, m_image_name);
+  }
+
+  std::string m_image_id;
+  librbd::ImageCtx *m_ictx = nullptr;
+  librados::IoCtx m_ref_ioctx;
+  librbd::ImageCtx *m_ref_ictx = nullptr;
+  librbd::ImageOptions m_opts;
+  librados::Rados m_other_pool_rados;
+  librados::IoCtx m_other_pool_ioctx;
+};
+
+TEST_F(TestMigration, Empty)
+{
+  uint64_t features = m_ictx->features ^ RBD_FEATURE_LAYERING;
+  ASSERT_EQ(0, m_opts.set(RBD_IMAGE_OPTION_FEATURES, features));
+
+  migrate(m_ioctx, m_image_name);
+
+  ASSERT_EQ(features, m_ictx->features);
+}
+
+TEST_F(TestMigration, OtherName)
+{
+  std::string name = get_temp_image_name();
+
+  migrate(m_ioctx, name);
+
+  ASSERT_EQ(name, m_ictx->name);
+}
+
+TEST_F(TestMigration, OtherPool)
+{
+  require_other_pool();
+
+  migrate(m_other_pool_ioctx, m_image_name);
+
+  ASSERT_EQ(m_other_pool_ioctx.get_id(), m_ictx->md_ctx.get_id());
+}
+
+TEST_F(TestMigration, DataPool)
+{
+  require_other_pool();
+
+  ASSERT_EQ(0, m_opts.set(RBD_IMAGE_OPTION_DATA_POOL,
+                          m_other_pool_ioctx.get_pool_name().c_str()));
+
+  migrate(m_ioctx, m_image_name);
+
+  ASSERT_EQ(m_other_pool_ioctx.get_id(), m_ictx->data_ctx.get_id());
+}
+
+TEST_F(TestMigration, AbortAfterPrepare)
+{
+  migration_prepare(m_ioctx, m_image_name);
+  migration_status(RBD_IMAGE_MIGRATION_STATE_PREPARED);
+  migration_abort(m_ioctx, m_image_name);
+}
+
+TEST_F(TestMigration, AbortAfterFailedPrepare)
+{
+  ASSERT_EQ(0, m_opts.set(RBD_IMAGE_OPTION_DATA_POOL, "INVALID_POOL"));
+
+  migration_prepare(m_ioctx, m_image_name, -ENOENT);
+
+  // Migration is automatically aborted if prepare failed
+}
+
+TEST_F(TestMigration, AbortAfterExecute)
+{
+  migration_prepare(m_ioctx, m_image_name);
+  migration_status(RBD_IMAGE_MIGRATION_STATE_PREPARED);
+  migration_execute(m_ioctx, m_image_name);
+  migration_status(RBD_IMAGE_MIGRATION_STATE_EXECUTED);
+  migration_abort(m_ioctx, m_image_name);
+}
+
+TEST_F(TestMigration, OtherPoolAbortAfterExecute)
+{
+  require_other_pool();
+
+  migration_prepare(m_other_pool_ioctx, m_image_name);
+  migration_status(RBD_IMAGE_MIGRATION_STATE_PREPARED);
+  migration_execute(m_other_pool_ioctx, m_image_name);
+  migration_status(RBD_IMAGE_MIGRATION_STATE_EXECUTED);
+  migration_abort(m_other_pool_ioctx, m_image_name);
+}
+
+TEST_F(TestMigration, MirroringSamePool)
+{
+  REQUIRE_FEATURE(RBD_FEATURE_JOURNALING);
+
+  ASSERT_EQ(0, librbd::api::Mirror<>::mode_set(m_ioctx, RBD_MIRROR_MODE_IMAGE));
+
+  ASSERT_EQ(0, librbd::api::Mirror<>::image_enable(m_ictx, false));
+  librbd::mirror_image_info_t info;
+  ASSERT_EQ(0, librbd::api::Mirror<>::image_get_info(m_ictx, &info));
+  ASSERT_EQ(RBD_MIRROR_IMAGE_ENABLED, info.state);
+
+  migrate(m_ioctx, m_image_name);
+
+  ASSERT_EQ(0, librbd::api::Mirror<>::image_get_info(m_ictx, &info));
+  ASSERT_EQ(RBD_MIRROR_IMAGE_ENABLED, info.state);
+}
+
+TEST_F(TestMigration, MirroringAbort)
+{
+  REQUIRE_FEATURE(RBD_FEATURE_JOURNALING);
+
+  ASSERT_EQ(0, librbd::api::Mirror<>::mode_set(m_ioctx, RBD_MIRROR_MODE_IMAGE));
+
+  ASSERT_EQ(0, librbd::api::Mirror<>::image_enable(m_ictx, false));
+  librbd::mirror_image_info_t info;
+  ASSERT_EQ(0, librbd::api::Mirror<>::image_get_info(m_ictx, &info));
+  ASSERT_EQ(RBD_MIRROR_IMAGE_ENABLED, info.state);
+
+  migration_prepare(m_ioctx, m_image_name);
+  migration_status(RBD_IMAGE_MIGRATION_STATE_PREPARED);
+  ASSERT_EQ(0, librbd::api::Mirror<>::image_get_info(m_ictx, &info));
+  ASSERT_EQ(RBD_MIRROR_IMAGE_DISABLED, info.state);
+
+  migration_abort(m_ioctx, m_image_name);
+
+  ASSERT_EQ(0, librbd::api::Mirror<>::image_get_info(m_ictx, &info));
+  ASSERT_EQ(RBD_MIRROR_IMAGE_ENABLED, info.state);
+}
+
+TEST_F(TestMigration, MirroringOtherPoolDisabled)
+{
+  REQUIRE_FEATURE(RBD_FEATURE_JOURNALING);
+
+  require_other_pool();
+
+  ASSERT_EQ(0, librbd::api::Mirror<>::mode_set(m_ioctx, RBD_MIRROR_MODE_IMAGE));
+
+  ASSERT_EQ(0, librbd::api::Mirror<>::image_enable(m_ictx, false));
+  librbd::mirror_image_info_t info;
+  ASSERT_EQ(0, librbd::api::Mirror<>::image_get_info(m_ictx, &info));
+  ASSERT_EQ(RBD_MIRROR_IMAGE_ENABLED, info.state);
+
+  migrate(m_other_pool_ioctx, m_image_name);
+
+  ASSERT_EQ(0, librbd::api::Mirror<>::image_get_info(m_ictx, &info));
+  ASSERT_EQ(RBD_MIRROR_IMAGE_DISABLED, info.state);
+}
+
+TEST_F(TestMigration, MirroringOtherPoolEnabled)
+{
+  REQUIRE_FEATURE(RBD_FEATURE_JOURNALING);
+
+  require_other_pool();
+
+  ASSERT_EQ(0, librbd::api::Mirror<>::mode_set(m_ioctx, RBD_MIRROR_MODE_IMAGE));
+  ASSERT_EQ(0, librbd::api::Mirror<>::mode_set(m_other_pool_ioctx,
+                                               RBD_MIRROR_MODE_IMAGE));
+
+  ASSERT_EQ(0, librbd::api::Mirror<>::image_enable(m_ictx, false));
+  librbd::mirror_image_info_t info;
+  ASSERT_EQ(0, librbd::api::Mirror<>::image_get_info(m_ictx, &info));
+  ASSERT_EQ(RBD_MIRROR_IMAGE_ENABLED, info.state);
+
+  migrate(m_other_pool_ioctx, m_image_name);
+
+  ASSERT_EQ(0, librbd::api::Mirror<>::image_get_info(m_ictx, &info));
+  ASSERT_EQ(RBD_MIRROR_IMAGE_ENABLED, info.state);
+}
+
+TEST_F(TestMigration, MirroringPool)
+{
+  REQUIRE_FEATURE(RBD_FEATURE_JOURNALING);
+
+  require_other_pool();
+
+  ASSERT_EQ(0, librbd::api::Mirror<>::mode_set(m_other_pool_ioctx,
+                                               RBD_MIRROR_MODE_POOL));
+  librbd::mirror_image_info_t info;
+  ASSERT_EQ(0, librbd::api::Mirror<>::image_get_info(m_ictx, &info));
+  ASSERT_EQ(RBD_MIRROR_IMAGE_DISABLED, info.state);
+
+  migrate(m_other_pool_ioctx, m_image_name);
+
+  ASSERT_EQ(0, librbd::api::Mirror<>::image_get_info(m_ictx, &info));
+  ASSERT_EQ(RBD_MIRROR_IMAGE_ENABLED, info.state);
+}
+
+TEST_F(TestMigration, Group)
+{
+  REQUIRE_FORMAT_V2();
+
+  ASSERT_EQ(0, librbd::api::Group<>::create(m_ioctx, "123"));
+  ASSERT_EQ(0, librbd::api::Group<>::image_add(m_ioctx, "123", m_ioctx,
+                                               m_image_name.c_str()));
+  librbd::group_info_t info;
+  ASSERT_EQ(0, librbd::api::Group<>::image_get_group(m_ictx, &info));
+
+  std::string name = get_temp_image_name();
+
+  migrate(m_ioctx, name);
+
+  ASSERT_EQ(0, librbd::api::Group<>::image_get_group(m_ictx, &info));
+  ASSERT_EQ(info.name, "123");
+
+  ASSERT_EQ(0, librbd::api::Group<>::image_remove(m_ioctx, "123", m_ioctx,
+                                                  name.c_str()));
+  ASSERT_EQ(0, librbd::api::Group<>::remove(m_ioctx, "123"));
+}
+
+TEST_F(TestMigration, GroupAbort)
+{
+  REQUIRE_FORMAT_V2();
+
+  ASSERT_EQ(0, librbd::api::Group<>::create(m_ioctx, "123"));
+  ASSERT_EQ(0, librbd::api::Group<>::image_add(m_ioctx, "123", m_ioctx,
+                                               m_image_name.c_str()));
+  librbd::group_info_t info;
+  ASSERT_EQ(0, librbd::api::Group<>::image_get_group(m_ictx, &info));
+
+  std::string name = get_temp_image_name();
+
+  migration_prepare(m_ioctx, name);
+  migration_status(RBD_IMAGE_MIGRATION_STATE_PREPARED);
+
+  ASSERT_EQ(0, librbd::api::Group<>::image_get_group(m_ictx, &info));
+  ASSERT_EQ(info.name, "123");
+
+  migration_abort(m_ioctx, m_image_name);
+
+  ASSERT_EQ(0, librbd::api::Group<>::image_get_group(m_ictx, &info));
+  ASSERT_EQ(info.name, "123");
+
+  ASSERT_EQ(0, librbd::api::Group<>::image_remove(m_ioctx, "123", m_ioctx,
+                                                  m_image_name.c_str()));
+  ASSERT_EQ(0, librbd::api::Group<>::remove(m_ioctx, "123"));
+}
+
+TEST_F(TestMigration, NoSnaps)
+{
+  test_no_snaps();
+  migrate(m_ioctx, m_image_name);
+}
+
+TEST_F(TestMigration, NoSnapsOtherPool)
+{
+  test_no_snaps();
+
+  require_other_pool();
+
+  test_no_snaps();
+  migrate(m_other_pool_ioctx, m_image_name);
+}
+
+TEST_F(TestMigration, NoSnapsDataPool)
+{
+  test_no_snaps();
+
+  require_other_pool();
+
+  ASSERT_EQ(0, m_opts.set(RBD_IMAGE_OPTION_DATA_POOL,
+                          m_other_pool_ioctx.get_pool_name().c_str()));
+  migrate(m_ioctx, m_image_name);
+
+  EXPECT_EQ(m_other_pool_ioctx.get_id(), m_ictx->data_ctx.get_id());
+}
+
+TEST_F(TestMigration, NoSnapsShrinkAfterPrepare)
+{
+  test_no_snaps();
+
+  migration_prepare(m_ioctx, m_image_name);
+  migration_status(RBD_IMAGE_MIGRATION_STATE_PREPARED);
+
+  resize(m_ictx->size >> 1);
+
+  migration_execute(m_ioctx, m_image_name);
+  migration_status(RBD_IMAGE_MIGRATION_STATE_EXECUTED);
+  migration_commit(m_ioctx, m_image_name);
+}
+
+TEST_F(TestMigration, NoSnapsShrinkToZeroBeforePrepare)
+{
+  test_no_snaps();
+  resize(0);
+
+  migrate(m_ioctx, m_image_name);
+}
+
+TEST_F(TestMigration, NoSnapsShrinkToZeroAfterPrepare)
+{
+  test_no_snaps();
+
+  migration_prepare(m_ioctx, m_image_name);
+  migration_status(RBD_IMAGE_MIGRATION_STATE_PREPARED);
+
+  resize(0);
+
+  migration_execute(m_ioctx, m_image_name);
+  migration_status(RBD_IMAGE_MIGRATION_STATE_EXECUTED);
+  migration_commit(m_ioctx, m_image_name);
+}
+
+TEST_F(TestMigration, NoSnapsExpandAfterPrepare)
+{
+  test_no_snaps();
+
+  migration_prepare(m_ioctx, m_image_name);
+  migration_status(RBD_IMAGE_MIGRATION_STATE_PREPARED);
+
+  resize(m_ictx->size << 1);
+
+  migration_execute(m_ioctx, m_image_name);
+  migration_status(RBD_IMAGE_MIGRATION_STATE_EXECUTED);
+  migration_commit(m_ioctx, m_image_name);
+}
+
+TEST_F(TestMigration, NoSnapsSnapAfterPrepare)
+{
+  test_no_snaps();
+
+  migration_prepare(m_ioctx, m_image_name);
+  migration_status(RBD_IMAGE_MIGRATION_STATE_PREPARED);
+
+  snap_create("after_prepare_snap");
+  resize(m_ictx->size >> 1);
+  write(0, 1000, '*');
+
+  migration_execute(m_ioctx, m_image_name);
+  migration_status(RBD_IMAGE_MIGRATION_STATE_EXECUTED);
+  migration_commit(m_ioctx, m_image_name);
+}
+
+TEST_F(TestMigration, Snaps)
+{
+  test_snaps();
+  migrate(m_ioctx, m_image_name);
+}
+
+TEST_F(TestMigration, SnapsOtherPool)
+{
+  test_snaps();
+
+  require_other_pool();
+
+  test_no_snaps();
+  migrate(m_other_pool_ioctx, m_image_name);
+
+  EXPECT_EQ(m_other_pool_ioctx.get_id(), m_ictx->md_ctx.get_id());
+}
+
+TEST_F(TestMigration, SnapsDataPool)
+{
+  test_snaps();
+
+  require_other_pool();
+
+  ASSERT_EQ(0, m_opts.set(RBD_IMAGE_OPTION_DATA_POOL,
+                          m_other_pool_ioctx.get_pool_name().c_str()));
+  migrate(m_ioctx, m_image_name);
+
+  EXPECT_EQ(m_other_pool_ioctx.get_id(), m_ictx->data_ctx.get_id());
+}
+
+TEST_F(TestMigration, SnapsShrinkAfterPrepare)
+{
+  test_snaps();
+
+  migration_prepare(m_ioctx, m_image_name);
+  migration_status(RBD_IMAGE_MIGRATION_STATE_PREPARED);
+
+  resize(m_ictx->size >> 1);
+
+  migration_execute(m_ioctx, m_image_name);
+  migration_status(RBD_IMAGE_MIGRATION_STATE_EXECUTED);
+  migration_commit(m_ioctx, m_image_name);
+}
+
+TEST_F(TestMigration, SnapsShrinkToZeroBeforePrepare)
+{
+  test_snaps();
+  resize(0);
+
+  migrate(m_ioctx, m_image_name);
+}
+
+TEST_F(TestMigration, SnapsShrinkToZeroAfterPrepare)
+{
+  test_snaps();
+
+  migration_prepare(m_ioctx, m_image_name);
+  migration_status(RBD_IMAGE_MIGRATION_STATE_PREPARED);
+
+  resize(0);
+
+  migration_execute(m_ioctx, m_image_name);
+  migration_status(RBD_IMAGE_MIGRATION_STATE_EXECUTED);
+  migration_commit(m_ioctx, m_image_name);
+}
+
+TEST_F(TestMigration, SnapsExpandAfterPrepare)
+{
+  test_snaps();
+
+  migration_prepare(m_ioctx, m_image_name);
+  migration_status(RBD_IMAGE_MIGRATION_STATE_PREPARED);
+
+  auto size = m_ictx->size;
+  resize(size << 1);
+  write(size, 1000, '*');
+
+  migration_execute(m_ioctx, m_image_name);
+  migration_status(RBD_IMAGE_MIGRATION_STATE_EXECUTED);
+  migration_commit(m_ioctx, m_image_name);
+}
+
+TEST_F(TestMigration, SnapsExpandAfterPrepare2)
+{
+  auto size = m_ictx->size;
+
+  write(size >> 1, 10, 'X');
+  snap_create("snap1");
+  resize(size >> 1);
+
+  migration_prepare(m_ioctx, m_image_name);
+  migration_status(RBD_IMAGE_MIGRATION_STATE_PREPARED);
+
+  resize(size);
+  write(size >> 1, 5, 'Y');
+
+  compare("before execute");
+
+  migration_execute(m_ioctx, m_image_name);
+  migration_status(RBD_IMAGE_MIGRATION_STATE_EXECUTED);
+  migration_commit(m_ioctx, m_image_name);
+}
+
+TEST_F(TestMigration, SnapsSnapAfterPrepare)
+{
+  test_snaps();
+
+  migration_prepare(m_ioctx, m_image_name);
+  migration_status(RBD_IMAGE_MIGRATION_STATE_PREPARED);
+
+  auto ictx = new librbd::ImageCtx(m_ictx->name.c_str(), "", "snap3", m_ioctx,
+                                   false);
+  ASSERT_EQ(0, ictx->state->open(0));
+  EXPECT_EQ(0, librbd::api::Image<>::snap_set(
+              m_ref_ictx, cls::rbd::UserSnapshotNamespace(), "snap3"));
+  compare_snaps("opened after prepare snap3", m_ref_ictx, ictx);
+  EXPECT_EQ(0, librbd::api::Image<>::snap_set(
+              m_ref_ictx, cls::rbd::UserSnapshotNamespace(), nullptr));
+  EXPECT_EQ(0, ictx->state->close());
+
+  snap_create("after_prepare_snap");
+  resize(m_ictx->size >> 1);
+  write(0, 1000, '*');
+
+  migration_execute(m_ioctx, m_image_name);
+  migration_status(RBD_IMAGE_MIGRATION_STATE_EXECUTED);
+  migration_commit(m_ioctx, m_image_name);
+}
+
+TEST_F(TestMigration, SnapsSnapExpandAfterPrepare)
+{
+  test_snaps();
+
+  migration_prepare(m_ioctx, m_image_name);
+  migration_status(RBD_IMAGE_MIGRATION_STATE_PREPARED);
+
+  snap_create("after_prepare_snap");
+  auto size = m_ictx->size;
+  resize(size << 1);
+  write(size, 1000, '*');
+
+  migration_execute(m_ioctx, m_image_name);
+  migration_status(RBD_IMAGE_MIGRATION_STATE_EXECUTED);
+  migration_commit(m_ioctx, m_image_name);
+}
+
+TEST_F(TestMigration, Clone)
+{
+  REQUIRE_FEATURE(RBD_FEATURE_LAYERING);
+
+  test_clone();
+  migrate(m_ioctx, m_image_name);
+}
+
+TEST_F(TestMigration, CloneUpdateAfterPrepare)
+{
+  REQUIRE_FEATURE(RBD_FEATURE_LAYERING);
+
+  write(0, 10, 'X');
+  snap_create("snap");
+  clone("snap");
+
+  migration_prepare(m_ioctx, m_image_name);
+
+  write(0, 1, 'Y');
+
+  migration_execute(m_ioctx, m_image_name);
+  migration_commit(m_ioctx, m_image_name);
+}
+
+TEST_F(TestMigration, TriggerAssertSnapcSeq)
+{
+  auto size = m_ictx->size;
+
+  write((size >> 1) + 0, 10, 'A');
+  snap_create("snap1");
+  write((size >> 1) + 1, 10, 'B');
+
+  migration_prepare(m_ioctx, m_image_name);
+
+  // copyup => deep copy (first time)
+  write((size >> 1) + 2, 10, 'C');
+
+  // preserve data before resizing
+  snap_create("snap2");
+
+  // decrease head overlap
+  resize(size >> 1);
+
+  // migrate object => deep copy (second time) => assert_snapc_seq => -ERANGE
+  migration_execute(m_ioctx, m_image_name);
+  migration_commit(m_ioctx, m_image_name);
+}
+
+TEST_F(TestMigration, SnapTrimBeforePrepare)
+{
+  auto size = m_ictx->size;
+
+  write(size >> 1, 10, 'A');
+  snap_create("snap1");
+  resize(size >> 1);
+
+  migration_prepare(m_ioctx, m_image_name);
+
+  resize(size);
+  snap_create("snap3");
+  write(size >> 1, 10, 'B');
+  snap_create("snap4");
+  resize(size >> 1);
+
+  migration_execute(m_ioctx, m_image_name);
+  migration_commit(m_ioctx, m_image_name);
+}
+
+TEST_F(TestMigration, StressNoMigrate)
+{
+  test_stress();
+
+  compare();
+}
+
+TEST_F(TestMigration, Stress)
+{
+  test_stress();
+
+  migrate(m_ioctx, m_image_name);
+}
+
+TEST_F(TestMigration, Stress2)
+{
+  test_stress2(false);
+}
+
+TEST_F(TestMigration, StressLive)
+{
+  test_stress2(true);
+}
index cde5e85096faf1badaa1cb2f238d0cf0d2241d5b..950e008b306a26f9dd8384e974ce6eaddcc20c8a 100644 (file)
@@ -6783,6 +6783,129 @@ TEST_F(TestLibRBD, NamespacesPP) {
   ASSERT_EQ("name3", names[0]);
 }
 
+TEST_F(TestLibRBD, Migration) {
+  bool old_format;
+  uint64_t features;
+  ASSERT_EQ(0, get_features(&old_format, &features));
+
+  rados_ioctx_t ioctx;
+  rados_ioctx_create(_cluster, m_pool_name.c_str(), &ioctx);
+  BOOST_SCOPE_EXIT(&ioctx) {
+    rados_ioctx_destroy(ioctx);
+  } BOOST_SCOPE_EXIT_END;
+
+  int order = 0;
+  std::string name = get_temp_image_name();
+  uint64_t size = 2 << 20;
+  ASSERT_EQ(0, create_image(ioctx, name.c_str(), size, &order));
+
+  rbd_image_options_t image_options;
+  rbd_image_options_create(&image_options);
+  BOOST_SCOPE_EXIT(&image_options) {
+    rbd_image_options_destroy(image_options);
+  } BOOST_SCOPE_EXIT_END;
+
+  ASSERT_EQ(0, rbd_migration_prepare(ioctx, name.c_str(), ioctx, name.c_str(),
+                                     image_options));
+
+  rbd_image_migration_status_t status;
+  ASSERT_EQ(0, rbd_migration_status(ioctx, name.c_str(), &status,
+                                    sizeof(status)));
+  ASSERT_EQ(status.source_pool_id, rados_ioctx_get_id(ioctx));
+  ASSERT_EQ(status.source_image_name, name);
+  if (old_format) {
+    ASSERT_EQ(status.source_image_id, string());
+  } else {
+    ASSERT_NE(status.source_image_id, string());
+  }
+  ASSERT_EQ(status.dest_pool_id, rados_ioctx_get_id(ioctx));
+  ASSERT_EQ(status.dest_image_name, name);
+  ASSERT_NE(status.dest_image_id, string());
+  ASSERT_EQ(status.state, RBD_IMAGE_MIGRATION_STATE_PREPARED);
+  rbd_migration_status_cleanup(&status);
+
+  ASSERT_EQ(-EBUSY, rbd_remove(ioctx, name.c_str()));
+
+  ASSERT_EQ(0, rbd_migration_execute(ioctx, name.c_str()));
+
+  ASSERT_EQ(0, rbd_migration_status(ioctx, name.c_str(), &status,
+                                    sizeof(status)));
+  ASSERT_EQ(status.state, RBD_IMAGE_MIGRATION_STATE_EXECUTED);
+  rbd_migration_status_cleanup(&status);
+
+  ASSERT_EQ(0, rbd_migration_commit(ioctx, name.c_str()));
+
+  std::string new_name = get_temp_image_name();
+
+  ASSERT_EQ(0, rbd_migration_prepare(ioctx, name.c_str(), ioctx,
+                                     new_name.c_str(), image_options));
+
+  ASSERT_EQ(-EBUSY, rbd_remove(ioctx, new_name.c_str()));
+
+  ASSERT_EQ(0, rbd_migration_abort(ioctx, name.c_str()));
+
+  rbd_image_t image;
+  ASSERT_EQ(0, rbd_open(ioctx, name.c_str(), &image, NULL));
+  EXPECT_EQ(0, rbd_close(image));
+}
+
+TEST_F(TestLibRBD, MigrationPP) {
+  bool old_format;
+  uint64_t features;
+  ASSERT_EQ(0, get_features(&old_format, &features));
+
+  librados::IoCtx ioctx;
+  ASSERT_EQ(0, _rados.ioctx_create(m_pool_name.c_str(), ioctx));
+
+  int order = 0;
+  std::string name = get_temp_image_name();
+  uint64_t size = 2 << 20;
+  librbd::RBD rbd;
+  ASSERT_EQ(0, create_image_pp(rbd, ioctx, name.c_str(), size, &order));
+
+  librbd::ImageOptions image_options;
+
+  ASSERT_EQ(0, rbd.migration_prepare(ioctx, name.c_str(), ioctx, name.c_str(),
+                                     image_options));
+
+  librbd::image_migration_status_t status;
+  ASSERT_EQ(0, rbd.migration_status(ioctx, name.c_str(), &status,
+                                    sizeof(status)));
+  ASSERT_EQ(status.source_pool_id, ioctx.get_id());
+  ASSERT_EQ(status.source_image_name, name);
+  if (old_format) {
+    ASSERT_EQ(status.source_image_id, "");
+  } else {
+    ASSERT_NE(status.source_image_id, "");
+  }
+  ASSERT_EQ(status.dest_pool_id, ioctx.get_id());
+  ASSERT_EQ(status.dest_image_name, name);
+  ASSERT_NE(status.dest_image_id, "");
+  ASSERT_EQ(status.state, RBD_IMAGE_MIGRATION_STATE_PREPARED);
+
+  ASSERT_EQ(-EBUSY, rbd.remove(ioctx, name.c_str()));
+
+  ASSERT_EQ(0, rbd.migration_execute(ioctx, name.c_str()));
+
+  ASSERT_EQ(0, rbd.migration_status(ioctx, name.c_str(), &status,
+                                    sizeof(status)));
+  ASSERT_EQ(status.state, RBD_IMAGE_MIGRATION_STATE_EXECUTED);
+
+  ASSERT_EQ(0, rbd.migration_commit(ioctx, name.c_str()));
+
+  std::string new_name = get_temp_image_name();
+
+  ASSERT_EQ(0, rbd.migration_prepare(ioctx, name.c_str(), ioctx,
+                                     new_name.c_str(), image_options));
+
+  ASSERT_EQ(-EBUSY, rbd.remove(ioctx, new_name.c_str()));
+
+  ASSERT_EQ(0, rbd.migration_abort(ioctx, name.c_str()));
+
+  librbd::Image image;
+  ASSERT_EQ(0, rbd.open(ioctx, image, name.c_str(), NULL));
+}
+
 // poorman's assert()
 namespace ceph {
   void __ceph_assert_fail(const char *assertion, const char *file, int line,
index 932163147c4fb5a8934cf1daebb7aa1e14375960..939f68b254a2620d86bd21c4b3548cc457b3781b 100644 (file)
@@ -16,6 +16,7 @@ extern void register_test_image_watcher();
 extern void register_test_internal();
 extern void register_test_journal_entries();
 extern void register_test_journal_replay();
+extern void register_test_migration();
 extern void register_test_mirroring();
 extern void register_test_mirroring_watcher();
 extern void register_test_object_map();
@@ -34,6 +35,7 @@ int main(int argc, char **argv)
   register_test_internal();
   register_test_journal_entries();
   register_test_journal_replay();
+  register_test_migration();
   register_test_mirroring();
   register_test_mirroring_watcher();
   register_test_object_map();
index 5a4b6b080936f4813a4686e96133b9cd25591916..e334e018cde74e2e36a83a0e76a7d2b36de757aa 100644 (file)
@@ -1097,6 +1097,114 @@ TRACEPOINT_EVENT(librbd, rename_exit,
     )
 )
 
+TRACEPOINT_EVENT(librbd, migration_prepare_enter,
+    TP_ARGS(
+        const char*, pool_name,
+        uint64_t, id,
+        const char*, image_name,
+        const char*, dest_pool_name,
+        uint64_t, dest_id,
+        const char*, dest_image_name,
+        void*, opts),
+    TP_FIELDS(
+        ctf_string(pool_name, pool_name)
+        ctf_integer(uint64_t, id, id)
+        ctf_string(image_name, image_name)
+        ctf_string(dest_pool_name, dest_pool_name)
+        ctf_integer(uint64_t, dest_id, dest_id)
+        ctf_string(dest_image_name, dest_image_name)
+        ctf_integer_hex(void*, opts, opts)
+    )
+)
+
+TRACEPOINT_EVENT(librbd, migration_prepare_exit,
+    TP_ARGS(
+        int, retval),
+    TP_FIELDS(
+        ctf_integer(int, retval, retval)
+    )
+)
+
+TRACEPOINT_EVENT(librbd, migration_execute_enter,
+    TP_ARGS(
+        const char*, pool_name,
+        int64_t, pool_id,
+        const char*, image_name),
+    TP_FIELDS(
+        ctf_string(pool_name, pool_name)
+        ctf_integer(int64_t, pool_id, pool_id)
+        ctf_string(image_name, image_name)
+    )
+)
+
+TRACEPOINT_EVENT(librbd, migration_execute_exit,
+    TP_ARGS(
+        int, retval),
+    TP_FIELDS(
+        ctf_integer(int, retval, retval)
+    )
+)
+
+TRACEPOINT_EVENT(librbd, migration_abort_enter,
+    TP_ARGS(
+        const char*, pool_name,
+        int64_t, pool_id,
+        const char*, image_name),
+    TP_FIELDS(
+        ctf_string(pool_name, pool_name)
+        ctf_integer(int64_t, pool_id, pool_id)
+        ctf_string(image_name, image_name)
+    )
+)
+
+TRACEPOINT_EVENT(librbd, migration_abort_exit,
+    TP_ARGS(
+        int, retval),
+    TP_FIELDS(
+        ctf_integer(int, retval, retval)
+    )
+)
+
+TRACEPOINT_EVENT(librbd, migration_commit_enter,
+    TP_ARGS(
+        const char*, pool_name,
+        int64_t, pool_id,
+        const char*, image_name),
+    TP_FIELDS(
+        ctf_string(pool_name, pool_name)
+        ctf_integer(int64_t, pool_id, pool_id)
+        ctf_string(image_name, image_name)
+    )
+)
+
+TRACEPOINT_EVENT(librbd, migration_commit_exit,
+    TP_ARGS(
+        int, retval),
+    TP_FIELDS(
+        ctf_integer(int, retval, retval)
+    )
+)
+
+TRACEPOINT_EVENT(librbd, migration_status_enter,
+    TP_ARGS(
+        const char*, pool_name,
+        int64_t, pool_id,
+        const char*, image_name),
+    TP_FIELDS(
+        ctf_string(pool_name, pool_name)
+        ctf_integer(int64_t, pool_id, pool_id)
+        ctf_string(image_name, image_name)
+    )
+)
+
+TRACEPOINT_EVENT(librbd, migration_status_exit,
+    TP_ARGS(
+        int, retval),
+    TP_FIELDS(
+        ctf_integer(int, retval, retval)
+    )
+)
+
 TRACEPOINT_EVENT(librbd, discard_enter,
     TP_ARGS(
         void*, imagectx,