.. index:: Ceph Block Device; live-migration
-RBD images can be live-migrated between different pools within the same cluster;
-between different image formats and layouts; or from external data sources.
-When started, the source will be deep-copied to the destination image, pulling
-all snapshot history while preserving the sparse allocation of data where
-possible.
+RBD images can be live-migrated between different pools, image formats and/or
+layouts within the same Ceph cluster; from an image in another Ceph cluster; or
+from external data sources. When started, the source will be deep-copied to
+the destination image, pulling all snapshot history while preserving the sparse
+allocation of data where possible.
By default, when live-migrating RBD images within the same Ceph cluster, the
source image will be marked read-only and all clients will instead redirect
parent.
The live-migration process can also be used in an import-only mode where the
-source image remains unmodified and the target image can be linked to an
-external data source such as a backing file, HTTP(s) file, or S3 object.
+source image remains unmodified and the target image can be linked to an image
+in another Ceph cluster or to an external data source such as a backing file,
+HTTP(s) file, or S3 object.
The live-migration copy process can safely run in the background while the new
target image is in use. There is currently a requirement to temporarily stop
{
"type": "native",
+ ["cluster_name": "<cluster-name>",] (specify if image in another cluster,
+ requires ``<cluster-name>.conf`` file)
+ ["client_name": "<client-name>",] (for connecting to another cluster,
+ default is ``client.admin``)
"pool_name": "<pool-name>",
["pool_id": <pool-id>,] (optional alternative to "pool_name")
["pool_namespace": "<pool-namespace",] (optional)
--- /dev/null
+../.qa/
\ No newline at end of file
--- /dev/null
+../.qa/
\ No newline at end of file
--- /dev/null
+meta:
+- desc: run two ceph clusters
+tasks:
+- install:
+- ceph:
+ cluster: cluster1
+- ceph:
+ cluster: cluster2
--- /dev/null
+../.qa/
\ No newline at end of file
--- /dev/null
+meta:
+- desc: 2 ceph clusters with 1 mon and 3 osds each
+roles:
+- - cluster1.mon.a
+ - cluster1.mgr.x
+ - cluster1.osd.0
+ - cluster1.osd.1
+ - cluster1.osd.2
+ - cluster1.client.0
+- - cluster2.mon.a
+ - cluster2.mgr.x
+ - cluster2.osd.0
+ - cluster2.osd.1
+ - cluster2.osd.2
+ - cluster2.client.0
--- /dev/null
+.qa/objectstore
\ No newline at end of file
--- /dev/null
+.qa/distros/supported-random-distro$
\ No newline at end of file
--- /dev/null
+../.qa/
\ No newline at end of file
--- /dev/null
+tasks:
+- exec:
+ cluster1.client.0:
+ - sudo ceph --cluster cluster1 osd erasure-code-profile set teuthologyprofile crush-failure-domain=osd m=1 k=2
+ - sudo ceph --cluster cluster1 osd pool create datapool 4 4 erasure teuthologyprofile
+ - sudo ceph --cluster cluster1 osd pool set datapool allow_ec_overwrites true
+ - rbd --cluster cluster1 pool init datapool
+ cluster2.client.0:
+ - sudo ceph --cluster cluster2 osd erasure-code-profile set teuthologyprofile crush-failure-domain=osd m=1 k=2
+ - sudo ceph --cluster cluster2 osd pool create datapool 4 4 erasure teuthologyprofile
+ - sudo ceph --cluster cluster2 osd pool set datapool allow_ec_overwrites true
+ - rbd --cluster cluster2 pool init datapool
+
+overrides:
+ thrashosds:
+ bdev_inject_crash: 2
+ bdev_inject_crash_probability: .5
+ ceph:
+ fs: xfs
+ conf:
+ client:
+ rbd default data pool: datapool
+ osd: # force bluestore since it's required for ec overwrites
+ osd objectstore: bluestore
+ bluestore block size: 96636764160
+ enable experimental unrecoverable data corrupting features: "*"
+ osd debug randomize hobject sort order: false
+# this doesn't work with failures bc the log writes are not atomic across the two backends
+# bluestore bluefs env mirror: true
--- /dev/null
+tasks:
+- exec:
+ cluster1.client.0:
+ - sudo ceph --cluster cluster1 osd pool create datapool 4
+ - rbd --cluster cluster1 pool init datapool
+ cluster2.client.0:
+ - sudo ceph --cluster cluster2 osd pool create datapool 4
+ - rbd --cluster cluster2 pool init datapool
+
+overrides:
+ ceph:
+ conf:
+ client:
+ rbd default data pool: datapool
--- /dev/null
+../.qa/
\ No newline at end of file
--- /dev/null
+tasks:
+ - exec:
+ cluster2.client.0:
+ - echo '{"type":"qcow","stream":{"type":"http","url":"http://download.ceph.com/qa/ubuntu-12.04.qcow2"}}' | rbd --cluster cluster2 migration prepare --import-only --source-spec-path - client.0.0-src
+ - rbd --cluster cluster2 migration execute client.0.0-src
+ - rbd --cluster cluster2 migration commit client.0.0-src
+ - rbd --cluster cluster2 snap create client.0.0-src@snap
+ - rbd --cluster cluster2 snap protect client.0.0-src@snap
+ - rbd --cluster cluster2 clone client.0.0-src@snap client.0.0
+ - rbd --cluster cluster2 snap create client.0.0@snap
+ - rbd --cluster cluster2 create --size 1G client.0.1-src
+ - rbd --cluster cluster2 bench --io-type write --io-pattern rand --io-size 16K --io-threads 1 --io-total 1M client.0.1-src
+ - rbd --cluster cluster2 snap create client.0.1-src@snap
+ - rbd --cluster cluster2 snap protect client.0.1-src@snap
+ - rbd --cluster cluster2 clone client.0.1-src@snap client.0.1
+ - rbd --cluster cluster2 bench --io-type write --io-pattern rand --io-size 16K --io-threads 1 --io-total 1M client.0.1
+ - rbd --cluster cluster2 snap create client.0.1@snap
+ - rbd --cluster cluster2 create --size 1G client.0.2-src
+ - rbd --cluster cluster2 bench --io-type write --io-pattern rand --io-size 16K --io-threads 1 --io-total 1M client.0.2-src
+ - rbd --cluster cluster2 snap create client.0.2-src@snap
+ - rbd --cluster cluster2 snap protect client.0.2-src@snap
+ - rbd --cluster cluster2 clone client.0.2-src@snap client.0.2
+ - rbd --cluster cluster2 bench --io-type write --io-pattern rand --io-size 16K --io-threads 1 --io-total 2M client.0.2
+ - rbd --cluster cluster2 snap create client.0.2@snap
+ - exec:
+ cluster1.client.0:
+ - echo '{"type":"native","cluster_name":"cluster2","client_name":"client.admin","pool_name":"rbd","image_name":"client.0.0","snap_name":"snap"}' | rbd --cluster cluster1 migration prepare --import-only --source-spec-path - client.0.0
+ - echo '{"type":"native","cluster_name":"cluster2","client_name":"client.admin","pool_name":"rbd","image_name":"client.0.1","snap_name":"snap"}' | rbd --cluster cluster1 migration prepare --import-only --source-spec-path - client.0.1
+ - echo '{"type":"native","cluster_name":"cluster2","client_name":"client.admin","pool_name":"rbd","image_name":"client.0.2","snap_name":"snap"}' | rbd --cluster cluster1 migration prepare --import-only --source-spec-path - client.0.2
--- /dev/null
+tasks:
+ - exec:
+ cluster2.client.0:
+ - echo '{"type":"qcow","stream":{"type":"http","url":"http://download.ceph.com/qa/ubuntu-12.04.qcow2"}}' | rbd --cluster cluster2 migration prepare --import-only --source-spec-path - client.0.0
+ - rbd --cluster cluster2 migration execute client.0.0
+ - rbd --cluster cluster2 migration commit client.0.0
+ - rbd --cluster cluster2 snap create client.0.0@snap
+ - rbd --cluster cluster2 create --size 1G client.0.1
+ - rbd --cluster cluster2 bench --io-type write --io-pattern rand --io-size 16K --io-threads 1 --io-total 2M client.0.1
+ - rbd --cluster cluster2 snap create client.0.1@snap
+ - rbd --cluster cluster2 create --size 1G client.0.2
+ - rbd --cluster cluster2 bench --io-type write --io-pattern rand --io-size 16K --io-threads 1 --io-total 2M client.0.2
+ - rbd --cluster cluster2 snap create client.0.2@snap
+ - exec:
+ cluster1.client.0:
+ - echo '{"type":"native","cluster_name":"cluster2","client_name":"client.admin","pool_name":"rbd","image_name":"client.0.0","snap_name":"snap"}' | rbd --cluster cluster1 migration prepare --import-only --source-spec-path - client.0.0
+ - echo '{"type":"native","cluster_name":"cluster2","client_name":"client.admin","pool_name":"rbd","image_name":"client.0.1","snap_name":"snap"}' | rbd --cluster cluster1 migration prepare --import-only --source-spec-path - client.0.1
+ - echo '{"type":"native","cluster_name":"cluster2","client_name":"client.admin","pool_name":"rbd","image_name":"client.0.2","snap_name":"snap"}' | rbd --cluster cluster1 migration prepare --import-only --source-spec-path - client.0.2
--- /dev/null
+../.qa/
\ No newline at end of file
--- /dev/null
+io_workload:
+ sequential:
+ - qemu:
+ cluster1.client.0:
+ type: block
+ disks:
+ - action: none
+ image_name: client.0.0
+ - action: none
+ image_name: client.0.1
+ - action: none
+ image_name: client.0.2
+ test: qa/run_xfstests_qemu.sh
+exclude_arch: armv7l
--- /dev/null
+../.qa/
\ No newline at end of file
--- /dev/null
+tasks:
+ - parallel:
+ - io_workload
+ - migrate_workload
+migrate_workload:
+ sequential:
+ - exec:
+ cluster1.client.0:
+ - sleep $((RANDOM % 600))
+ - rbd --cluster cluster1 migration execute client.0.0
+ - sleep $((RANDOM % 600))
+ - rbd --cluster cluster1 migration commit client.0.0
+ - sleep $((RANDOM % 600))
+ - rbd --cluster cluster1 migration execute client.0.1
--- /dev/null
+.qa/rbd/conf
\ No newline at end of file
ldout(cct, 10) << this << " " << __func__ << dendl;
ceph_assert(parent == nullptr);
+ ceph_assert(parent_rados == nullptr);
ceph_assert(config_watcher == nullptr);
ceph_assert(image_watcher == NULL);
ceph_assert(exclusive_lock == NULL);
// lock_tag
// lockers
// object_map
- // parent_md and parent
+ // parent_md, parent and parent_rados
// encryption_format
ceph::shared_mutex timestamp_lock; // protects (create/access/modify)_timestamp
std::string id; // only used for new-format images
ParentImageInfo parent_md;
ImageCtx *parent = nullptr;
+ librados::Rados *parent_rados = nullptr; // set iff image is being imported
+ // from another cluster
ImageCtx *child = nullptr;
MigrationInfo migration_info;
cls::rbd::GroupSpec group_spec;
<< dest_io_ctx.get_pool_name() << "/"
<< dest_image_name << ", opts=" << opts << dendl;
- I* src_image_ctx = nullptr;
+ I* src_image_ctx;
+ librados::Rados* src_rados;
C_SaferCond open_ctx;
auto req = migration::OpenSourceImageRequest<I>::create(
dest_io_ctx, nullptr, CEPH_NOSNAP,
- {-1, "", "", "", source_spec, {}, 0, false}, &src_image_ctx, &open_ctx);
+ {-1, "", "", "", source_spec, {}, 0, false}, &src_image_ctx, &src_rados,
+ &open_ctx);
req->send();
int r = open_ctx.wait();
return r;
}
- BOOST_SCOPE_EXIT_TPL(src_image_ctx) {
+ BOOST_SCOPE_EXIT_TPL(src_image_ctx, src_rados) {
src_image_ctx->state->close();
+ delete src_rados;
} BOOST_SCOPE_EXIT_END;
uint64_t image_format = 2;
ldout(cct, 10) << this << " " << __func__ << ": r=" << r << dendl;
m_image_ctx->parent = nullptr;
+ if (m_image_ctx->parent_rados != nullptr) {
+ delete m_image_ctx->parent_rados;
+ m_image_ctx->parent_rados = nullptr;
+ }
+
save_result(r);
if (r < 0) {
lderr(cct) << "error closing parent image: " << cpp_strerror(r) << dendl;
void RefreshParentRequest<I>::apply() {
ceph_assert(ceph_mutex_is_wlocked(m_child_image_ctx.image_lock));
std::swap(m_child_image_ctx.parent, m_parent_image_ctx);
+ std::swap(m_child_image_ctx.parent_rados, m_parent_rados);
}
template <typename I>
if (m_parent_image_ctx != nullptr) {
send_close_parent();
} else {
+ ceph_assert(m_parent_rados == nullptr);
send_complete(0);
}
}
&RefreshParentRequest<I>::handle_open_parent, false>(this));
auto req = migration::OpenSourceImageRequest<I>::create(
m_child_image_ctx.md_ctx, &m_child_image_ctx, m_parent_md.spec.snap_id,
- m_migration_info, &m_parent_image_ctx, ctx);
+ m_migration_info, &m_parent_image_ctx, &m_parent_rados, ctx);
req->send();
return;
}
ldout(cct, 10) << this << " " << __func__ << " r=" << *result << dendl;
m_parent_image_ctx = nullptr;
+ if (m_parent_rados != nullptr) {
+ delete m_parent_rados;
+ m_parent_rados = nullptr;
+ }
if (*result < 0) {
lderr(cct) << "failed to close parent image: " << cpp_strerror(*result)
#define CEPH_LIBRBD_IMAGE_REFRESH_PARENT_REQUEST_H
#include "include/int_types.h"
+#include "include/rados/librados_fwd.hpp"
#include "librbd/Types.h"
class Context;
Context *m_on_finish;
ImageCtxT *m_parent_image_ctx = nullptr;
+ librados::Rados *m_parent_rados = nullptr;
int m_error_result;
// vim: ts=8 sw=2 smarttab
#include "librbd/migration/NativeFormat.h"
+#include "common/ceph_argparse.h"
+#include "common/common_init.h"
#include "common/dout.h"
#include "common/errno.h"
+#include "include/scope_guard.h"
#include "librbd/ImageCtx.h"
#include "librbd/ImageState.h"
#include "json_spirit/json_spirit.h"
namespace {
const std::string TYPE_KEY{"type"};
+const std::string CLUSTER_NAME_KEY{"cluster_name"};
+const std::string CLIENT_NAME_KEY{"client_name"};
const std::string POOL_ID_KEY{"pool_id"};
const std::string POOL_NAME_KEY{"pool_name"};
const std::string POOL_NAMESPACE_KEY{"pool_namespace"};
int NativeFormat<I>::create_image_ctx(
librados::IoCtx& dst_io_ctx,
const json_spirit::mObject& source_spec_object,
- bool import_only, uint64_t src_snap_id, I** src_image_ctx) {
+ bool import_only, uint64_t src_snap_id, I** src_image_ctx,
+ librados::Rados** src_rados) {
auto cct = reinterpret_cast<CephContext*>(dst_io_ctx.cct());
+ std::string cluster_name;
+ std::string client_name;
std::string pool_name;
int64_t pool_id = -1;
std::string pool_namespace;
uint64_t snap_id = CEPH_NOSNAP;
int r;
+ if (auto it = source_spec_object.find(CLUSTER_NAME_KEY);
+ it != source_spec_object.end()) {
+ if (it->second.type() == json_spirit::str_type) {
+ cluster_name = it->second.get_str();
+ } else {
+ lderr(cct) << "invalid cluster name" << dendl;
+ return -EINVAL;
+ }
+ }
+
+ if (auto it = source_spec_object.find(CLIENT_NAME_KEY);
+ it != source_spec_object.end()) {
+ if (cluster_name.empty()) {
+ lderr(cct) << "cannot specify client name without cluster name" << dendl;
+ return -EINVAL;
+ }
+ if (it->second.type() == json_spirit::str_type) {
+ client_name = it->second.get_str();
+ } else {
+ lderr(cct) << "invalid client name" << dendl;
+ return -EINVAL;
+ }
+ }
+
if (auto it = source_spec_object.find(POOL_NAME_KEY);
it != source_spec_object.end()) {
if (it->second.type() == json_spirit::str_type) {
snap_id = src_snap_id;
}
- // TODO add support for external clusters
+ std::unique_ptr<librados::Rados> rados_ptr;
+ if (!cluster_name.empty()) {
+ // manually bootstrap a CephContext, skipping reading environment
+ // variables for now -- since we don't have access to command line
+ // arguments here, the least confusing option is to limit initial
+ // remote cluster config to a file in the default location
+ // TODO: support specifying mon_host and key via source spec
+ // TODO: support merging in effective local cluster config to get
+ // overrides for log levels, etc
+ CephInitParameters iparams(CEPH_ENTITY_TYPE_CLIENT);
+ if (!client_name.empty() && !iparams.name.from_str(client_name)) {
+ lderr(cct) << "failed to set remote client name" << dendl;
+ return -EINVAL;
+ }
+
+ auto remote_cct = common_preinit(iparams, CODE_ENVIRONMENT_LIBRARY, 0);
+ auto put_remote_cct = make_scope_guard([remote_cct] { remote_cct->put(); });
+
+ remote_cct->_conf->cluster = cluster_name;
+
+ // pass CEPH_CONF_FILE_DEFAULT instead of nullptr to prevent
+ // CEPH_CONF environment variable from being picked up
+ r = remote_cct->_conf.parse_config_files(CEPH_CONF_FILE_DEFAULT, nullptr,
+ 0);
+ if (r < 0) {
+ remote_cct->_conf.complain_about_parse_error(cct);
+ lderr(cct) << "failed to read ceph conf for remote cluster: "
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ remote_cct->_conf.apply_changes(nullptr);
+
+ rados_ptr.reset(new librados::Rados());
+ r = rados_ptr->init_with_context(remote_cct);
+ ceph_assert(r == 0);
+
+ r = rados_ptr->connect();
+ if (r < 0) {
+ lderr(cct) << "failed to connect to remote cluster: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+ } else {
+ rados_ptr.reset(new librados::Rados(dst_io_ctx));
+ }
+
librados::IoCtx src_io_ctx;
if (!pool_name.empty()) {
r = rados_ptr->ioctx_create(pool_name.c_str(), src_io_ctx);
*src_image_ctx = I::create(image_name, image_id, snap_id, src_io_ctx,
true);
}
+
+ if (!cluster_name.empty()) {
+ *src_rados = rados_ptr.release();
+ } else {
+ *src_rados = nullptr;
+ }
+
return 0;
}
static int create_image_ctx(librados::IoCtx& dst_io_ctx,
const json_spirit::mObject& source_spec_object,
bool import_only, uint64_t src_snap_id,
- ImageCtxT** src_image_ctx);
+ ImageCtxT** src_image_ctx,
+ librados::Rados** src_rados);
};
} // namespace migration
#include "common/errno.h"
#include "librbd/ImageCtx.h"
#include "librbd/ImageState.h"
+#include "librbd/TaskFinisher.h"
#include "librbd/Utils.h"
#include "librbd/io/ImageDispatcher.h"
#include "librbd/migration/FormatInterface.h"
template <typename I>
OpenSourceImageRequest<I>::OpenSourceImageRequest(
librados::IoCtx& dst_io_ctx, I* dst_image_ctx, uint64_t src_snap_id,
- const MigrationInfo &migration_info, I** src_image_ctx, Context* on_finish)
+ const MigrationInfo &migration_info, I** src_image_ctx,
+ librados::Rados** src_rados, Context* on_finish)
: m_cct(reinterpret_cast<CephContext*>(dst_io_ctx.cct())),
m_dst_io_ctx(dst_io_ctx), m_dst_image_ctx(dst_image_ctx),
m_src_snap_id(src_snap_id), m_migration_info(migration_info),
- m_src_image_ctx(src_image_ctx), m_on_finish(on_finish) {
+ m_src_image_ctx(src_image_ctx), m_src_rados(src_rados),
+ m_on_finish(on_finish) {
ldout(m_cct, 10) << dendl;
}
int r = NativeFormat<I>::create_image_ctx(m_dst_io_ctx, source_spec_object,
import_only, m_src_snap_id,
- m_src_image_ctx);
+ m_src_image_ctx, m_src_rados);
if (r < 0) {
lderr(m_cct) << "failed to create native image context: "
<< cpp_strerror(r) << dendl;
if (r < 0) {
lderr(m_cct) << "failed to open native image: " << cpp_strerror(r)
<< dendl;
- finish(r);
+
+ // m_src_rados must be deleted outside the scope of its task
+ // finisher thread to avoid the finisher attempting to destroy
+ // itself and locking up
+ // since the local image (m_dst_image_ctx) may not be available,
+ // redirect to the local rados' task finisher
+ auto ctx = new LambdaContext([this](int r) {
+ delete *m_src_rados;
+ finish(r);
+ });
+ TaskFinisherSingleton::get_singleton(m_cct).queue(ctx, r);
return;
}
// note that all source image ctx properties are placeholders
*m_src_image_ctx = I::create("", "", CEPH_NOSNAP, m_dst_io_ctx, true);
+ *m_src_rados = nullptr;
+
auto src_image_ctx = *m_src_image_ctx;
src_image_ctx->child = m_dst_image_ctx;
if (r < 0) {
*m_src_image_ctx = nullptr;
+ *m_src_rados = nullptr;
} else {
register_image_dispatch();
}
ImageCtxT* destination_image_ctx,
uint64_t src_snap_id,
const MigrationInfo &migration_info,
- ImageCtxT** source_image_ctx,
+ ImageCtxT** src_image_ctx,
+ librados::Rados** src_rados,
Context* on_finish) {
return new OpenSourceImageRequest(dst_io_ctx, destination_image_ctx,
src_snap_id, migration_info,
- source_image_ctx, on_finish);
+ src_image_ctx, src_rados, on_finish);
}
OpenSourceImageRequest(librados::IoCtx& dst_io_ctx,
ImageCtxT* destination_image_ctx,
uint64_t src_snap_id,
const MigrationInfo &migration_info,
- ImageCtxT** source_image_ctx,
+ ImageCtxT** src_image_ctx,
+ librados::Rados** src_rados,
Context* on_finish);
void send();
uint64_t m_src_snap_id;
MigrationInfo m_migration_info;
ImageCtxT** m_src_image_ctx;
+ librados::Rados** m_src_rados;
Context* m_on_finish;
std::unique_ptr<FormatInterface> m_format;