From: Jason Dillaman Date: Mon, 28 Mar 2016 17:05:48 +0000 (-0400) Subject: rbd-mirror: initial support for primary/non-primary handling X-Git-Tag: v10.1.1~64^2~2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=4c2747fba1cd1f232f1cfcac9bc7329786779183;p=ceph.git rbd-mirror: initial support for primary/non-primary handling Signed-off-by: Jason Dillaman --- diff --git a/src/test/rbd_mirror/image_replay.cc b/src/test/rbd_mirror/image_replay.cc index bde46e2cc4f5..88c6ac70bbc0 100644 --- a/src/test/rbd_mirror/image_replay.cc +++ b/src/test/rbd_mirror/image_replay.cc @@ -185,7 +185,7 @@ int main(int argc, const char **argv) threads = new rbd::mirror::Threads(reinterpret_cast( local->cct())); replayer = new rbd::mirror::ImageReplayer<>(threads, local, remote, client_id, - local_pool_id, remote_pool_id, + "", local_pool_id, remote_pool_id, remote_image_id, "global image id"); diff --git a/src/test/rbd_mirror/test_mock_ImageReplayer.cc b/src/test/rbd_mirror/test_mock_ImageReplayer.cc index fc1f16b39d49..ac4e8bcf354d 100644 --- a/src/test/rbd_mirror/test_mock_ImageReplayer.cc +++ b/src/test/rbd_mirror/test_mock_ImageReplayer.cc @@ -61,7 +61,8 @@ struct BootstrapRequest { const std::string &global_image_id, ContextWQ *work_queue, SafeTimer *timer, Mutex *timer_lock, - const std::string &mirror_uuid, + const std::string &local_mirror_uuid, + const std::string &remote_mirror_uuid, ::journal::MockJournalerProxy *journaler, librbd::journal::MirrorPeerClientMeta *client_meta, Context *on_finish) { diff --git a/src/tools/rbd_mirror/ImageReplayer.cc b/src/tools/rbd_mirror/ImageReplayer.cc index 9f9830b8c1bd..df297b080593 100644 --- a/src/tools/rbd_mirror/ImageReplayer.cc +++ b/src/tools/rbd_mirror/ImageReplayer.cc @@ -254,7 +254,8 @@ void ImageReplayer::bootstrap() { m_local_ioctx, m_remote_ioctx, &m_local_image_ctx, m_local_image_name, m_remote_image_id, m_global_image_id, m_threads->work_queue, m_threads->timer, &m_threads->timer_lock, - m_local_mirror_uuid, m_remote_journaler, &m_client_meta, ctx); + m_local_mirror_uuid, m_remote_mirror_uuid, m_remote_journaler, + &m_client_meta, ctx); request->send(); } @@ -262,7 +263,11 @@ template void ImageReplayer::handle_bootstrap(int r) { dout(20) << "r=" << r << dendl; - if (r < 0) { + if (r == -EREMOTEIO) { + dout(5) << "remote image is non-primary or local image is primary" << dendl; + on_start_fail_start(0); + return; + } else if (r < 0) { on_start_fail_start(r); return; } else if (on_start_interrupted()) { @@ -392,6 +397,9 @@ void ImageReplayer::on_start_fail_finish(int r) m_local_ioctx.close(); m_remote_ioctx.close(); + delete m_asok_hook; + m_asok_hook = nullptr; + Context *on_start_finish(nullptr); Context *on_stop_finish(nullptr); { @@ -540,6 +548,9 @@ void ImageReplayer::on_stop_local_image_close_finish(int r) m_remote_ioctx.close(); + delete m_asok_hook; + m_asok_hook = nullptr; + Context *on_finish(nullptr); { @@ -775,12 +786,16 @@ void ImageReplayer::allocate_local_tag() { if (mirror_uuid == librbd::Journal<>::LOCAL_MIRROR_UUID || mirror_uuid == m_local_mirror_uuid) { mirror_uuid = m_remote_mirror_uuid; + } else if (mirror_uuid == librbd::Journal<>::ORPHAN_MIRROR_UUID) { + dout(5) << "encountered image demotion: stopping" << dendl; + Mutex::Locker locker(m_lock); + m_stop_requested = true; } std::string predecessor_mirror_uuid = m_replay_tag_data.predecessor_mirror_uuid; if (predecessor_mirror_uuid == librbd::Journal<>::LOCAL_MIRROR_UUID) { - mirror_uuid = m_remote_mirror_uuid; + predecessor_mirror_uuid = m_remote_mirror_uuid; } else if (predecessor_mirror_uuid == m_local_mirror_uuid) { predecessor_mirror_uuid = librbd::Journal<>::LOCAL_MIRROR_UUID; } diff --git a/src/tools/rbd_mirror/image_replayer/BootstrapRequest.cc b/src/tools/rbd_mirror/image_replayer/BootstrapRequest.cc index 4aaf5a29fd1d..55795467b378 100644 --- a/src/tools/rbd_mirror/image_replayer/BootstrapRequest.cc +++ b/src/tools/rbd_mirror/image_replayer/BootstrapRequest.cc @@ -13,8 +13,9 @@ #include "librbd/ImageCtx.h" #include "librbd/ImageState.h" #include "librbd/internal.h" -#include "librbd/journal/Types.h" +#include "librbd/Journal.h" #include "librbd/Utils.h" +#include "librbd/journal/Types.h" #include "tools/rbd_mirror/ImageSync.h" #define dout_subsys ceph_subsys_rbd_mirror @@ -35,15 +36,18 @@ template struct C_CreateImage : public Context { librados::IoCtx &local_io_ctx; std::string global_image_id; + std::string remote_mirror_uuid; std::string local_image_name; I *remote_image_ctx; Context *on_finish; C_CreateImage(librados::IoCtx &local_io_ctx, const std::string &global_image_id, + const std::string &remote_mirror_uuid, const std::string &local_image_name, I *remote_image_ctx, Context *on_finish) : local_io_ctx(local_io_ctx), global_image_id(global_image_id), + remote_mirror_uuid(remote_mirror_uuid), local_image_name(local_image_name), remote_image_ctx(remote_image_ctx), on_finish(on_finish) { } @@ -67,7 +71,7 @@ struct C_CreateImage : public Context { remote_image_ctx->stripe_unit, remote_image_ctx->stripe_count, journal_order, journal_splay_width, journal_pool, - global_image_id); + global_image_id, remote_mirror_uuid); on_finish->complete(r); } }; @@ -83,7 +87,8 @@ BootstrapRequest::BootstrapRequest(librados::IoCtx &local_io_ctx, const std::string &global_image_id, ContextWQ *work_queue, SafeTimer *timer, Mutex *timer_lock, - const std::string &mirror_uuid, + const std::string &local_mirror_uuid, + const std::string &remote_mirror_uuid, Journaler *journaler, MirrorPeerClientMeta *client_meta, Context *on_finish) @@ -91,7 +96,8 @@ BootstrapRequest::BootstrapRequest(librados::IoCtx &local_io_ctx, m_local_image_ctx(local_image_ctx), m_local_image_name(local_image_name), m_remote_image_id(remote_image_id), m_global_image_id(global_image_id), m_work_queue(work_queue), m_timer(timer), m_timer_lock(timer_lock), - m_mirror_uuid(mirror_uuid), m_journaler(journaler), + m_local_mirror_uuid(local_mirror_uuid), + m_remote_mirror_uuid(remote_mirror_uuid), m_journaler(journaler), m_client_meta(client_meta), m_on_finish(on_finish) { } @@ -139,6 +145,51 @@ void BootstrapRequest::handle_get_local_image_id(int r) { return; } + get_remote_tag_class(); +} + +template +void BootstrapRequest::get_remote_tag_class() { + dout(20) << dendl; + + Context *ctx = create_context_callback< + BootstrapRequest, &BootstrapRequest::handle_get_remote_tag_class>( + this); + m_journaler->get_client(librbd::Journal<>::IMAGE_CLIENT_ID, &m_client, ctx); +} + +template +void BootstrapRequest::handle_get_remote_tag_class(int r) { + dout(20) << ": r=" << r << dendl; + + if (r < 0) { + derr << ": failed to retreive remote client: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + librbd::journal::ClientData client_data; + bufferlist::iterator it = m_client.data.begin(); + try { + ::decode(client_data, it); + } catch (const buffer::error &err) { + derr << ": failed to decode remote client meta data: " << err.what() + << dendl; + finish(-EBADMSG); + return; + } + + librbd::journal::ImageClientMeta *client_meta = + boost::get(&client_data.client_meta); + if (client_meta == nullptr) { + derr << ": unknown remote client registration" << dendl; + finish(-EINVAL); + return; + } + + m_remote_tag_class = client_meta->tag_class; + dout(10) << ": remote tag class=" << m_remote_tag_class << dendl; + get_client(); } @@ -149,7 +200,7 @@ void BootstrapRequest::get_client() { Context *ctx = create_context_callback< BootstrapRequest, &BootstrapRequest::handle_get_client>( this); - m_journaler->get_client(m_mirror_uuid, &m_client, ctx); + m_journaler->get_client(m_local_mirror_uuid, &m_client, ctx); } template @@ -216,6 +267,9 @@ void BootstrapRequest::open_remote_image() { template void BootstrapRequest::handle_open_remote_image(int r) { + // deduce the class type for the journal to support unit tests + typedef typename std::decay::type Journal; + dout(20) << ": r=" << r << dendl; if (r < 0) { @@ -225,6 +279,25 @@ void BootstrapRequest::handle_open_remote_image(int r) { return; } + // TODO: make async + bool tag_owner; + r = Journal::is_tag_owner(m_remote_image_ctx, &tag_owner); + if (r < 0) { + derr << ": failed to query remote image primary status: " << cpp_strerror(r) + << dendl; + m_ret_val = r; + close_remote_image(); + return; + } + + if (!tag_owner) { + dout(5) << ": remote image is not primary -- skipping image replay" + << dendl; + m_ret_val = -EREMOTEIO; + close_remote_image(); + return; + } + // default local image name to the remote image name if not provided if (m_local_image_name.empty()) { m_local_image_name = m_remote_image_ctx->name; @@ -302,6 +375,7 @@ void BootstrapRequest::create_local_image() { BootstrapRequest, &BootstrapRequest::handle_create_local_image>( this); m_work_queue->queue(new C_CreateImage(m_local_io_ctx, m_global_image_id, + m_remote_mirror_uuid, m_local_image_name, m_remote_image_ctx, ctx), 0); } @@ -317,13 +391,15 @@ void BootstrapRequest::handle_create_local_image(int r) { return; } + m_created_local_image = true; open_local_image(); } template void BootstrapRequest::update_client() { - if (m_local_image_id == (*m_local_image_ctx)->id) { - image_sync(); + if (m_client_meta->image_id == (*m_local_image_ctx)->id) { + // already registered local image with remote journal + get_remote_tags(); return; } m_local_image_id = (*m_local_image_ctx)->id; @@ -355,11 +431,97 @@ void BootstrapRequest::handle_update_client(int r) { } m_client_meta->image_id = m_local_image_id; + get_remote_tags(); +} + +template +void BootstrapRequest::get_remote_tags() { + if (m_created_local_image) { + // optimization -- no need to compare remote tags if we just created + // the image locally + image_sync(); + return; + } + + dout(20) << dendl; + + Context *ctx = create_context_callback< + BootstrapRequest, &BootstrapRequest::handle_get_remote_tags>(this); + m_journaler->get_tags(m_remote_tag_class, &m_remote_tags, ctx); +} + +template +void BootstrapRequest::handle_get_remote_tags(int r) { + dout(20) << ": r=" << r << dendl; + + if (r < 0) { + derr << ": failed to retreive remote tags: " << cpp_strerror(r) << dendl; + m_ret_val = r; + close_local_image(); + return; + } + + // decode the remote tags + librbd::journal::TagData remote_tag_data; + for (auto &tag : m_remote_tags) { + try { + bufferlist::iterator it = tag.data.begin(); + ::decode(remote_tag_data, it); + } catch (const buffer::error &err) { + derr << ": failed to decode remote tag: " << err.what() << dendl; + m_ret_val = -EBADMSG; + close_local_image(); + return; + } + + dout(10) << ": decoded remote tag: " << remote_tag_data << dendl; + if (remote_tag_data.mirror_uuid == librbd::Journal<>::ORPHAN_MIRROR_UUID && + remote_tag_data.predecessor_mirror_uuid == m_local_mirror_uuid) { + // remote tag is chained off a local tag demotion + break; + } + } + + // At this point, the local image was existing and non-primary and the remote + // image is primary. Attempt to link the local image's most recent tag + // to the remote image's tag chain. + I *local_image_ctx = (*m_local_image_ctx); + { + RWLock::RLocker snap_locker(local_image_ctx->snap_lock); + if (local_image_ctx->journal == nullptr) { + derr << "local image does not support journaling" << dendl; + m_ret_val = -EINVAL; + close_local_image(); + return; + } + + librbd::journal::TagData tag_data = + local_image_ctx->journal->get_tag_data(); + dout(20) << ": local tag data: " << tag_data << dendl; + + if (!((tag_data.mirror_uuid == librbd::Journal::ORPHAN_MIRROR_UUID && + remote_tag_data.mirror_uuid == librbd::Journal::ORPHAN_MIRROR_UUID && + remote_tag_data.predecessor_mirror_uuid == m_local_mirror_uuid) || + (tag_data.mirror_uuid == m_remote_mirror_uuid && + m_client_meta->state == librbd::journal::MIRROR_PEER_STATE_REPLAYING))) { + derr << ": split-brain detected -- skipping image replay" << dendl; + m_ret_val = -EEXIST; + close_local_image(); + return; + } + } + image_sync(); } template void BootstrapRequest::image_sync() { + if (m_client_meta->state == librbd::journal::MIRROR_PEER_STATE_REPLAYING) { + // clean replay state -- no image sync required + close_remote_image(); + return; + } + dout(20) << dendl; Context *ctx = create_context_callback< @@ -367,8 +529,9 @@ void BootstrapRequest::image_sync() { this); ImageSync *request = ImageSync::create(*m_local_image_ctx, m_remote_image_ctx, m_timer, - m_timer_lock, m_mirror_uuid, - m_journaler, m_client_meta, ctx); + m_timer_lock, + m_local_mirror_uuid, m_journaler, + m_client_meta, ctx); request->start(); } @@ -379,8 +542,6 @@ void BootstrapRequest::handle_image_sync(int r) { if (r < 0) { derr << ": failed to sync remote image: " << cpp_strerror(r) << dendl; m_ret_val = r; - close_local_image(); - return; } close_remote_image(); diff --git a/src/tools/rbd_mirror/image_replayer/BootstrapRequest.h b/src/tools/rbd_mirror/image_replayer/BootstrapRequest.h index faa8694f96ef..bf9629ff5894 100644 --- a/src/tools/rbd_mirror/image_replayer/BootstrapRequest.h +++ b/src/tools/rbd_mirror/image_replayer/BootstrapRequest.h @@ -8,6 +8,7 @@ #include "include/rados/librados.hpp" #include "cls/journal/cls_journal_types.h" #include "librbd/journal/TypeTraits.h" +#include #include class Context; @@ -37,14 +38,16 @@ public: const std::string &global_image_id, ContextWQ *work_queue, SafeTimer *timer, Mutex *timer_lock, - const std::string &mirror_uuid, + const std::string &local_mirror_uuid, + const std::string &remote_mirror_uuid, Journaler *journaler, MirrorPeerClientMeta *client_meta, Context *on_finish) { return new BootstrapRequest(local_io_ctx, remote_io_ctx, local_image_ctx, local_image_name, remote_image_id, global_image_id, work_queue, timer, timer_lock, - mirror_uuid, journaler, client_meta, on_finish); + local_mirror_uuid, remote_mirror_uuid, + journaler, client_meta, on_finish); } BootstrapRequest(librados::IoCtx &local_io_ctx, @@ -54,7 +57,8 @@ public: const std::string &remote_image_id, const std::string &global_image_id, ContextWQ *work_queue, SafeTimer *timer, Mutex *timer_lock, - const std::string &mirror_uuid, Journaler *journaler, + const std::string &local_mirror_uuid, + const std::string &remote_mirror_uuid, Journaler *journaler, MirrorPeerClientMeta *client_meta, Context *on_finish); ~BootstrapRequest(); @@ -70,6 +74,9 @@ private: * GET_LOCAL_IMAGE_ID * * * * * * * * * * * * * | * * v * + * GET_REMOTE_TAG_CLASS * * * * * * * * * * * + * | * + * v * * GET_CLIENT * * * * * * * * * * * * * * * * * | * * v (skip if not needed) * (error) @@ -94,9 +101,12 @@ private: * | \-----------------/ * * | * * v (skip if not needed) * - * UPDATE_CLIENT * - * | * - * v (skip if not needed) * + * UPDATE_CLIENT * * * * * * * * * + * | * * + * v (skip if not needed) * * + * GET_REMOTE_TAGS * * * * * * * * + * | * * + * v (skip if not needed) v * * IMAGE_SYNC * * * > CLOSE_LOCAL_IMAGE * * | | * * | /-------------------/ * @@ -109,6 +119,8 @@ private: * * @endverbatim */ + typedef std::list Tags; + librados::IoCtx &m_local_io_ctx; librados::IoCtx &m_remote_io_ctx; ImageCtxT **m_local_image_ctx; @@ -119,13 +131,17 @@ private: ContextWQ *m_work_queue; SafeTimer *m_timer; Mutex *m_timer_lock; - std::string m_mirror_uuid; + std::string m_local_mirror_uuid; + std::string m_remote_mirror_uuid; Journaler *m_journaler; MirrorPeerClientMeta *m_client_meta; Context *m_on_finish; + Tags m_remote_tags; cls::journal::Client m_client; + uint64_t m_remote_tag_class = 0; ImageCtxT *m_remote_image_ctx = nullptr; + bool m_created_local_image = false; int m_ret_val = 0; bufferlist m_out_bl; @@ -133,6 +149,9 @@ private: void get_local_image_id(); void handle_get_local_image_id(int r); + void get_remote_tag_class(); + void handle_get_remote_tag_class(int r); + void get_client(); void handle_get_client(int r); @@ -154,6 +173,9 @@ private: void update_client(); void handle_update_client(int r); + void get_remote_tags(); + void handle_get_remote_tags(int r); + void image_sync(); void handle_image_sync(int r);