From: Jason Dillaman Date: Fri, 3 Feb 2017 02:31:54 +0000 (-0500) Subject: librbd: asynchronous journal demote state machine X-Git-Tag: v12.0.1~112^2~4 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=56b17f4165c854555e38a7398fd0162945b0f56d;p=ceph.git librbd: asynchronous journal demote state machine Signed-off-by: Jason Dillaman --- diff --git a/src/librbd/CMakeLists.txt b/src/librbd/CMakeLists.txt index 456c2d8f5829..dc5dda37014a 100644 --- a/src/librbd/CMakeLists.txt +++ b/src/librbd/CMakeLists.txt @@ -47,10 +47,11 @@ set(librbd_internal_srcs io/ImageRequestWQ.cc io/ObjectRequest.cc io/ReadResult.cc - journal/RemoveRequest.cc journal/CreateRequest.cc + journal/DemoteRequest.cc journal/OpenRequest.cc journal/PromoteRequest.cc + journal/RemoveRequest.cc journal/Replay.cc journal/StandardPolicy.cc journal/Utils.cc diff --git a/src/librbd/Journal.cc b/src/librbd/Journal.cc index e75f3bfc75cc..476f3e00fb2a 100644 --- a/src/librbd/Journal.cc +++ b/src/librbd/Journal.cc @@ -2,25 +2,26 @@ // vim: ts=8 sw=2 smarttab #include "librbd/Journal.h" -#include "librbd/ExclusiveLock.h" -#include "librbd/ImageCtx.h" +#include "include/rados/librados.hpp" +#include "common/errno.h" +#include "common/Timer.h" +#include "common/WorkQueue.h" #include "cls/journal/cls_journal_types.h" #include "journal/Journaler.h" #include "journal/Policy.h" #include "journal/ReplayEntry.h" #include "journal/Settings.h" #include "journal/Utils.h" -#include "common/errno.h" -#include "common/Timer.h" -#include "common/WorkQueue.h" -#include "include/rados/librados.hpp" +#include "librbd/ExclusiveLock.h" +#include "librbd/ImageCtx.h" #include "librbd/io/ImageRequestWQ.h" #include "librbd/io/ObjectRequest.h" #include "librbd/journal/CreateRequest.h" +#include "librbd/journal/DemoteRequest.h" #include "librbd/journal/OpenRequest.h" -#include "librbd/journal/PromoteRequest.h" #include "librbd/journal/RemoveRequest.h" #include "librbd/journal/Replay.h" +#include "librbd/journal/PromoteRequest.h" #include #include @@ -557,6 +558,18 @@ int Journal::promote(I *image_ctx) { return ctx.wait(); } +template +int Journal::demote(I *image_ctx) { + CephContext *cct = image_ctx->cct; + ldout(cct, 20) << __func__ << dendl; + + C_SaferCond ctx; + auto req = journal::DemoteRequest::create(*image_ctx, &ctx); + req->send(); + + return ctx.wait(); +} + template bool Journal::is_journal_ready() const { Mutex::Locker locker(m_lock); @@ -672,87 +685,6 @@ journal::TagData Journal::get_tag_data() const { return m_tag_data; } -template -int Journal::demote() { - CephContext *cct = m_image_ctx.cct; - ldout(cct, 20) << __func__ << dendl; - - int r; - C_SaferCond ctx; - Future future; - C_SaferCond flush_ctx; - - { - Mutex::Locker locker(m_lock); - assert(m_journaler != nullptr && is_tag_owner(m_lock)); - - cls::journal::Client client; - r = m_journaler->get_cached_client(IMAGE_CLIENT_ID, &client); - if (r < 0) { - lderr(cct) << this << " " << __func__ << ": " - << "failed to retrieve client: " << cpp_strerror(r) << dendl; - return r; - } - - assert(m_tag_data.mirror_uuid == LOCAL_MIRROR_UUID); - journal::TagPredecessor predecessor; - predecessor.mirror_uuid = LOCAL_MIRROR_UUID; - if (!client.commit_position.object_positions.empty()) { - auto position = client.commit_position.object_positions.front(); - predecessor.commit_valid = true; - predecessor.tag_tid = position.tag_tid; - predecessor.entry_tid = position.entry_tid; - } - - cls::journal::Tag new_tag; - r = allocate_journaler_tag(cct, m_journaler, m_tag_class, predecessor, - ORPHAN_MIRROR_UUID, &new_tag); - if (r < 0) { - return r; - } - - bufferlist::iterator tag_data_bl_it = new_tag.data.begin(); - r = C_DecodeTag::decode(&tag_data_bl_it, &m_tag_data); - if (r < 0) { - lderr(cct) << this << " " << __func__ << ": " - << "failed to decode newly allocated tag" << dendl; - return r; - } - - journal::EventEntry event_entry{journal::DemoteEvent{}, ceph_clock_now()}; - bufferlist event_entry_bl; - ::encode(event_entry, event_entry_bl); - - m_tag_tid = new_tag.tid; - future = m_journaler->append(m_tag_tid, event_entry_bl); - future.flush(&ctx); - } - - r = ctx.wait(); - if (r < 0) { - lderr(cct) << this << " " << __func__ << ": " - << "failed to append demotion journal event: " << cpp_strerror(r) - << dendl; - return r; - } - - { - Mutex::Locker l(m_lock); - m_journaler->committed(future); - m_journaler->flush_commit_position(&flush_ctx); - } - - r = flush_ctx.wait(); - if (r < 0) { - lderr(cct) << this << " " << __func__ << ": " - << "failed to flush demotion commit position: " - << cpp_strerror(r) << dendl; - return r; - } - - return 0; -} - template void Journal::allocate_local_tag(Context *on_finish) { CephContext *cct = m_image_ctx.cct; diff --git a/src/librbd/Journal.h b/src/librbd/Journal.h index 022ba9c28326..6a60826dc971 100644 --- a/src/librbd/Journal.h +++ b/src/librbd/Journal.h @@ -115,6 +115,7 @@ public: std::string *mirror_uuid); static int request_resync(ImageCtxT *image_ctx); static int promote(ImageCtxT *image_ctx); + static int demote(ImageCtxT *image_ctx); bool is_journal_ready() const; bool is_journal_replaying() const; @@ -128,7 +129,6 @@ public: bool is_tag_owner() const; uint64_t get_tag_tid() const; journal::TagData get_tag_data() const; - int demote(); void allocate_local_tag(Context *on_finish); void allocate_tag(const std::string &mirror_uuid, diff --git a/src/librbd/api/Mirror.cc b/src/librbd/api/Mirror.cc index dfed5d7d38fa..81386af70d21 100644 --- a/src/librbd/api/Mirror.cc +++ b/src/librbd/api/Mirror.cc @@ -374,7 +374,7 @@ int Mirror::image_demote(I *ictx) { return -EINVAL; } - r = ictx->journal->demote(); + r = Journal::demote(ictx); if (r < 0) { lderr(cct) << "failed to demote image: " << cpp_strerror(r) << dendl; diff --git a/src/librbd/journal/DemoteRequest.cc b/src/librbd/journal/DemoteRequest.cc new file mode 100644 index 000000000000..c41961d73bab --- /dev/null +++ b/src/librbd/journal/DemoteRequest.cc @@ -0,0 +1,255 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/journal/DemoteRequest.h" +#include "common/dout.h" +#include "common/errno.h" +#include "common/WorkQueue.h" +#include "journal/Journaler.h" +#include "journal/Settings.h" +#include "librbd/ImageCtx.h" +#include "librbd/Journal.h" +#include "librbd/Utils.h" +#include "librbd/journal/OpenRequest.h" + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::journal::DemoteRequest: " << this \ + << " " << __func__ << ": " + +namespace librbd { +namespace journal { + +using librbd::util::create_async_context_callback; +using librbd::util::create_context_callback; + +template +DemoteRequest::DemoteRequest(I &image_ctx, Context *on_finish) + : m_image_ctx(image_ctx), m_on_finish(on_finish), + m_lock("DemoteRequest::m_lock") { +} + +template +DemoteRequest::~DemoteRequest() { + assert(m_journaler == nullptr); +} + +template +void DemoteRequest::send() { + open_journaler(); +} + +template +void DemoteRequest::open_journaler() { + CephContext *cct = m_image_ctx.cct; + ldout(cct, 20) << dendl; + + m_journaler = new Journaler(m_image_ctx.md_ctx, m_image_ctx.id, + Journal<>::IMAGE_CLIENT_ID, {}); + auto ctx = create_async_context_callback( + m_image_ctx, create_context_callback< + DemoteRequest, &DemoteRequest::handle_open_journaler>(this)); + auto req = OpenRequest::create(&m_image_ctx, m_journaler, &m_lock, + &m_client_meta, &m_tag_tid, &m_tag_data, + ctx); + req->send(); +} + +template +void DemoteRequest::handle_open_journaler(int r) { + CephContext *cct = m_image_ctx.cct; + ldout(cct, 20) << "r=" << r << dendl; + + if (r < 0) { + m_ret_val = r; + lderr(cct) << "failed to open journal: " << cpp_strerror(r) << dendl; + shut_down_journaler(); + return; + } else if (m_tag_data.mirror_uuid != Journal<>::LOCAL_MIRROR_UUID) { + m_ret_val = -EINVAL; + lderr(cct) << "image is not currently the primary" << dendl; + shut_down_journaler(); + return; + } + + allocate_tag(); +} + +template +void DemoteRequest::allocate_tag() { + CephContext *cct = m_image_ctx.cct; + ldout(cct, 20) << dendl; + + cls::journal::Client client; + int r = m_journaler->get_cached_client(Journal<>::IMAGE_CLIENT_ID, &client); + if (r < 0) { + m_ret_val = r; + lderr(cct) << "failed to retrieve client: " << cpp_strerror(r) << dendl; + shut_down_journaler(); + return; + } + + TagPredecessor predecessor; + predecessor.mirror_uuid = Journal<>::LOCAL_MIRROR_UUID; + if (!client.commit_position.object_positions.empty()) { + auto position = client.commit_position.object_positions.front(); + predecessor.commit_valid = true; + predecessor.tag_tid = position.tag_tid; + predecessor.entry_tid = position.entry_tid; + } + + TagData tag_data; + tag_data.mirror_uuid = Journal<>::ORPHAN_MIRROR_UUID; + tag_data.predecessor = std::move(predecessor); + + bufferlist tag_bl; + ::encode(tag_data, tag_bl); + + auto ctx = create_context_callback< + DemoteRequest, &DemoteRequest::handle_allocate_tag>(this); + m_journaler->allocate_tag(m_client_meta.tag_class, tag_bl, &m_tag, ctx); +} + +template +void DemoteRequest::handle_allocate_tag(int r) { + CephContext *cct = m_image_ctx.cct; + ldout(cct, 20) << "r=" << r << dendl; + + if (r < 0) { + m_ret_val = r; + lderr(cct) << "failed to allocate tag: " << cpp_strerror(r) << dendl; + shut_down_journaler(); + return; + } + + m_tag_tid = m_tag.tid; + append_event(); +} + +template +void DemoteRequest::append_event() { + CephContext *cct = m_image_ctx.cct; + ldout(cct, 20) << dendl; + + EventEntry event_entry{DemoteEvent{}, ceph_clock_now()}; + bufferlist event_entry_bl; + ::encode(event_entry, event_entry_bl); + + m_journaler->start_append(0, 0, 0); + m_future = m_journaler->append(m_tag_tid, event_entry_bl); + + auto ctx = create_context_callback< + DemoteRequest, &DemoteRequest::handle_append_event>(this); + m_future.flush(ctx); + +} + +template +void DemoteRequest::handle_append_event(int r) { + CephContext *cct = m_image_ctx.cct; + ldout(cct, 20) << "r=" << r << dendl; + + if (r < 0) { + m_ret_val = r; + lderr(cct) << "failed to append demotion journal event: " << cpp_strerror(r) + << dendl; + stop_append(); + return; + } + + commit_event(); +} + +template +void DemoteRequest::commit_event() { + CephContext *cct = m_image_ctx.cct; + ldout(cct, 20) << dendl; + + m_journaler->committed(m_future); + + auto ctx = create_context_callback< + DemoteRequest, &DemoteRequest::handle_commit_event>(this); + m_journaler->flush_commit_position(ctx); +} + +template +void DemoteRequest::handle_commit_event(int r) { + CephContext *cct = m_image_ctx.cct; + ldout(cct, 20) << "r=" << r << dendl; + + if (r < 0) { + m_ret_val = r; + lderr(cct) << "failed to flush demotion commit position: " + << cpp_strerror(r) << dendl; + } + + stop_append(); +} + +template +void DemoteRequest::stop_append() { + CephContext *cct = m_image_ctx.cct; + ldout(cct, 20) << dendl; + + auto ctx = create_context_callback< + DemoteRequest, &DemoteRequest::handle_stop_append>(this); + m_journaler->stop_append(ctx); +} + +template +void DemoteRequest::handle_stop_append(int r) { + CephContext *cct = m_image_ctx.cct; + ldout(cct, 20) << "r=" << r << dendl; + + if (r < 0) { + if (m_ret_val == 0) { + m_ret_val = r; + } + lderr(cct) << "failed to stop journal append: " << cpp_strerror(r) << dendl; + } + + shut_down_journaler(); +} + +template +void DemoteRequest::shut_down_journaler() { + CephContext *cct = m_image_ctx.cct; + ldout(cct, 20) << dendl; + + Context *ctx = create_async_context_callback( + m_image_ctx, create_context_callback< + DemoteRequest, &DemoteRequest::handle_shut_down_journaler>(this)); + m_journaler->shut_down(ctx); +} + +template +void DemoteRequest::handle_shut_down_journaler(int r) { + CephContext *cct = m_image_ctx.cct; + ldout(cct, 20) << "r=" << r << dendl; + + if (r < 0) { + lderr(cct) << "failed to shut down journal: " << cpp_strerror(r) << dendl; + } + + delete m_journaler; + m_journaler = nullptr; + finish(r); +} + +template +void DemoteRequest::finish(int r) { + if (m_ret_val < 0) { + r = m_ret_val; + } + + CephContext *cct = m_image_ctx.cct; + ldout(cct, 20) << "r=" << r << dendl; + + m_on_finish->complete(r); + delete this; +} + +} // namespace journal +} // namespace librbd + +template class librbd::journal::DemoteRequest; diff --git a/src/librbd/journal/DemoteRequest.h b/src/librbd/journal/DemoteRequest.h new file mode 100644 index 000000000000..5fea7f47b301 --- /dev/null +++ b/src/librbd/journal/DemoteRequest.h @@ -0,0 +1,107 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_JOURNAL_DEMOTE_REQUEST_H +#define CEPH_LIBRBD_JOURNAL_DEMOTE_REQUEST_H + +#include "common/Mutex.h" +#include "cls/journal/cls_journal_types.h" +#include "journal/Future.h" +#include "librbd/journal/Types.h" +#include "librbd/journal/TypeTraits.h" + +struct Context; + +namespace librbd { + +struct ImageCtx; + +namespace journal { + +template +class DemoteRequest { +public: + static DemoteRequest *create(ImageCtxT &image_ctx, Context *on_finish) { + return new DemoteRequest(image_ctx, on_finish); + } + + DemoteRequest(ImageCtxT &image_ctx, Context *on_finish); + ~DemoteRequest(); + + void send(); + +private: + /** + * @verbatim + * + * + * | + * v + * OPEN_JOURNALER * * * * * + * | * + * v * + * ALLOCATE_TAG * * * * * * + * | * + * v * + * APPEND_EVENT * * * * + * | * * + * v * * + * COMMIT_EVENT * * + * | * * + * v * * + * STOP_APPEND <* * * * + * | * + * v * + * SHUT_DOWN_JOURNALER <* * + * | + * v + * + * + * @endverbatim + */ + + typedef typename TypeTraits::Journaler Journaler; + typedef typename TypeTraits::Future Future; + + ImageCtxT &m_image_ctx; + Context *m_on_finish; + + Journaler *m_journaler = nullptr; + int m_ret_val = 0; + + Mutex m_lock; + ImageClientMeta m_client_meta; + uint64_t m_tag_tid = 0; + TagData m_tag_data; + + cls::journal::Tag m_tag; + Future m_future; + + void open_journaler(); + void handle_open_journaler(int r); + + void allocate_tag(); + void handle_allocate_tag(int r); + + void append_event(); + void handle_append_event(int r); + + void commit_event(); + void handle_commit_event(int r); + + void stop_append(); + void handle_stop_append(int r); + + void shut_down_journaler(); + void handle_shut_down_journaler(int r); + + void finish(int r); + +}; + +} // namespace journal +} // namespace librbd + +extern template class librbd::journal::DemoteRequest; + +#endif // CEPH_LIBRBD_JOURNAL_DEMOTE_REQUEST_H