From eb119cf3e639c30ec355e9d4bbfa930621c1c0c9 Mon Sep 17 00:00:00 2001 From: Jason Dillaman Date: Wed, 27 May 2020 16:23:35 -0400 Subject: [PATCH] librbd: create new write block image dispatch layer The ability to block writes needs to occur after the refresh and exclusive lock dispatch layers to prevent IO from getting stuck when blocking writes. Signed-off-by: Jason Dillaman --- src/librbd/CMakeLists.txt | 1 + src/librbd/io/Types.h | 1 + src/librbd/io/WriteBlockImageDispatch.cc | 251 +++++++++++++++++++++++ src/librbd/io/WriteBlockImageDispatch.h | 111 ++++++++++ 4 files changed, 364 insertions(+) create mode 100644 src/librbd/io/WriteBlockImageDispatch.cc create mode 100644 src/librbd/io/WriteBlockImageDispatch.h diff --git a/src/librbd/CMakeLists.txt b/src/librbd/CMakeLists.txt index 49952dc25ce..8e802443029 100644 --- a/src/librbd/CMakeLists.txt +++ b/src/librbd/CMakeLists.txt @@ -90,6 +90,7 @@ set(librbd_internal_srcs io/RefreshImageDispatch.cc io/SimpleSchedulerObjectDispatch.cc io/Utils.cc + io/WriteBlockImageDispatch.cc journal/CreateRequest.cc journal/DemoteRequest.cc journal/ObjectDispatch.cc diff --git a/src/librbd/io/Types.h b/src/librbd/io/Types.h index 332167a657a..3d43bcc16ae 100644 --- a/src/librbd/io/Types.h +++ b/src/librbd/io/Types.h @@ -56,6 +56,7 @@ enum ImageDispatchLayer { IMAGE_DISPATCH_LAYER_REFRESH, IMAGE_DISPATCH_LAYER_INTERNAL_START = IMAGE_DISPATCH_LAYER_REFRESH, IMAGE_DISPATCH_LAYER_JOURNAL, + IMAGE_DISPATCH_LAYER_WRITE_BLOCK, IMAGE_DISPATCH_LAYER_WRITEBACK_CACHE, IMAGE_DISPATCH_LAYER_CORE, IMAGE_DISPATCH_LAYER_LAST diff --git a/src/librbd/io/WriteBlockImageDispatch.cc b/src/librbd/io/WriteBlockImageDispatch.cc new file mode 100644 index 00000000000..6cc86764278 --- /dev/null +++ b/src/librbd/io/WriteBlockImageDispatch.cc @@ -0,0 +1,251 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/io/WriteBlockImageDispatch.h" +#include "common/dout.h" +#include "common/Cond.h" +#include "common/WorkQueue.h" +#include "librbd/ImageCtx.h" +#include "librbd/Utils.h" +#include "librbd/io/AioCompletion.h" +#include "librbd/io/ImageDispatchSpec.h" + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::io::WriteBlockImageDispatch: " << this \ + << " " << __func__ << ": " + +namespace librbd { +namespace io { + +template +WriteBlockImageDispatch::WriteBlockImageDispatch(I* image_ctx) + : m_image_ctx(image_ctx), + m_lock(ceph::make_shared_mutex( + util::unique_lock_name("librbd::io::WriteBlockImageDispatch::m_lock", + this))) { + auto cct = m_image_ctx->cct; + ldout(cct, 5) << "ictx=" << image_ctx << dendl; +} + +template +void WriteBlockImageDispatch::shut_down(Context* on_finish) { + on_finish->complete(0); +} + +template +int WriteBlockImageDispatch::block_writes() { + C_SaferCond cond_ctx; + block_writes(&cond_ctx); + return cond_ctx.wait(); +} + +template +void WriteBlockImageDispatch::block_writes(Context *on_blocked) { + ceph_assert(ceph_mutex_is_locked(m_image_ctx->owner_lock)); + auto cct = m_image_ctx->cct; + + // ensure owner lock is not held after block_writes completes + on_blocked = util::create_async_context_callback( + *m_image_ctx, on_blocked); + + { + std::unique_lock locker{m_lock}; + ++m_write_blockers; + ldout(cct, 5) << m_image_ctx << ", " + << "num=" << m_write_blockers << dendl; + if (!m_write_blocker_contexts.empty() || !m_in_flight_write_tids.empty()) { + ldout(cct, 5) << "waiting for in-flight writes to complete: " + << "write_tids=" << m_in_flight_write_tids << dendl; + m_write_blocker_contexts.push_back(on_blocked); + return; + } + } + + flush_io(on_blocked); +}; + +template +void WriteBlockImageDispatch::unblock_writes() { + auto cct = m_image_ctx->cct; + + Contexts waiter_contexts; + Contexts dispatch_contexts; + { + std::unique_lock locker{m_lock}; + ceph_assert(m_write_blockers > 0); + --m_write_blockers; + + ldout(cct, 5) << m_image_ctx << ", " + << "num=" << m_write_blockers << dendl; + if (m_write_blockers == 0) { + std::swap(waiter_contexts, m_unblocked_write_waiter_contexts); + std::swap(dispatch_contexts, m_on_dispatches); + } + } + + for (auto ctx : waiter_contexts) { + ctx->complete(0); + } + + for (auto ctx : dispatch_contexts) { + ctx->complete(0); + } +} + +template +void WriteBlockImageDispatch::wait_on_writes_unblocked( + Context *on_unblocked) { + ceph_assert(ceph_mutex_is_locked(m_image_ctx->owner_lock)); + auto cct = m_image_ctx->cct; + + { + std::unique_lock locker{m_lock}; + ldout(cct, 20) << m_image_ctx << ", " + << "write_blockers=" << m_write_blockers << dendl; + if (!m_unblocked_write_waiter_contexts.empty() || m_write_blockers > 0) { + m_unblocked_write_waiter_contexts.push_back(on_unblocked); + return; + } + } + + on_unblocked->complete(0); +} + +template +bool WriteBlockImageDispatch::read( + AioCompletion* aio_comp, Extents &&image_extents, ReadResult &&read_result, + int op_flags, const ZTracer::Trace &parent_trace, uint64_t tid, + std::atomic* image_dispatch_flags, + DispatchResult* dispatch_result, Context* on_dispatched) { + auto cct = m_image_ctx->cct; + ldout(cct, 20) << "tid=" << tid << dendl; + + return process_io(true, tid, dispatch_result, on_dispatched); +} + +template +bool WriteBlockImageDispatch::write( + AioCompletion* aio_comp, Extents &&image_extents, bufferlist &&bl, + int op_flags, const ZTracer::Trace &parent_trace, uint64_t tid, + std::atomic* image_dispatch_flags, + DispatchResult* dispatch_result, Context* on_dispatched) { + auto cct = m_image_ctx->cct; + ldout(cct, 20) << "tid=" << tid << dendl; + + return process_io(false, tid, dispatch_result, on_dispatched); +} + +template +bool WriteBlockImageDispatch::discard( + AioCompletion* aio_comp, Extents &&image_extents, + uint32_t discard_granularity_bytes, const ZTracer::Trace &parent_trace, + uint64_t tid, std::atomic* image_dispatch_flags, + DispatchResult* dispatch_result, Context* on_dispatched) { + auto cct = m_image_ctx->cct; + ldout(cct, 20) << "tid=" << tid << dendl; + + return process_io(false, tid, dispatch_result, on_dispatched); +} + +template +bool WriteBlockImageDispatch::write_same( + AioCompletion* aio_comp, Extents &&image_extents, bufferlist &&bl, + int op_flags, const ZTracer::Trace &parent_trace, uint64_t tid, + std::atomic* image_dispatch_flags, + DispatchResult* dispatch_result, Context* on_dispatched) { + auto cct = m_image_ctx->cct; + ldout(cct, 20) << "tid=" << tid << dendl; + + return process_io(false, tid, dispatch_result, on_dispatched); +} + +template +bool WriteBlockImageDispatch::compare_and_write( + AioCompletion* aio_comp, Extents &&image_extents, bufferlist &&cmp_bl, + bufferlist &&bl, uint64_t *mismatch_offset, int op_flags, + const ZTracer::Trace &parent_trace, uint64_t tid, + std::atomic* image_dispatch_flags, + DispatchResult* dispatch_result, Context* on_dispatched) { + auto cct = m_image_ctx->cct; + ldout(cct, 20) << "tid=" << tid << dendl; + + return process_io(false, tid, dispatch_result, on_dispatched); +} + +template +bool WriteBlockImageDispatch::flush( + AioCompletion* aio_comp, FlushSource flush_source, + const ZTracer::Trace &parent_trace, uint64_t tid, + std::atomic* image_dispatch_flags, + DispatchResult* dispatch_result, Context* on_dispatched) { + auto cct = m_image_ctx->cct; + ldout(cct, 20) << "tid=" << tid << dendl; + + if (flush_source != FLUSH_SOURCE_USER) { + return false; + } + + return process_io(false, tid, dispatch_result, on_dispatched); +} + +template +void WriteBlockImageDispatch::handle_finished(int r, uint64_t tid) { + auto cct = m_image_ctx->cct; + ldout(cct, 20) << "r=" << r << ", tid=" << tid << dendl; + + std::unique_lock locker{m_lock}; + auto it = m_in_flight_write_tids.find(tid); + if (it == m_in_flight_write_tids.end()) { + // assumed to be a read op + return; + } + m_in_flight_write_tids.erase(it); + + Contexts write_blocker_contexts; + if (m_in_flight_write_tids.empty()) { + std::swap(write_blocker_contexts, m_write_blocker_contexts); + } + locker.unlock(); + + for (auto ctx : write_blocker_contexts) { + ctx->complete(0); + } +} + +template +bool WriteBlockImageDispatch::process_io( + bool read_op, uint64_t tid, DispatchResult* dispatch_result, + Context* on_dispatched) { + std::unique_lock locker{m_lock}; + if (!read_op) { + if (m_write_blockers > 0 || !m_on_dispatches.empty()) { + *dispatch_result = DISPATCH_RESULT_RESTART; + m_on_dispatches.push_back(on_dispatched); + return true; + } + + m_in_flight_write_tids.insert(tid); + } + + return false; +} + +template +void WriteBlockImageDispatch::flush_io(Context* on_finish) { + auto cct = m_image_ctx->cct; + ldout(cct, 10) << dendl; + + // ensure that all in-flight IO is flushed + auto aio_comp = AioCompletion::create_and_start( + on_finish, util::get_image_ctx(m_image_ctx), librbd::io::AIO_TYPE_FLUSH); + auto req = ImageDispatchSpec::create_flush( + *m_image_ctx, IMAGE_DISPATCH_LAYER_WRITE_BLOCK, aio_comp, + FLUSH_SOURCE_INTERNAL, {}); + req->send(); +} + +} // namespace io +} // namespace librbd + +template class librbd::io::WriteBlockImageDispatch; diff --git a/src/librbd/io/WriteBlockImageDispatch.h b/src/librbd/io/WriteBlockImageDispatch.h new file mode 100644 index 00000000000..2c8064ed4a9 --- /dev/null +++ b/src/librbd/io/WriteBlockImageDispatch.h @@ -0,0 +1,111 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_IO_WRITE_BLOCK_IMAGE_DISPATCH_H +#define CEPH_LIBRBD_IO_WRITE_BLOCK_IMAGE_DISPATCH_H + +#include "librbd/io/ImageDispatchInterface.h" +#include "include/int_types.h" +#include "include/buffer.h" +#include "common/ceph_mutex.h" +#include "common/zipkin_trace.h" +#include "common/Throttle.h" +#include "librbd/io/ReadResult.h" +#include "librbd/io/Types.h" +#include +#include + +struct Context; + +namespace librbd { + +struct ImageCtx; + +namespace io { + +struct AioCompletion; + +template +class WriteBlockImageDispatch : public ImageDispatchInterface { +public: + WriteBlockImageDispatch(ImageCtxT* image_ctx); + + ImageDispatchLayer get_dispatch_layer() const override { + return IMAGE_DISPATCH_LAYER_WRITE_BLOCK; + } + + void shut_down(Context* on_finish) override; + + int block_writes(); + void block_writes(Context *on_blocked); + void unblock_writes(); + + inline bool writes_blocked() const { + std::shared_lock locker{m_lock}; + return (m_write_blockers > 0); + } + + void wait_on_writes_unblocked(Context *on_unblocked); + + bool read( + AioCompletion* aio_comp, Extents &&image_extents, + ReadResult &&read_result, int op_flags, + const ZTracer::Trace &parent_trace, uint64_t tid, + std::atomic* image_dispatch_flags, + DispatchResult* dispatch_result, Context* on_dispatched) override; + bool write( + AioCompletion* aio_comp, Extents &&image_extents, bufferlist &&bl, + int op_flags, const ZTracer::Trace &parent_trace, uint64_t tid, + std::atomic* image_dispatch_flags, + DispatchResult* dispatch_result, Context* on_dispatched) override; + bool discard( + AioCompletion* aio_comp, Extents &&image_extents, + uint32_t discard_granularity_bytes, + const ZTracer::Trace &parent_trace, uint64_t tid, + std::atomic* image_dispatch_flags, + DispatchResult* dispatch_result, Context* on_dispatched) override; + bool write_same( + AioCompletion* aio_comp, Extents &&image_extents, bufferlist &&bl, + int op_flags, const ZTracer::Trace &parent_trace, uint64_t tid, + std::atomic* image_dispatch_flags, + DispatchResult* dispatch_result, Context* on_dispatched) override; + bool compare_and_write( + AioCompletion* aio_comp, Extents &&image_extents, bufferlist &&cmp_bl, + bufferlist &&bl, uint64_t *mismatch_offset, int op_flags, + const ZTracer::Trace &parent_trace, uint64_t tid, + std::atomic* image_dispatch_flags, + DispatchResult* dispatch_result, Context* on_dispatched) override; + bool flush( + AioCompletion* aio_comp, FlushSource flush_source, + const ZTracer::Trace &parent_trace, uint64_t tid, + std::atomic* image_dispatch_flags, + DispatchResult* dispatch_result, Context* on_dispatched) override; + + void handle_finished(int r, uint64_t tid) override; + +private: + typedef std::list Contexts; + typedef std::set Tids; + + ImageCtxT* m_image_ctx; + + mutable ceph::shared_mutex m_lock; + Contexts m_on_dispatches; + Tids m_in_flight_write_tids; + + uint32_t m_write_blockers = 0; + Contexts m_write_blocker_contexts; + Contexts m_unblocked_write_waiter_contexts; + + bool process_io(bool read_op, uint64_t tid, DispatchResult* dispatch_result, + Context* on_dispatched); + void flush_io(Context* on_finish); + +}; + +} // namespace io +} // namespace librbd + +extern template class librbd::io::WriteBlockImageDispatch; + +#endif // CEPH_LIBRBD_IO_WRITE_BLOCK_IMAGE_DISPATCH_H -- 2.39.5