From: Jason Dillaman Date: Thu, 7 May 2020 20:25:50 +0000 (-0400) Subject: librbd: generic helper for tracking in-flight IO and managing flush requests X-Git-Tag: v17.0.0~2350^2~10 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=728057e492a697191d72049ce64b337fa4c80078;p=ceph.git librbd: generic helper for tracking in-flight IO and managing flush requests Layers that potentially queue IOs but not all IOs will need to track all in-flight IOs to properly ensure that a flush cannot complete while older IO is still stuck in a queue. Signed-off-by: Jason Dillaman --- diff --git a/src/librbd/CMakeLists.txt b/src/librbd/CMakeLists.txt index ff6a6990807e5..fe914202f3d6c 100644 --- a/src/librbd/CMakeLists.txt +++ b/src/librbd/CMakeLists.txt @@ -74,6 +74,7 @@ set(librbd_internal_srcs io/AioCompletion.cc io/AsyncOperation.cc io/CopyupRequest.cc + io/FlushTracker.cc io/ImageDispatch.cc io/ImageDispatchSpec.cc io/ImageDispatcher.cc diff --git a/src/librbd/io/FlushTracker.cc b/src/librbd/io/FlushTracker.cc new file mode 100644 index 0000000000000..0a0e1027e796e --- /dev/null +++ b/src/librbd/io/FlushTracker.cc @@ -0,0 +1,125 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/io/FlushTracker.h" +#include "common/dout.h" +#include "librbd/ImageCtx.h" +#include "librbd/Utils.h" + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::io::FlushTracker: " << this \ + << " " << __func__ << ": " + +namespace librbd { +namespace io { + +template +FlushTracker::FlushTracker(I* image_ctx) + : m_image_ctx(image_ctx), + m_lock(ceph::make_shared_mutex( + util::unique_lock_name("librbd::io::FlushTracker::m_lock", this))) { +} + +template +FlushTracker::~FlushTracker() { + std::unique_lock locker{m_lock}; + ceph_assert(m_flush_contexts.empty()); +} + +template +void FlushTracker::shut_down() { + auto cct = m_image_ctx->cct; + ldout(cct, 20) << dendl; + + std::unique_lock locker{m_lock}; + Contexts flush_ctxs; + for (auto& [flush_tid, ctxs] : m_flush_contexts) { + flush_ctxs.insert(flush_ctxs.end(), ctxs.begin(), ctxs.end()); + } + locker.unlock(); + + for (auto ctx : flush_ctxs) { + ctx->complete(0); + } +} + +template +uint64_t FlushTracker::start_io(uint64_t tid) { + auto cct = m_image_ctx->cct; + + std::unique_lock locker{m_lock}; + auto [it, inserted] = m_tid_to_flush_tid.insert({tid, ++m_next_flush_tid}); + auto flush_tid = it->second; + m_in_flight_flush_tids.insert(flush_tid); + locker.unlock(); + + ldout(cct, 20) << "tid=" << tid << ", flush_tid=" << flush_tid << dendl; + return flush_tid; +} + +template +void FlushTracker::finish_io(uint64_t tid) { + auto cct = m_image_ctx->cct; + + std::unique_lock locker{m_lock}; + auto tid_to_flush_tid_it = m_tid_to_flush_tid.find(tid); + if (tid_to_flush_tid_it == m_tid_to_flush_tid.end()) { + return; + } + + auto flush_tid = tid_to_flush_tid_it->second; + m_tid_to_flush_tid.erase(tid_to_flush_tid_it); + m_in_flight_flush_tids.erase(flush_tid); + + ldout(cct, 20) << "tid=" << tid << ", flush_tid=" << flush_tid << dendl; + auto oldest_flush_tid = std::numeric_limits::max(); + if (!m_in_flight_flush_tids.empty()) { + oldest_flush_tid = *m_in_flight_flush_tids.begin(); + } + + // all flushes tagged before the oldest tid should be completed + Contexts flush_ctxs; + auto flush_contexts_it = m_flush_contexts.begin(); + while (flush_contexts_it != m_flush_contexts.end()) { + if (flush_contexts_it->first >= oldest_flush_tid) { + ldout(cct, 20) << "pending IOs: [" << m_in_flight_flush_tids << "], " + << "pending flushes=" << m_flush_contexts << dendl; + break; + } + + auto& ctxs = flush_contexts_it->second; + flush_ctxs.insert(flush_ctxs.end(), ctxs.begin(), ctxs.end()); + flush_contexts_it = m_flush_contexts.erase(flush_contexts_it); + } + locker.unlock(); + + if (!flush_ctxs.empty()) { + ldout(cct, 20) << "completing flushes: " << flush_ctxs << dendl; + for (auto ctx : flush_ctxs) { + ctx->complete(0); + } + } +} + +template +void FlushTracker::flush(Context* on_finish) { + auto cct = m_image_ctx->cct; + + std::unique_lock locker{m_lock}; + if (m_in_flight_flush_tids.empty()) { + locker.unlock(); + on_finish->complete(0); + return; + } + + auto flush_tid = *m_in_flight_flush_tids.rbegin(); + m_flush_contexts[flush_tid].push_back(on_finish); + ldout(cct, 20) << "flush_tid=" << flush_tid << ", ctx=" << on_finish << ", " + << "flush_contexts=" << m_flush_contexts << dendl; +} + +} // namespace io +} // namespace librbd + +template class librbd::io::FlushTracker; diff --git a/src/librbd/io/FlushTracker.h b/src/librbd/io/FlushTracker.h new file mode 100644 index 0000000000000..cc7fcd9ae9a7f --- /dev/null +++ b/src/librbd/io/FlushTracker.h @@ -0,0 +1,61 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_IO_FLUSH_TRACKER_H +#define CEPH_LIBRBD_IO_FLUSH_TRACKER_H + +#include "include/int_types.h" +#include "common/ceph_mutex.h" +#include +#include +#include +#include +#include + +struct Context; + +namespace librbd { + +struct ImageCtx; + +namespace io { + +struct AioCompletion; + +template +class FlushTracker { +public: + FlushTracker(ImageCtxT* image_ctx); + ~FlushTracker(); + + void shut_down(); + + uint64_t start_io(uint64_t tid); + void finish_io(uint64_t tid); + + void flush(Context* on_finish); + +private: + typedef std::list Contexts; + typedef std::map FlushContexts; + typedef std::set Tids; + typedef std::unordered_map TidToFlushTid; + + ImageCtxT* m_image_ctx; + + std::atomic m_next_flush_tid{0}; + + mutable ceph::shared_mutex m_lock; + TidToFlushTid m_tid_to_flush_tid; + + Tids m_in_flight_flush_tids; + FlushContexts m_flush_contexts; + +}; + +} // namespace io +} // namespace librbd + +extern template class librbd::io::FlushTracker; + +#endif // CEPH_LIBRBD_IO_FLUSH_TRACKER_H