From: Jason Dillaman Date: Thu, 5 Feb 2015 06:20:00 +0000 (-0500) Subject: librbd: consolidate all async operation flush logic X-Git-Tag: v0.93~71 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=b025fbfa22416ea05fe48a09d20cf22672fd3fc8;p=ceph.git librbd: consolidate all async operation flush logic librbd has three different methods for flushing asynchronous operations. These have all been consolidated down to a single, shared method to fix an existing issue and simplify maintenance going forward. Fixes: #10783 Signed-off-by: Jason Dillaman Reviewed-by: Josh Durgin --- diff --git a/src/librbd/AioCompletion.cc b/src/librbd/AioCompletion.cc index eedfced96e93..4ead667d6dbf 100644 --- a/src/librbd/AioCompletion.cc +++ b/src/librbd/AioCompletion.cc @@ -89,12 +89,7 @@ namespace librbd { break; } - { - Mutex::Locker l(ictx->aio_lock); - assert(ictx->pending_aio != 0); - --ictx->pending_aio; - ictx->pending_aio_cond.Signal(); - } + async_op.finish_op(); if (complete_cb) { complete_cb(rbd_comp, complete_arg); diff --git a/src/librbd/AioCompletion.h b/src/librbd/AioCompletion.h index e1924d6ca47d..6f6d74357aab 100644 --- a/src/librbd/AioCompletion.h +++ b/src/librbd/AioCompletion.h @@ -11,6 +11,7 @@ #include "include/utime.h" #include "include/rbd/librbd.hpp" +#include "librbd/AsyncOperation.h" #include "librbd/ImageCtx.h" #include "librbd/internal.h" @@ -61,6 +62,8 @@ namespace librbd { char *read_buf; size_t read_buf_len; + AsyncOperation async_op; + AioCompletion() : lock("AioCompletion::lock", true), done(false), rval(0), complete_cb(NULL), complete_arg(NULL), rbd_comp(NULL), @@ -91,8 +94,7 @@ namespace librbd { aio_type = t; start_time = ceph_clock_now(ictx->cct); - Mutex::Locker l(ictx->aio_lock); - ++ictx->pending_aio; + async_op.start_op(*ictx); } } diff --git a/src/librbd/AsyncOperation.cc b/src/librbd/AsyncOperation.cc new file mode 100644 index 000000000000..cdb131067956 --- /dev/null +++ b/src/librbd/AsyncOperation.cc @@ -0,0 +1,44 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +#include "librbd/AsyncOperation.h" +#include "librbd/ImageCtx.h" +#include "common/dout.h" +#include "include/assert.h" + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::AsyncOperation: " + +namespace librbd { + +void AsyncOperation::start_op(ImageCtx &image_ctx) { + assert(m_image_ctx == NULL); + m_image_ctx = &image_ctx; + + ldout(m_image_ctx->cct, 20) << this << " " << __func__ << dendl; + Mutex::Locker l(m_image_ctx->async_ops_lock); + m_image_ctx->async_ops.push_back(&m_xlist_item); +} + +void AsyncOperation::finish_op() { + ldout(m_image_ctx->cct, 20) << this << " " << __func__ << dendl; + { + Mutex::Locker l(m_image_ctx->async_ops_lock); + assert(m_xlist_item.remove_myself()); + } + + while (!m_flush_contexts.empty()) { + Context *flush_ctx = m_flush_contexts.front(); + m_flush_contexts.pop_front(); + + ldout(m_image_ctx->cct, 20) << "completed flush: " << flush_ctx << dendl; + flush_ctx->complete(0); + } +} + +void AsyncOperation::add_flush_context(Context *on_finish) { + assert(m_image_ctx->async_ops_lock.is_locked()); + m_flush_contexts.push_back(on_finish); +} + +} // namespace librbd diff --git a/src/librbd/AsyncOperation.h b/src/librbd/AsyncOperation.h new file mode 100644 index 000000000000..2ca2aec9e22c --- /dev/null +++ b/src/librbd/AsyncOperation.h @@ -0,0 +1,44 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +#ifndef LIBRBD_ASYNC_OPERATION_H +#define LIBRBD_ASYNC_OPERATION_H + +#include "include/assert.h" +#include "include/xlist.h" +#include + +class Context; + +namespace librbd { + +class ImageCtx; + +class AsyncOperation { +public: + + AsyncOperation() + : m_image_ctx(NULL), m_xlist_item(this) + { + } + + ~AsyncOperation() + { + assert(!m_xlist_item.is_on_list()); + } + + void start_op(ImageCtx &image_ctx); + void finish_op(); + + void add_flush_context(Context *on_finish); + +private: + + ImageCtx *m_image_ctx; + xlist::item m_xlist_item; + std::list m_flush_contexts; + +}; + +} // namespace librbd + +#endif // LIBRBD_ASYNC_OPERATION_H diff --git a/src/librbd/CopyupRequest.cc b/src/librbd/CopyupRequest.cc index fa4fe934ba8b..49fd599cdb1a 100644 --- a/src/librbd/CopyupRequest.cc +++ b/src/librbd/CopyupRequest.cc @@ -27,10 +27,12 @@ namespace librbd { : m_ictx(ictx), m_oid(oid), m_object_no(objectno), m_image_extents(image_extents), m_state(STATE_READ_FROM_PARENT) { + m_async_op.start_op(*m_ictx); } CopyupRequest::~CopyupRequest() { assert(m_pending_requests.empty()); + m_async_op.finish_op(); } ceph::bufferlist& CopyupRequest::get_copyup_data() { @@ -164,10 +166,6 @@ namespace librbd { m_ictx->copyup_list.find(m_object_no); assert(it != m_ictx->copyup_list.end()); m_ictx->copyup_list.erase(it); - - if (m_ictx->copyup_list.empty()) { - m_ictx->copyup_list_cond.Signal(); - } } bool CopyupRequest::send_object_map() { diff --git a/src/librbd/CopyupRequest.h b/src/librbd/CopyupRequest.h index 6ec13071bbb9..92714c2bc9a4 100644 --- a/src/librbd/CopyupRequest.h +++ b/src/librbd/CopyupRequest.h @@ -3,6 +3,7 @@ #ifndef CEPH_LIBRBD_COPYUPREQUEST_H #define CEPH_LIBRBD_COPYUPREQUEST_H +#include "librbd/AsyncOperation.h" #include "include/int_types.h" #include "common/Mutex.h" @@ -54,6 +55,8 @@ namespace librbd { ceph::bufferlist m_copyup_data; vector m_pending_requests; + AsyncOperation m_async_op; + bool complete_requests(int r); void complete(int r); diff --git a/src/librbd/ImageCtx.cc b/src/librbd/ImageCtx.cc index 9882634108da..116cb92aa8a1 100644 --- a/src/librbd/ImageCtx.cc +++ b/src/librbd/ImageCtx.cc @@ -7,8 +7,8 @@ #include "common/errno.h" #include "common/perf_counters.h" +#include "librbd/AsyncOperation.h" #include "librbd/internal.h" - #include "librbd/ImageCtx.h" #include "librbd/ImageWatcher.h" #include "librbd/ObjectMap.h" @@ -48,9 +48,8 @@ namespace librbd { parent_lock("librbd::ImageCtx::parent_lock"), refresh_lock("librbd::ImageCtx::refresh_lock"), object_map_lock("librbd::ImageCtx::object_map_lock"), - aio_lock("librbd::ImageCtx::aio_lock"), + async_ops_lock("librbd::ImageCtx::async_ops_lock"), copyup_list_lock("librbd::ImageCtx::copyup_list_lock"), - copyup_list_cond(), extra_read_flags(0), old_format(true), order(0), size(0), features(0), @@ -60,7 +59,7 @@ namespace librbd { object_cacher(NULL), writeback_handler(NULL), object_set(NULL), readahead(), total_bytes_read(0), copyup_finisher(NULL), - pending_aio(0), object_map(NULL) + object_map(NULL) { md_ctx.dup(p); data_ctx.dup(p); @@ -612,6 +611,7 @@ namespace librbd { int ImageCtx::invalidate_cache() { if (!object_cacher) return 0; + flush_async_operations(); cache_lock.Lock(); object_cacher->release_set(object_set); cache_lock.Unlock(); @@ -623,7 +623,6 @@ namespace librbd { } else if (r) { lderr(cct) << "flush_cache returned " << r << dendl; } - wait_for_pending_aio(); cache_lock.Lock(); loff_t unclean = object_cacher->release_set(object_set); cache_lock.Unlock(); @@ -694,18 +693,21 @@ namespace librbd { return len; } - void ImageCtx::wait_for_pending_aio() { - Mutex::Locker l(aio_lock); - while (pending_aio > 0) { - pending_aio_cond.Wait(aio_lock); - } + void ImageCtx::flush_async_operations() { + C_SaferCond *ctx = new C_SaferCond(); + flush_async_operations(ctx); + ctx->wait(); } - void ImageCtx::wait_for_pending_copyup() { - Mutex::Locker l(copyup_list_lock); - while (!copyup_list.empty()) { - ldout(cct, 20) << __func__ << " waiting CopyupRequest to be completed" << dendl; - copyup_list_cond.Wait(copyup_list_lock); + void ImageCtx::flush_async_operations(Context *on_finish) { + Mutex::Locker l(async_ops_lock); + if (async_ops.empty()) { + on_finish->complete(0); + return; } + + ldout(cct, 20) << "flush async operations: " << on_finish << " " + << "count=" << async_ops.size() << dendl; + async_ops.back()->add_flush_context(on_finish); } } diff --git a/src/librbd/ImageCtx.h b/src/librbd/ImageCtx.h index 13a5bfc2fffb..9b401163fc7e 100644 --- a/src/librbd/ImageCtx.h +++ b/src/librbd/ImageCtx.h @@ -20,6 +20,7 @@ #include "include/rbd/librbd.hpp" #include "include/rbd_types.h" #include "include/types.h" +#include "include/xlist.h" #include "osdc/ObjectCacher.h" #include "cls/rbd/cls_rbd_client.h" @@ -33,8 +34,9 @@ class PerfCounters; namespace librbd { - class ImageWatcher; + class AsyncOperation; class CopyupRequest; + class ImageWatcher; class ObjectMap; struct ImageCtx { @@ -67,7 +69,7 @@ namespace librbd { /** * Lock ordering: * owner_lock, md_lock, cache_lock, snap_lock, parent_lock, refresh_lock, - * object_map_lock, aio_lock + * object_map_lock, async_op_lock */ RWLock owner_lock; // protects exclusive lock leadership updates RWLock md_lock; // protects access to the mutable image metadata that @@ -78,11 +80,9 @@ namespace librbd { RWLock parent_lock; // protects parent_md and parent Mutex refresh_lock; // protects refresh_seq and last_refresh RWLock object_map_lock; // protects object map updates - Mutex aio_lock; // protects pending_aio and pending_aio_cond + Mutex async_ops_lock; // protects async_ops Mutex copyup_list_lock; // protects copyup_waiting_list - Cond copyup_list_cond; // protected by copyup_waiting_list_lock - unsigned extra_read_flags; bool old_format; @@ -110,8 +110,7 @@ namespace librbd { Finisher *copyup_finisher; std::map copyup_list; - Cond pending_aio_cond; - uint64_t pending_aio; + xlist async_ops; ObjectMap *object_map; @@ -182,8 +181,9 @@ namespace librbd { librados::snap_t in_snap_id); uint64_t prune_parent_extents(vector >& objectx, uint64_t overlap); - void wait_for_pending_aio(); - void wait_for_pending_copyup(); + + void flush_async_operations(); + void flush_async_operations(Context *on_finish); }; } diff --git a/src/librbd/ImageWatcher.cc b/src/librbd/ImageWatcher.cc index a777f92246a7..731f14dae408 100644 --- a/src/librbd/ImageWatcher.cc +++ b/src/librbd/ImageWatcher.cc @@ -72,7 +72,7 @@ ImageWatcher::ImageWatcher(ImageCtx &image_ctx) m_async_request_lock("librbd::ImageWatcher::m_async_request_lock"), m_async_request_id(0), m_aio_request_lock("librbd::ImageWatcher::m_aio_request_lock"), - m_retrying_aio_requests(false), m_retry_aio_context(NULL) + m_retry_aio_context(NULL) { m_finisher->start(); m_timer->init(); @@ -144,30 +144,6 @@ int ImageWatcher::unregister_watch() { return r; } -bool ImageWatcher::has_pending_aio_operations() { - Mutex::Locker l(m_aio_request_lock); - return !m_aio_requests.empty(); -} - -void ImageWatcher::flush_aio_operations() { - C_SaferCond *ctx = new C_SaferCond(); - flush_aio_operations(ctx); - ctx->wait(); -} - -void ImageWatcher::flush_aio_operations(Context *ctx) { - Mutex::Locker l(m_aio_request_lock); - if (!m_retrying_aio_requests && m_aio_requests.empty()) { - ctx->complete(0); - return; - } - - ldout(m_image_ctx.cct, 20) << "pending flush: " << ctx << " " - << "retrying=" << m_retrying_aio_requests << ", " - << "count=" << m_aio_requests.size() << dendl; - m_aio_flush_contexts.push_back(ctx); -} - int ImageWatcher::try_lock() { assert(m_image_ctx.owner_lock.is_wlocked()); assert(m_lock_owner_state == LOCK_OWNER_STATE_NOT_LOCKED); @@ -567,9 +543,7 @@ void ImageWatcher::retry_aio_requests() { std::vector lock_request_restarts; { Mutex::Locker l(m_aio_request_lock); - assert(!m_retrying_aio_requests); lock_request_restarts.swap(m_aio_requests); - m_retrying_aio_requests = true; } for (std::vector::iterator iter = lock_request_restarts.begin(); @@ -580,7 +554,6 @@ void ImageWatcher::retry_aio_requests() { } Mutex::Locker l(m_aio_request_lock); - m_retrying_aio_requests = false; while (!m_aio_flush_contexts.empty()) { Context *flush_ctx = m_aio_flush_contexts.front(); m_aio_flush_contexts.pop_front(); diff --git a/src/librbd/ImageWatcher.h b/src/librbd/ImageWatcher.h index 0261b1307701..343722aa883a 100644 --- a/src/librbd/ImageWatcher.h +++ b/src/librbd/ImageWatcher.h @@ -56,10 +56,6 @@ namespace librbd { int register_watch(); int unregister_watch(); - bool has_pending_aio_operations(); - void flush_aio_operations(); - void flush_aio_operations(Context *ctx); - int try_lock(); int request_lock(const boost::function& restart_op, AioCompletion* c); @@ -176,7 +172,6 @@ namespace librbd { Mutex m_aio_request_lock; std::list m_aio_flush_contexts; std::vector m_aio_requests; - bool m_retrying_aio_requests; Context *m_retry_aio_context; std::string encode_lock_cookie() const; diff --git a/src/librbd/Makefile.am b/src/librbd/Makefile.am index 2836c696da03..b94061edf167 100644 --- a/src/librbd/Makefile.am +++ b/src/librbd/Makefile.am @@ -3,6 +3,7 @@ librbd_internal_la_SOURCES = \ librbd/AioRequest.cc \ librbd/AsyncFlattenRequest.cc \ librbd/AsyncObjectThrottle.cc \ + librbd/AsyncOperation.cc \ librbd/AsyncRequest.cc \ librbd/AsyncResizeRequest.cc \ librbd/AsyncTrimRequest.cc \ @@ -44,6 +45,7 @@ noinst_HEADERS += \ librbd/AioRequest.h \ librbd/AsyncFlattenRequest.h \ librbd/AsyncObjectThrottle.h \ + librbd/AsyncOperation.h \ librbd/AsyncRequest.h \ librbd/AsyncResizeRequest.h \ librbd/AsyncTrimRequest.h \ diff --git a/src/librbd/internal.cc b/src/librbd/internal.cc index a6df0b9d94f5..2049553cbfa5 100644 --- a/src/librbd/internal.cc +++ b/src/librbd/internal.cc @@ -1665,7 +1665,7 @@ reprotect_and_return_err: RWLock::RLocker l(ictx->md_lock); original_size = ictx->size; if (size < ictx->size) { - ictx->wait_for_pending_copyup(); + ictx->flush_async_operations(); if (ictx->object_cacher) { // need to invalidate since we're deleting objects, and // ObjectCacher doesn't track non-existent objects @@ -2299,10 +2299,7 @@ reprotect_and_return_err: // ignore return value, since we may be set to a non-existent // snapshot and the user is trying to fix that ictx_check(ictx); - ictx->wait_for_pending_copyup(); - if (ictx->image_watcher != NULL) { - ictx->image_watcher->flush_aio_operations(); - } + ictx->flush_async_operations(); if (ictx->object_cacher) { // complete pending writes before we're set to a snapshot and // get -EROFS for writes @@ -2369,9 +2366,6 @@ reprotect_and_return_err: ldout(ictx->cct, 20) << "close_image " << ictx << dendl; ictx->readahead.wait_for_pending(); - if (ictx->image_watcher != NULL) { - ictx->image_watcher->flush_aio_operations(); - } if (ictx->object_cacher) { ictx->shutdown_cache(); // implicitly flushes } else { @@ -2382,7 +2376,6 @@ reprotect_and_return_err: ictx->copyup_finisher->wait_for_empty(); ictx->copyup_finisher->stop(); } - ictx->wait_for_pending_copyup(); if (ictx->parent) { close_image(ictx->parent); @@ -3099,20 +3092,15 @@ reprotect_and_return_err: return r; } - if (ictx->image_watcher != NULL) { - ictx->image_watcher->flush_aio_operations(); - } ictx->user_flushed(); c->get(); - c->init_time(ictx, AIO_TYPE_FLUSH); - if (ictx->image_watcher != NULL) { - C_AioWrite *flush_ctx = new C_AioWrite(cct, c); - c->add_request(); - ictx->image_watcher->flush_aio_operations(flush_ctx); - } + C_AioWrite *flush_ctx = new C_AioWrite(cct, c); + c->add_request(); + ictx->flush_async_operations(flush_ctx); + c->init_time(ictx, AIO_TYPE_FLUSH); C_AioWrite *req_comp = new C_AioWrite(cct, c); c->add_request(); if (ictx->object_cacher) { @@ -3148,10 +3136,6 @@ reprotect_and_return_err: int _flush(ImageCtx *ictx) { - if (ictx->image_watcher != NULL) { - ictx->image_watcher->flush_aio_operations(); - } - CephContext *cct = ictx->cct; int r; // flush any outstanding writes @@ -3159,7 +3143,7 @@ reprotect_and_return_err: r = ictx->flush_cache(); } else { r = ictx->data_ctx.aio_flush(); - ictx->wait_for_pending_aio(); + ictx->flush_async_operations(); } if (r) @@ -3178,9 +3162,7 @@ reprotect_and_return_err: return r; } - if (ictx->image_watcher != NULL) { - ictx->image_watcher->flush_aio_operations(); - } + ictx->flush_async_operations(); RWLock::WLocker l(ictx->md_lock); r = ictx->invalidate_cache(); diff --git a/src/test/librbd/test_internal.cc b/src/test/librbd/test_internal.cc index f74e83ec769e..2831db5f86aa 100644 --- a/src/test/librbd/test_internal.cc +++ b/src/test/librbd/test_internal.cc @@ -1,6 +1,7 @@ // -*- mode:C; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- // vim: ts=8 sw=2 smarttab #include "test/librbd/test_fixture.h" +#include "librbd/AioCompletion.h" #include "librbd/ImageWatcher.h" #include "librbd/internal.h" #include @@ -260,8 +261,7 @@ TEST_F(TestInternal, AioWriteRequestsLock) { bool is_owner; ASSERT_EQ(0, librbd::is_exclusive_lock_owner(ictx, &is_owner)); ASSERT_FALSE(is_owner); - - ASSERT_TRUE(ictx->image_watcher->has_pending_aio_operations()); + ASSERT_FALSE(c->is_complete()); } TEST_F(TestInternal, AioDiscardRequestsLock) { @@ -279,6 +279,5 @@ TEST_F(TestInternal, AioDiscardRequestsLock) { bool is_owner; ASSERT_EQ(0, librbd::is_exclusive_lock_owner(ictx, &is_owner)); ASSERT_FALSE(is_owner); - - ASSERT_TRUE(ictx->image_watcher->has_pending_aio_operations()); + ASSERT_FALSE(c->is_complete()); }