From 762e57503f74e1393ad9f90133d5f7c5453b43c8 Mon Sep 17 00:00:00 2001 From: shangdehao1 Date: Fri, 14 Jun 2019 09:15:46 +0800 Subject: [PATCH] librbd: refine re-connect implement and init method - add new status to OpenImageRequest, including send_parent_cache and handle_parent_cache. - refine re-connect to remove race condition. - fixed read error bug Signed-off-by: Dehao Shang --- src/librbd/cache/ParentCacheObjectDispatch.cc | 48 ++++++++++--------- src/librbd/cache/ParentCacheObjectDispatch.h | 3 +- src/librbd/image/OpenRequest.cc | 47 +++++++++++++----- src/librbd/image/OpenRequest.h | 6 +++ 4 files changed, 68 insertions(+), 36 deletions(-) diff --git a/src/librbd/cache/ParentCacheObjectDispatch.cc b/src/librbd/cache/ParentCacheObjectDispatch.cc index a79168e2acf..f1a1b5edabc 100644 --- a/src/librbd/cache/ParentCacheObjectDispatch.cc +++ b/src/librbd/cache/ParentCacheObjectDispatch.cc @@ -28,8 +28,7 @@ namespace cache { template ParentCacheObjectDispatch::ParentCacheObjectDispatch( I* image_ctx) : m_image_ctx(image_ctx), m_cache_client(nullptr), - m_initialized(false), m_connecting(false), - m_lock("librbd::cache::ParentCacheObjectDispatch::m_lock") { + m_initialized(false), m_connecting(false) { std::string controller_path = ((CephContext*)(m_image_ctx->cct))->_conf.get_val("immutable_object_cache_sock"); m_cache_client = new CacheClient(controller_path.c_str(), m_image_ctx->cct); @@ -41,7 +40,7 @@ ParentCacheObjectDispatch::~ParentCacheObjectDispatch() { } template -void ParentCacheObjectDispatch::init() { +void ParentCacheObjectDispatch::init(Context* on_finish) { auto cct = m_image_ctx->cct; ldout(cct, 5) << dendl; @@ -50,13 +49,14 @@ void ParentCacheObjectDispatch::init() { return; } - ceph_assert(m_connecting.load() == false); - m_connecting.store(true); - Context* create_session_ctx = new FunctionContext([this](int ret) { - Mutex::Locker locker(m_lock); + Context* create_session_ctx = new FunctionContext([this, on_finish](int ret) { m_connecting.store(false); + if (on_finish != nullptr) { + on_finish->complete(ret); + } }); + m_connecting.store(true); create_cache_session(create_session_ctx, false); m_image_ctx->io_object_dispatcher->register_object_dispatch(this); @@ -80,27 +80,28 @@ bool ParentCacheObjectDispatch::read( /* if RO daemon still don't startup, or RO daemon crash, * or session occur any error, try to re-connect daemon.*/ if (!m_cache_client->is_session_work()) { - { - Mutex::Locker locker(m_lock); - if (m_connecting.load()) { - ldout(cct, 5) << "Parent cache is re-connecting RO daemon, " - << "dispatch current request to lower object layer " << dendl; - return false; + if (!m_connecting.exchange(true)) { + /* Since we don't have a giant lock protecting the full re-connect process, + * if thread A first passes the if (!m_cache_client->is_session_work()), + * thread B could have also passed it and reconnected + * before thread A resumes and executes if (!m_connecting.exchange(true)). + * This will result in thread A re-connecting a working session. + * So, we need to check if session is normal again. If session work, + * we need set m_connecting to false. */ + if (!m_cache_client->is_session_work()) { + Context* on_finish = new FunctionContext([this](int ret) { + m_connecting.store(false); + }); + create_cache_session(on_finish, true); + } else { + m_connecting.store(false); } - m_connecting.store(true); } - - ceph_assert(m_connecting.load()); - - Context* on_finish = new FunctionContext([this](int ret) { - m_connecting.store(false); - }); - create_cache_session(on_finish, true); - - ldout(cct, 5) << "Parent cache initiate re-connect to RO daemon. " + ldout(cct, 5) << "Parent cache try to re-connect to RO daemon. " << "dispatch current request to lower object layer" << dendl; return false; } + ceph_assert(m_cache_client->is_session_work()); CacheGenContextURef ctx = make_gen_lambda_context::handle_read_cache( // cache read error, fall back to read rados *dispatch_result = io::DISPATCH_RESULT_CONTINUE; on_dispatched->complete(0); + return; } *dispatch_result = io::DISPATCH_RESULT_COMPLETE; diff --git a/src/librbd/cache/ParentCacheObjectDispatch.h b/src/librbd/cache/ParentCacheObjectDispatch.h index c41edef6239..d576659a1a2 100644 --- a/src/librbd/cache/ParentCacheObjectDispatch.h +++ b/src/librbd/cache/ParentCacheObjectDispatch.h @@ -34,7 +34,7 @@ public: return io::OBJECT_DISPATCH_LAYER_PARENT_CACHE; } - void init(); + void init(Context* on_finish = nullptr); void shut_down(Context* on_finish) { m_image_ctx->op_work_queue->queue(on_finish, 0); } @@ -134,7 +134,6 @@ private: CacheClient *m_cache_client; bool m_initialized; std::atomic m_connecting; - Mutex m_lock; }; } // namespace cache diff --git a/src/librbd/image/OpenRequest.cc b/src/librbd/image/OpenRequest.cc index 81b9a03b5fa..fb1ada777de 100644 --- a/src/librbd/image/OpenRequest.cc +++ b/src/librbd/image/OpenRequest.cc @@ -513,27 +513,52 @@ Context *OpenRequest::handle_refresh(int *result) { return nullptr; } + return send_parent_cache(result); +} + +template +Context* OpenRequest::send_parent_cache(int *result) { + CephContext *cct = m_image_ctx->cct; + ldout(cct, 10) << __func__ << ": r=" << *result << dendl; + + bool parent_cache_enabled = m_image_ctx->config.template get_val( + "rbd_parent_cache_enabled"); + + if (m_image_ctx->child == nullptr || !parent_cache_enabled) { + return send_init_cache(result); + } + + auto parent_cache = cache::ParentCacheObjectDispatch::create(m_image_ctx); + using klass = OpenRequest; + Context *ctx = create_context_callback< + klass, &klass::handle_parent_cache>(this); + + parent_cache->init(ctx); + return nullptr; +} + +template +Context* OpenRequest::handle_parent_cache(int* result) { + CephContext *cct = m_image_ctx->cct; + ldout(cct, 10) << __func__ << ": r=" << *result << dendl; + + if (*result < 0) { + lderr(cct) << "failed to parent cache " << dendl; + send_close_image(*result); + return nullptr; + } + return send_init_cache(result); } template Context *OpenRequest::send_init_cache(int *result) { // cache is disabled or parent image context - CephContext *cct = m_image_ctx->cct; - if (!m_image_ctx->cache || m_image_ctx->child != nullptr) { - // enable Parent cache for parent image - bool parent_cache_enabled = m_image_ctx->config.template get_val( - "rbd_parent_cache_enabled"); - - if (m_image_ctx->child != nullptr && parent_cache_enabled ) { - ldout(cct, 10) << this << " " << "setting up parent cache"<< dendl; - auto parent_cache = cache::ParentCacheObjectDispatch::create(m_image_ctx); - parent_cache->init(); - } return send_register_watch(result); } + CephContext *cct = m_image_ctx->cct; ldout(cct, 10) << this << " " << __func__ << dendl; size_t max_dirty = m_image_ctx->config.template get_val( diff --git a/src/librbd/image/OpenRequest.h b/src/librbd/image/OpenRequest.h index 0b84fa00458..460839bb3d8 100644 --- a/src/librbd/image/OpenRequest.h +++ b/src/librbd/image/OpenRequest.h @@ -61,6 +61,9 @@ private: * V2_GET_DATA_POOL --------------> REFRESH * | * v + * INIT_PARENT_CACHE(skip if + * | disable) + * v * INIT_CACHE * | * v @@ -120,6 +123,9 @@ private: void send_refresh(); Context *handle_refresh(int *result); + Context* send_parent_cache(int *result); + Context* handle_parent_cache(int *result); + Context *send_init_cache(int *result); Context *send_register_watch(int *result); -- 2.39.5