]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
librbd: refine re-connect implement and init method
authorshangdehao1 <dehao.shang@intel.com>
Fri, 14 Jun 2019 01:15:46 +0000 (09:15 +0800)
committerJason Dillaman <dillaman@redhat.com>
Mon, 24 Jun 2019 21:36:54 +0000 (17:36 -0400)
- add new status to OpenImageRequest, including
  send_parent_cache and handle_parent_cache.
- refine re-connect to remove race condition.
- fixed read error bug

Signed-off-by: Dehao Shang <dehao.shang@intel.com>
src/librbd/cache/ParentCacheObjectDispatch.cc
src/librbd/cache/ParentCacheObjectDispatch.h
src/librbd/image/OpenRequest.cc
src/librbd/image/OpenRequest.h

index a79168e2acf0f381c4c5136427712da785e234fb..f1a1b5edabc8813d438c6788b0e92e768cc44c08 100644 (file)
@@ -28,8 +28,7 @@ namespace cache {
 template <typename I>
 ParentCacheObjectDispatch<I>::ParentCacheObjectDispatch(
     I* image_ctx) : m_image_ctx(image_ctx), m_cache_client(nullptr),
-    m_initialized(false), m_connecting(false),
-    m_lock("librbd::cache::ParentCacheObjectDispatch::m_lock") {
+    m_initialized(false), m_connecting(false) {
   std::string controller_path =
     ((CephContext*)(m_image_ctx->cct))->_conf.get_val<std::string>("immutable_object_cache_sock");
   m_cache_client = new CacheClient(controller_path.c_str(), m_image_ctx->cct);
@@ -41,7 +40,7 @@ ParentCacheObjectDispatch<I>::~ParentCacheObjectDispatch() {
 }
 
 template <typename I>
-void ParentCacheObjectDispatch<I>::init() {
+void ParentCacheObjectDispatch<I>::init(Context* on_finish) {
   auto cct = m_image_ctx->cct;
   ldout(cct, 5) << dendl;
 
@@ -50,13 +49,14 @@ void ParentCacheObjectDispatch<I>::init() {
     return;
   }
 
-  ceph_assert(m_connecting.load() == false);
-  m_connecting.store(true);
-  Context* create_session_ctx = new FunctionContext([this](int ret) {
-    Mutex::Locker locker(m_lock);
+  Context* create_session_ctx = new FunctionContext([this, on_finish](int ret) {
     m_connecting.store(false);
+    if (on_finish != nullptr) {
+      on_finish->complete(ret);
+    }
   });
 
+  m_connecting.store(true);
   create_cache_session(create_session_ctx, false);
 
   m_image_ctx->io_object_dispatcher->register_object_dispatch(this);
@@ -80,27 +80,28 @@ bool ParentCacheObjectDispatch<I>::read(
   /* if RO daemon still don't startup, or RO daemon crash,
    * or session occur any error, try to re-connect daemon.*/
   if (!m_cache_client->is_session_work()) {
-    {
-      Mutex::Locker locker(m_lock);
-      if (m_connecting.load()) {
-        ldout(cct, 5) << "Parent cache is re-connecting RO daemon, "
-                      << "dispatch current request to lower object layer " << dendl;
-        return false;
+    if (!m_connecting.exchange(true)) {
+      /* Since we don't have a giant lock protecting the full re-connect process,
+       * if thread A first passes the if (!m_cache_client->is_session_work()),
+       * thread B could have also passed it and reconnected
+       * before thread A resumes and executes if (!m_connecting.exchange(true)).
+       * This will result in thread A re-connecting a working session.
+       * So, we need to check if session is normal again. If session work,
+       * we need set m_connecting to false. */
+      if (!m_cache_client->is_session_work()) {
+        Context* on_finish = new FunctionContext([this](int ret) {
+          m_connecting.store(false);
+        });
+        create_cache_session(on_finish, true);
+      } else {
+        m_connecting.store(false);
       }
-      m_connecting.store(true);
     }
-
-    ceph_assert(m_connecting.load());
-
-    Context* on_finish = new FunctionContext([this](int ret) {
-      m_connecting.store(false);
-    });
-    create_cache_session(on_finish, true);
-
-    ldout(cct, 5) << "Parent cache initiate re-connect to RO daemon. "
+    ldout(cct, 5) << "Parent cache try to re-connect to RO daemon. "
                   << "dispatch current request to lower object layer" << dendl;
     return false;
   }
+
   ceph_assert(m_cache_client->is_session_work());
 
   CacheGenContextURef ctx = make_gen_lambda_context<ObjectCacheRequest*,
@@ -142,6 +143,7 @@ void ParentCacheObjectDispatch<I>::handle_read_cache(
     // cache read error, fall back to read rados
     *dispatch_result = io::DISPATCH_RESULT_CONTINUE;
     on_dispatched->complete(0);
+    return;
   }
 
   *dispatch_result = io::DISPATCH_RESULT_COMPLETE;
index c41edef62394b8890cc8f6cab630479ec62e4d82..d576659a1a27277a022fcd09a7f0ffb46da8ce2a 100644 (file)
@@ -34,7 +34,7 @@ public:
     return io::OBJECT_DISPATCH_LAYER_PARENT_CACHE;
   }
 
-  void init();
+  void init(Context* on_finish = nullptr);
   void shut_down(Context* on_finish) {
     m_image_ctx->op_work_queue->queue(on_finish, 0);
   }
@@ -134,7 +134,6 @@ private:
   CacheClient *m_cache_client;
   bool m_initialized;
   std::atomic<bool> m_connecting;
-  Mutex m_lock;
 };
 
 } // namespace cache
index 81b9a03b5fa53e85ada25cf2293f45e561632a1b..fb1ada777def058598397bdb2777aaf2c94d9d12 100644 (file)
@@ -513,27 +513,52 @@ Context *OpenRequest<I>::handle_refresh(int *result) {
     return nullptr;
   }
 
+  return send_parent_cache(result);
+}
+
+template <typename I>
+Context* OpenRequest<I>::send_parent_cache(int *result) {
+  CephContext *cct = m_image_ctx->cct;
+  ldout(cct, 10) << __func__ << ": r=" << *result << dendl;
+
+  bool parent_cache_enabled = m_image_ctx->config.template get_val<bool>(
+    "rbd_parent_cache_enabled");
+
+  if (m_image_ctx->child == nullptr || !parent_cache_enabled) {
+    return send_init_cache(result);
+  }
+
+  auto parent_cache = cache::ParentCacheObjectDispatch<I>::create(m_image_ctx);
+  using klass = OpenRequest<I>;
+  Context *ctx = create_context_callback<
+    klass, &klass::handle_parent_cache>(this);
+
+  parent_cache->init(ctx);
+  return nullptr;
+}
+
+template <typename I>
+Context* OpenRequest<I>::handle_parent_cache(int* result) {
+  CephContext *cct = m_image_ctx->cct;
+  ldout(cct, 10) << __func__ << ": r=" << *result << dendl;
+
+  if (*result < 0) {
+    lderr(cct) << "failed to parent cache " << dendl;
+    send_close_image(*result);
+    return nullptr;
+  }
+
   return send_init_cache(result);
 }
 
 template <typename I>
 Context *OpenRequest<I>::send_init_cache(int *result) {
   // cache is disabled or parent image context
-  CephContext *cct = m_image_ctx->cct;
-
   if (!m_image_ctx->cache || m_image_ctx->child != nullptr) {
-    // enable Parent cache for parent image
-    bool parent_cache_enabled = m_image_ctx->config.template get_val<bool>(
-      "rbd_parent_cache_enabled");
-
-    if (m_image_ctx->child != nullptr && parent_cache_enabled ) {
-      ldout(cct, 10) << this << " " << "setting up parent cache"<< dendl;
-      auto parent_cache = cache::ParentCacheObjectDispatch<I>::create(m_image_ctx);
-      parent_cache->init();
-    }
     return send_register_watch(result);
   }
 
+  CephContext *cct = m_image_ctx->cct;
   ldout(cct, 10) << this << " " << __func__ << dendl;
 
   size_t max_dirty = m_image_ctx->config.template get_val<Option::size_t>(
index 0b84fa0045821cab7b8302c8be9a27c08859e889..460839bb3d8526ca62c533a483032278569d55aa 100644 (file)
@@ -61,6 +61,9 @@ private:
    *            V2_GET_DATA_POOL --------------> REFRESH
    *                                                |
    *                                                v
+   *                                             INIT_PARENT_CACHE(skip if
+   *                                                |               disable)
+   *                                                v
    *                                             INIT_CACHE
    *                                                |
    *                                                v
@@ -120,6 +123,9 @@ private:
   void send_refresh();
   Context *handle_refresh(int *result);
 
+  Context* send_parent_cache(int *result);
+  Context* handle_parent_cache(int *result);
+
   Context *send_init_cache(int *result);
 
   Context *send_register_watch(int *result);