From 74a139d2ceb068fd5c77884e2a7be0359a3300c9 Mon Sep 17 00:00:00 2001 From: Jason Dillaman Date: Mon, 13 Jul 2020 16:11:06 -0400 Subject: [PATCH] librbd: fix parent cache races and error handling If the plugin fails to connect to the daemon at start-up it will crash the process due to a resource deadlock exception being thrown as the client is destroyed. Additionally, librbd will support concurrent IO thread processing in the future so the client needs to be protected by a lock. Signed-off-by: Jason Dillaman --- src/librbd/cache/ParentCacheObjectDispatch.cc | 78 ++++++++----------- src/librbd/cache/ParentCacheObjectDispatch.h | 16 ++-- src/test/librbd/CMakeLists.txt | 2 +- ...=> test_mock_ParentCacheObjectDispatch.cc} | 21 ++--- 4 files changed, 47 insertions(+), 70 deletions(-) rename src/test/librbd/cache/{test_mock_ParentImageCache.cc => test_mock_ParentCacheObjectDispatch.cc} (93%) diff --git a/src/librbd/cache/ParentCacheObjectDispatch.cc b/src/librbd/cache/ParentCacheObjectDispatch.cc index 088384b1525..73d37274310 100644 --- a/src/librbd/cache/ParentCacheObjectDispatch.cc +++ b/src/librbd/cache/ParentCacheObjectDispatch.cc @@ -26,17 +26,20 @@ namespace cache { template ParentCacheObjectDispatch::ParentCacheObjectDispatch( - I* image_ctx) : m_image_ctx(image_ctx), m_cache_client(nullptr), - m_initialized(false), m_connecting(false) { + I* image_ctx) + : m_image_ctx(image_ctx), + m_lock(ceph::make_mutex( + "librbd::cache::ParentCacheObjectDispatch::lock", true, false)) { ceph_assert(m_image_ctx->data_ctx.is_valid()); - std::string controller_path = - ((CephContext*)(m_image_ctx->cct))->_conf.get_val("immutable_object_cache_sock"); + auto controller_path = image_ctx->cct->_conf.template get_val( + "immutable_object_cache_sock"); m_cache_client = new CacheClient(controller_path.c_str(), m_image_ctx->cct); } template ParentCacheObjectDispatch::~ParentCacheObjectDispatch() { - delete m_cache_client; + delete m_cache_client; + m_cache_client = nullptr; } template @@ -52,18 +55,10 @@ void ParentCacheObjectDispatch::init(Context* on_finish) { return; } - Context* create_session_ctx = new LambdaContext([this, on_finish](int ret) { - m_connecting.store(false); - if (on_finish != nullptr) { - on_finish->complete(ret); - } - }); - - m_connecting.store(true); - create_cache_session(create_session_ctx, false); - m_image_ctx->io_object_dispatcher->register_dispatch(this); - m_initialized = true; + + std::unique_lock locker{m_lock}; + create_cache_session(on_finish, false); } template @@ -77,36 +72,18 @@ bool ParentCacheObjectDispatch::read( auto cct = m_image_ctx->cct; ldout(cct, 20) << "object_no=" << object_no << " " << object_off << "~" << object_len << dendl; - ceph_assert(m_initialized); string oid = data_object_name(m_image_ctx, object_no); /* if RO daemon still don't startup, or RO daemon crash, * or session occur any error, try to re-connect daemon.*/ + std::unique_lock locker{m_lock}; if (!m_cache_client->is_session_work()) { - if (!m_connecting.exchange(true)) { - /* Since we don't have a giant lock protecting the full re-connect process, - * if thread A first passes the if (!m_cache_client->is_session_work()), - * thread B could have also passed it and reconnected - * before thread A resumes and executes if (!m_connecting.exchange(true)). - * This will result in thread A re-connecting a working session. - * So, we need to check if session is normal again. If session work, - * we need set m_connecting to false. */ - if (!m_cache_client->is_session_work()) { - Context* on_finish = new LambdaContext([this](int ret) { - m_connecting.store(false); - }); - create_cache_session(on_finish, true); - } else { - m_connecting.store(false); - } - } + create_cache_session(nullptr, true); ldout(cct, 5) << "Parent cache try to re-connect to RO daemon. " << "dispatch current request to lower object layer" << dendl; return false; } - ceph_assert(m_cache_client->is_session_work()); - CacheGenContextURef ctx = make_gen_lambda_context> ([this, read_data, dispatch_result, on_dispatched, @@ -165,18 +142,29 @@ int ParentCacheObjectDispatch::handle_register_client(bool reg) { } template -int ParentCacheObjectDispatch::create_cache_session(Context* on_finish, bool is_reconnect) { +void ParentCacheObjectDispatch::create_cache_session(Context* on_finish, + bool is_reconnect) { + ceph_assert(ceph_mutex_is_locked_by_me(m_lock)); + if (m_connecting) { + return; + } + m_connecting = true; + auto cct = m_image_ctx->cct; ldout(cct, 20) << dendl; Context* register_ctx = new LambdaContext([this, cct, on_finish](int ret) { if (ret < 0) { lderr(cct) << "Parent cache fail to register client." << dendl; - } else { - ceph_assert(m_cache_client->is_session_work()); } handle_register_client(ret < 0 ? false : true); - on_finish->complete(ret); + + ceph_assert(m_connecting); + m_connecting = false; + + if (on_finish != nullptr) { + on_finish->complete(0); + } }); Context* connect_ctx = new LambdaContext( @@ -197,21 +185,19 @@ int ParentCacheObjectDispatch::create_cache_session(Context* on_finish, bool delete m_cache_client; // create new CacheClient to connect RO daemon. - std::string controller_path = - ((CephContext*)(m_image_ctx->cct))->_conf.get_val("immutable_object_cache_sock"); + auto controller_path = cct->_conf.template get_val( + "immutable_object_cache_sock"); m_cache_client = new CacheClient(controller_path.c_str(), m_image_ctx->cct); } m_cache_client->run(); - m_cache_client->connect(connect_ctx); - return 0; } template int ParentCacheObjectDispatch::read_object( - std::string file_path, ceph::bufferlist* read_data, uint64_t offset, - uint64_t length, Context *on_finish) { + std::string file_path, ceph::bufferlist* read_data, uint64_t offset, + uint64_t length, Context *on_finish) { auto *cct = m_image_ctx->cct; ldout(cct, 20) << "file path: " << file_path << dendl; diff --git a/src/librbd/cache/ParentCacheObjectDispatch.h b/src/librbd/cache/ParentCacheObjectDispatch.h index 42222dd9d2b..2c7b5234141 100644 --- a/src/librbd/cache/ParentCacheObjectDispatch.h +++ b/src/librbd/cache/ParentCacheObjectDispatch.h @@ -5,8 +5,9 @@ #define CEPH_LIBRBD_CACHE_PARENT_CACHER_OBJECT_DISPATCH_H #include "librbd/io/ObjectDispatchInterface.h" -#include "tools/immutable_object_cache/CacheClient.h" +#include "common/ceph_mutex.h" #include "librbd/cache/TypeTraits.h" +#include "tools/immutable_object_cache/CacheClient.h" #include "tools/immutable_object_cache/Types.h" namespace librbd { @@ -104,10 +105,6 @@ public: uint64_t journal_tid, uint64_t new_journal_tid) { } - bool get_state() { - return m_initialized; - } - ImageCtxT* get_image_ctx() { return m_image_ctx; } @@ -127,12 +124,13 @@ private: io::DispatchResult* dispatch_result, Context* on_dispatched); int handle_register_client(bool reg); - int create_cache_session(Context* on_finish, bool is_reconnect); + void create_cache_session(Context* on_finish, bool is_reconnect); ImageCtxT* m_image_ctx; - CacheClient *m_cache_client; - bool m_initialized; - std::atomic m_connecting; + + ceph::mutex m_lock; + CacheClient *m_cache_client = nullptr; + bool m_connecting = false; }; } // namespace cache diff --git a/src/test/librbd/CMakeLists.txt b/src/test/librbd/CMakeLists.txt index 2cfd785935e..e57842ef982 100644 --- a/src/test/librbd/CMakeLists.txt +++ b/src/test/librbd/CMakeLists.txt @@ -51,7 +51,7 @@ set(unittest_librbd_srcs test_mock_TrashWatcher.cc test_mock_Watcher.cc cache/test_mock_WriteAroundObjectDispatch.cc - cache/test_mock_ParentImageCache.cc + cache/test_mock_ParentCacheObjectDispatch.cc deep_copy/test_mock_ImageCopyRequest.cc deep_copy/test_mock_MetadataCopyRequest.cc deep_copy/test_mock_ObjectCopyRequest.cc diff --git a/src/test/librbd/cache/test_mock_ParentImageCache.cc b/src/test/librbd/cache/test_mock_ParentCacheObjectDispatch.cc similarity index 93% rename from src/test/librbd/cache/test_mock_ParentImageCache.cc rename to src/test/librbd/cache/test_mock_ParentCacheObjectDispatch.cc index 64394ef3113..3b36e44f4eb 100644 --- a/src/test/librbd/cache/test_mock_ParentImageCache.cc +++ b/src/test/librbd/cache/test_mock_ParentCacheObjectDispatch.cc @@ -54,7 +54,7 @@ using ::testing::Return; using ::testing::WithArg; using ::testing::WithArgs; -class TestMockParentImageCache : public TestMockFixture { +class TestMockParentCacheObjectDispatch : public TestMockFixture { public : typedef cache::ParentCacheObjectDispatch MockParentImageCache; @@ -142,7 +142,7 @@ public : } }; -TEST_F(TestMockParentImageCache, test_initialization_success) { +TEST_F(TestMockParentCacheObjectDispatch, test_initialization_success) { librbd::ImageCtx* ictx; ASSERT_EQ(0, open_image(m_image_name, &ictx)); MockParentImageCacheImageCtx mock_image_ctx(*ictx); @@ -161,7 +161,6 @@ TEST_F(TestMockParentImageCache, test_initialization_success) { ASSERT_EQ(reg, true); }); expect_cache_register(*mock_parent_image_cache, ctx, 0); - expect_cache_session_state(*mock_parent_image_cache, true); expect_io_object_dispatcher_register_state(*mock_parent_image_cache, 0); expect_cache_close(*mock_parent_image_cache, 0); expect_cache_stop(*mock_parent_image_cache, 0); @@ -171,7 +170,6 @@ TEST_F(TestMockParentImageCache, test_initialization_success) { ASSERT_EQ(mock_parent_image_cache->get_dispatch_layer(), io::OBJECT_DISPATCH_LAYER_PARENT_CACHE); - ASSERT_EQ(mock_parent_image_cache->get_state(), true); expect_cache_session_state(*mock_parent_image_cache, true); ASSERT_EQ(mock_parent_image_cache->get_cache_client()->is_session_work(), true); @@ -181,7 +179,7 @@ TEST_F(TestMockParentImageCache, test_initialization_success) { delete mock_parent_image_cache; } -TEST_F(TestMockParentImageCache, test_initialization_fail_at_connect) { +TEST_F(TestMockParentCacheObjectDispatch, test_initialization_fail_at_connect) { librbd::ImageCtx* ictx; ASSERT_EQ(0, open_image(m_image_name, &ictx)); MockParentImageCacheImageCtx mock_image_ctx(*ictx); @@ -206,7 +204,6 @@ TEST_F(TestMockParentImageCache, test_initialization_fail_at_connect) { // initialization fails. ASSERT_EQ(mock_parent_image_cache->get_dispatch_layer(), io::OBJECT_DISPATCH_LAYER_PARENT_CACHE); - ASSERT_EQ(mock_parent_image_cache->get_state(), true); ASSERT_EQ(mock_parent_image_cache->get_cache_client()->is_session_work(), false); mock_parent_image_cache->get_cache_client()->close(); @@ -216,7 +213,7 @@ TEST_F(TestMockParentImageCache, test_initialization_fail_at_connect) { } -TEST_F(TestMockParentImageCache, test_initialization_fail_at_register) { +TEST_F(TestMockParentCacheObjectDispatch, test_initialization_fail_at_register) { librbd::ImageCtx* ictx; ASSERT_EQ(0, open_image(m_image_name, &ictx)); MockParentImageCacheImageCtx mock_image_ctx(*ictx); @@ -235,7 +232,6 @@ TEST_F(TestMockParentImageCache, test_initialization_fail_at_register) { ASSERT_EQ(reg, false); }); expect_cache_register(*mock_parent_image_cache, ctx, -1); - expect_cache_session_state(*mock_parent_image_cache, true); expect_io_object_dispatcher_register_state(*mock_parent_image_cache, 0); expect_cache_close(*mock_parent_image_cache, 0); expect_cache_stop(*mock_parent_image_cache, 0); @@ -245,7 +241,6 @@ TEST_F(TestMockParentImageCache, test_initialization_fail_at_register) { ASSERT_EQ(mock_parent_image_cache->get_dispatch_layer(), io::OBJECT_DISPATCH_LAYER_PARENT_CACHE); - ASSERT_EQ(mock_parent_image_cache->get_state(), true); expect_cache_session_state(*mock_parent_image_cache, true); ASSERT_EQ(mock_parent_image_cache->get_cache_client()->is_session_work(), true); @@ -255,7 +250,7 @@ TEST_F(TestMockParentImageCache, test_initialization_fail_at_register) { delete mock_parent_image_cache; } -TEST_F(TestMockParentImageCache, test_disble_interface) { +TEST_F(TestMockParentCacheObjectDispatch, test_disble_interface) { librbd::ImageCtx* ictx; ASSERT_EQ(0, open_image(m_image_name, &ictx)); MockParentImageCacheImageCtx mock_image_ctx(*ictx); @@ -295,7 +290,7 @@ TEST_F(TestMockParentImageCache, test_disble_interface) { } -TEST_F(TestMockParentImageCache, test_read) { +TEST_F(TestMockParentCacheObjectDispatch, test_read) { librbd::ImageCtx* ictx; ASSERT_EQ(0, open_image(m_image_name, &ictx)); MockParentImageCacheImageCtx mock_image_ctx(*ictx); @@ -314,7 +309,6 @@ TEST_F(TestMockParentImageCache, test_read) { ASSERT_EQ(reg, true); }); expect_cache_register(*mock_parent_image_cache, ctx, 0); - expect_cache_session_state(*mock_parent_image_cache, true); expect_io_object_dispatcher_register_state(*mock_parent_image_cache, 0); expect_cache_close(*mock_parent_image_cache, 0); expect_cache_stop(*mock_parent_image_cache, 0); @@ -324,7 +318,6 @@ TEST_F(TestMockParentImageCache, test_read) { ASSERT_EQ(mock_parent_image_cache->get_dispatch_layer(), io::OBJECT_DISPATCH_LAYER_PARENT_CACHE); - ASSERT_EQ(mock_parent_image_cache->get_state(), true); expect_cache_session_state(*mock_parent_image_cache, true); ASSERT_EQ(mock_parent_image_cache->get_cache_client()->is_session_work(), true); @@ -332,7 +325,7 @@ TEST_F(TestMockParentImageCache, test_read) { Context* on_finish = &cond; auto& expect = EXPECT_CALL(*(mock_parent_image_cache->get_cache_client()), is_session_work()); - expect.WillOnce(Return(true)).WillOnce(Return(true)); + expect.WillOnce(Return(true)); expect_cache_lookup_object(*mock_parent_image_cache, on_finish); -- 2.39.5