From: Ilya Dryomov Date: Thu, 7 Apr 2022 16:49:46 +0000 (+0200) Subject: librbd/cache/pwl: avoid inconsistencies in ImageCacheState X-Git-Tag: v16.2.8~19^2~8 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=8bea4b6279dda00b7f929d684ea9569488b0675e;p=ceph.git librbd/cache/pwl: avoid inconsistencies in ImageCacheState When empty and/or clean bools are updated in I/O handling code paths, ImageCacheState becomes inconistent for a short while: e.g. with clean transitioned to true, dirty_bytes counter could still be positive because the counters are updated only in periodic_stats(). Move to updating the counters in update_image_cache_state(Context*) to avoid this. update_image_cache_state(Context*) now requires m_lock -- most call sites already hold it anyway. The only problematic call site was AbstractWriteLog::shut_down() callback chain: perf_stop() needed to be moved to the very end since perf counters must be alive now for update_image_cache_state() to work. Don't override expect_op_work_queue() in unit tests: completing context in the same thread now results in a deadlock on m_lock in all test cases that call AbstractWriteLog::init(). Signed-off-by: Ilya Dryomov (cherry picked from commit 016882925a63f4f03a9c445d008b2325d479bc30) --- diff --git a/src/librbd/cache/pwl/AbstractWriteLog.cc b/src/librbd/cache/pwl/AbstractWriteLog.cc index b0dd0b636d03..5c243c198772 100644 --- a/src/librbd/cache/pwl/AbstractWriteLog.cc +++ b/src/librbd/cache/pwl/AbstractWriteLog.cc @@ -314,17 +314,6 @@ void AbstractWriteLog::log_perf() { template void AbstractWriteLog::periodic_stats() { std::lock_guard locker(m_lock); - m_cache_state->allocated_bytes = m_bytes_allocated; - m_cache_state->cached_bytes = m_bytes_cached; - m_cache_state->dirty_bytes = m_bytes_dirty; - m_cache_state->free_bytes = m_bytes_allocated_cap - m_bytes_allocated; - m_cache_state->hits_full = m_perfcounter->get(l_librbd_pwl_rd_hit_req); - m_cache_state->hits_partial = m_perfcounter->get(l_librbd_pwl_rd_part_hit_req); - m_cache_state->misses = m_perfcounter->get(l_librbd_pwl_rd_req) - - m_cache_state->hits_full - m_cache_state->hits_partial; - m_cache_state->hit_bytes = m_perfcounter->get(l_librbd_pwl_rd_hit_bytes); - m_cache_state->miss_bytes = m_perfcounter->get(l_librbd_pwl_rd_bytes) - - m_cache_state->hit_bytes; update_image_cache_state(); ldout(m_image_ctx.cct, 5) << "STATS: m_log_entries=" << m_log_entries.size() << ", m_dirty_log_entries=" << m_dirty_log_entries.size() @@ -589,6 +578,19 @@ void AbstractWriteLog::update_image_cache_state() { template void AbstractWriteLog::update_image_cache_state(Context *on_finish) { ldout(m_image_ctx.cct, 10) << dendl; + + ceph_assert(ceph_mutex_is_locked_by_me(m_lock)); + m_cache_state->allocated_bytes = m_bytes_allocated; + m_cache_state->cached_bytes = m_bytes_cached; + m_cache_state->dirty_bytes = m_bytes_dirty; + m_cache_state->free_bytes = m_bytes_allocated_cap - m_bytes_allocated; + m_cache_state->hits_full = m_perfcounter->get(l_librbd_pwl_rd_hit_req); + m_cache_state->hits_partial = m_perfcounter->get(l_librbd_pwl_rd_part_hit_req); + m_cache_state->misses = m_perfcounter->get(l_librbd_pwl_rd_req) - + m_cache_state->hits_full - m_cache_state->hits_partial; + m_cache_state->hit_bytes = m_perfcounter->get(l_librbd_pwl_rd_hit_bytes); + m_cache_state->miss_bytes = m_perfcounter->get(l_librbd_pwl_rd_bytes) - + m_cache_state->hit_bytes; m_cache_state->write_image_cache_state(on_finish); } @@ -618,6 +620,7 @@ void AbstractWriteLog::init(Context *on_finish) { Context *ctx = new LambdaContext( [this, on_finish](int r) { if (r >= 0) { + std::lock_guard locker(m_lock); update_image_cache_state(on_finish); } else { on_finish->complete(r); @@ -637,6 +640,9 @@ void AbstractWriteLog::shut_down(Context *on_finish) { Context *ctx = new LambdaContext( [this, on_finish](int r) { + if (m_perfcounter) { + perf_stop(); + } ldout(m_image_ctx.cct, 6) << "shutdown complete" << dendl; m_image_ctx.op_work_queue->queue(on_finish, r); }); @@ -645,20 +651,14 @@ void AbstractWriteLog::shut_down(Context *on_finish) { ldout(m_image_ctx.cct, 6) << "image cache cleaned" << dendl; Context *next_ctx = override_ctx(r, ctx); periodic_stats(); - { - std::lock_guard locker(m_lock); - check_image_cache_state_clean(); - m_wake_up_enabled = false; - m_log_entries.clear(); - m_cache_state->clean = true; - m_cache_state->empty = true; - - remove_pool_file(); - if (m_perfcounter) { - perf_stop(); - } - } + std::lock_guard locker(m_lock); + check_image_cache_state_clean(); + m_wake_up_enabled = false; + m_log_entries.clear(); + m_cache_state->clean = true; + m_cache_state->empty = true; + remove_pool_file(); update_image_cache_state(next_ctx); }); ctx = new LambdaContext( diff --git a/src/test/librbd/cache/pwl/test_mock_ReplicatedWriteLog.cc b/src/test/librbd/cache/pwl/test_mock_ReplicatedWriteLog.cc index d4b1079b5c26..fc372b0d8c7a 100644 --- a/src/test/librbd/cache/pwl/test_mock_ReplicatedWriteLog.cc +++ b/src/test/librbd/cache/pwl/test_mock_ReplicatedWriteLog.cc @@ -84,13 +84,6 @@ struct TestMockCacheReplicatedWriteLog : public TestMockFixture { ASSERT_EQ(size, state.size); } - void expect_op_work_queue(MockImageCtx& mock_image_ctx) { - EXPECT_CALL(*mock_image_ctx.op_work_queue, queue(_, _)) - .WillRepeatedly(Invoke([](Context* ctx, int r) { - ctx->complete(r); - })); - } - void expect_context_complete(MockContextRWL& mock_context, int r) { EXPECT_CALL(mock_context, complete(r)) .WillRepeatedly(Invoke([&mock_context](int r) { diff --git a/src/test/librbd/cache/pwl/test_mock_SSDWriteLog.cc b/src/test/librbd/cache/pwl/test_mock_SSDWriteLog.cc index 81a248b8e8c5..b1fc4726461a 100644 --- a/src/test/librbd/cache/pwl/test_mock_SSDWriteLog.cc +++ b/src/test/librbd/cache/pwl/test_mock_SSDWriteLog.cc @@ -86,13 +86,6 @@ struct TestMockCacheSSDWriteLog : public TestMockFixture { ASSERT_EQ(size, state.size); } - void expect_op_work_queue(MockImageCtx& mock_image_ctx) { - EXPECT_CALL(*mock_image_ctx.op_work_queue, queue(_, _)) - .WillRepeatedly(Invoke([](Context* ctx, int r) { - ctx->complete(r); - })); - } - void expect_context_complete(MockContextSSD& mock_context, int r) { EXPECT_CALL(mock_context, complete(r)) .WillRepeatedly(Invoke([&mock_context](int r) {