From 7220189bb7dc846de78c4362dc5a3e1a0297c071 Mon Sep 17 00:00:00 2001 From: Yin Congmin Date: Wed, 19 Oct 2022 10:47:35 +0800 Subject: [PATCH] librbd/cache/pwl:fix clean vs bytes_dirty cache state inconsistency The space of the pwl is pre allocated. In check_allocation(), the space is reserved, and then the op is completed after a period of process. When checking resource is successful and the resource is reserved, dirty_bytes has been increased. But new entry will not be added to m_dirty_log_entries until the op is finished. This time gap leads to inconsistencies. Put update_image_cache_state() in check_allocation() to fix this issue. It is now considered that as long as space is reserved and the op is started, the cache is dirty. Fixes: https://tracker.ceph.com/issues/57872 Signed-off-by: Yin Congmin (cherry picked from commit e2b079137473d4c819d58ef6466450a5c4a8b8af) --- src/librbd/cache/pwl/AbstractWriteLog.cc | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/src/librbd/cache/pwl/AbstractWriteLog.cc b/src/librbd/cache/pwl/AbstractWriteLog.cc index 713fc3caf0837..a59a59d1ca03d 100644 --- a/src/librbd/cache/pwl/AbstractWriteLog.cc +++ b/src/librbd/cache/pwl/AbstractWriteLog.cc @@ -1307,7 +1307,6 @@ void AbstractWriteLog::complete_op_log_entries(GenericLogOperations &&ops, { GenericLogEntries dirty_entries; int published_reserves = 0; - bool need_update_state = false; ldout(m_image_ctx.cct, 20) << __func__ << ": completing" << dendl; for (auto &op : ops) { utime_t now = ceph_clock_now(); @@ -1327,11 +1326,6 @@ void AbstractWriteLog::complete_op_log_entries(GenericLogOperations &&ops, std::lock_guard locker(m_lock); m_unpublished_reserves -= published_reserves; m_dirty_log_entries.splice(m_dirty_log_entries.end(), dirty_entries); - if (m_cache_state->clean && !this->m_dirty_log_entries.empty()) { - m_cache_state->clean = false; - update_image_cache_state(); - need_update_state = true; - } } op->complete(result); m_perfcounter->tinc(l_librbd_pwl_log_op_dis_to_app_t, @@ -1346,10 +1340,6 @@ void AbstractWriteLog::complete_op_log_entries(GenericLogOperations &&ops, log_entry->ram_entry.write_bytes); m_perfcounter->tinc(l_librbd_pwl_log_op_app_to_cmp_t, now - op->log_append_start_time); } - if (need_update_state) { - std::unique_lock locker(m_lock); - write_image_cache_state(locker); - } // New entries may be flushable { std::lock_guard locker(m_lock); @@ -1539,7 +1529,7 @@ bool AbstractWriteLog::check_allocation( } if (alloc_succeeds) { - std::lock_guard locker(m_lock); + std::unique_lock locker(m_lock); /* We need one free log entry per extent (each is a separate entry), and * one free "lane" for remote replication. */ if ((m_free_lanes >= num_lanes) && @@ -1551,6 +1541,11 @@ bool AbstractWriteLog::check_allocation( m_bytes_allocated += bytes_allocated; m_bytes_cached += bytes_cached; m_bytes_dirty += bytes_dirtied; + if (m_cache_state->clean && bytes_dirtied > 0) { + m_cache_state->clean = false; + update_image_cache_state(); + write_image_cache_state(locker); + } } else { alloc_succeeds = false; } -- 2.39.5