From: Yin Congmin Date: Wed, 19 Oct 2022 02:47:35 +0000 (+0800) Subject: librbd/cache/pwl:fix clean vs bytes_dirty cache state inconsistency X-Git-Tag: v16.2.11~104^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=refs%2Fpull%2F49054%2Fhead;p=ceph.git librbd/cache/pwl:fix clean vs bytes_dirty cache state inconsistency The space of the pwl is pre allocated. In check_allocation(), the space is reserved, and then the op is completed after a period of process. When checking resource is successful and the resource is reserved, dirty_bytes has been increased. But new entry will not be added to m_dirty_log_entries until the op is finished. This time gap leads to inconsistencies. Put update_image_cache_state() in check_allocation() to fix this issue. It is now considered that as long as space is reserved and the op is started, the cache is dirty. Fixes: https://tracker.ceph.com/issues/57872 Signed-off-by: Yin Congmin (cherry picked from commit e2b079137473d4c819d58ef6466450a5c4a8b8af) Conflicts: src/librbd/cache/pwl/AbstractWriteLog.cc [ commit 6290446b819b ("librbd/cache/pwl/: remove IO waited state") not in pacific ] --- diff --git a/src/librbd/cache/pwl/AbstractWriteLog.cc b/src/librbd/cache/pwl/AbstractWriteLog.cc index 3d686bc044935..6f017a9c33b3c 100644 --- a/src/librbd/cache/pwl/AbstractWriteLog.cc +++ b/src/librbd/cache/pwl/AbstractWriteLog.cc @@ -1308,7 +1308,6 @@ void AbstractWriteLog::complete_op_log_entries(GenericLogOperations &&ops, { GenericLogEntries dirty_entries; int published_reserves = 0; - bool need_update_state = false; ldout(m_image_ctx.cct, 20) << __func__ << ": completing" << dendl; for (auto &op : ops) { utime_t now = ceph_clock_now(); @@ -1328,11 +1327,6 @@ void AbstractWriteLog::complete_op_log_entries(GenericLogOperations &&ops, std::lock_guard locker(m_lock); m_unpublished_reserves -= published_reserves; m_dirty_log_entries.splice(m_dirty_log_entries.end(), dirty_entries); - if (m_cache_state->clean && !this->m_dirty_log_entries.empty()) { - m_cache_state->clean = false; - update_image_cache_state(); - need_update_state = true; - } } op->complete(result); m_perfcounter->tinc(l_librbd_pwl_log_op_dis_to_app_t, @@ -1347,10 +1341,6 @@ void AbstractWriteLog::complete_op_log_entries(GenericLogOperations &&ops, log_entry->ram_entry.write_bytes); m_perfcounter->tinc(l_librbd_pwl_log_op_app_to_cmp_t, now - op->log_append_start_time); } - if (need_update_state) { - std::unique_lock locker(m_lock); - write_image_cache_state(locker); - } // New entries may be flushable { std::lock_guard locker(m_lock); @@ -1545,7 +1535,7 @@ bool AbstractWriteLog::check_allocation( } if (alloc_succeeds) { - std::lock_guard locker(m_lock); + std::unique_lock locker(m_lock); /* We need one free log entry per extent (each is a separate entry), and * one free "lane" for remote replication. */ if ((m_free_lanes >= num_lanes) && @@ -1560,6 +1550,11 @@ bool AbstractWriteLog::check_allocation( if (req->has_io_waited_for_buffers()) { req->set_io_waited_for_buffers(false); } + if (m_cache_state->clean && bytes_dirtied > 0) { + m_cache_state->clean = false; + update_image_cache_state(); + write_image_cache_state(locker); + } } else { alloc_succeeds = false; }