From 7ba2bf1b2b8e735ca82b9e78708a8ff3c4aac841 Mon Sep 17 00:00:00 2001 From: Bill Scales Date: Wed, 16 Jul 2025 15:55:40 +0100 Subject: [PATCH] osd: Optimized EC invalid pwlc for shards doing backfill/async Shards performing backfill or async recovery receive log entries (but not transactions) for updates to missing/yet to be backfilled objects. These log entries get applied and completed immediately because there is nothing that can be rolled back. This causes pwlc to advance too early and causes problems if other shards do not complete the update and end up rolling it backwards. This fix sets pwlc to be invalid when such a log entry is applied and completed and it then remains invalid until the next interval when peering runs again. Other shards will continue to update pwlc and any complete subset of shards in a future interval will include at least one shard that has continued to update pwlc Signed-off-by: Bill Scales (cherry picked from commit 534fc76d40a86a49bfabab247d3a703cbb575e27) --- src/osd/PGBackend.cc | 17 +++++++++++++---- src/osd/PeeringState.cc | 10 ++++++++++ 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/src/osd/PGBackend.cc b/src/osd/PGBackend.cc index d4ca5b83ca025..3eedd4d462d16 100644 --- a/src/osd/PGBackend.cc +++ b/src/osd/PGBackend.cc @@ -440,7 +440,12 @@ void PGBackend::partial_write( } auto &&[old_v, new_v] = pwlc_iter->second; if (old_v == new_v) { - if (old_v.version >= entry.version.version) { + if (old_v.version == eversion_t::max().version) { + // shard is backfilling or in async recovery, pwlc is + // invalid + ldpp_dout(dpp, 20) << __func__ << " pwlc invalid " << shard + << dendl; + } else if (old_v.version >= entry.version.version) { // Abnormal case - consider_adjusting_pwlc may advance pwlc // during peering because all shards have updates but these // have not been marked complete. At the end of peering @@ -463,9 +468,13 @@ void PGBackend::partial_write( } else if (pwlc_iter != info->partial_writes_last_complete.end()) { auto &&[old_v, new_v] = pwlc_iter->second; // Log updated or shard absent, partial write entry is a no-op - if (old_v.version >= entry.version.version) { - // Abnormal case - see above - ldpp_dout(dpp, 20) << __func__ << " pwlc is ahead of entry " << shard + if (old_v.version == eversion_t::max().version) { + // shard is backfilling or in async recovery, pwlc is invalid + ldpp_dout(dpp, 20) << __func__ << " pwlc invalid " << shard + << dendl; + } else if (old_v.version >= entry.version.version) { + // Abnormal case - see above + ldpp_dout(dpp, 20) << __func__ << " pwlc is ahead of entry " << shard << dendl; } else { old_v = new_v = entry.version; diff --git a/src/osd/PeeringState.cc b/src/osd/PeeringState.cc index 0fad28eff615e..427ed1a620585 100644 --- a/src/osd/PeeringState.cc +++ b/src/osd/PeeringState.cc @@ -4627,6 +4627,16 @@ void PeeringState::append_log( * object is deleted before we can _merge_object_divergent_entries(). */ pg_log.skip_rollforward(&info, handler.get()); + /* Invalidate pwlc for this shard until the next interval when + * it will be updated with the pwlc from another shard + */ + for (auto & [shard, versionrange] : + info.partial_writes_last_complete) { + auto & [fromversion, toversion] = versionrange; + fromversion.epoch = 0; + fromversion.version = eversion_t::max().version; + toversion = fromversion; + } } for (auto p = logv.begin(); p != logv.end(); ++p) { -- 2.39.5