From a26af9165dd93c876ef63938ccc726c220b7c7da Mon Sep 17 00:00:00 2001 From: Bill Scales Date: Wed, 1 Oct 2025 15:52:23 +0100 Subject: [PATCH] osd: Optimized EC missing list not updated on recovering shard (OLD FIX) Shards that are recovering (last_complete != last_update) are using pwlc to advance last_update for writes that did not effect the shard. However simply incrementing last_update means that the primary doesnt send the shard log entries that it missed and consequently it cannot update its missing list. If the shard is already missing object X at version V1 and there was a partial write at V2 that did not update the shard, it does not need to retain the log entry, but it does need to update the missing list to say it needs V2 rather than V1. This ensures all shards report a need for an object at the same version and avoids an assert in MissingLoc::add_active_missing when the primary is trying to combine the missing lists from all the shards to work out what has to be recovered. The fix is to avoid applying pwlc when last_complete != last_update, this forces the primary to send the log to the recovering shard which can then update its missing list (and discarding the log entries as they are partial writes). Fixes: https://tracker.ceph.com/issues/73249 Signed-off-by: Bill Scales --- src/osd/PeeringState.cc | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/src/osd/PeeringState.cc b/src/osd/PeeringState.cc index fef041f5eec..3bbc1300f85 100644 --- a/src/osd/PeeringState.cc +++ b/src/osd/PeeringState.cc @@ -333,21 +333,12 @@ void PeeringState::apply_pwlc(const std::pair pwlc, const auto & [fromversion, toversion] = pwlc; if (toversion > info.last_update) { if ((fromversion <= info.last_update) && - ((stage == AFTER_ACTIVATE) || - (info.last_complete == info.last_update))) { - if (info.last_complete == info.last_update) { - psdout(10) << "osd." << shard << " has last_complete" - << "=last_update " << info.last_update - << " pwlc can advance both to " << toversion - << dendl; - info.last_complete = toversion; - } else { - psdout(10) << "osd." << shard << " has last_complete " - << info.last_complete << " and last_update " - << info.last_update - << " pwlc can advance last_update to " << toversion - << dendl; - } + (info.last_complete == info.last_update)) { + psdout(10) << "osd." << shard << " has last_complete" + << "=last_update " << info.last_update + << " pwlc can advance both to " << toversion + << dendl; + info.last_complete = toversion; info.last_update = toversion; if (log1 && toversion > log1->head) { log1->head = toversion; -- 2.39.5