From 54b265f811e545885916367d7d63c7f4d734fae0 Mon Sep 17 00:00:00 2001 From: Bill Scales Date: Wed, 14 May 2025 08:39:40 +0100 Subject: [PATCH] osd: EC Optimizations bug fix for flip/flop acting set EC optimizations pools have a set of non-primary shards which cannot become the primary because they do not have all the metadata updates. If one of these shards is chosen as the primary it will set the acting set to force another shard to be chosen. It is important that the selected acting set is the same acting set that will be chosen by the next primary (assuming nothing else changes) otherwise a PG can get into a state where the acting set flip/flops between two different states causing the PG to get stuck in peering and hanging I/O. A bug in update_peer_info meant that non-primary shards did not present the same info to choose_acting_set as primary shards because they were not updating their pg_info_t based on pwlc information from other shards. Signed-off-by: Bill Scales --- src/osd/PeeringState.cc | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/src/osd/PeeringState.cc b/src/osd/PeeringState.cc index d1eded648418b..4021dc6e6a35d 100644 --- a/src/osd/PeeringState.cc +++ b/src/osd/PeeringState.cc @@ -364,7 +364,7 @@ void PeeringState::update_peer_info(const pg_shard_t &from, } // 3 cases: // We are the primary - from is the shard that sent the oinfo - // We are a replica - from is the primary, it will not have pwlc infomation + // We are a replica - from is the primary, it will not have pwlc infomation for itself // Merge - from is pg_whoami, oinfo is a source pg that is being merged if ((from != pg_whoami) && info.partial_writes_last_complete.contains(from.shard)) { @@ -391,6 +391,34 @@ void PeeringState::update_peer_info(const pg_shard_t &from, } } } + // Non-primary shards might need to apply pwlc to update info + if (info.partial_writes_last_complete.contains(pg_whoami.shard)) { + // Check if last_complete and last_update can be advanced based on + // knowledge of partial_writes + const auto & [fromversion, toversion] = + info.partial_writes_last_complete[pg_whoami.shard]; + if (toversion > info.last_complete) { + if (fromversion <= info.last_complete) { + psdout(10) << "osd." << pg_whoami << " has last_complete " + << info.last_complete + << " but pwlc says its at " << toversion + << dendl; + info.last_complete = toversion; + if (toversion > info.last_update) { + info.last_update = toversion; + } + if (toversion > pg_log.get_head()) { + pg_log.set_head(toversion); + } + } else { + psdout(10) << "osd." << pg_whoami << " has last_complete " + << info.last_complete + << " cannot apply pwlc from " << fromversion + << " to " << toversion + << dendl; + } + } + } } bool PeeringState::proc_replica_notify(const pg_shard_t &from, const pg_notify_t ¬ify) -- 2.39.5