]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: EC Optimizations bug fix for flip/flop acting set
authorBill Scales <bill_scales@uk.ibm.com>
Wed, 14 May 2025 07:39:40 +0000 (08:39 +0100)
committerAlex Ainscow <aainscow@uk.ibm.com>
Wed, 25 Jun 2025 22:36:40 +0000 (23:36 +0100)
EC optimizations pools have a set of non-primary shards which
cannot become the primary because they do not have all the
metadata updates. If one of these shards is chosen as the
primary it will set the acting set to force another shard to
be chosen.

It is important that the selected acting set is the same
acting set that will be chosen by the next primary (assuming
nothing else changes) otherwise a PG can get into a state where
the acting set flip/flops between two different states causing
the PG to get stuck in peering and hanging I/O.

A bug in update_peer_info meant that non-primary shards did not
present the same info to choose_acting_set as primary shards
because they were not updating their pg_info_t based on pwlc
information from other shards.

Signed-off-by: Bill Scales <bill_scales@uk.ibm.com>
src/osd/PeeringState.cc

index d1eded648418bc89b3aebe14d608b6063fe8d085..4021dc6e6a35d757fa493d80887c2821f7f974b1 100644 (file)
@@ -364,7 +364,7 @@ void PeeringState::update_peer_info(const pg_shard_t &from,
   }
   // 3 cases:
   // We are the primary - from is the shard that sent the oinfo
-  // We are a replica - from is the primary, it will not have pwlc infomation
+  // We are a replica - from is the primary, it will not have pwlc infomation for itself
   // Merge - from is pg_whoami, oinfo is a source pg that is being merged
   if ((from != pg_whoami) &&
       info.partial_writes_last_complete.contains(from.shard)) {
@@ -391,6 +391,34 @@ void PeeringState::update_peer_info(const pg_shard_t &from,
       }
     }
   }
+  // Non-primary shards might need to apply pwlc to update info
+  if (info.partial_writes_last_complete.contains(pg_whoami.shard)) {
+    // Check if last_complete and last_update can be advanced based on
+    // knowledge of partial_writes
+    const auto & [fromversion, toversion] =
+      info.partial_writes_last_complete[pg_whoami.shard];
+    if (toversion > info.last_complete) {
+      if (fromversion <= info.last_complete) {
+       psdout(10) << "osd." << pg_whoami << " has last_complete "
+                  << info.last_complete
+                  << " but pwlc says its at " << toversion
+                  << dendl;
+       info.last_complete = toversion;
+       if (toversion > info.last_update) {
+         info.last_update = toversion;
+       }
+       if (toversion > pg_log.get_head()) {
+         pg_log.set_head(toversion);
+       }
+      } else {
+       psdout(10) << "osd." << pg_whoami << " has last_complete "
+                  << info.last_complete
+                  << " cannot apply pwlc from " << fromversion
+                  << " to " << toversion
+                  << dendl;
+      }
+    }
+  }
 }
 
 bool PeeringState::proc_replica_notify(const pg_shard_t &from, const pg_notify_t &notify)