]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: EC Optimizations: Get missing changes for partial logs
authorBill Scales <bill_scales@uk.ibm.com>
Wed, 26 Mar 2025 13:15:45 +0000 (13:15 +0000)
committerAlex Ainscow <aainscow@uk.ibm.com>
Tue, 22 Apr 2025 07:17:03 +0000 (08:17 +0100)
Changes to the get missing step of peering to handle partial
writes. Having established the authorative log the primary
works out what shards are missing objects. With partial
writes this code needs to differentiate between a shard that
missed an update (and hence has a missing object) versus a
shard that was not updated by a partial write. The divergent
log entries are examined to see if the updates were partial
writes that did not involve the shard.

Signed-off-by: Bill Scales <bill_scales@uk.ibm.com>
src/osd/PeeringState.cc

index 0f49351d69a1f747a3122582c145445238740246..b8bc669473fedfaf0fc51ab99d74078d8e0c70cd 100644 (file)
@@ -7523,7 +7523,7 @@ PeeringState::GetMissing::GetMissing(my_context ctx)
        i != ps->acting_recovery_backfill.end();
        ++i) {
     if (*i == ps->get_primary()) continue;
-    const pg_info_t& pi = ps->peer_info[*i];
+    pg_info_t& pi = ps->peer_info[*i];
     // reset this so to make sure the pg_missing_t is initialized and
     // has the correct semantics even if we don't need to get a
     // missing set from a shard. This way later additions due to
@@ -7544,6 +7544,67 @@ PeeringState::GetMissing::GetMissing(my_context ctx)
       continue;
     }
 
+    // If the peer log is only divergent because of partial writes then
+    // roll forward the peer to cover writes it was not involved in.
+    if (pi.last_update < ps->info.last_update) {
+      // Search backwards through log looking for a match with peer's head
+      // entry
+      mempool::osd_pglog::list<pg_log_entry_t>::const_iterator p =
+       ps->pg_log.get_log().log.end();
+      while (p != ps->pg_log.get_log().log.begin()) {
+       --p;
+       if (p->version.version <= pi.last_update.version) {
+         break;
+       }
+      }
+      if (pi.last_update == pi.last_complete &&
+         p->version == pi.last_update) {
+       // Matched peer's head entry - see if we can advance last_update
+       // because of partial written shards
+       eversion_t old_last_update = pi.last_update;
+       if (ps->info.partial_writes_last_complete.contains(i->shard) &&
+           ps->info.partial_writes_last_complete[i->shard].first <
+           old_last_update) {
+         old_last_update =
+           ps->info.partial_writes_last_complete[i->shard].first;
+       }
+       ++p;
+       bool advanced = false;
+       while (p != ps->pg_log.get_log().log.end()) {
+         if (p->is_written_shard(i->shard)) {
+           psdout(20) << "log entry " << p->version
+                      << " written_shards=" << p->written_shards
+                      << " is divergent" << dendl;
+           break;
+         }
+         pi.last_update = p->version;
+         pi.last_complete = p->version;
+         // Update partial_writes_last_complete
+         if (ps->info.partial_writes_last_complete.contains(i->shard)) {
+           // Existing pwlc entry - only update if p->version is newer
+           if (ps->info.partial_writes_last_complete[i->shard].second <
+               p->version) {
+             ps->info.partial_writes_last_complete[i->shard] =
+               std::pair(old_last_update, p->version);
+           }
+         } else {
+           // No existing pwlc entry - create one
+           ps->info.partial_writes_last_complete[i->shard] =
+             std::pair(old_last_update, p->version);
+         }
+         advanced = true;
+         ++p;
+       }
+       if (advanced) {
+         psdout(20) << "shard " << i->shard << " pwlc="
+                    << ps->info.partial_writes_last_complete.at(i->shard)
+                    << " last_complete=" << ps->info.last_complete
+                    << " last_update=" << pi.last_update
+                    << dendl;
+       }
+      }
+    }
+
     if (pi.last_update == pi.last_complete &&  // peer has no missing
        pi.last_update == ps->info.last_update) {  // peer is up to date
       // replica has no missing and identical log as us.  no need to