From: Bill Scales Date: Wed, 26 Mar 2025 13:15:45 +0000 (+0000) Subject: osd: EC Optimizations: Get missing changes for partial logs X-Git-Tag: v20.3.0~29^2~11 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=eb0bef777d96d103392b5e8279fb140cfafe67c8;p=ceph.git osd: EC Optimizations: Get missing changes for partial logs Changes to the get missing step of peering to handle partial writes. Having established the authorative log the primary works out what shards are missing objects. With partial writes this code needs to differentiate between a shard that missed an update (and hence has a missing object) versus a shard that was not updated by a partial write. The divergent log entries are examined to see if the updates were partial writes that did not involve the shard. Signed-off-by: Bill Scales --- diff --git a/src/osd/PeeringState.cc b/src/osd/PeeringState.cc index 0f49351d69a1..b8bc669473fe 100644 --- a/src/osd/PeeringState.cc +++ b/src/osd/PeeringState.cc @@ -7523,7 +7523,7 @@ PeeringState::GetMissing::GetMissing(my_context ctx) i != ps->acting_recovery_backfill.end(); ++i) { if (*i == ps->get_primary()) continue; - const pg_info_t& pi = ps->peer_info[*i]; + pg_info_t& pi = ps->peer_info[*i]; // reset this so to make sure the pg_missing_t is initialized and // has the correct semantics even if we don't need to get a // missing set from a shard. This way later additions due to @@ -7544,6 +7544,67 @@ PeeringState::GetMissing::GetMissing(my_context ctx) continue; } + // If the peer log is only divergent because of partial writes then + // roll forward the peer to cover writes it was not involved in. + if (pi.last_update < ps->info.last_update) { + // Search backwards through log looking for a match with peer's head + // entry + mempool::osd_pglog::list::const_iterator p = + ps->pg_log.get_log().log.end(); + while (p != ps->pg_log.get_log().log.begin()) { + --p; + if (p->version.version <= pi.last_update.version) { + break; + } + } + if (pi.last_update == pi.last_complete && + p->version == pi.last_update) { + // Matched peer's head entry - see if we can advance last_update + // because of partial written shards + eversion_t old_last_update = pi.last_update; + if (ps->info.partial_writes_last_complete.contains(i->shard) && + ps->info.partial_writes_last_complete[i->shard].first < + old_last_update) { + old_last_update = + ps->info.partial_writes_last_complete[i->shard].first; + } + ++p; + bool advanced = false; + while (p != ps->pg_log.get_log().log.end()) { + if (p->is_written_shard(i->shard)) { + psdout(20) << "log entry " << p->version + << " written_shards=" << p->written_shards + << " is divergent" << dendl; + break; + } + pi.last_update = p->version; + pi.last_complete = p->version; + // Update partial_writes_last_complete + if (ps->info.partial_writes_last_complete.contains(i->shard)) { + // Existing pwlc entry - only update if p->version is newer + if (ps->info.partial_writes_last_complete[i->shard].second < + p->version) { + ps->info.partial_writes_last_complete[i->shard] = + std::pair(old_last_update, p->version); + } + } else { + // No existing pwlc entry - create one + ps->info.partial_writes_last_complete[i->shard] = + std::pair(old_last_update, p->version); + } + advanced = true; + ++p; + } + if (advanced) { + psdout(20) << "shard " << i->shard << " pwlc=" + << ps->info.partial_writes_last_complete.at(i->shard) + << " last_complete=" << ps->info.last_complete + << " last_update=" << pi.last_update + << dendl; + } + } + } + if (pi.last_update == pi.last_complete && // peer has no missing pi.last_update == ps->info.last_update) { // peer is up to date // replica has no missing and identical log as us. no need to