From: Bill Scales Date: Thu, 5 Jun 2025 10:17:06 +0000 (+0100) Subject: osd: EC optimizations correct pwlc after PG split X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=6c04e4cf2b81c768eb313c3d0e6ac37d8e69b150;p=ceph.git osd: EC optimizations correct pwlc after PG split When a PG splits the log entries are divided between the two PGs, this can result in PWLC refering to log entries in the other PG. Rollback PWLC after the split so it is not further advanced that the most recently completed log entry. Non-primary shards can be missing log entries and may rollback PWLC too far because of this, however this does not matter because a split occurs at the start of a peering cycle and these shards will be updated with the correct PWLC from the primary shard later in the peering cycle when they are activated. Signed-off-by: Bill Scales --- diff --git a/src/osd/PGLog.h b/src/osd/PGLog.h index 90f123993c3..c829f053489 100644 --- a/src/osd/PGLog.h +++ b/src/osd/PGLog.h @@ -335,6 +335,30 @@ public: unsigned split_bits, IndexedLog *target); + void split_pwlc(pg_info_t &info) { + eversion_t previous_version; + if (rollback_info_trimmed_to_riter == log.rend()) { + previous_version = tail; + } else { + previous_version = rollback_info_trimmed_to_riter->version; + } + // When a split occurs log entries are divided between the two PGs, + // this can leave pwlc refering to entries that are no longer in this + // PG log. Update pwlc so it is not beyond the last entry in the log. + // Non-primary shards which don't have a full log may rollback pwlc + // too far, but this will get corrected by the primary shard when + // activating shards later in peering. + for (auto & [shard, versionrange] : info.partial_writes_last_complete) { + auto &&[old_v, new_v] = versionrange; + if (new_v > previous_version) { + new_v = previous_version; + if (old_v > new_v) { + old_v = new_v; + } + } + } + } + void zero() { // we must have already trimmed the old entries ceph_assert(rollback_info_trimmed_to == head); @@ -918,6 +942,10 @@ public: } } + void split_pwlc(pg_info_t &info) { + log.split_pwlc(info); + } + void merge_from( const std::vector& sources, eversion_t last_update) { diff --git a/src/osd/PeeringState.cc b/src/osd/PeeringState.cc index 959f5cb023e..8cad271b8b7 100644 --- a/src/osd/PeeringState.cc +++ b/src/osd/PeeringState.cc @@ -3603,7 +3603,10 @@ void PeeringState::split_into( child->info.last_user_version = info.last_user_version; + // fix up pwlc - it may refer to log entries that are no longer in the log child->info.partial_writes_last_complete = info.partial_writes_last_complete; + pg_log.split_pwlc(info); + child->pg_log.split_pwlc(child->info); info.log_tail = pg_log.get_tail(); child->info.log_tail = child->pg_log.get_tail();