]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: EC optimizations correct pwlc after PG split
authorBill Scales <bill_scales@uk.ibm.com>
Thu, 5 Jun 2025 10:17:06 +0000 (11:17 +0100)
committerAlex Ainscow <aainscow@uk.ibm.com>
Tue, 1 Jul 2025 12:03:29 +0000 (13:03 +0100)
When a PG splits the log entries are divided between the two PGs,
this can result in PWLC refering to log entries in the other PG.
Rollback PWLC after the split so it is not further advanced that
the most recently completed log entry.

Non-primary shards can be missing log entries and may rollback
PWLC too far because of this, however this does not matter
because a split occurs at the start of a peering cycle and these
shards will be updated with the correct PWLC from the primary
shard later in the peering cycle when they are activated.

Signed-off-by: Bill Scales <bill_scales@uk.ibm.com>
src/osd/PGLog.h
src/osd/PeeringState.cc

index 90f123993c3f6af12929915bea88104881023126..c829f0534898312b04debb4e8274cbef40c581bd 100644 (file)
@@ -335,6 +335,30 @@ public:
       unsigned split_bits,
       IndexedLog *target);
 
+    void split_pwlc(pg_info_t &info) {
+      eversion_t previous_version;
+      if (rollback_info_trimmed_to_riter == log.rend()) {
+       previous_version = tail;
+      } else {
+       previous_version = rollback_info_trimmed_to_riter->version;
+      }
+      // When a split occurs log entries are divided between the two PGs,
+      // this can leave pwlc refering to entries that are no longer in this
+      // PG log. Update pwlc so it is not beyond the last entry in the log.
+      // Non-primary shards which don't have a full log may rollback pwlc
+      // too far, but this will get corrected by the primary shard when
+      // activating shards later in peering.
+      for (auto & [shard, versionrange] : info.partial_writes_last_complete) {
+       auto &&[old_v,  new_v] = versionrange;
+       if (new_v > previous_version) {
+         new_v = previous_version;
+         if (old_v > new_v) {
+           old_v = new_v;
+         }
+       }
+      }
+    }
+
     void zero() {
       // we must have already trimmed the old entries
       ceph_assert(rollback_info_trimmed_to == head);
@@ -918,6 +942,10 @@ public:
     }
   }
 
+  void split_pwlc(pg_info_t &info) {
+    log.split_pwlc(info);
+  }
+
   void merge_from(
     const std::vector<PGLog*>& sources,
     eversion_t last_update) {
index 959f5cb023e2133fe9f67de3dacb644ee5c2fec7..8cad271b8b7d1a873f49342fcabf3c777966c8e8 100644 (file)
@@ -3603,7 +3603,10 @@ void PeeringState::split_into(
 
   child->info.last_user_version = info.last_user_version;
 
+  // fix up pwlc - it may refer to log entries that are no longer in the log
   child->info.partial_writes_last_complete = info.partial_writes_last_complete;
+  pg_log.split_pwlc(info);
+  child->pg_log.split_pwlc(child->info);
 
   info.log_tail = pg_log.get_tail();
   child->info.log_tail = child->pg_log.get_tail();