]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: EC optimizations correct pwlc after PG split
authorBill Scales <bill_scales@uk.ibm.com>
Thu, 5 Jun 2025 10:17:06 +0000 (11:17 +0100)
committerLaura Flores <lflores@ibm.com>
Wed, 9 Jul 2025 15:47:24 +0000 (15:47 +0000)
When a PG splits the log entries are divided between the two PGs,
this can result in PWLC refering to log entries in the other PG.
Rollback PWLC after the split so it is not further advanced that
the most recently completed log entry.

Non-primary shards can be missing log entries and may rollback
PWLC too far because of this, however this does not matter
because a split occurs at the start of a peering cycle and these
shards will be updated with the correct PWLC from the primary
shard later in the peering cycle when they are activated.

Signed-off-by: Bill Scales <bill_scales@uk.ibm.com>
(cherry picked from commit 6c04e4cf2b81c768eb313c3d0e6ac37d8e69b150)

src/osd/PGLog.h
src/osd/PeeringState.cc

index 90f123993c3f6af12929915bea88104881023126..c829f0534898312b04debb4e8274cbef40c581bd 100644 (file)
@@ -335,6 +335,30 @@ public:
       unsigned split_bits,
       IndexedLog *target);
 
+    void split_pwlc(pg_info_t &info) {
+      eversion_t previous_version;
+      if (rollback_info_trimmed_to_riter == log.rend()) {
+       previous_version = tail;
+      } else {
+       previous_version = rollback_info_trimmed_to_riter->version;
+      }
+      // When a split occurs log entries are divided between the two PGs,
+      // this can leave pwlc refering to entries that are no longer in this
+      // PG log. Update pwlc so it is not beyond the last entry in the log.
+      // Non-primary shards which don't have a full log may rollback pwlc
+      // too far, but this will get corrected by the primary shard when
+      // activating shards later in peering.
+      for (auto & [shard, versionrange] : info.partial_writes_last_complete) {
+       auto &&[old_v,  new_v] = versionrange;
+       if (new_v > previous_version) {
+         new_v = previous_version;
+         if (old_v > new_v) {
+           old_v = new_v;
+         }
+       }
+      }
+    }
+
     void zero() {
       // we must have already trimmed the old entries
       ceph_assert(rollback_info_trimmed_to == head);
@@ -918,6 +942,10 @@ public:
     }
   }
 
+  void split_pwlc(pg_info_t &info) {
+    log.split_pwlc(info);
+  }
+
   void merge_from(
     const std::vector<PGLog*>& sources,
     eversion_t last_update) {
index 74368aed9a76dcfd0543fdf0e3c2a9ee2dcaca6e..01a93aa1e4f5c4503e789c83d8759460ddccf7ad 100644 (file)
@@ -3603,7 +3603,10 @@ void PeeringState::split_into(
 
   child->info.last_user_version = info.last_user_version;
 
+  // fix up pwlc - it may refer to log entries that are no longer in the log
   child->info.partial_writes_last_complete = info.partial_writes_last_complete;
+  pg_log.split_pwlc(info);
+  child->pg_log.split_pwlc(child->info);
 
   info.log_tail = pg_log.get_tail();
   child->info.log_tail = child->pg_log.get_tail();