]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
osd: Optimized EC don't try to trim past crt
authorBill Scales <bill_scales@uk.ibm.com>
Fri, 1 Aug 2025 09:22:47 +0000 (10:22 +0100)
committerAlex Ainscow <aainscow@uk.ibm.com>
Wed, 17 Sep 2025 08:43:26 +0000 (09:43 +0100)
If there is an exceptionally long sequence of partial writes
that did not update a shard that is followed by a full write
then it is possible that the log trim point is ahead of the
previous write to the shard (and hence crt). We cannot trim
beyond crt. In this scenario its fine to limit the trim to crt
because the shard doesn't have any of the log entries for the
partial writes so there is nothing more to trim.

Signed-off-by: Bill Scales <bill_scales@uk.ibm.com>
(cherry picked from commit 645cdf9f61e79764eca019f58a4d9c6b51768c81)

src/osd/PeeringState.cc

index b3fc1599d13519e1bbd80f47d889b3cbabe5e582..eec4653a3504b53575760c9c9471ffc02272292e 100644 (file)
@@ -4549,8 +4549,16 @@ bool PeeringState::append_log_entries_update_missing(
 
   psdout(20) << "trim_to bool = " << bool(trim_to)
             << " trim_to = " << (trim_to ? *trim_to : eversion_t()) << dendl;
-  if (trim_to)
-    pg_log.trim(*trim_to, info);
+  if (trim_to) {
+    eversion_t trim = *trim_to;
+    if (pool.info.allows_ecoptimizations() &&
+       (trim > pg_log.get_can_rollback_to())) {
+      // An exceptionally long sequence of partial writes followed by a full
+      // write can result in trim_to being ahead of crt
+      trim = pg_log.get_can_rollback_to();
+    }
+    pg_log.trim(trim, info);
+  }
   dirty_info = true;
   write_if_dirty(t);
   return invalidate_stats;
@@ -4722,6 +4730,12 @@ void PeeringState::append_log(
   if (!transaction_applied || async)
     psdout(10) << pg_whoami
               << " is async_recovery or backfill target" << dendl;
+  if (pool.info.allows_ecoptimizations() &&
+      (trim_to > pg_log.get_can_rollback_to())) {
+    // An exceptionally long sequence of partial writes followed by a full
+    // write can result in trim_to being ahead of crt
+    trim_to = pg_log.get_can_rollback_to();
+  }
   pg_log.trim(trim_to, info, transaction_applied, async);
 
   // update the local pg, pg log
@@ -7004,6 +7018,13 @@ boost::statechart::result PeeringState::ReplicaActive::react(const MTrim& trim)
 {
   DECLARE_LOCALS;
   // primary is instructing us to trim
+  eversion_t trim_to = trim.trim_to;
+  if (ps->pool.info.allows_ecoptimizations() &&
+      (trim_to > ps->pg_log.get_can_rollback_to())) {
+    // An exceptionally long sequence of partial writes followed by a full
+    // write can result in trim_to being ahead of crt
+    trim_to = ps->pg_log.get_can_rollback_to();
+  }
   ps->pg_log.trim(trim.trim_to, ps->info);
   ps->dirty_info = true;
   return discard_event();