From d85ea954e9ebd8ea7135bc7eecaac41c66e4a7fc Mon Sep 17 00:00:00 2001 From: Bill Scales Date: Fri, 1 Aug 2025 10:22:47 +0100 Subject: [PATCH] osd: Optimized EC don't try to trim past crt If there is an exceptionally long sequence of partial writes that did not update a shard that is followed by a full write then it is possible that the log trim point is ahead of the previous write to the shard (and hence crt). We cannot trim beyond crt. In this scenario its fine to limit the trim to crt because the shard doesn't have any of the log entries for the partial writes so there is nothing more to trim. Signed-off-by: Bill Scales (cherry picked from commit 645cdf9f61e79764eca019f58a4d9c6b51768c81) --- src/osd/PeeringState.cc | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/src/osd/PeeringState.cc b/src/osd/PeeringState.cc index b3fc1599d1351..eec4653a3504b 100644 --- a/src/osd/PeeringState.cc +++ b/src/osd/PeeringState.cc @@ -4549,8 +4549,16 @@ bool PeeringState::append_log_entries_update_missing( psdout(20) << "trim_to bool = " << bool(trim_to) << " trim_to = " << (trim_to ? *trim_to : eversion_t()) << dendl; - if (trim_to) - pg_log.trim(*trim_to, info); + if (trim_to) { + eversion_t trim = *trim_to; + if (pool.info.allows_ecoptimizations() && + (trim > pg_log.get_can_rollback_to())) { + // An exceptionally long sequence of partial writes followed by a full + // write can result in trim_to being ahead of crt + trim = pg_log.get_can_rollback_to(); + } + pg_log.trim(trim, info); + } dirty_info = true; write_if_dirty(t); return invalidate_stats; @@ -4722,6 +4730,12 @@ void PeeringState::append_log( if (!transaction_applied || async) psdout(10) << pg_whoami << " is async_recovery or backfill target" << dendl; + if (pool.info.allows_ecoptimizations() && + (trim_to > pg_log.get_can_rollback_to())) { + // An exceptionally long sequence of partial writes followed by a full + // write can result in trim_to being ahead of crt + trim_to = pg_log.get_can_rollback_to(); + } pg_log.trim(trim_to, info, transaction_applied, async); // update the local pg, pg log @@ -7004,6 +7018,13 @@ boost::statechart::result PeeringState::ReplicaActive::react(const MTrim& trim) { DECLARE_LOCALS; // primary is instructing us to trim + eversion_t trim_to = trim.trim_to; + if (ps->pool.info.allows_ecoptimizations() && + (trim_to > ps->pg_log.get_can_rollback_to())) { + // An exceptionally long sequence of partial writes followed by a full + // write can result in trim_to being ahead of crt + trim_to = ps->pg_log.get_can_rollback_to(); + } ps->pg_log.trim(trim.trim_to, ps->info); ps->dirty_info = true; return discard_event(); -- 2.39.5