From 7967f28c774f3f3830f10e02e88d6ef3e82b9f82 Mon Sep 17 00:00:00 2001 From: Alex Ainscow Date: Wed, 18 Mar 2026 09:22:26 +0000 Subject: [PATCH] osd: PGLog Attach correct version to missing list when ignoring log entries A previous fix for PR 66698 fixed an issue where log entries associated with partial writes were being processed incorrectly (see that PR and associated tracker for details). The fix was to ignore log entries that should not have been present on the non-primary shard. The problem with that approach is that in a more complex scenario, where the log contained a partial write, followed by a full write AND the shard is backfilling, then the missing list was being given the version prior to the full write, rather than prior to the clone. Our fix here corrects how the missing list version is calculated. See the associated tracker for instructions on how to recreate Fixes: https://tracker.ceph.com/issues/75211 Signed-off-by: Alex Ainscow --- src/osd/PGLog.cc | 2 +- src/osd/PGLog.h | 20 ++++++++++++++------ src/test/osd/TestECFailoverWithPeering.cc | 4 ++-- 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/src/osd/PGLog.cc b/src/osd/PGLog.cc index 9c745dff964c..bf067dd69994 100644 --- a/src/osd/PGLog.cc +++ b/src/osd/PGLog.cc @@ -305,7 +305,7 @@ void PGLog::proc_replica_log( omissing, nullptr, ec_optimizations_enabled, - to.shard, + from.shard, this); if (lu < oinfo.last_update) { diff --git a/src/osd/PGLog.h b/src/osd/PGLog.h index 3130f752ab90..fd20b24a50b1 100644 --- a/src/osd/PGLog.h +++ b/src/osd/PGLog.h @@ -1116,6 +1116,7 @@ protected: mempool::osd_pglog::list entries; eversion_t last; bool seen_non_error = false; + std::optional prior_version_opt; for (auto i = orig_entries.begin(); i != orig_entries.end(); ++i) { @@ -1148,14 +1149,20 @@ protected: } } if (i->is_error()) { - ldpp_dout(dpp, 20) << __func__ << ": ignoring " << *i << dendl; + ldpp_dout(dpp, 20) << __func__ << ": ignoring " << *i << dendl; } else if (!i->written_shards.empty() && !i->written_shards.contains(orig_shard)) { ldpp_dout(dpp, 20) << __func__ << ": ignoring partial write " << *i << dendl; - last = i->version; + last = i->version; + if (!prior_version_opt) { + prior_version_opt = i->prior_version; + } } else { - ldpp_dout(dpp, 20) << __func__ << ": keeping " << *i << dendl; - entries.push_back(*i); - last = i->version; + ldpp_dout(dpp, 20) << __func__ << ": keeping " << *i << dendl; + if (!prior_version_opt) { + prior_version_opt = i->prior_version; + } + entries.push_back(*i); + last = i->version; } } if (entries.empty()) { @@ -1163,7 +1170,8 @@ protected: return; } - const eversion_t prior_version = entries.begin()->prior_version; + ceph_assert(prior_version_opt); + const eversion_t prior_version = *prior_version_opt; const eversion_t first_divergent_update = entries.begin()->version; const eversion_t last_divergent_update = entries.rbegin()->version; const bool object_not_in_store = diff --git a/src/test/osd/TestECFailoverWithPeering.cc b/src/test/osd/TestECFailoverWithPeering.cc index 2ac088d3975a..7f252c82f295 100644 --- a/src/test/osd/TestECFailoverWithPeering.cc +++ b/src/test/osd/TestECFailoverWithPeering.cc @@ -672,7 +672,7 @@ TEST_P(TestECFailoverWithPeering, DISABLED_MultiObjectParallelRecoveryCrash) { */ TEST_P( TestECFailoverWithPeering, - DISABLED_RollbackAfterMixedBlockedWritesWithOSDFailure + RollbackAfterMixedBlockedWritesWithOSDFailure ) { if (m < 2) { GTEST_SKIP() << "RollbackAfterMixedBlockedWritesWithOSDFailure requires m >= 2"; @@ -744,7 +744,7 @@ TEST_P( */ TEST_P( TestECFailoverWithPeering, - DISABLED_RollbackAfterMixedBlockedWritesWithOSDFailure2 + RollbackAfterMixedBlockedWritesWithOSDFailure2 ) { if (m < 2) { GTEST_SKIP() << "RollbackAfterMixedBlockedWritesWithOSDFailure requires m >= 2"; -- 2.47.3