]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: PGLog Attach correct version to missing list when ignoring log entries 67870/head
authorAlex Ainscow <aainscow@uk.ibm.com>
Wed, 18 Mar 2026 09:22:26 +0000 (09:22 +0000)
committerAlex Ainscow <aainscow@uk.ibm.com>
Thu, 30 Apr 2026 15:27:48 +0000 (16:27 +0100)
A previous fix for PR 66698 fixed an issue where log entries associated with
partial writes were being processed incorrectly (see that PR and associated
tracker for details).  The fix was to ignore log entries that should not have
been present on the non-primary shard.

The problem with that approach is that in a more complex scenario, where the
log contained a partial write, followed by a full write AND the shard is
backfilling, then the missing list was being given the version prior to the
full write, rather than prior to the clone.

Our fix here corrects how the missing list version is calculated.

See the associated tracker for instructions on how to recreate

Fixes: https://tracker.ceph.com/issues/75211
Signed-off-by: Alex Ainscow <aainscow@uk.ibm.com>
src/osd/PGLog.cc
src/osd/PGLog.h
src/test/osd/TestECFailoverWithPeering.cc

index 9c745dff964c071c804fc7064c66701dff58f73f..bf067dd699945a8857506dc50e9abb0bc87fd364 100644 (file)
@@ -305,7 +305,7 @@ void PGLog::proc_replica_log(
     omissing,
     nullptr,
     ec_optimizations_enabled,
-    to.shard,
+    from.shard,
     this);
 
   if (lu < oinfo.last_update) {
index 3130f752ab90e851a1d1bb20ab757aa7a4ec045a..fd20b24a50b1b839e90b2bd97a3b976615b1e5c5 100644 (file)
@@ -1116,6 +1116,7 @@ protected:
     mempool::osd_pglog::list<pg_log_entry_t> entries;
     eversion_t last;
     bool seen_non_error = false;
+    std::optional<eversion_t> prior_version_opt;
     for (auto i = orig_entries.begin();
         i != orig_entries.end();
         ++i) {
@@ -1148,14 +1149,20 @@ protected:
        }
       }
       if (i->is_error()) {
- ldpp_dout(dpp, 20) << __func__ << ": ignoring " << *i << dendl;
       ldpp_dout(dpp, 20) << __func__ << ": ignoring " << *i << dendl;
       } else if (!i->written_shards.empty() && !i->written_shards.contains(orig_shard)) {
         ldpp_dout(dpp, 20) << __func__ << ": ignoring partial write " << *i << dendl;
- last = i->version;
+        last = i->version;
+        if (!prior_version_opt) {
+          prior_version_opt = i->prior_version;
+        }
       } else {
- ldpp_dout(dpp, 20) << __func__ << ": keeping " << *i << dendl;
- entries.push_back(*i);
- last = i->version;
+        ldpp_dout(dpp, 20) << __func__ << ": keeping " << *i << dendl;
+        if (!prior_version_opt) {
+          prior_version_opt = i->prior_version;
+        }
+        entries.push_back(*i);
+        last = i->version;
       }
     }
     if (entries.empty()) {
@@ -1163,7 +1170,8 @@ protected:
       return;
     }
 
-    const eversion_t prior_version = entries.begin()->prior_version;
+    ceph_assert(prior_version_opt);
+    const eversion_t prior_version = *prior_version_opt;
     const eversion_t first_divergent_update = entries.begin()->version;
     const eversion_t last_divergent_update = entries.rbegin()->version;
     const bool object_not_in_store =
index 2ac088d3975ad15133ca93cc29701a09ac881dab..7f252c82f295a795be5b09449b3125c7157815c3 100644 (file)
@@ -672,7 +672,7 @@ TEST_P(TestECFailoverWithPeering, DISABLED_MultiObjectParallelRecoveryCrash) {
  */
 TEST_P(
   TestECFailoverWithPeering,
-  DISABLED_RollbackAfterMixedBlockedWritesWithOSDFailure
+  RollbackAfterMixedBlockedWritesWithOSDFailure
 ) {
   if (m < 2) {
     GTEST_SKIP() << "RollbackAfterMixedBlockedWritesWithOSDFailure requires m >= 2";
@@ -744,7 +744,7 @@ TEST_P(
  */
 TEST_P(
   TestECFailoverWithPeering,
-  DISABLED_RollbackAfterMixedBlockedWritesWithOSDFailure2
+  RollbackAfterMixedBlockedWritesWithOSDFailure2
 ) {
   if (m < 2) {
     GTEST_SKIP() << "RollbackAfterMixedBlockedWritesWithOSDFailure requires m >= 2";