]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: Divergent logs for pure partial writes should roll back mising list when merged.
authorAlex Ainscow <aainscow@uk.ibm.com>
Fri, 1 May 2026 20:36:25 +0000 (21:36 +0100)
committerAlex Ainscow <aainscow@uk.ibm.com>
Tue, 19 May 2026 12:31:10 +0000 (13:31 +0100)
When a log was applied for a partial write to an object that is async recovering, the
log will be rolled forward and the missing list updated.

If this log becomes divergent, then we must rollback the update to the missing list
in the merge.

Fixes: https://tracker.ceph.com/issues/75962
Signed-off-by: Alex Ainscow <aainscow@uk.ibm.com>
src/osd/PGLog.h
src/test/osd/ECPeeringTestFixture.cc
src/test/osd/TestECFailoverWithPeering.cc

index fd20b24a50b1b839e90b2bd97a3b976615b1e5c5..648dddc37a8aa25ea1a88d5db813a84bab3c04e1 100644 (file)
@@ -1165,18 +1165,26 @@ protected:
         last = i->version;
       }
     }
-    if (entries.empty()) {
+    if (!prior_version_opt) {
       ldpp_dout(dpp, 10) << __func__ << ": no non-ERROR entries" << dendl;
       return;
     }
 
+    bool object_not_in_store = false;
+
     ceph_assert(prior_version_opt);
-    const eversion_t prior_version = *prior_version_opt;
-    const eversion_t first_divergent_update = entries.begin()->version;
-    const eversion_t last_divergent_update = entries.rbegin()->version;
-    const bool object_not_in_store =
-      !missing.is_missing(hoid) &&
-      entries.rbegin()->is_delete();
+    eversion_t prior_version = *prior_version_opt;
+    eversion_t first_divergent_update;
+    eversion_t last_divergent_update;
+
+    if (!entries.empty()) {
+      first_divergent_update = entries.begin()->version;
+      last_divergent_update = entries.rbegin()->version;
+      object_not_in_store =
+        !missing.is_missing(hoid) &&
+        entries.rbegin()->is_delete();
+    }
+
     ldpp_dout(dpp, 10) << __func__ << ": hoid " << " object_not_in_store: "
                        << object_not_in_store << dendl;
     ldpp_dout(dpp, 10) << __func__ << ": hoid " << hoid
@@ -1186,7 +1194,7 @@ protected:
                       << dendl;
 
     auto objiter = log.objects.find(hoid);
-    if (objiter != log.objects.end() &&
+    if (objiter != log.objects.end() && !entries.empty() &&
        objiter->second->version >= first_divergent_update) {
       /// Case 1)
       ldpp_dout(dpp, 10) << __func__ << ": more recent entry found: "
@@ -1226,7 +1234,7 @@ protected:
 
     ldpp_dout(dpp, 10) << __func__ << ": hoid " << hoid
                       <<" has no more recent entries in log" << dendl;
-    if (prior_version == eversion_t() || entries.front().is_clone()) {
+    if (prior_version == eversion_t() || (!entries.empty() && entries.front().is_clone())) {
       /// Case 2)
       ldpp_dout(dpp, 10) << __func__ << ": hoid " << hoid
                         << " prior_version or op type indicates creation,"
index 172aa9a8dfb60fe924957771b1e578435f539d46..c3cc654daaa0eca2493245b1a08a0a563f4b6d0f 100644 (file)
@@ -674,8 +674,28 @@ void ECPeeringTestFixture::run_parallel_recovery_and_verify_callbacks(
 
       ASSERT_EQ(target_missing_item, missing_item) << "Missing on shard and primary should match";
 
-
-      std::cout << "  OSD " << target_osd << " is a peer and has object " << obj_names[i] << " in peer_missing" << std::endl;
+      // Read the OI directly from the primary's store to get the authoritative version
+      // This avoids relying on potentially stale cached data in the OBC
+      ObjectStore::CollectionHandle primary_ch = chs[primary_shard];
+      ASSERT_TRUE(primary_ch) << "Primary shard " << primary_shard << " must have a valid collection handle";
+      
+      ghobject_t primary_ghoid(hoid, ghobject_t::NO_GEN, shard_id_t(primary_shard));
+      ceph::buffer::ptr oi_ptr;
+      int r = store->getattr(primary_ch, primary_ghoid, OI_ATTR, oi_ptr);
+      ASSERT_GE(r, 0) << "Failed to read OI_ATTR from primary store for " << obj_names[i];
+      
+      bufferlist oi_bl;
+      oi_bl.append(oi_ptr);
+      object_info_t oi;
+      auto p = oi_bl.cbegin();
+      oi.decode(p);
+      
+      std::cout << "  OSD " << target_osd << " is a peer and has object " << obj_names[i]
+                << " in peer_missing (OI version from primary store: " << oi.version << ")" << std::endl;
+      
+      // Verify the missing item's need version matches what we read from the store
+      ASSERT_EQ(missing_item.need, oi.version)
+        << "Missing item need version should match OI version from primary store for " << obj_names[i];
     }
 
     missing_items.push_back(missing_item);
index 7f252c82f295a795be5b09449b3125c7157815c3..7f44b16033d52a93c0ed91325c106a71dbdc605d 100644 (file)
@@ -807,6 +807,42 @@ TEST_P(
   run_recovery_and_verify_callbacks(obj_name, recovery_target_shard, pattern_p1);
 }
 
+/**
+ * Test rollback after a sequence of blocked full-stripe and chunk writes.
+ * This is a similar scenario to the previous test, but we force the shard
+ * to do a sync, rather than async recovery at the end.
+ * Recreate for tracker https://tracker.ceph.com/issues/75962
+ */
+TEST_P(
+  TestECFailoverWithPeering,
+  RollbackAfterMixedBlockedWritesWithOSDFailure3
+) {
+  if (m < 2) {
+    GTEST_SKIP() << "RollbackAfterMixedBlockedWritesWithOSDFailure requires m >= 2";
+  }
+  set_config("osd_async_recovery_min_cost", "0");
+
+  const int blocked_shard = k + 1;
+  const int recovery_target_shard = 1;
+  const std::string obj_name = "test_mixed_blocked_writes";
+  const size_t full_stripe_size = stripe_unit * k;
+  const std::string pattern_p1(full_stripe_size, 'A');
+  mark_osd_down(recovery_target_shard);
+  create_and_write_verify(obj_name, pattern_p1);
+  mark_osd_up(recovery_target_shard);
+  create_and_write_verify("dummy", pattern_p1);
+  suspend_primary_to_osd(blocked_shard);
+  int result = write_attribute(obj_name, "test_attr", "value2", false);
+  ASSERT_EQ(-EINPROGRESS, result);
+  mark_osd_down(2);
+  unsuspend_primary_to_osd(blocked_shard);
+  event_loop->run_until_idle();
+
+  run_recovery_and_verify_callbacks(obj_name, recovery_target_shard, pattern_p1);
+
+  set_config("osd_async_recovery_min_cost", "100");
+}
+
 // ---------------------------------------------------------------------------
 // Instantiate TestECFailoverWithPeering with EC configurations
 // ---------------------------------------------------------------------------