When a log was applied for a partial write to an object that is async recovering, the
log will be rolled forward and the missing list updated.
If this log becomes divergent, then we must rollback the update to the missing list
in the merge.
Fixes: https://tracker.ceph.com/issues/75962
Signed-off-by: Alex Ainscow <aainscow@uk.ibm.com>
last = i->version;
}
}
- if (entries.empty()) {
+ if (!prior_version_opt) {
ldpp_dout(dpp, 10) << __func__ << ": no non-ERROR entries" << dendl;
return;
}
+ bool object_not_in_store = false;
+
ceph_assert(prior_version_opt);
- const eversion_t prior_version = *prior_version_opt;
- const eversion_t first_divergent_update = entries.begin()->version;
- const eversion_t last_divergent_update = entries.rbegin()->version;
- const bool object_not_in_store =
- !missing.is_missing(hoid) &&
- entries.rbegin()->is_delete();
+ eversion_t prior_version = *prior_version_opt;
+ eversion_t first_divergent_update;
+ eversion_t last_divergent_update;
+
+ if (!entries.empty()) {
+ first_divergent_update = entries.begin()->version;
+ last_divergent_update = entries.rbegin()->version;
+ object_not_in_store =
+ !missing.is_missing(hoid) &&
+ entries.rbegin()->is_delete();
+ }
+
ldpp_dout(dpp, 10) << __func__ << ": hoid " << " object_not_in_store: "
<< object_not_in_store << dendl;
ldpp_dout(dpp, 10) << __func__ << ": hoid " << hoid
<< dendl;
auto objiter = log.objects.find(hoid);
- if (objiter != log.objects.end() &&
+ if (objiter != log.objects.end() && !entries.empty() &&
objiter->second->version >= first_divergent_update) {
/// Case 1)
ldpp_dout(dpp, 10) << __func__ << ": more recent entry found: "
ldpp_dout(dpp, 10) << __func__ << ": hoid " << hoid
<<" has no more recent entries in log" << dendl;
- if (prior_version == eversion_t() || entries.front().is_clone()) {
+ if (prior_version == eversion_t() || (!entries.empty() && entries.front().is_clone())) {
/// Case 2)
ldpp_dout(dpp, 10) << __func__ << ": hoid " << hoid
<< " prior_version or op type indicates creation,"
ASSERT_EQ(target_missing_item, missing_item) << "Missing on shard and primary should match";
-
- std::cout << " OSD " << target_osd << " is a peer and has object " << obj_names[i] << " in peer_missing" << std::endl;
+ // Read the OI directly from the primary's store to get the authoritative version
+ // This avoids relying on potentially stale cached data in the OBC
+ ObjectStore::CollectionHandle primary_ch = chs[primary_shard];
+ ASSERT_TRUE(primary_ch) << "Primary shard " << primary_shard << " must have a valid collection handle";
+
+ ghobject_t primary_ghoid(hoid, ghobject_t::NO_GEN, shard_id_t(primary_shard));
+ ceph::buffer::ptr oi_ptr;
+ int r = store->getattr(primary_ch, primary_ghoid, OI_ATTR, oi_ptr);
+ ASSERT_GE(r, 0) << "Failed to read OI_ATTR from primary store for " << obj_names[i];
+
+ bufferlist oi_bl;
+ oi_bl.append(oi_ptr);
+ object_info_t oi;
+ auto p = oi_bl.cbegin();
+ oi.decode(p);
+
+ std::cout << " OSD " << target_osd << " is a peer and has object " << obj_names[i]
+ << " in peer_missing (OI version from primary store: " << oi.version << ")" << std::endl;
+
+ // Verify the missing item's need version matches what we read from the store
+ ASSERT_EQ(missing_item.need, oi.version)
+ << "Missing item need version should match OI version from primary store for " << obj_names[i];
}
missing_items.push_back(missing_item);
run_recovery_and_verify_callbacks(obj_name, recovery_target_shard, pattern_p1);
}
+/**
+ * Test rollback after a sequence of blocked full-stripe and chunk writes.
+ * This is a similar scenario to the previous test, but we force the shard
+ * to do a sync, rather than async recovery at the end.
+ * Recreate for tracker https://tracker.ceph.com/issues/75962
+ */
+TEST_P(
+ TestECFailoverWithPeering,
+ RollbackAfterMixedBlockedWritesWithOSDFailure3
+) {
+ if (m < 2) {
+ GTEST_SKIP() << "RollbackAfterMixedBlockedWritesWithOSDFailure requires m >= 2";
+ }
+ set_config("osd_async_recovery_min_cost", "0");
+
+ const int blocked_shard = k + 1;
+ const int recovery_target_shard = 1;
+ const std::string obj_name = "test_mixed_blocked_writes";
+ const size_t full_stripe_size = stripe_unit * k;
+ const std::string pattern_p1(full_stripe_size, 'A');
+ mark_osd_down(recovery_target_shard);
+ create_and_write_verify(obj_name, pattern_p1);
+ mark_osd_up(recovery_target_shard);
+ create_and_write_verify("dummy", pattern_p1);
+ suspend_primary_to_osd(blocked_shard);
+ int result = write_attribute(obj_name, "test_attr", "value2", false);
+ ASSERT_EQ(-EINPROGRESS, result);
+ mark_osd_down(2);
+ unsuspend_primary_to_osd(blocked_shard);
+ event_loop->run_until_idle();
+
+ run_recovery_and_verify_callbacks(obj_name, recovery_target_shard, pattern_p1);
+
+ set_config("osd_async_recovery_min_cost", "100");
+}
+
// ---------------------------------------------------------------------------
// Instantiate TestECFailoverWithPeering with EC configurations
// ---------------------------------------------------------------------------