From: Xuehan Xu Date: Tue, 24 Feb 2026 07:35:58 +0000 (+0800) Subject: crimson/os/seastore/lba: TRIM/CLEANER trans to adjust deltas of X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=c7af833de2f0d0eaa8c994d2539514abca005443;p=ceph-ci.git crimson/os/seastore/lba: TRIM/CLEANER trans to adjust deltas of LBALeafNodes when committing them. This is to deal with the following scenario: 1. A client transaction modifies the value of the LBALeafNode, but not the pladdr but other field; 2. A TRIM/CLEANER transaction modifies the pladdr for the same laddr_t concurrently In the old approach, the client trans may override the pladdr with the outdated value after the TRIM/CLEANER transaction commits Signed-off-by: Xuehan Xu --- diff --git a/src/crimson/common/fixed_kv_node_layout.h b/src/crimson/common/fixed_kv_node_layout.h index 7adce7158b3..a4aa8efd129 100644 --- a/src/crimson/common/fixed_kv_node_layout.h +++ b/src/crimson/common/fixed_kv_node_layout.h @@ -310,6 +310,12 @@ public: void clear() { buffer.clear(); } + template + void for_each(Func &&f) { + for (auto &i : buffer) { + std::invoke(std::forward(f), i); + } + } }; void journal_insert( diff --git a/src/crimson/os/seastore/cached_extent.cc b/src/crimson/os/seastore/cached_extent.cc index d5abf2e0853..a0e599d65f1 100644 --- a/src/crimson/os/seastore/cached_extent.cc +++ b/src/crimson/os/seastore/cached_extent.cc @@ -434,12 +434,28 @@ void ExtentCommitter::_share_prior_data_to_mutations() { ceph_assert(is_lba_backref_node(extent.get_type())); auto &prior = *extent.prior_instance; for (auto &mext : prior.mutation_pending_extents) { - auto &mextent = static_cast(mext); - TRACE("{} -> {}", extent, mextent); - extent.get_bptr().copy_out( - 0, extent.get_length(), mextent.get_bptr().c_str()); - mextent.on_data_commit(); - mextent.reapply_delta(); + if (extent.get_type() == extent_types_t::LADDR_LEAF) { + auto &mextent = static_cast(mext); + auto &me = static_cast(extent); + TRACE("{} -> {}", me, mextent); + auto iter = me.begin(); + auto merged = me.merge_content_to(t, mextent, iter); + mextent.adjust_delta([&](auto &buf) { + if (buf.op == lba::LBALeafNode::delta_t::op_t::UPDATE) { + auto it = merged.find(buf.key); + if (it != merged.end()) { + buf.val.pladdr = pladdr_le_t(it->second); + } + } + }); + } else { + auto &mextent = static_cast(mext); + TRACE("{} -> {}", extent, mextent); + extent.get_bptr().copy_out( + 0, extent.get_length(), mextent.get_bptr().c_str()); + mextent.on_data_commit(); + mextent.reapply_delta(); + } } } diff --git a/src/crimson/os/seastore/lba/lba_btree_node.h b/src/crimson/os/seastore/lba/lba_btree_node.h index c0faa45b880..addbabd74ee 100644 --- a/src/crimson/os/seastore/lba/lba_btree_node.h +++ b/src/crimson/os/seastore/lba/lba_btree_node.h @@ -288,45 +288,63 @@ struct LBALeafNode std::ostream &print_detail(std::ostream &out) const final; + std::map merge_content_to( + Transaction &t, + LBALeafNode &pending_version, + iterator &iter) + { + std::map modified; + auto it = pending_version.begin(); + while (it != pending_version.end() && iter != this->end()) { + const auto &v1 = iter->get_val(); + if (v1.pladdr.is_laddr() || + v1.pladdr.get_paddr().is_zero()) { + iter++; + continue; + } + const auto &v2 = it->get_val(); + if (v2.pladdr.is_laddr() || v2.pladdr.get_paddr().is_zero()) { + it++; + continue; + } + if (auto child = pending_version.children[it->get_offset()]; + is_valid_child_ptr(child) && child->_is_pending_io()) { + // skip the ones that the pending version is also modifying + it++; + continue; + } + if (it->get_key() == iter->get_key()) { + if (v2.pladdr != v1.pladdr) { + auto m_v2 = v2; + m_v2.pladdr = v1.pladdr; + m_v2.checksum = v1.checksum; + it->set_val(m_v2); + auto [_it, inserted] = modified.emplace(it->get_key(), v1.pladdr); + ceph_assert(inserted); + } + it++; + iter++; + } else if (it->get_key() > iter->get_key()) { + iter++; + } else { + it++; + } + } + if (pending_version.is_initial_pending() && + pending_version.get_last_committed_crc()) { + // if pending_version has already calculated its crc, + // calculate it again. + pending_version.set_last_committed_crc(pending_version.calc_crc32c()); + } + return modified; + } + template