From 1d67305a53187c9222d7e0d0acd00975639052f1 Mon Sep 17 00:00:00 2001 From: Xuehan Xu Date: Wed, 26 Nov 2025 16:39:37 +0800 Subject: [PATCH] crimson/os/seastore: disable linked tree node operations when committing rewriting transactions Signed-off-by: Xuehan Xu --- src/crimson/os/seastore/btree/fixed_kv_node.h | 40 +++++++++++-------- src/crimson/os/seastore/cache.cc | 8 ++-- src/crimson/os/seastore/cached_extent.h | 4 +- src/crimson/os/seastore/logical_child_node.h | 10 +++-- src/crimson/os/seastore/object_data_handler.h | 5 ++- .../omap_manager/btree/omap_btree_node_impl.h | 19 ++++++--- src/crimson/os/seastore/root_block.cc | 12 ++++-- src/crimson/os/seastore/root_block.h | 2 +- 8 files changed, 64 insertions(+), 36 deletions(-) diff --git a/src/crimson/os/seastore/btree/fixed_kv_node.h b/src/crimson/os/seastore/btree/fixed_kv_node.h index 926408f64a9..b11031a6ea1 100644 --- a/src/crimson/os/seastore/btree/fixed_kv_node.h +++ b/src/crimson/os/seastore/btree/fixed_kv_node.h @@ -185,8 +185,10 @@ struct FixedKVInternalNode this->set_layout_buf(this->get_bptr().c_str()); } - void prepare_commit() final { - parent_node_t::prepare_commit(); + void prepare_commit(Transaction &t) final { + if (!is_rewrite_transaction(t.get_src())) { + parent_node_t::prepare_commit(); + } } virtual ~FixedKVInternalNode() { @@ -250,12 +252,14 @@ struct FixedKVInternalNode delta_buffer.clear(); } - void on_replace_prior() final { - this->parent_node_t::on_replace_prior(); - if (this->is_btree_root()) { - this->root_node_t::on_replace_prior(); - } else { - this->child_node_t::on_replace_prior(); + void on_replace_prior(Transaction &t) final { + if (!is_rewrite_transaction(t.get_src())) { + this->parent_node_t::on_replace_prior(); + if (this->is_btree_root()) { + this->root_node_t::on_replace_prior(); + } else { + this->child_node_t::on_replace_prior(); + } } } @@ -680,19 +684,23 @@ struct FixedKVLeafNode } virtual void do_prepare_commit() = 0; - void prepare_commit() final { - do_prepare_commit(); + void prepare_commit(Transaction &t) final { + if (!is_rewrite_transaction(t.get_src())) { + do_prepare_commit(); + } modifications = 0; } virtual void do_on_replace_prior() = 0; - void on_replace_prior() final { + void on_replace_prior(Transaction &t) final { ceph_assert(!this->is_rewrite()); - do_on_replace_prior(); - if (this->is_btree_root()) { - this->root_node_t::on_replace_prior(); - } else { - this->child_node_t::on_replace_prior(); + if (!is_rewrite_transaction(t.get_src())) { + do_on_replace_prior(); + if (this->is_btree_root()) { + this->root_node_t::on_replace_prior(); + } else { + this->child_node_t::on_replace_prior(); + } } modifications = 0; } diff --git a/src/crimson/os/seastore/cache.cc b/src/crimson/os/seastore/cache.cc index c4ee118fb3b..03915947e13 100644 --- a/src/crimson/os/seastore/cache.cc +++ b/src/crimson/os/seastore/cache.cc @@ -1348,10 +1348,10 @@ record_t Cache::prepare_record( } i->prepare_write(); - i->prepare_commit(); + i->prepare_commit(t); if (i->is_mutation_pending()) { - i->on_replace_prior(); + i->on_replace_prior(t); } // else, is_exist_mutation_pending(): // - it doesn't have prior_instance to replace @@ -1414,12 +1414,12 @@ record_t Cache::prepare_record( i->trans_view_hook.unlink(); } - t.for_each_finalized_fresh_block([](auto &e) { + t.for_each_finalized_fresh_block([&t](auto &e) { // fresh blocks' `prepare_commit` must be invoked before // retiering extents, this is because logical linked tree // nodes needs to access their prior instances in this // phase if they are rewritten. - e->prepare_commit(); + e->prepare_commit(t); }); /* diff --git a/src/crimson/os/seastore/cached_extent.h b/src/crimson/os/seastore/cached_extent.h index d2c4e6eed85..a6fd3cb1077 100644 --- a/src/crimson/os/seastore/cached_extent.h +++ b/src/crimson/os/seastore/cached_extent.h @@ -404,7 +404,7 @@ public: * Called prior to committing the transaction in which this extent * is living. */ - virtual void prepare_commit() {} + virtual void prepare_commit(Transaction &) {} /** * on_initial_write @@ -455,7 +455,7 @@ public: * with the states of Cache and can't wait till transaction * completes. */ - virtual void on_replace_prior() {} + virtual void on_replace_prior(Transaction &) {} /** * on_invalidated diff --git a/src/crimson/os/seastore/logical_child_node.h b/src/crimson/os/seastore/logical_child_node.h index 40eaea75aae..092ee084a2e 100644 --- a/src/crimson/os/seastore/logical_child_node.h +++ b/src/crimson/os/seastore/logical_child_node.h @@ -128,12 +128,14 @@ public: return (get_laddr() + get_length()).checked_to_laddr(); } protected: - void on_replace_prior() final { + void on_replace_prior(Transaction &t) final { assert(is_seen_by_users()); - lba_child_node_t::on_replace_prior(); - do_on_replace_prior(); + if (!is_rewrite_transaction(t.get_src())) { + lba_child_node_t::on_replace_prior(); + } + do_on_replace_prior(t); } - virtual void do_on_replace_prior() {} + virtual void do_on_replace_prior(Transaction &t) {} }; using LogicalChildNodeRef = TCachedExtentRef; } // namespace crimson::os::seastore diff --git a/src/crimson/os/seastore/object_data_handler.h b/src/crimson/os/seastore/object_data_handler.h index ffb22246b4f..73c0953668a 100644 --- a/src/crimson/os/seastore/object_data_handler.h +++ b/src/crimson/os/seastore/object_data_handler.h @@ -294,7 +294,10 @@ struct ObjectDataBlock : crimson::os::seastore::LogicalChildNode { modified_region.clear(); } - void prepare_commit() final { + void prepare_commit(Transaction &t) final { + if (is_rewrite_transaction(t.get_src())) { + return; + } if (has_mutation()) { ceph_assert(!cached_overwrites.is_empty()); if (cached_overwrites.has_cached_bptr()) { diff --git a/src/crimson/os/seastore/omap_manager/btree/omap_btree_node_impl.h b/src/crimson/os/seastore/omap_manager/btree/omap_btree_node_impl.h index 42f127b1a29..4d238ff185c 100644 --- a/src/crimson/os/seastore/omap_manager/btree/omap_btree_node_impl.h +++ b/src/crimson/os/seastore/omap_manager/btree/omap_btree_node_impl.h @@ -80,7 +80,10 @@ struct OMapInnerNode } } - void prepare_commit() final { + void prepare_commit(Transaction &t) final { + if (is_rewrite_transaction(t.get_src())) { + return; + } if (unlikely(!is_seen_by_users())) { ceph_assert(is_rewrite()); auto &prior = *get_prior_instance()->template cast(); @@ -111,7 +114,10 @@ struct OMapInnerNode } } - void do_on_replace_prior() final { + void do_on_replace_prior(Transaction &t) final { + if (is_rewrite_transaction(t.get_src())) { + return; + } this->parent_node_t::on_replace_prior(); if (!this->is_btree_root()) { auto &prior = *get_prior_instance()->template cast(); @@ -337,7 +343,10 @@ struct OMapLeafNode this->child_node_t::on_invalidated(); } - void prepare_commit() final { + void prepare_commit(Transaction &t) final { + if (is_rewrite_transaction(t.get_src())) { + return; + } if (unlikely(!is_seen_by_users())) { ceph_assert(is_rewrite()); auto &prior = *get_prior_instance()->template cast(); @@ -367,9 +376,9 @@ struct OMapLeafNode } } - void do_on_replace_prior() final { + void do_on_replace_prior(Transaction &t) final { ceph_assert(!this->is_rewrite()); - if (!this->is_btree_root()) { + if (!this->is_btree_root() && !is_rewrite_transaction(t.get_src())) { auto &prior = *get_prior_instance()->template cast(); assert(prior.base_child_t::has_parent_tracker()); this->child_node_t::on_replace_prior(); diff --git a/src/crimson/os/seastore/root_block.cc b/src/crimson/os/seastore/root_block.cc index 3f61b92fdaa..574b500a860 100644 --- a/src/crimson/os/seastore/root_block.cc +++ b/src/crimson/os/seastore/root_block.cc @@ -8,8 +8,11 @@ namespace crimson::os::seastore { -void RootBlock::on_replace_prior() { - if (!lba_root_node) { +void RootBlock::on_replace_prior(Transaction &t) { + if (!lba_root_node || + // for rewrite transactions, we keep the prior extents instead of + // the new ones. + is_rewrite_transaction(t.get_src())) { auto &prior = static_cast(*get_prior_instance()); if (prior.lba_root_node) { RootBlockRef this_ref = this; @@ -29,7 +32,10 @@ void RootBlock::on_replace_prior() { } } } - if (!backref_root_node) { + if (!backref_root_node || + // for rewrite transactions, we keep the prior extents instead of + // the new ones. + is_rewrite_transaction(t.get_src())) { auto &prior = static_cast(*get_prior_instance()); if (prior.backref_root_node) { RootBlockRef this_ref = this; diff --git a/src/crimson/os/seastore/root_block.h b/src/crimson/os/seastore/root_block.h index 3913be52415..ad0cc40a900 100644 --- a/src/crimson/os/seastore/root_block.h +++ b/src/crimson/os/seastore/root_block.h @@ -61,7 +61,7 @@ struct RootBlock : CachedExtent { return extent_types_t::ROOT; } - void on_replace_prior() final; + void on_replace_prior(Transaction &t) final; /// dumps root as delta ceph::bufferlist get_delta() final { -- 2.47.3