From a0ea17025774b9785021f5ddcb112a26f161bb87 Mon Sep 17 00:00:00 2001 From: Xuehan Xu Date: Sun, 29 Mar 2026 11:20:52 +0800 Subject: [PATCH] crimson/os/seastore: handle OP_TOUCH_TEMP Signed-off-by: Xuehan Xu --- src/crimson/os/cyanstore/cyan_store.cc | 1 + src/crimson/os/seastore/lba/lba_btree_node.h | 8 ++- src/crimson/os/seastore/lba_mapping.h | 6 +- src/crimson/os/seastore/onode.h | 24 +++++++- .../staged-fltree/fltree_onode_manager.cc | 3 +- .../staged-fltree/fltree_onode_manager.h | 16 ++++++ .../staged-fltree/stages/key_layout.h | 36 ++++++++++++ .../onode_manager/staged-fltree/value.cc | 14 +++++ .../onode_manager/staged-fltree/value.h | 5 ++ src/crimson/os/seastore/seastore.cc | 57 ++++++++++++++++--- src/crimson/os/seastore/seastore_types.cc | 46 +++++++++++++++ src/crimson/os/seastore/seastore_types.h | 13 +++++ src/os/Transaction.cc | 16 +++++- src/os/bluestore/BlueStore.cc | 6 ++ .../seastore/test_object_data_handler.cc | 26 +++++++-- 15 files changed, 258 insertions(+), 19 deletions(-) diff --git a/src/crimson/os/cyanstore/cyan_store.cc b/src/crimson/os/cyanstore/cyan_store.cc index be2649e52e70..0b530260e9db 100644 --- a/src/crimson/os/cyanstore/cyan_store.cc +++ b/src/crimson/os/cyanstore/cyan_store.cc @@ -583,6 +583,7 @@ seastar::future<> CyanStore::Shard::do_transaction_no_callbacks( } break; case Transaction::OP_TOUCH: + case Transaction::OP_TOUCH_TEMP: case Transaction::OP_CREATE: { coll_t cid = i.get_cid(op->cid); diff --git a/src/crimson/os/seastore/lba/lba_btree_node.h b/src/crimson/os/seastore/lba/lba_btree_node.h index 9aaf30bda84f..830b331d1f45 100644 --- a/src/crimson/os/seastore/lba/lba_btree_node.h +++ b/src/crimson/os/seastore/lba/lba_btree_node.h @@ -425,7 +425,13 @@ struct LBACursor : BtreeCursor { assert(is_viewable()); assert(is_indirect()); assert(!is_end()); - return iter.get_val().pladdr.build_laddr(key); + if (likely(!hobject_t::is_temp_pool(get_key().get_pool()))) { + return iter.get_val().pladdr.build_laddr(key); + } else { + auto k = key; + k.set_pool(hobject_t::POOL_TEMP_START - key.get_pool()); + return iter.get_val().pladdr.build_laddr(k); + } } checksum_t get_checksum() const { assert(is_viewable()); diff --git a/src/crimson/os/seastore/lba_mapping.h b/src/crimson/os/seastore/lba_mapping.h index 5ef5f8a62b8c..395e51e9534e 100644 --- a/src/crimson/os/seastore/lba_mapping.h +++ b/src/crimson/os/seastore/lba_mapping.h @@ -169,8 +169,10 @@ public: extent_len_t get_intermediate_offset() const { assert(is_indirect()); assert(get_intermediate_base() <= get_intermediate_key()); - assert(get_intermediate_key() + get_length() <= - get_intermediate_base() + get_intermediate_length()); + if (likely(!hobject_t::is_temp_pool(get_key().get_pool()))) { + assert(get_intermediate_key() + get_length() <= + get_intermediate_base() + get_intermediate_length()); + } return get_intermediate_base().get_byte_distance< extent_len_t>(get_intermediate_key()); } diff --git a/src/crimson/os/seastore/onode.h b/src/crimson/os/seastore/onode.h index 83392ba7dc6d..83cbe858dc5b 100644 --- a/src/crimson/os/seastore/onode.h +++ b/src/crimson/os/seastore/onode.h @@ -82,6 +82,10 @@ protected: local_object_id_t object_id, extent_len_t block_size, bool is_metadata) const = 0; + virtual laddr_hint_t generate_temp_hint( + local_object_id_t object_id, + extent_len_t block_size, + bool is_metadata) const = 0; laddr_hint_t get_hint(extent_len_t block_size, bool is_metadata) const { assert(block_size >= laddr_t::UNIT_SIZE); auto prefix = get_clone_prefix(); @@ -92,8 +96,13 @@ protected: return laddr_hint_t::create_object_data_hint(*prefix, block_size); } } else if (sibling_object_id) { - return generate_clone_hint( - *sibling_object_id, block_size, is_metadata); + if (get_hobj().is_temp()) { + return generate_temp_hint( + *sibling_object_id, block_size, is_metadata); + } else { + return generate_clone_hint( + *sibling_object_id, block_size, is_metadata); + } } else { return init_hint(block_size, is_metadata); } @@ -190,10 +199,21 @@ public: } return std::nullopt; } + bool maybe_set_sibling_object_id(local_object_id_t id) { + if (sibling_object_id) { + return false; + } + set_sibling_object_id(id); + return true; + } void set_sibling_object_id(local_object_id_t id) { assert(!sibling_object_id); sibling_object_id = id; } + // should only be used for unittest + void reset_sibling_object_id() { + sibling_object_id.reset(); + } friend std::ostream& operator<<(std::ostream &out, const Onode &rhs); }; diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.cc b/src/crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.cc index 0ab338fcffe2..073e04e53211 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.cc +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.cc @@ -166,8 +166,7 @@ FLTreeOnodeManager::get_onode_ret FLTreeOnodeManager::get_onode( LOG_PREFIX(FLTreeOnodeManager::get_onode); return tree.find( trans, hoid - ).si_then([this, &hoid, &trans, FNAME](auto cursor) - -> get_onode_ret { + ).si_then([this, &hoid, &trans, FNAME](auto cursor) -> get_onode_ret { if (cursor == tree.end()) { DEBUGT("no entry for {}", trans, hoid); return crimson::ct_error::enoent::make(); diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.h b/src/crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.h index 59603dfd15a0..f85388afaf3a 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.h +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.h @@ -27,6 +27,14 @@ struct FakeOnode final : Onode { return LADDR_HINT_NULL; } + laddr_hint_t generate_temp_hint( + local_object_id_t object_id, + extent_len_t block_size, + bool is_metadata) const final { + ceph_abort("impossible"); + return LADDR_HINT_NULL; + } + bool is_alive() const final { return true; } const onode_layout_t &get_layout() const final { return layout; @@ -372,6 +380,14 @@ struct FLTreeOnode final : Onode, Value { bool is_metadata) const final { return Value::generate_clone_hint(object_id, block_size, is_metadata); } + laddr_hint_t generate_temp_hint( + local_object_id_t object_id, + extent_len_t block_size, + bool is_metadata) const final { + return Value::generate_temp_hint( + object_id, block_size, is_metadata); + } + ~FLTreeOnode() final {} }; diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/stages/key_layout.h b/src/crimson/os/seastore/onode_manager/staged-fltree/stages/key_layout.h index f4a82b088dc3..ca8659d8c0f8 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/stages/key_layout.h +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/stages/key_layout.h @@ -426,6 +426,9 @@ class key_hobj_t { pool_t pool() const { return ghobj.hobj.pool; } + pool_t logical_pool() const { + return ghobj.hobj.get_logical_pool(); + } crush_hash_t crush() const { // Note: this is the reversed version of the object hash return ghobj.hobj.get_bitwise_key_u32(); @@ -445,6 +448,18 @@ class key_hobj_t { return laddr_hint_t::create_fresh_object_md_hint( shard(), pool(), crush(), block_size); } + laddr_hint_t create_temp_object_data_hint( + local_object_id_t object_id, + extent_len_t block_size) const { + return laddr_hint_t::create_temp_object_data_hint( + shard(), pool(), crush(), object_id, block_size); + } + laddr_hint_t create_temp_object_md_hint( + local_object_id_t object_id, + extent_len_t block_size) const { + return laddr_hint_t::create_temp_object_md_hint( + shard(), pool(), crush(), object_id, block_size); + } laddr_hint_t create_clone_object_data_hint( local_object_id_t object_id, extent_len_t block_size) const { @@ -546,6 +561,7 @@ class key_view_t { */ inline shard_t shard() const; inline pool_t pool() const; + inline pool_t logical_pool() const; inline crush_hash_t crush() const; laddr_hint_t create_onode_hint( extent_len_t block_size = laddr_t::UNIT_SIZE) const { @@ -562,6 +578,18 @@ class key_view_t { return laddr_hint_t::create_fresh_object_md_hint( shard(), pool(), crush(), block_size); } + laddr_hint_t create_temp_object_data_hint( + local_object_id_t object_id, + extent_len_t block_size) const { + return laddr_hint_t::create_temp_object_data_hint( + shard(), pool(), crush(), object_id, block_size); + } + laddr_hint_t create_temp_object_md_hint( + local_object_id_t object_id, + extent_len_t block_size) const { + return laddr_hint_t::create_temp_object_md_hint( + shard(), pool(), crush(), object_id, block_size); + } laddr_hint_t create_clone_object_data_hint( local_object_id_t object_id, extent_len_t block_size) const { @@ -794,6 +822,14 @@ pool_t key_view_t::pool() const { return shard_pool_packed().pool(); } +pool_t key_view_t::logical_pool() const { + auto pool = shard_pool_packed().pool(); + if (unlikely(hobject_t::is_temp_pool(pool))) { + return hobject_t::get_temp_pool(pool); + } + return pool; +} + crush_hash_t key_view_t::crush() const { return crush_packed().crush; } diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/value.cc b/src/crimson/os/seastore/onode_manager/staged-fltree/value.cc index d875037b5f7a..2b672683be35 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/value.cc +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/value.cc @@ -95,6 +95,20 @@ laddr_hint_t Value::init_hint( } } +laddr_hint_t Value::generate_temp_hint( + local_object_id_t object_id, + extent_len_t block_size, + bool is_metadata) const +{ + if (is_metadata) { + return p_cursor->get_key_view(vb.get_header_magic()) + .create_temp_object_md_hint(object_id, block_size); + } else { + return p_cursor->get_key_view(vb.get_header_magic()) + .create_temp_object_data_hint(object_id, block_size); + } +} + laddr_hint_t Value::generate_clone_hint( local_object_id_t object_id, extent_len_t block_size, diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/value.h b/src/crimson/os/seastore/onode_manager/staged-fltree/value.h index 33ef0970c130..b03552b2518c 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/value.h +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/value.h @@ -208,6 +208,11 @@ class Value { local_object_id_t object_id, extent_len_t block_size, bool is_metadata) const; + laddr_hint_t generate_temp_hint( + local_object_id_t object_id, + extent_len_t block_size, + bool is_metadata) const; + bool operator==(const Value& v) const { return p_cursor == v.p_cursor; } bool operator!=(const Value& v) const { return !(*this == v); } diff --git a/src/crimson/os/seastore/seastore.cc b/src/crimson/os/seastore/seastore.cc index 41f52ce9948e..e7b2a742ce57 100644 --- a/src/crimson/os/seastore/seastore.cc +++ b/src/crimson/os/seastore/seastore.cc @@ -1798,6 +1798,7 @@ SeaStore::Shard::_do_transaction_step( auto fut = onode_iertr::make_ready_future(OnodeRef()); bool create = false; if (op->op == Transaction::OP_TOUCH || + op->op == Transaction::OP_TOUCH_TEMP || op->op == Transaction::OP_CREATE || op->op == Transaction::OP_WRITE || op->op == Transaction::OP_ZERO) { @@ -1815,7 +1816,8 @@ SeaStore::Shard::_do_transaction_step( fut = onode_manager->get_or_create_onode(*ctx.transaction, oid); } } - return fut.si_then([&, op, this, FNAME](auto get_onode) { + return fut.si_then([&, op, this, FNAME](auto get_onode) + -> OnodeManager::get_or_create_onode_iertr::future<> { OnodeRef& onode = onodes[op->oid]; if (!onode) { assert(get_onode); @@ -1838,6 +1840,14 @@ SeaStore::Shard::_do_transaction_step( d_onode = dest_onode; return seastar::now(); }); + } else if (op->op == Transaction::OP_TOUCH_TEMP && !d_onode) { + const ghobject_t& dest_oid = i.get_oid(op->dest_oid); + DEBUGT("op {}, get_onode dest oid={} ...", + *ctx.transaction, (uint32_t)op->op, dest_oid); + return onode_manager->get_or_create_onode(*ctx.transaction, dest_oid + ).si_then([&d_onode](auto target_onode) { + d_onode = target_onode; + }); } else { return OnodeManager::get_or_create_onode_iertr::now(); } @@ -1864,6 +1874,30 @@ SeaStore::Shard::_do_transaction_step( oid); return _touch(ctx, *onode); } + case Transaction::OP_TOUCH_TEMP: + { + const auto &dest_oid = i.get_oid(op->dest_oid); + DEBUGT("op {}, temp oid={}, oid={} ...", + *ctx.transaction, + "TOUCH_TEMP", + oid, + dest_oid); + OnodeRef& d_onode = onodes[op->dest_oid]; + assert(d_onode); + assert(d_onode->get_hobj() == dest_oid.hobj); + assert(!dest_oid.hobj.is_temp()); + assert(oid.hobj.is_temp()); + return _touch(ctx, *d_onode + ).si_then([&onode, this, &ctx, &d_onode] { + assert(d_onode); + auto prefix = d_onode->get_clone_prefix(); + assert(prefix); + prefix->set_pool(onode->get_hobj().pool); + auto object_id = prefix->get_local_object_id(); + onode->set_sibling_object_id(object_id); + return _touch(ctx, *onode); + }); + } case Transaction::OP_WRITE: { uint64_t off = op->off; @@ -2023,8 +2057,9 @@ SeaStore::Shard::_do_transaction_step( DEBUGT("op COLL_MOVE_RENAME, oid={}, dest oid={} ...", *ctx.transaction, oid, i.get_oid(op->dest_oid)); ceph_assert(op->cid == op->dest_cid); + auto &target_onode = onodes[op->dest_oid]; return _rename( - ctx, onode, onodes[op->dest_oid] + ctx, onode, target_onode ).si_then([&onode] { onode.reset(); }); @@ -2067,8 +2102,10 @@ void rename_onode_omap_metadata( Transaction &t, Onode &src, Onode &dst) { auto src_prefix = *src.get_clone_prefix(); - auto dst_prefix = *dst.get_clone_prefix(); - + auto dst_prefix = src_prefix; + if (auto prefix = dst.get_clone_prefix(); prefix) { + dst_prefix = *prefix; + } auto rename_root = [&src, &dst, src_prefix, dst_prefix](omap_type_t type) { auto root = src.get_root(type).get(dst.get_metadata_hint()); if (root.is_null()) { @@ -2095,11 +2132,17 @@ SeaStore::Shard::_rename( OnodeRef &onode, OnodeRef &d_onode) { - auto &objHandler = ObjectDataHandler(max_object_size); - co_await objHanlder.rename(ObjectDataHandler::context_t{ + auto prefix = onode->get_clone_prefix(); + assert(prefix); + prefix->set_pool(onode->get_hobj().get_logical_pool()); + auto object_id = prefix->get_local_object_id(); + std::ignore = d_onode->maybe_set_sibling_object_id(object_id); + auto olayout = onode->get_layout(); + ObjectDataHandler objHandler(max_object_size); + co_await objHandler.rename(ObjectDataHandler::context_t{ *transaction_manager, *ctx.transaction, *onode, d_onode.get() }); - auto olayout = onode->get_layout(); + uint32_t size = olayout.size; auto oi_bl = ceph::bufferlist::static_from_mem( &olayout.oi[0], diff --git a/src/crimson/os/seastore/seastore_types.cc b/src/crimson/os/seastore/seastore_types.cc index 69bede32c90e..d205d009f11b 100644 --- a/src/crimson/os/seastore/seastore_types.cc +++ b/src/crimson/os/seastore/seastore_types.cc @@ -294,6 +294,52 @@ laddr_hint_t laddr_hint_t::create_fresh_object_md_hint( return hint; } +laddr_hint_t laddr_hint_t::create_temp_object_data_hint( + laddr_shard_t shard, + laddr_pool_t pool, + laddr_crush_hash_t crush, + local_object_id_t id, + extent_len_t block_size) +{ + laddr_hint_t hint{ + L_ADDR_MIN, + laddr_conflict_condition_t::clone_prefix_at_clone_id, + laddr_conflict_policy_t::gen_random, + block_size + }; + hint.addr.set_shard(shard); + hint.addr.set_pool(pool); + hint.addr.set_reversed_hash(crush); + hint.addr.set_local_object_id(id); + + CHECK_OBJECT_INFO(hint.addr, shard, pool, crush); + assert(hint.addr.get_local_object_id() == id); + assert(!hint.addr.is_metadata()); + assert(hint.addr.get_offset_bytes() == 0); + return hint; +} + +laddr_hint_t laddr_hint_t::create_temp_object_md_hint( + laddr_shard_t shard, + laddr_pool_t pool, + laddr_crush_hash_t crush, + local_object_id_t id, + extent_len_t block_size) +{ + auto hint = create_temp_object_data_hint( + shard, pool, crush, id, block_size); + auto addr = hint.addr; + + hint.addr.set_metadata(true); + + CHECK_OBJECT_INFO(hint.addr, shard, pool, crush); + assert(hint.addr.get_clone_prefix() == addr.get_clone_prefix()); + assert(hint.addr.is_metadata()); + boost::ignore_unused(addr); + return hint; +} + + laddr_hint_t laddr_hint_t::create_clone_object_data_hint( laddr_shard_t shard, laddr_pool_t pool, diff --git a/src/crimson/os/seastore/seastore_types.h b/src/crimson/os/seastore/seastore_types.h index fb748367849b..878f6cf9ae2b 100644 --- a/src/crimson/os/seastore/seastore_types.h +++ b/src/crimson/os/seastore/seastore_types.h @@ -1726,6 +1726,19 @@ struct laddr_hint_t { laddr_t clone_prefix, extent_len_t block_size); + static laddr_hint_t create_temp_object_data_hint( + laddr_shard_t shard, + laddr_pool_t pool, + laddr_crush_hash_t crush, + local_object_id_t object_id, + extent_len_t block_size); + static laddr_hint_t create_temp_object_md_hint( + laddr_shard_t shard, + laddr_pool_t pool, + laddr_crush_hash_t crush, + local_object_id_t id, + extent_len_t block_size); + void find_next_random(); bool conflict_with(laddr_t other) const { diff --git a/src/os/Transaction.cc b/src/os/Transaction.cc index f432adf58d50..960dfd301910 100644 --- a/src/os/Transaction.cc +++ b/src/os/Transaction.cc @@ -86,7 +86,21 @@ void Transaction::dump(ceph::Formatter *f) f->dump_stream("oid") << oid; } break; - + +#ifdef WITH_CRIMSON + case Transaction::OP_TOUCH_TEMP: + { + coll_t cid = i.get_cid(op->cid); + const ghobject_t &oid = i.get_oid(op->oid); + const ghobject_t &dest_oid = i.get_oid(op->dest_oid); + f->dump_string("op_name", "touch_temp"); + f->dump_stream("collection") << cid; + f->dump_stream("temp oid") << oid; + f->dump_stream("oid") << dest_oid; + } + break; +#endif + case Transaction::OP_WRITE: { coll_t cid = i.get_cid(op->cid); diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index b5619a49ffc9..211f2f9f9039 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -16186,6 +16186,9 @@ void BlueStore::_txc_add_transaction(TransContext *txc, Transaction *t) // these operations implicity create the object bool create = false; if (op->op == Transaction::OP_TOUCH || +#ifdef WITH_CRIMSON + op->op == Transaction::OP_TOUCH_TEMP || +#endif op->op == Transaction::OP_CREATE || op->op == Transaction::OP_WRITE || op->op == Transaction::OP_ZERO) { @@ -16209,6 +16212,9 @@ void BlueStore::_txc_add_transaction(TransContext *txc, Transaction *t) switch (op->op) { case Transaction::OP_CREATE: case Transaction::OP_TOUCH: +#ifdef WITH_CRIMSON + case Transaction::OP_TOUCH_TEMP: +#endif r = _touch(txc, c, o); break; diff --git a/src/test/crimson/seastore/test_object_data_handler.cc b/src/test/crimson/seastore/test_object_data_handler.cc index 903a1314a537..fd370c1847aa 100644 --- a/src/test/crimson/seastore/test_object_data_handler.cc +++ b/src/test/crimson/seastore/test_object_data_handler.cc @@ -43,24 +43,35 @@ public: std::swap(layout.xattr_root, o_mlayout.xattr_root); }); } - laddr_hint_t get_hint() const { + laddr_hint_t init_hint( + extent_len_t block_size, + bool is_metadata) const final { laddr_hint_t hint; hint.addr = laddr_t::from_byte_offset(0); + hint.addr.set_pool(1); hint.condition = laddr_conflict_condition_t::all_at_object_content; hint.policy = laddr_conflict_policy_t::linear_search; hint.block_size = laddr_t::UNIT_SIZE; return hint; } - laddr_hint_t init_hint( + laddr_hint_t generate_temp_hint( + local_object_id_t object_id, extent_len_t block_size, bool is_metadata) const final { - return get_hint(); + ceph_abort("impossible for now"); + return laddr_hint_t{}; } laddr_hint_t generate_clone_hint( local_object_id_t object_id, extent_len_t block_size, bool is_metadata) const final { - return get_hint(); + laddr_hint_t hint; + hint.addr = laddr_t::from_byte_offset(0); + hint.addr.set_pool(1); + hint.condition = laddr_conflict_condition_t::clone_prefix_at_clone_id; + hint.policy = laddr_conflict_policy_t::gen_random; + hint.block_size = laddr_t::UNIT_SIZE; + return hint; } ~TestOnode() final = default; @@ -213,7 +224,14 @@ struct object_data_handler_test_t: ObjectDataHandler objhandler(MAX_OBJECT_SIZE); auto &target = get_object(target_snap); target.clone_from(head); + auto id = head.onode->get_layout() + .object_data + .get() + .get_reserved_data_base() + .get_local_object_id(); head.onode->swap_layout(t, *(target.onode)); + head.onode->reset_sibling_object_id(); + head.onode->set_sibling_object_id(id); co_await objhandler.clone( ObjectDataHandler::context_t{ *tm, t, *(target.onode), &*(head.onode) -- 2.47.3