From: Zhang Song Date: Tue, 26 Aug 2025 02:35:55 +0000 (+0800) Subject: crimson/os/seastore: make pladdr_t only store the local clone id instead of full... X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=c9e62b62531a6c6a0fbdd7c6d523f39f1656e1bb;p=ceph.git crimson/os/seastore: make pladdr_t only store the local clone id instead of full laddr_t Signed-off-by: Zhang Song Signed-off-by: Xuehan Xu --- diff --git a/src/crimson/os/seastore/backref/backref_tree_node.h b/src/crimson/os/seastore/backref/backref_tree_node.h index f8e061290068..d8d5961a9ad2 100644 --- a/src/crimson/os/seastore/backref/backref_tree_node.h +++ b/src/crimson/os/seastore/backref/backref_tree_node.h @@ -33,14 +33,14 @@ constexpr size_t INTERNAL_NODE_CAPACITY = 254; * checksum : ceph_le32[1] 4B * size : ceph_le32[1] 4B * meta : backref_node_meta_le_t[1] 20B - * keys : paddr_le_t[CAPACITY] (193*8)B - * values : backref_map_val_le_t[CAPACITY] (193*13)B - * = 4081B + * keys : paddr_le_t[CAPACITY] (140*8)B + * values : backref_map_val_le_t[CAPACITY] (140*21)B + * = 4088B * * TODO: update FixedKVNodeLayout to handle the above calculation * TODO: the above alignment probably isn't portable without further work */ -constexpr size_t LEAF_NODE_CAPACITY = 193; +constexpr size_t LEAF_NODE_CAPACITY = 140; using BackrefNode = FixedKVNode; diff --git a/src/crimson/os/seastore/lba/btree_lba_manager.cc b/src/crimson/os/seastore/lba/btree_lba_manager.cc index c836b5d3b7f8..34a065da75b8 100644 --- a/src/crimson/os/seastore/lba/btree_lba_manager.cc +++ b/src/crimson/os/seastore/lba/btree_lba_manager.cc @@ -252,7 +252,7 @@ BtreeLBAManager::reserve_region( auto iter = btree.make_partial_iter(c, *cursor); lba_map_val_t val{ len, - P_ADDR_ZERO, + pladdr_t{P_ADDR_ZERO}, EXTENT_DEFAULT_REF_COUNT, 0, extent_types_t::NONE}; @@ -287,7 +287,7 @@ BtreeLBAManager::alloc_extents( ext->get_laddr(), lba_map_val_t{ ext->get_length(), - ext->get_paddr(), + pladdr_t{ext->get_paddr()}, EXTENT_DEFAULT_REF_COUNT, ext->get_last_committed_crc(), ext->get_type()}, @@ -333,13 +333,14 @@ BtreeLBAManager::clone_mapping( auto btree = co_await get_btree(cache, c); co_await pos->refresh(); assert(laddr + len <= pos->get_laddr()); + assert(inter_key.get_clone_prefix() != laddr.get_clone_prefix()); auto p = co_await btree.insert( c, btree.make_partial_iter(c, *pos), laddr, lba_map_val_t{ len, - inter_key, + pladdr_t{inter_key.get_local_clone_id()}, EXTENT_DEFAULT_REF_COUNT, 0, mapping->get_extent_type()}, @@ -924,7 +925,7 @@ BtreeLBAManager::scan_mapped_space( pos.get_val().type); ceph_assert(pos.get_val().len > 0 && pos.get_val().len % block_size == 0); - ceph_assert(pos.get_val().pladdr != L_ADDR_NULL); + ceph_assert(pos.get_val().pladdr != pladdr_t{LOCAL_CLONE_ID_NULL}); scan_visitor( pos.get_val().pladdr.get_paddr(), pos.get_val().len, @@ -1020,9 +1021,10 @@ BtreeLBAManager::remap_mappings( auto new_key = (orig_laddr + remap.offset).checked_to_laddr(); val.len = remap.len; if (val.pladdr.is_laddr()) { - auto laddr = val.pladdr.get_laddr(); - DEBUGT("{} + {:#x}", t, laddr, remap.offset); - val.pladdr = (laddr + remap.offset).checked_to_laddr(); + DEBUGT("{} + {:#x}", + t, + val.pladdr.get_local_clone_id(), + remap.offset); } else { auto paddr = val.pladdr.get_paddr(); val.pladdr = paddr + remap.offset; diff --git a/src/crimson/os/seastore/lba/btree_lba_manager.h b/src/crimson/os/seastore/lba/btree_lba_manager.h index 35c2de364798..ae0f135d74e1 100644 --- a/src/crimson/os/seastore/lba/btree_lba_manager.h +++ b/src/crimson/os/seastore/lba/btree_lba_manager.h @@ -290,7 +290,7 @@ private: laddr, { len, - pladdr_t(intermediate_key), + pladdr_t(intermediate_key.get_local_clone_id()), EXTENT_DEFAULT_REF_COUNT, 0, // crc will only be used and checked with LBA direct mappings // also see pin_to_extent(_by_type) diff --git a/src/crimson/os/seastore/lba/lba_btree_node.h b/src/crimson/os/seastore/lba/lba_btree_node.h index 0a5f56b62cab..e242061d3f45 100644 --- a/src/crimson/os/seastore/lba/lba_btree_node.h +++ b/src/crimson/os/seastore/lba/lba_btree_node.h @@ -46,15 +46,15 @@ using lba_node_meta_le_t = fixed_kv_node_meta_le_t; * Layout (4KiB): * checksum : ceph_le32[1] 4B * size : ceph_le32[1] 4B - * meta : lba_node_meta_le_t[1] 20B - * keys : laddr_le_t[CAPACITY] (254*8)B - * values : paddr_le_t[CAPACITY] (254*8)B - * = 4092B + * meta : lba_node_meta_le_t[1] 36B + * keys : laddr_le_t[CAPACITY] (168*16)B + * values : paddr_le_t[CAPACITY] (168*8)B + * = 4076B * TODO: make the above capacity calculation part of FixedKVNodeLayout * TODO: the above alignment probably isn't portable without further work */ -constexpr size_t INTERNAL_NODE_CAPACITY = 254; +constexpr size_t INTERNAL_NODE_CAPACITY = 168; struct LBAInternalNode : FixedKVInternalNode< INTERNAL_NODE_CAPACITY, @@ -89,15 +89,15 @@ using LBAInternalNodeRef = LBAInternalNode::Ref; * Layout (4KiB): * checksum : ceph_le32[1] 4B * size : ceph_le32[1] 4B - * meta : lba_node_meta_le_t[1] 20B - * keys : laddr_le_t[CAPACITY] (140*8)B - * values : lba_map_val_le_t[CAPACITY] (140*21)B - * = 4088B + * meta : lba_node_meta_le_t[1] 36B + * keys : laddr_le_t[CAPACITY] (109*16)B + * values : lba_map_val_le_t[CAPACITY] (109*21)B + * = 4077B * * TODO: update FixedKVNodeLayout to handle the above calculation * TODO: the above alignment probably isn't portable without further work */ -constexpr size_t LEAF_NODE_CAPACITY = 135; +constexpr size_t LEAF_NODE_CAPACITY = 109; struct LBALeafNode : FixedKVLeafNode< @@ -425,7 +425,7 @@ struct LBACursor : BtreeCursor { assert(is_viewable()); assert(is_indirect()); assert(!is_end()); - return iter.get_val().pladdr.get_laddr(); + return iter.get_val().pladdr.build_laddr(key); } checksum_t get_checksum() const { assert(is_viewable()); diff --git a/src/crimson/os/seastore/seastore_types.cc b/src/crimson/os/seastore/seastore_types.cc index 929d68d1526d..0d5994362654 100644 --- a/src/crimson/os/seastore/seastore_types.cc +++ b/src/crimson/os/seastore/seastore_types.cc @@ -156,11 +156,16 @@ std::ostream &operator<<(std::ostream &out, const laddr_offset_t &laddr_offset) std::ostream &operator<<(std::ostream &out, const pladdr_t &pladdr) { + out << "pladdr("; if (pladdr.is_laddr()) { - return out << pladdr.get_laddr(); + // pladdr(local_clone_id=0x...) + out << "local_clone_id=0x" << std::hex + << pladdr.get_local_clone_id() << std::dec; } else { - return out << pladdr.get_paddr(); + // pladdr(paddr<...>) + out << pladdr.get_paddr(); } + return out << ")"; } std::ostream &operator<<(std::ostream &out, const paddr_t &rhs) diff --git a/src/crimson/os/seastore/seastore_types.h b/src/crimson/os/seastore/seastore_types.h index 56b316a145c4..9bbb35495c87 100644 --- a/src/crimson/os/seastore/seastore_types.h +++ b/src/crimson/os/seastore/seastore_types.h @@ -1647,13 +1647,20 @@ struct __attribute__((packed)) laddr_le_t { bool operator==(const laddr_le_t&) const = default; }; +/** + * pladdr_t + * + * The value of LBA tree leaf node entries, stores either the physical address + * of the logical extent, or the value of local_clone_id field of the intermediate + * key which points to the physical lba mapping. + */ struct pladdr_t { - std::variant pladdr; + std::variant pladdr; pladdr_t() = default; - pladdr_t(const pladdr_t &) noexcept = default; - explicit pladdr_t(laddr_t laddr) - : pladdr(laddr) {} + pladdr_t(const pladdr_t &) = default; + explicit pladdr_t(local_clone_id_t id) + : pladdr(id) {} constexpr explicit pladdr_t(paddr_t paddr) : pladdr(paddr) {} @@ -1670,8 +1677,8 @@ struct pladdr_t { return *this; } - pladdr_t& operator=(laddr_t laddr) { - pladdr = laddr; + pladdr_t& operator=(local_clone_id_t id) { + pladdr = id; return *this; } @@ -1682,11 +1689,16 @@ struct pladdr_t { return paddr_t(std::get<1>(pladdr)); } - laddr_t get_laddr() const { + local_clone_id_t get_local_clone_id() const { assert(pladdr.index() == 0); - return laddr_t(std::get<0>(pladdr)); + return std::get<0>(pladdr); } + // The corresponding lba key with stored local clone id is the real + // intermediate key. + laddr_t build_laddr(laddr_t key) const { + return key.with_local_clone_id(get_local_clone_id()); + } }; constexpr pladdr_t PL_ADDR_NULL = pladdr_t(P_ADDR_NULL); @@ -1699,29 +1711,26 @@ enum class addr_type_t : uint8_t { }; struct __attribute__((packed)) pladdr_le_t { - ceph_le64 pladdr = ceph_le64(0); - addr_type_t addr_type = addr_type_t::MAX; + ceph_le64 addr; + addr_type_t addr_type; - pladdr_le_t() = default; + pladdr_le_t() : pladdr_le_t(PL_ADDR_NULL) {} pladdr_le_t(const pladdr_le_t &) = default; explicit pladdr_le_t(const pladdr_t &addr) - : pladdr( - ceph_le64( - addr.is_laddr() ? - std::get<0>(addr.pladdr).value : - std::get<1>(addr.pladdr).internal_paddr)), - addr_type( - addr.is_laddr() ? - addr_type_t::LADDR : - addr_type_t::PADDR) + : addr(ceph_le64(addr.is_laddr() + ? addr.get_local_clone_id() + : addr.get_paddr().internal_paddr)), + addr_type(addr.is_laddr() + ? addr_type_t::LADDR + : addr_type_t::PADDR) {} operator pladdr_t() const { if (addr_type == addr_type_t::LADDR) { - return pladdr_t(laddr_t(pladdr)); + return pladdr_t(static_cast(addr)); } else { assert(addr_type == addr_type_t::PADDR); - return pladdr_t(paddr_t(pladdr)); + return pladdr_t(paddr_t(addr)); } } }; diff --git a/src/test/crimson/seastore/test_transaction_manager.cc b/src/test/crimson/seastore/test_transaction_manager.cc index d792d412ed9f..1579ae110c7a 100644 --- a/src/test/crimson/seastore/test_transaction_manager.cc +++ b/src/test/crimson/seastore/test_transaction_manager.cc @@ -1860,10 +1860,10 @@ TEST_P(tm_random_block_device_test_t, scatter_allocation) laddr_t ADDR = get_laddr_hint(0xFF * 4096); epm->prefill_fragmented_devices(); auto t = create_transaction(); - for (int i = 0; i < 1958; i++) { + for (int i = 0; i < 1975; i++) { auto extents = alloc_extents(t, (ADDR + i * 16384).checked_to_laddr(), 16384, 'a'); } - alloc_extents_deemed_fail(t, (ADDR + 1958 * 16384).checked_to_laddr(), 16384, 'a'); + alloc_extents_deemed_fail(t, (ADDR + 1975 * 16384).checked_to_laddr(), 16384, 'a'); check_mappings(t); check(); submit_transaction(std::move(t));