From: Zhang Song Date: Wed, 14 May 2025 07:22:15 +0000 (+0800) Subject: crimson/os/seastore: extend the size of laddr_t from 64 bits to 128 bits X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=a72adaa9444050ff93d4d5a1243a49530b7bc60f;p=ceph.git crimson/os/seastore: extend the size of laddr_t from 64 bits to 128 bits Signed-off-by: Zhang Song Signed-off-by: Xuehan Xu --- diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/dummy.h b/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/dummy.h index d007a135e55f..c01c820ad6f6 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/dummy.h +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/dummy.h @@ -151,7 +151,7 @@ class DummyNodeExtentManager final: public NodeExtentManager { assert(len % ALIGNMENT == 0); auto r = ceph::buffer::create_aligned(len, ALIGNMENT); auto addr = laddr_t::from_byte_offset( - reinterpret_cast(r->get_data())); + reinterpret_cast(r->get_data())); auto bp = ceph::bufferptr(std::move(r)); auto extent = Ref(new DummyNodeExtent(std::move(bp))); extent->set_laddr(addr); diff --git a/src/crimson/os/seastore/seastore_types.cc b/src/crimson/os/seastore/seastore_types.cc index b8a93f7d0806..ccddc50fe62c 100644 --- a/src/crimson/os/seastore/seastore_types.cc +++ b/src/crimson/os/seastore/seastore_types.cc @@ -94,8 +94,41 @@ std::ostream& operator<<(std::ostream& out, segment_seq_printer_t seq) } } +template +concept is_convertible_to_stream = std::is_convertible_v; + +template +concept is_streamable = requires(std::ostream &os, const T &value) { + { os << value } -> is_convertible_to_stream; +}; + +template +struct laddr_formatter_t; + +template +requires fmt::is_formattable::value +struct laddr_formatter_t { + static std::ostream &format(std::ostream &out, const T &v) { + // fmt support format __int128 + fmt::format_to(std::ostreambuf_iterator(out), "L0x{:x}", v); + return out; + } +}; +template +requires is_streamable +struct laddr_formatter_t { + static std::ostream &format(std::ostream &out, const T &v) { + // boost uint128_t support stream operator but __int128 doesn't + return out << "L0x" << std::hex << v << std::dec; + } +}; + std::ostream &operator<<(std::ostream &out, const laddr_t &laddr) { - return out << "L0x" << std::hex << laddr.value << std::dec; + if (laddr == L_ADDR_NULL) { + return out << "L_ADDR_NULL"; + } else { + return laddr_formatter_t::format(out, laddr.value); + } } std::ostream &operator<<(std::ostream &out, const laddr_offset_t &laddr_offset) { diff --git a/src/crimson/os/seastore/seastore_types.h b/src/crimson/os/seastore/seastore_types.h index 683ae1c88f9d..00c18be5438e 100644 --- a/src/crimson/os/seastore/seastore_types.h +++ b/src/crimson/os/seastore/seastore_types.h @@ -24,6 +24,14 @@ #include "crimson/common/errorator.h" +#ifndef SEASTORE_LADDR_USE_BOOST_U128 +#define SEASTORE_LADDR_USE_BOOST_U128 0 +#endif + +#if !defined (__SIZEOF_INT128__) || SEASTORE_LADDR_USE_BOOST_U128 +#include +#endif + namespace crimson::os::seastore { using base_ertr = crimson::errorator< @@ -1086,19 +1094,33 @@ inline extent_len_le_t init_extent_len_le(extent_len_t len) { // logical addr, see LBAManager, TransactionManager class laddr_t { public: - // the type of underlying integer - using Unsigned = uint64_t; +#if defined (__SIZEOF_INT128__) && !SEASTORE_LADDR_USE_BOOST_U128 + // The other components must be compatible with boost version laddr_t. + using Unsigned = unsigned __int128; +#else + using Unsigned = boost::multiprecision::uint128_t; +#endif + + static_assert( + std::numeric_limits::is_specialized, + "numeric_limits not specialized for the underlying int128 type, " + "set RAW_VALUE_MAX manually"); + static constexpr Unsigned RAW_VALUE_MAX = std::numeric_limits::max(); constexpr laddr_t() : laddr_t(RAW_VALUE_MAX) {} + laddr_t(const laddr_t &) noexcept = default; + laddr_t(laddr_t &&) noexcept = default; + laddr_t &operator=(const laddr_t &) noexcept = default; + laddr_t &operator=(laddr_t &&) noexcept = default; // laddr_t is block aligned, one logical address represents one 4KiB block in disk static constexpr unsigned UNIT_SHIFT = 12; static constexpr unsigned UNIT_SIZE = 1 << UNIT_SHIFT; // 4096 static constexpr unsigned UNIT_MASK = UNIT_SIZE - 1; - static laddr_t from_byte_offset(Unsigned value) { + static laddr_t from_byte_offset(loffset_t value) { assert((value & UNIT_MASK) == 0); return laddr_t(value >> UNIT_SHIFT); } @@ -1109,14 +1131,20 @@ public: /// laddr_t works like primitive integer type, encode/decode it manually void encode(::ceph::buffer::list::contiguous_appender& p) const { - p.append(reinterpret_cast(&value), sizeof(Unsigned)); + auto lo = get_low64(); + auto hi = get_high64(); + denc(lo, p); + denc(hi, p); } void bound_encode(size_t& p) const { - p += sizeof(Unsigned); + p += sizeof(uint64_t) * 2; } void decode(::ceph::buffer::ptr::const_iterator& p) { - assert(static_cast(p.get_end() - p.get_pos()) >= sizeof(Unsigned)); - memcpy((char *)&value, p.get_pos_add(sizeof(Unsigned)), sizeof(Unsigned)); + assert(static_cast(p.get_end() - p.get_pos()) >= sizeof(uint64_t) * 2); + uint64_t lo = 0, hi = 0; + denc(lo, p); + denc(hi, p); + value = Unsigned(lo) | (Unsigned(hi) << 64); } // laddr_offset_t contains one base laddr and one block not aligned @@ -1132,16 +1160,18 @@ public: laddr_t get_roundup_laddr(size_t alignment) const { ceph_assert(alignment % laddr_t::UNIT_SIZE == 0); + Unsigned align_shift = alignment >> laddr_t::UNIT_SHIFT; if (offset == 0) { - return laddr_t(p2roundup(base, alignment >> laddr_t::UNIT_SHIFT)); + return laddr_t(p2roundup(base, align_shift)); } else { assert(offset < laddr_t::UNIT_SIZE); - return laddr_t(p2roundup(base + 1, alignment >> laddr_t::UNIT_SHIFT)); + return laddr_t(p2roundup(base + 1, align_shift)); } } laddr_t get_aligned_laddr(size_t alignment) const { ceph_assert(alignment % laddr_t::UNIT_SIZE == 0); - return laddr_t(p2align(base, alignment >> laddr_t::UNIT_SHIFT)); + Unsigned align_shift = alignment >> laddr_t::UNIT_SHIFT; + return laddr_t(p2align(base, align_shift)); } laddr_t get_laddr() const { return laddr_t{base}; @@ -1192,7 +1222,19 @@ public: } friend bool operator==(const laddr_offset_t&, const laddr_offset_t&) = default; - friend auto operator<=>(const laddr_offset_t&, const laddr_offset_t&) = default; + friend std::strong_ordering operator<=>( + const laddr_offset_t& l, const laddr_offset_t& r) { + assert(l.offset < laddr_t::UNIT_SIZE); + assert(r.offset < laddr_t::UNIT_SIZE); + // boost uint128 doesn't support three way compare operator, + // we need to implement it manually. + if (l.base == r.base) { + return l.offset <=> r.offset; + } else { + // use laddr_t <=> laddr_t + return laddr_t(l.base) <=> laddr_t(r.base); + } + } friend std::ostream &operator<<(std::ostream&, const laddr_offset_t&); friend laddr_offset_t operator+(const laddr_offset_t &laddr_offset, const loffset_t &offset) { @@ -1258,7 +1300,17 @@ public: return laddr_offset.get_laddr() == laddr && laddr_offset.get_offset() == 0; } - friend auto operator<=>(const laddr_t&, const laddr_t&) = default; + friend std::strong_ordering operator<=>(const laddr_t& l, const laddr_t& r) { + // boost::multiprecision::uint128_t doesn't support three ways operator, + // so we need to implement it manually. + if (l.value < r.value) { + return std::strong_ordering::less; + } else if (l.value == r.value) { + return std::strong_ordering::equal; + } else { + return std::strong_ordering::greater; + } + } friend auto operator<=>(const laddr_t &laddr, const laddr_offset_t &laddr_offset) { return laddr_offset_t(laddr, 0) <=> laddr_offset; @@ -1294,10 +1346,21 @@ public: struct laddr_hash_t { std::size_t operator()(const laddr_t &laddr) const { - return static_cast(laddr.value); + auto h = laddr.get_high64(); + auto l = laddr.get_low64(); + auto seed = h ^ l; + boost::hash_combine(seed, h); + boost::hash_combine(seed, l); + return static_cast(seed); } }; private: + constexpr laddr_t(uint64_t low, uint64_t high) + : value((Unsigned(high) << 64) | Unsigned(low)) {} + + uint64_t get_high64() const { return static_cast(value >> 64); } + uint64_t get_low64() const { return static_cast(value); } + // Prevent direct construction of laddr_t with an integer, // always use laddr_t::from_raw_uint instead. constexpr explicit laddr_t(Unsigned value) : value(value) {} @@ -1310,38 +1373,36 @@ constexpr laddr_t L_ADDR_MIN = laddr_t::from_raw_uint(0); constexpr laddr_t L_ADDR_NULL = L_ADDR_MAX; struct __attribute__((packed)) laddr_le_t { - ceph_le64 laddr; + ceph_le64 low64; + ceph_le64 high64; using orig_type = laddr_t; laddr_le_t() : laddr_le_t(L_ADDR_NULL) {} laddr_le_t(const laddr_le_t &) = default; explicit laddr_le_t(const laddr_t &addr) - : laddr(addr.value) {} + : low64(addr.get_low64()), high64(addr.get_high64()) {} operator laddr_t() const { - return laddr_t(laddr); + return laddr_t(low64, high64); } laddr_le_t& operator=(laddr_t addr) { - ceph_le64 val; - val = addr.value; - laddr = val; + low64 = addr.get_low64(); + high64 = addr.get_high64(); return *this; } bool operator==(const laddr_le_t&) const = default; }; -constexpr uint64_t PL_ADDR_NULL = std::numeric_limits::max(); - struct pladdr_t { std::variant pladdr; pladdr_t() = default; - pladdr_t(const pladdr_t &) = default; - pladdr_t(laddr_t laddr) + pladdr_t(const pladdr_t &) noexcept = default; + explicit pladdr_t(laddr_t laddr) : pladdr(laddr) {} - pladdr_t(paddr_t paddr) + constexpr explicit pladdr_t(paddr_t paddr) : pladdr(paddr) {} bool is_laddr() const { @@ -1376,6 +1437,7 @@ struct pladdr_t { }; +constexpr pladdr_t PL_ADDR_NULL = pladdr_t(P_ADDR_NULL); std::ostream &operator<<(std::ostream &out, const pladdr_t &pladdr); enum class addr_type_t : uint8_t { diff --git a/src/crimson/os/seastore/segment_manager/ephemeral.h b/src/crimson/os/seastore/segment_manager/ephemeral.h index 47735688d453..01c2983b95c6 100644 --- a/src/crimson/os/seastore/segment_manager/ephemeral.h +++ b/src/crimson/os/seastore/segment_manager/ephemeral.h @@ -34,7 +34,7 @@ struct ephemeral_config_t { constexpr ephemeral_config_t DEFAULT_TEST_EPHEMERAL = { 1 << 30, 4 << 10, - 8 << 20 + 16 << 20 }; std::ostream &operator<<(std::ostream &, const ephemeral_config_t &); diff --git a/src/test/crimson/seastore/onode_tree/test_staged_fltree.cc b/src/test/crimson/seastore/onode_tree/test_staged_fltree.cc index 2c07de23b2d9..331467000362 100644 --- a/src/test/crimson/seastore/onode_tree/test_staged_fltree.cc +++ b/src/test/crimson/seastore/onode_tree/test_staged_fltree.cc @@ -1315,7 +1315,7 @@ TEST_F(c_dummy_test_t, 5_split_merge_internal_node) { logger().info("\n---------------------------------------------" "\nbefore internal node insert:\n"); - auto padding = std::string(250, '_'); + auto padding = std::string(226, '_'); auto keys = build_key_set({2, 6}, {2, 5}, {2, 5}, padding, true); keys.erase(make_ghobj(2, 2, 2, "ns2", "oid2" + padding, 2, 2)); keys.erase(make_ghobj(2, 2, 2, "ns2", "oid2" + padding, 3, 3)); @@ -1323,11 +1323,11 @@ TEST_F(c_dummy_test_t, 5_split_merge_internal_node) keys.erase(make_ghobj(5, 5, 5, "ns4", "oid4" + padding, 2, 2)); keys.erase(make_ghobj(5, 5, 5, "ns4", "oid4" + padding, 3, 3)); keys.erase(make_ghobj(5, 5, 5, "ns4", "oid4" + padding, 4, 4)); - auto padding_s = std::string(257, '_'); + auto padding_s = std::string(231, '_'); keys.insert(make_ghobj(2, 2, 2, "ns2", "oid2" + padding_s, 2, 2)); keys.insert(make_ghobj(2, 2, 2, "ns2", "oid2" + padding_s, 3, 3)); keys.insert(make_ghobj(2, 2, 2, "ns2", "oid2" + padding_s, 4, 4)); - auto padding_e = std::string(247, '_'); + auto padding_e = std::string(215, '_'); keys.insert(make_ghobj(5, 5, 5, "ns4", "oid4" + padding_e, 2, 2)); keys.insert(make_ghobj(5, 5, 5, "ns4", "oid4" + padding_e, 3, 3)); keys.insert(make_ghobj(5, 5, 5, "ns4", "oid4" + padding_e, 4, 4)); @@ -1399,7 +1399,7 @@ TEST_F(c_dummy_test_t, 5_split_merge_internal_node) { logger().info("\n---------------------------------------------" "\nbefore internal node insert (1):\n"); - auto padding = std::string(244, '_'); + auto padding = std::string(217, '_'); auto keys = build_key_set({2, 6}, {2, 5}, {2, 5}, padding, true); keys.insert(make_ghobj(5, 5, 5, "ns4", "oid4" + padding, 5, 5)); keys.insert(make_ghobj(5, 5, 5, "ns4", "oid4" + padding, 6, 6)); @@ -1426,23 +1426,24 @@ TEST_F(c_dummy_test_t, 5_split_merge_internal_node) { logger().info("\n---------------------------------------------" "\nbefore internal node insert (2):\n"); - auto padding = std::string(243, '_'); + auto padding = std::string(216, '_'); auto keys = build_key_set({2, 6}, {2, 5}, {2, 5}, padding, true); - keys.insert(make_ghobj(4, 4, 4, "n", "o", 3, 3)); + auto padding_o = std::string(10, '_'); + keys.insert(make_ghobj(4, 4, 4, "n", "o" + padding_o, 3, 3)); keys.insert(make_ghobj(5, 5, 5, "ns4", "oid4" + padding, 5, 5)); keys.insert(make_ghobj(5, 5, 5, "ns4", "oid4" + padding, 6, 6)); pool.build_tree(keys).unsafe_get(); logger().info("\n---------------------------------------------" "\nsplit at stage 2; insert to left back at stage (0, 1, 2, 1,) 0\n"); - pool.split_merge(make_ghobj(4, 4, 4, "n", "o", 2, 2), {2, {0, {0}}}, + pool.split_merge(make_ghobj(4, 4, 4, "n", "o" + padding_o, 2, 2), {2, {0, {0}}}, {2u, 0u, true, InsertType::LAST}).get(); } { logger().info("\n---------------------------------------------" "\nbefore internal node insert (3):\n"); - auto padding = std::string(419, '_'); + auto padding = std::string(394, '_'); auto keys = build_key_set({2, 5}, {2, 5}, {2, 5}, padding, true); keys.erase(make_ghobj(4, 4, 4, "ns4", "oid4" + padding, 2, 2)); keys.erase(make_ghobj(4, 4, 4, "ns4", "oid4" + padding, 3, 3)); @@ -1453,7 +1454,7 @@ TEST_F(c_dummy_test_t, 5_split_merge_internal_node) "\nsplit at stage 1; insert to right front at stage 0, 1, 0\n"); pool.split_merge(make_ghobj(3, 3, 3, "ns2", "oid2" + padding, 5, 5), {1, {1, {0}}}, {1u, 0u, false, InsertType::BEGIN}).get(); - pool.split_merge(make_ghobj(3, 3, 3, "ns2", "oid3", 3, 3), {1, {1, {0}}}, + pool.split_merge(make_ghobj(3, 3, 3, "ns2", "oid3" + std::string(361, '_'), 3, 3), {1, {1, {0}}}, {1u, 1u, false, InsertType::BEGIN}).get(); pool.split_merge(make_ghobj(3, 3, 3, "ns3", "oid3" + padding, 1, 1), {1, {1, {0}}}, {1u, 0u, false, InsertType::BEGIN}).get(); @@ -1462,12 +1463,12 @@ TEST_F(c_dummy_test_t, 5_split_merge_internal_node) { logger().info("\n---------------------------------------------" "\nbefore internal node insert (4):\n"); - auto padding = std::string(361, '_'); + auto padding = std::string(333, '_'); auto keys = build_key_set({2, 5}, {2, 5}, {2, 5}, padding, true); keys.erase(make_ghobj(2, 2, 2, "ns2", "oid2" + padding, 2, 2)); keys.erase(make_ghobj(2, 2, 2, "ns2", "oid2" + padding, 3, 3)); keys.erase(make_ghobj(2, 2, 2, "ns2", "oid2" + padding, 4, 4)); - auto padding_s = std::string(386, '_'); + auto padding_s = std::string(381, '_'); keys.insert(make_ghobj(2, 2, 2, "ns2", "oid2" + padding_s, 2, 2)); keys.insert(make_ghobj(2, 2, 2, "ns2", "oid2" + padding_s, 3, 3)); keys.insert(make_ghobj(2, 2, 2, "ns2", "oid2" + padding_s, 4, 4)); @@ -1494,7 +1495,7 @@ TEST_F(c_dummy_test_t, 5_split_merge_internal_node) { logger().info("\n---------------------------------------------" "\nbefore internal node insert (5):\n"); - auto padding = std::string(412, '_'); + auto padding = std::string(385, '_'); auto keys = build_key_set({2, 5}, {2, 5}, {2, 5}, padding); keys.insert(make_ghobj(3, 3, 3, "ns2", "oid3", 3, 3)); keys.insert(make_ghobj(4, 4, 4, "ns3", "oid3" + padding, 5, 5)); @@ -1513,9 +1514,16 @@ TEST_F(c_dummy_test_t, 5_split_merge_internal_node) { logger().info("\n---------------------------------------------" "\nbefore internal node insert (6):\n"); - auto padding = std::string(328, '_'); + auto padding = std::string(301, '_'); auto keys = build_key_set({2, 5}, {2, 5}, {2, 5}, padding); - keys.insert(make_ghobj(5, 5, 5, "ns3", "oid3" + std::string(270, '_'), 3, 3)); + keys.erase(make_ghobj(2, 2, 2, "ns2", "oid2" + padding, 2, 2)); + keys.erase(make_ghobj(2, 2, 2, "ns2", "oid2" + padding, 3, 3)); + keys.erase(make_ghobj(2, 2, 2, "ns2", "oid2" + padding, 4, 4)); + auto padding_s = std::string(332, '_'); + keys.insert(make_ghobj(2, 2, 2, "ns2", "oid2" + padding_s, 2, 2)); + keys.insert(make_ghobj(2, 2, 2, "ns2", "oid2" + padding_s, 3, 3)); + keys.insert(make_ghobj(2, 2, 2, "ns2", "oid2" + padding_s, 4, 4)); + keys.insert(make_ghobj(5, 5, 5, "ns3", "oid3" + std::string(250, '_'), 3, 3)); keys.insert(make_ghobj(9, 9, 9, "ns~last", "oid~last", 9, 9)); pool.build_tree(keys).unsafe_get(); @@ -1538,7 +1546,7 @@ TEST_F(c_dummy_test_t, 5_split_merge_internal_node) { logger().info("\n---------------------------------------------" "\nbefore internal node insert (7):\n"); - auto padding = std::string(323, '_'); + auto padding = std::string(300, '_'); auto keys = build_key_set({2, 5}, {2, 5}, {2, 5}, padding); keys.insert(make_ghobj(4, 4, 4, "ns5", "oid5" + padding, 3, 3)); keys.insert(make_ghobj(9, 9, 9, "ns~last", "oid~last", 9, 9)); diff --git a/src/test/crimson/seastore/test_transaction_manager.cc b/src/test/crimson/seastore/test_transaction_manager.cc index 490b1df6e4cb..d792d412ed9f 100644 --- a/src/test/crimson/seastore/test_transaction_manager.cc +++ b/src/test/crimson/seastore/test_transaction_manager.cc @@ -1860,10 +1860,10 @@ TEST_P(tm_random_block_device_test_t, scatter_allocation) laddr_t ADDR = get_laddr_hint(0xFF * 4096); epm->prefill_fragmented_devices(); auto t = create_transaction(); - for (int i = 0; i < 1989; i++) { + for (int i = 0; i < 1958; i++) { auto extents = alloc_extents(t, (ADDR + i * 16384).checked_to_laddr(), 16384, 'a'); } - alloc_extents_deemed_fail(t, (ADDR + 1991 * 16384).checked_to_laddr(), 16384, 'a'); + alloc_extents_deemed_fail(t, (ADDR + 1958 * 16384).checked_to_laddr(), 16384, 'a'); check_mappings(t); check(); submit_transaction(std::move(t));