From eff1156a2ef6357370f2dcbb365f548f87ed16b5 Mon Sep 17 00:00:00 2001 From: Xuehan Xu Date: Fri, 17 Mar 2023 18:15:20 +0800 Subject: [PATCH] crimson/os/seastore/lba_manager: lba map value may contain laddr Signed-off-by: Xuehan Xu (cherry picked from commit f11504cd5d3fbba01cf9dbf11345e2c15001f947) --- .../seastore/backref/btree_backref_manager.cc | 11 --- .../seastore/backref/btree_backref_manager.h | 2 - src/crimson/os/seastore/backref_manager.h | 3 - .../os/seastore/btree/btree_range_pin.h | 8 +- .../os/seastore/btree/fixed_kv_btree.h | 25 ++++-- .../lba_manager/btree/btree_lba_manager.cc | 14 ++-- .../lba_manager/btree/btree_lba_manager.h | 2 +- .../lba_manager/btree/lba_btree_node.cc | 11 +-- .../lba_manager/btree/lba_btree_node.h | 42 ++++++---- src/crimson/os/seastore/seastore_types.cc | 9 ++ src/crimson/os/seastore/seastore_types.h | 84 +++++++++++++++++++ .../seastore/test_btree_lba_manager.cc | 2 +- 12 files changed, 156 insertions(+), 57 deletions(-) diff --git a/src/crimson/os/seastore/backref/btree_backref_manager.cc b/src/crimson/os/seastore/backref/btree_backref_manager.cc index 6b75a34c981..c3ac906f2f9 100644 --- a/src/crimson/os/seastore/backref/btree_backref_manager.cc +++ b/src/crimson/os/seastore/backref/btree_backref_manager.cc @@ -332,17 +332,6 @@ BtreeBackrefManager::merge_cached_backrefs( }); } -BtreeBackrefManager::check_child_trackers_ret -BtreeBackrefManager::check_child_trackers( - Transaction &t) { - auto c = get_context(t); - return with_btree( - cache, c, - [c](auto &btree) { - return btree.check_child_trackers(c); - }); -} - BtreeBackrefManager::scan_mapped_space_ret BtreeBackrefManager::scan_mapped_space( Transaction &t, diff --git a/src/crimson/os/seastore/backref/btree_backref_manager.h b/src/crimson/os/seastore/backref/btree_backref_manager.h index e19d9ce7b06..48ef4d83191 100644 --- a/src/crimson/os/seastore/backref/btree_backref_manager.h +++ b/src/crimson/os/seastore/backref/btree_backref_manager.h @@ -75,8 +75,6 @@ public: Transaction &t, paddr_t offset) final; - check_child_trackers_ret check_child_trackers(Transaction &t) final; - scan_mapped_space_ret scan_mapped_space( Transaction &t, scan_mapped_space_func_t &&f) final; diff --git a/src/crimson/os/seastore/backref_manager.h b/src/crimson/os/seastore/backref_manager.h index 4a354bdca87..3feedb997b4 100644 --- a/src/crimson/os/seastore/backref_manager.h +++ b/src/crimson/os/seastore/backref_manager.h @@ -127,9 +127,6 @@ public: Transaction &t, paddr_t offset) = 0; - using check_child_trackers_ret = base_iertr::future<>; - virtual check_child_trackers_ret check_child_trackers(Transaction &t) = 0; - /** * scan all extents in both tree and cache, * including backref extents, logical extents and lba extents, diff --git a/src/crimson/os/seastore/btree/btree_range_pin.h b/src/crimson/os/seastore/btree/btree_range_pin.h index fef89197fd9..684d81ce991 100644 --- a/src/crimson/os/seastore/btree/btree_range_pin.h +++ b/src/crimson/os/seastore/btree/btree_range_pin.h @@ -127,11 +127,15 @@ class BtreeNodeMapping : public PhysicalNodeMapping { */ CachedExtentRef parent; - val_t value; + pladdr_t value; extent_len_t len; fixed_kv_node_meta_t range; uint16_t pos = std::numeric_limits::max(); + pladdr_t _get_val() const final { + return value; + } + public: using val_type = val_t; BtreeNodeMapping(op_context_t ctx) : ctx(ctx) {} @@ -140,7 +144,7 @@ public: op_context_t ctx, CachedExtentRef parent, uint16_t pos, - val_t &value, + pladdr_t value, extent_len_t len, fixed_kv_node_meta_t &&meta) : ctx(ctx), diff --git a/src/crimson/os/seastore/btree/fixed_kv_btree.h b/src/crimson/os/seastore/btree/fixed_kv_btree.h index ab9febc9e2f..87a957e3d22 100644 --- a/src/crimson/os/seastore/btree/fixed_kv_btree.h +++ b/src/crimson/os/seastore/btree/fixed_kv_btree.h @@ -197,7 +197,10 @@ public: if constexpr ( std::is_same_v) { - ret.paddr = ret.paddr.maybe_relative_to(leaf.node->get_paddr()); + if (ret.pladdr.is_paddr()) { + ret.pladdr = ret.pladdr.get_paddr().maybe_relative_to( + leaf.node->get_paddr()); + } } return ret; } @@ -487,11 +490,13 @@ public: return upper_bound(c, min_max_t::max); } - template + template ::type = 0> void check_node( op_context_t c, TCachedExtentRef node) { + assert(leaf_has_children); for (auto i : *node) { CachedExtentRef child_node; Transaction::get_extent_ret ret; @@ -501,11 +506,10 @@ public: i->get_val().maybe_relative_to(node->get_paddr()), &child_node); } else { - if constexpr (leaf_has_children) { - ret = c.trans.get_extent( - i->get_val().paddr.maybe_relative_to(node->get_paddr()), - &child_node); - } + assert(i->get_val().pladdr.is_paddr()); + ret = c.trans.get_extent( + i->get_val().pladdr.get_paddr().maybe_relative_to(node->get_paddr()), + &child_node); } if (ret == Transaction::get_extent_ret::PRESENT) { if (child_node->is_stable()) { @@ -577,7 +581,10 @@ public: assert(!c.cache.query_cache(i->get_val(), nullptr)); } else { if constexpr (leaf_has_children) { - assert(!c.cache.query_cache(i->get_val().paddr, nullptr)); + assert(i->get_val().pladdr.is_paddr() + ? (bool)!c.cache.query_cache( + i->get_val().pladdr.get_paddr(), nullptr) + : true); } } } @@ -588,6 +595,8 @@ public: } using check_child_trackers_ret = base_iertr::future<>; + template ::type = 0> check_child_trackers_ret check_child_trackers( op_context_t c) { mapped_space_visitor_t checker = [c, this]( diff --git a/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc b/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc index f1add39ba5b..f109b8a9982 100644 --- a/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc +++ b/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc @@ -272,7 +272,7 @@ BtreeLBAManager::alloc_extent( c, *state.insert_iter, state.last_end, - lba_map_val_t{len, addr, 1, 0}, + lba_map_val_t{len, pladdr_t(addr), 1, 0} nextent ).si_then([&state, FNAME, c, addr, len, hint, nextent](auto &&p) { auto [iter, inserted] = std::move(p); @@ -311,7 +311,8 @@ _init_cached_extent( LOG_PREFIX(BtreeLBAManager::init_cached_extent); if (!iter.is_end() && iter.get_key() == logn->get_laddr() && - iter.get_val().paddr == logn->get_paddr()) { + iter.get_val().pladdr.is_paddr() && + iter.get_val().pladdr.get_paddr() == logn->get_paddr()) { assert(!iter.get_leaf_node()->is_pending()); iter.get_leaf_node()->link_child(logn.get(), iter.get_leaf_pos()); logn->set_laddr(iter.get_pin(c)->get_key()); @@ -387,7 +388,7 @@ BtreeLBAManager::scan_mappings( seastar::stop_iteration::yes); } ceph_assert((pos.get_key() + pos.get_val().len) > begin); - f(pos.get_key(), pos.get_val().paddr, pos.get_val().len); + f(pos.get_key(), pos.get_val().pladdr, pos.get_val().len); return typename LBABtree::iterate_repeat_ret_inner( interruptible::ready_future_marker{}, seastar::stop_iteration::no); @@ -439,8 +440,9 @@ BtreeLBAManager::update_mapping( const lba_map_val_t &in) { assert(!addr.is_null()); lba_map_val_t ret = in; - ceph_assert(in.paddr == prev_addr); - ret.paddr = addr; + ceph_assert(in.pladdr.is_paddr()); + ceph_assert(in.pladdr.get_paddr() == prev_addr); + ret.pladdr = addr; return ret; }, nextent @@ -528,7 +530,7 @@ BtreeLBAManager::update_refcount( DEBUGT("laddr={}, delta={} done -- {}", t, addr, delta, result); return ref_update_result_t{ result.refcount, - result.paddr, + result.pladdr, result.len }; }); diff --git a/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.h b/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.h index b48abf9456b..7c5d42cec79 100644 --- a/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.h +++ b/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.h @@ -39,7 +39,7 @@ public: c, parent, pos, - val.paddr, + val.pladdr, val.len, std::forward(meta)) {} diff --git a/src/crimson/os/seastore/lba_manager/btree/lba_btree_node.cc b/src/crimson/os/seastore/lba_manager/btree/lba_btree_node.cc index a33f75917c1..66dc94394a9 100644 --- a/src/crimson/os/seastore/lba_manager/btree/lba_btree_node.cc +++ b/src/crimson/os/seastore/lba_manager/btree/lba_btree_node.cc @@ -20,7 +20,7 @@ namespace crimson::os::seastore::lba_manager::btree { std::ostream& operator<<(std::ostream& out, const lba_map_val_t& v) { return out << "lba_map_val_t(" - << v.paddr + << v.pladdr << "~" << v.len << ", refcount=" << v.refcount << ", checksum=" << v.checksum @@ -42,10 +42,11 @@ void LBALeafNode::resolve_relative_addrs(paddr_t base) { LOG_PREFIX(LBALeafNode::resolve_relative_addrs); for (auto i: *this) { - if (i->get_val().paddr.is_relative()) { - auto val = i->get_val(); - val.paddr = base.add_relative(val.paddr); - TRACE("{} -> {}", i->get_val().paddr, val.paddr); + auto val = i->get_val(); + if (val.pladdr.is_paddr() && + val.pladdr.get_paddr().is_relative()) { + val.pladdr = base.add_relative(val.pladdr.get_paddr()); + TRACE("{} -> {}", i->get_val().pladdr, val.pladdr); i->set_val(val); } } diff --git a/src/crimson/os/seastore/lba_manager/btree/lba_btree_node.h b/src/crimson/os/seastore/lba_manager/btree/lba_btree_node.h index 62ceae6cc46..ffce2c1b5e6 100644 --- a/src/crimson/os/seastore/lba_manager/btree/lba_btree_node.h +++ b/src/crimson/os/seastore/lba_manager/btree/lba_btree_node.h @@ -33,17 +33,18 @@ using LBANode = FixedKVNode; */ struct lba_map_val_t { extent_len_t len = 0; ///< length of mapping - paddr_t paddr; ///< physical addr of mapping + pladdr_t pladdr; ///< physical addr of mapping or + // laddr of a physical lba mapping(see btree_lba_manager.h) uint32_t refcount = 0; ///< refcount uint32_t checksum = 0; ///< checksum of original block written at paddr (TODO) lba_map_val_t() = default; lba_map_val_t( extent_len_t len, - paddr_t paddr, + pladdr_t pladdr, uint32_t refcount, uint32_t checksum) - : len(len), paddr(paddr), refcount(refcount), checksum(checksum) {} + : len(len), pladdr(pladdr), refcount(refcount), checksum(checksum) {} bool operator==(const lba_map_val_t&) const = default; }; @@ -103,14 +104,14 @@ using LBAInternalNodeRef = LBAInternalNode::Ref; * size : uint32_t[1] 4b * (padding) : 4b * meta : lba_node_meta_le_t[3] (1*24)b - * keys : laddr_t[170] (145*8)b - * values : lba_map_val_t[170] (145*20)b + * keys : laddr_t[170] (140*8)b + * values : lba_map_val_t[170] (140*21)b * = 4092 * * TODO: update FixedKVNodeLayout to handle the above calculation * TODO: the above alignment probably isn't portable without further work */ -constexpr size_t LEAF_NODE_CAPACITY = 145; +constexpr size_t LEAF_NODE_CAPACITY = 140; /** * lba_map_val_le_t @@ -119,7 +120,7 @@ constexpr size_t LEAF_NODE_CAPACITY = 145; */ struct lba_map_val_le_t { extent_len_le_t len = init_extent_len_le(0); - paddr_le_t paddr; + pladdr_le_t pladdr; ceph_le32 refcount{0}; ceph_le32 checksum{0}; @@ -127,12 +128,12 @@ struct lba_map_val_le_t { lba_map_val_le_t(const lba_map_val_le_t &) = default; explicit lba_map_val_le_t(const lba_map_val_t &val) : len(init_extent_len_le(val.len)), - paddr(paddr_le_t(val.paddr)), + pladdr(pladdr_le_t(val.pladdr)), refcount(val.refcount), checksum(val.checksum) {} operator lba_map_val_t() const { - return lba_map_val_t{ len, paddr, refcount, checksum }; + return lba_map_val_t{ len, pladdr, refcount, checksum }; } }; @@ -195,7 +196,9 @@ struct LBALeafNode // child-ptr may already be correct, see LBAManager::update_mappings() this->update_child_ptr(iter, nextent); } - val.paddr = this->maybe_generate_relative(val.paddr); + if (val.pladdr.is_paddr()) { + val.pladdr = maybe_generate_relative(val.pladdr.get_paddr()); + } return this->journal_update( iter, val, @@ -214,7 +217,9 @@ struct LBALeafNode addr, (void*)nextent); this->insert_child_ptr(iter, nextent); - val.paddr = this->maybe_generate_relative(val.paddr); + if (val.pladdr.is_paddr()) { + val.pladdr = maybe_generate_relative(val.pladdr.get_paddr()); + } this->journal_insert( iter, addr, @@ -245,9 +250,10 @@ struct LBALeafNode if (this->is_initial_pending()) { for (auto i = from; i != to; ++i) { auto val = i->get_val(); - if (val.paddr.is_relative()) { - assert(val.paddr.is_block_relative()); - val.paddr = this->get_paddr().add_relative(val.paddr); + if (val.pladdr.is_paddr() + && val.pladdr.get_paddr().is_relative()) { + assert(val.pladdr.get_paddr().is_block_relative()); + val.pladdr = this->get_paddr().add_relative(val.pladdr.get_paddr()); i->set_val(val); } } @@ -260,10 +266,10 @@ struct LBALeafNode if (this->is_initial_pending()) { for (auto i = from; i != to; ++i) { auto val = i->get_val(); - if (val.paddr.is_relative()) { - auto val = i->get_val(); - assert(val.paddr.is_record_relative()); - val.paddr = val.paddr.block_relative_to(this->get_paddr()); + if (val.pladdr.is_paddr() + && val.pladdr.get_paddr().is_relative()) { + assert(val.pladdr.get_paddr().is_record_relative()); + val.pladdr = val.pladdr.get_paddr().block_relative_to(this->get_paddr()); i->set_val(val); } } diff --git a/src/crimson/os/seastore/seastore_types.cc b/src/crimson/os/seastore/seastore_types.cc index 6a47dcea34b..0acfdb74ebb 100644 --- a/src/crimson/os/seastore/seastore_types.cc +++ b/src/crimson/os/seastore/seastore_types.cc @@ -89,6 +89,15 @@ std::ostream& operator<<(std::ostream& out, segment_seq_printer_t seq) } } +std::ostream &operator<<(std::ostream &out, const pladdr_t &pladdr) +{ + if (pladdr.is_laddr()) { + return out << pladdr.get_laddr(); + } else { + return out << pladdr.get_paddr(); + } +} + std::ostream &operator<<(std::ostream &out, const paddr_t &rhs) { auto id = rhs.get_device_id(); diff --git a/src/crimson/os/seastore/seastore_types.h b/src/crimson/os/seastore/seastore_types.h index 373ab4377ae..0713609fc87 100644 --- a/src/crimson/os/seastore/seastore_types.h +++ b/src/crimson/os/seastore/seastore_types.h @@ -488,6 +488,7 @@ constexpr device_off_t decode_device_off(internal_paddr_t addr) { struct seg_paddr_t; struct blk_paddr_t; struct res_paddr_t; +struct pladdr_t; struct paddr_t { public: // P_ADDR_MAX == P_ADDR_NULL == paddr_t{} @@ -668,6 +669,8 @@ private: static_cast(offset)) {} friend struct paddr_le_t; + friend struct pladdr_le_t; + }; std::ostream &operator<<(std::ostream &out, const paddr_t &rhs); @@ -1032,6 +1035,86 @@ struct __attribute((packed)) laddr_le_t { } }; +constexpr uint64_t PL_ADDR_NULL = std::numeric_limits::max(); + +struct pladdr_t { + std::variant pladdr; + + pladdr_t() = default; + pladdr_t(const pladdr_t &) = default; + explicit pladdr_t(laddr_t laddr) + : pladdr(laddr) {} + explicit pladdr_t(paddr_t paddr) + : pladdr(paddr) {} + + bool is_laddr() const { + return pladdr.index() == 0; + } + + bool is_paddr() const { + return pladdr.index() == 1; + } + + pladdr_t& operator=(paddr_t paddr) { + pladdr = paddr; + return *this; + } + + pladdr_t& operator=(laddr_t laddr) { + pladdr = laddr; + return *this; + } + + bool operator==(const pladdr_t &) const = default; + + paddr_t get_paddr() const { + assert(pladdr.index() == 1); + return paddr_t(std::get<1>(pladdr)); + } + + laddr_t get_laddr() const { + assert(pladdr.index() == 0); + return laddr_t(std::get<0>(pladdr)); + } + +}; + +std::ostream &operator<<(std::ostream &out, const pladdr_t &pladdr); + +enum class addr_type_t : uint8_t { + PADDR=0, + LADDR=1, + MAX=2 // or NONE +}; + +struct __attribute((packed)) pladdr_le_t { + ceph_le64 pladdr = ceph_le64(PL_ADDR_NULL); + addr_type_t addr_type = addr_type_t::MAX; + + pladdr_le_t() = default; + pladdr_le_t(const pladdr_le_t &) = default; + explicit pladdr_le_t(const pladdr_t &addr) + : pladdr( + ceph_le64( + addr.is_laddr() ? + std::get<0>(addr.pladdr) : + std::get<1>(addr.pladdr).internal_paddr)), + addr_type( + addr.is_laddr() ? + addr_type_t::LADDR : + addr_type_t::PADDR) + {} + + operator pladdr_t() const { + if (addr_type == addr_type_t::LADDR) { + return pladdr_t(laddr_t(pladdr)); + } else { + assert(addr_type == addr_type_t::PADDR); + return pladdr_t(paddr_t(pladdr)); + } + } +}; + // logical offset, see LBAManager, TransactionManager using extent_len_t = uint32_t; constexpr extent_len_t EXTENT_LEN_MAX = @@ -2132,6 +2215,7 @@ template <> struct fmt::formatter : fmt::os template <> struct fmt::formatter : fmt::ostream_formatter {}; template <> struct fmt::formatter : fmt::ostream_formatter {}; template <> struct fmt::formatter : fmt::ostream_formatter {}; +template <> struct fmt::formatter : fmt::ostream_formatter {}; template <> struct fmt::formatter : fmt::ostream_formatter {}; template <> struct fmt::formatter : fmt::ostream_formatter {}; template <> struct fmt::formatter : fmt::ostream_formatter {}; diff --git a/src/test/crimson/seastore/test_btree_lba_manager.cc b/src/test/crimson/seastore/test_btree_lba_manager.cc index 0635358463a..f55d0d6abd4 100644 --- a/src/test/crimson/seastore/test_btree_lba_manager.cc +++ b/src/test/crimson/seastore/test_btree_lba_manager.cc @@ -249,7 +249,7 @@ struct lba_btree_test : btree_test_base { } static auto get_map_val(extent_len_t len) { - return lba_map_val_t{0, P_ADDR_NULL, len, 0}; + return lba_map_val_t{0, (pladdr_t)P_ADDR_NULL, len, 0}; } device_off_t next_off = 0; -- 2.39.5