From: Xuehan Xu Date: Tue, 11 Jun 2024 07:37:57 +0000 (+0800) Subject: crimson/os/seastore/transaction_manager: fix lba mappings before getting X-Git-Tag: v20.0.0~1647^2~1 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=063e429501ec0de3a0f356b88fe946eeadcc55ab;p=ceph.git crimson/os/seastore/transaction_manager: fix lba mappings before getting logical extents through them Signed-off-by: Xuehan Xu --- diff --git a/src/crimson/os/seastore/btree/btree_range_pin.cc b/src/crimson/os/seastore/btree/btree_range_pin.cc index ad6abdf4ee80f..12e078814ccc5 100644 --- a/src/crimson/os/seastore/btree/btree_range_pin.cc +++ b/src/crimson/os/seastore/btree/btree_range_pin.cc @@ -31,7 +31,10 @@ bool BtreeNodeMapping::is_stable() const assert(!this->parent_modified()); assert(pos != std::numeric_limits::max()); auto &p = (FixedKVNode&)*parent; - return p.is_child_stable(ctx, pos); + auto k = this->is_indirect() + ? this->get_intermediate_base() + : get_key(); + return p.is_child_stable(ctx, pos, k); } template @@ -40,7 +43,10 @@ bool BtreeNodeMapping::is_data_stable() const assert(!this->parent_modified()); assert(pos != std::numeric_limits::max()); auto &p = (FixedKVNode&)*parent; - return p.is_child_data_stable(ctx, pos); + auto k = this->is_indirect() + ? this->get_intermediate_base() + : get_key(); + return p.is_child_data_stable(ctx, pos, k); } template class BtreeNodeMapping; diff --git a/src/crimson/os/seastore/btree/fixed_kv_node.h b/src/crimson/os/seastore/btree/fixed_kv_node.h index ea61d6b9933d2..97822e7e467c0 100644 --- a/src/crimson/os/seastore/btree/fixed_kv_node.h +++ b/src/crimson/os/seastore/btree/fixed_kv_node.h @@ -265,8 +265,14 @@ struct FixedKVNode : ChildableCachedExtent { set_child_ptracker(child); } - virtual bool is_child_stable(op_context_t, uint16_t pos) const = 0; - virtual bool is_child_data_stable(op_context_t, uint16_t pos) const = 0; + virtual bool is_child_stable( + op_context_t, + uint16_t pos, + node_key_t key) const = 0; + virtual bool is_child_data_stable( + op_context_t, + uint16_t pos, + node_key_t key) const = 0; template get_child_ret_t get_child( @@ -275,6 +281,7 @@ struct FixedKVNode : ChildableCachedExtent { node_key_t key) { assert(children.capacity()); + assert(key == get_key_from_idx(pos)); auto child = children[pos]; ceph_assert(!is_reserved_ptr(child)); if (is_valid_child_ptr(child)) { @@ -632,11 +639,17 @@ struct FixedKVInternalNode } } - bool is_child_stable(op_context_t, uint16_t pos) const final { + bool is_child_stable( + op_context_t, + uint16_t pos, + NODE_KEY key) const final { ceph_abort("impossible"); return false; } - bool is_child_data_stable(op_context_t, uint16_t pos) const final { + bool is_child_data_stable( + op_context_t, + uint16_t pos, + NODE_KEY key) const final { ceph_abort("impossible"); return false; } @@ -1040,14 +1053,25 @@ struct FixedKVLeafNode // 2. The child extent is stable // // For reserved mappings, the return values are undefined. - bool is_child_stable(op_context_t c, uint16_t pos) const final { - return _is_child_stable(c, pos); + bool is_child_stable( + op_context_t c, + uint16_t pos, + NODE_KEY key) const final { + return _is_child_stable(c, pos, key); } - bool is_child_data_stable(op_context_t c, uint16_t pos) const final { - return _is_child_stable(c, pos, true); + bool is_child_data_stable( + op_context_t c, + uint16_t pos, + NODE_KEY key) const final { + return _is_child_stable(c, pos, key, true); } - bool _is_child_stable(op_context_t c, uint16_t pos, bool data_only = false) const { + bool _is_child_stable( + op_context_t c, + uint16_t pos, + NODE_KEY key, + bool data_only = false) const { + assert(key == get_key_from_idx(pos)); auto child = this->children[pos]; if (is_reserved_ptr(child)) { return true; diff --git a/src/crimson/os/seastore/cached_extent.h b/src/crimson/os/seastore/cached_extent.h index d26d61f25a229..e78a0d95028b7 100644 --- a/src/crimson/os/seastore/cached_extent.h +++ b/src/crimson/os/seastore/cached_extent.h @@ -1157,6 +1157,10 @@ public: return false; }; + virtual void maybe_fix_pos() { + ceph_abort("impossible"); + } + virtual ~PhysicalNodeMapping() {} protected: std::optional child_pos = std::nullopt; diff --git a/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.h b/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.h index e5ac044e16be5..ca25dc6a2a02b 100644 --- a/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.h +++ b/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.h @@ -25,6 +25,8 @@ namespace crimson::os::seastore::lba_manager::btree { +struct LBALeafNode; + class BtreeLBAMapping : public BtreeNodeMapping { // To support cloning, there are two kinds of lba mappings: // 1. physical lba mapping: the pladdr in the value of which is the paddr of @@ -166,6 +168,14 @@ public: return p.modified_since(parent_modifications); } + void maybe_fix_pos() final { + assert(is_parent_valid()); + if (!parent_modified()) { + return; + } + auto &p = static_cast(*parent); + p.maybe_fix_mapping_pos(*this); + } protected: std::unique_ptr> _duplicate( op_context_t ctx) const final { @@ -181,6 +191,10 @@ protected: return pin; } private: + void _new_pos(uint16_t pos) { + this->pos = pos; + } + laddr_t key = L_ADDR_NULL; bool indirect = false; laddr_t intermediate_key = L_ADDR_NULL; @@ -189,6 +203,7 @@ private: pladdr_t raw_val; lba_map_val_t map_val; uint64_t parent_modifications = 0; + friend struct LBALeafNode; }; using BtreeLBAMappingRef = std::unique_ptr; diff --git a/src/crimson/os/seastore/lba_manager/btree/lba_btree_node.cc b/src/crimson/os/seastore/lba_manager/btree/lba_btree_node.cc index 730da546cb0e2..504c346ea946d 100644 --- a/src/crimson/os/seastore/lba_manager/btree/lba_btree_node.cc +++ b/src/crimson/os/seastore/lba_manager/btree/lba_btree_node.cc @@ -10,7 +10,7 @@ #include "include/buffer.h" #include "include/byteorder.h" -#include "crimson/os/seastore/lba_manager/btree/lba_btree_node.h" +#include "crimson/os/seastore/lba_manager/btree/btree_lba_manager.h" #include "crimson/os/seastore/logging.h" SET_SUBSYS(seastore_lba); @@ -53,4 +53,23 @@ void LBALeafNode::resolve_relative_addrs(paddr_t base) } } +void LBALeafNode::maybe_fix_mapping_pos(BtreeLBAMapping &mapping) +{ + assert(mapping.get_parent() == this); + auto key = mapping.is_indirect() + ? mapping.get_intermediate_base() + : mapping.get_key(); + if (key != iter_idx(mapping.get_pos()).get_key()) { + auto iter = lower_bound(key); + { + // a mapping that no longer exist or has its value + // modified is considered an outdated one, and + // shouldn't be used anymore + ceph_assert(iter != end()); + assert(iter.get_val() == mapping.get_map_val()); + } + mapping._new_pos(iter.get_offset()); + } +} + } diff --git a/src/crimson/os/seastore/lba_manager/btree/lba_btree_node.h b/src/crimson/os/seastore/lba_manager/btree/lba_btree_node.h index ff06bf81039d4..add464e45e6f9 100644 --- a/src/crimson/os/seastore/lba_manager/btree/lba_btree_node.h +++ b/src/crimson/os/seastore/lba_manager/btree/lba_btree_node.h @@ -26,6 +26,8 @@ namespace crimson::os::seastore::lba_manager::btree { using base_iertr = LBAManager::base_iertr; using LBANode = FixedKVNode; +class BtreeLBAMapping; + /** * lba_map_val_t * @@ -290,6 +292,8 @@ struct LBALeafNode } std::ostream &_print_detail(std::ostream &out) const final; + + void maybe_fix_mapping_pos(BtreeLBAMapping &mapping); }; using LBALeafNodeRef = TCachedExtentRef; diff --git a/src/crimson/os/seastore/transaction_manager.h b/src/crimson/os/seastore/transaction_manager.h index 886f96793200b..7181a32b9f196 100644 --- a/src/crimson/os/seastore/transaction_manager.h +++ b/src/crimson/os/seastore/transaction_manager.h @@ -181,14 +181,27 @@ public: Transaction &t, LBAMappingRef pin) { - // checking the lba child must be atomic with creating - // and linking the absent child - auto ret = get_extent_if_linked(t, std::move(pin)); - if (ret.index() == 1) { - return std::move(std::get<1>(ret)); + auto fut = base_iertr::make_ready_future(); + if (!pin->is_parent_valid()) { + fut = get_pin(t, pin->get_key() + ).handle_error_interruptible( + crimson::ct_error::enoent::assert_failure{"unexpected enoent"}, + crimson::ct_error::input_output_error::pass_further{} + ); } else { - return this->pin_to_extent(t, std::move(std::get<0>(ret))); + pin->maybe_fix_pos(); + fut = base_iertr::make_ready_future(std::move(pin)); } + return fut.si_then([&t, this](auto npin) mutable { + // checking the lba child must be atomic with creating + // and linking the absent child + auto ret = get_extent_if_linked(t, std::move(npin)); + if (ret.index() == 1) { + return std::move(std::get<1>(ret)); + } else { + return this->pin_to_extent(t, std::move(std::get<0>(ret))); + } + }); } template @@ -198,6 +211,8 @@ public: LBAMappingRef pin) { ceph_assert(pin->is_parent_valid()); + // checking the lba child must be atomic with creating + // and linking the absent child auto v = pin->get_logical_extent(t); if (v.has_child()) { return v.get_child_fut().safe_then([pin=std::move(pin)](auto extent) { @@ -459,16 +474,31 @@ public: // The according extent might be stable or pending. auto fut = base_iertr::now(); if (!pin->is_indirect()) { - auto fut2 = base_iertr::make_ready_future>(); - if (full_extent_integrity_check) { - fut2 = read_pin(t, pin->duplicate()); + if (!pin->is_parent_valid()) { + fut = get_pin(t, pin->get_key() + ).si_then([&pin](auto npin) { + assert(npin); + pin = std::move(npin); + return seastar::now(); + }).handle_error_interruptible( + crimson::ct_error::enoent::assert_failure{"unexpected enoent"}, + crimson::ct_error::input_output_error::pass_further{} + ); } else { - auto ret = get_extent_if_linked(t, pin->duplicate()); - if (ret.index() == 1) { - fut2 = std::move(std::get<1>(ret)); - } + pin->maybe_fix_pos(); } - fut = fut2.si_then([this, &t, &remaps, original_paddr, + + fut = fut.si_then([this, &t, &pin] { + if (full_extent_integrity_check) { + return read_pin(t, pin->duplicate()); + } else { + auto ret = get_extent_if_linked(t, pin->duplicate()); + if (ret.index() == 1) { + return std::move(std::get<1>(ret)); + } + } + return base_iertr::make_ready_future>(); + }).si_then([this, &t, &remaps, original_paddr, original_laddr, original_len, &extents, FNAME](auto ext) mutable { ceph_assert(full_extent_integrity_check