]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
crimson/os/seastore/transaction_manager: fix lba mappings before getting
authorXuehan Xu <xuxuehan@qianxin.com>
Tue, 11 Jun 2024 07:37:57 +0000 (15:37 +0800)
committerXuehan Xu <xuxuehan@qianxin.com>
Wed, 26 Jun 2024 02:59:20 +0000 (10:59 +0800)
logical extents through them

Signed-off-by: Xuehan Xu <xuxuehan@qianxin.com>
src/crimson/os/seastore/btree/btree_range_pin.cc
src/crimson/os/seastore/btree/fixed_kv_node.h
src/crimson/os/seastore/cached_extent.h
src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.h
src/crimson/os/seastore/lba_manager/btree/lba_btree_node.cc
src/crimson/os/seastore/lba_manager/btree/lba_btree_node.h
src/crimson/os/seastore/transaction_manager.h

index ad6abdf4ee80f6f8b10f33ea8fa8fe62f21e4d92..12e078814ccc5dc746d164b3b97a0d5df160a08e 100644 (file)
@@ -31,7 +31,10 @@ bool BtreeNodeMapping<key_t, val_t>::is_stable() const
   assert(!this->parent_modified());
   assert(pos != std::numeric_limits<uint16_t>::max());
   auto &p = (FixedKVNode<key_t>&)*parent;
-  return p.is_child_stable(ctx, pos);
+  auto k = this->is_indirect()
+    ? this->get_intermediate_base()
+    : get_key();
+  return p.is_child_stable(ctx, pos, k);
 }
 
 template <typename key_t, typename val_t>
@@ -40,7 +43,10 @@ bool BtreeNodeMapping<key_t, val_t>::is_data_stable() const
   assert(!this->parent_modified());
   assert(pos != std::numeric_limits<uint16_t>::max());
   auto &p = (FixedKVNode<key_t>&)*parent;
-  return p.is_child_data_stable(ctx, pos);
+  auto k = this->is_indirect()
+    ? this->get_intermediate_base()
+    : get_key();
+  return p.is_child_data_stable(ctx, pos, k);
 }
 
 template class BtreeNodeMapping<laddr_t, paddr_t>;
index ea61d6b9933d227d8a821cedf9d85f21a6244438..97822e7e467c0fea5298f706893720b15314e195 100644 (file)
@@ -265,8 +265,14 @@ struct FixedKVNode : ChildableCachedExtent {
     set_child_ptracker(child);
   }
 
-  virtual bool is_child_stable(op_context_t<node_key_t>, uint16_t pos) const = 0;
-  virtual bool is_child_data_stable(op_context_t<node_key_t>, uint16_t pos) const = 0;
+  virtual bool is_child_stable(
+    op_context_t<node_key_t>,
+    uint16_t pos,
+    node_key_t key) const = 0;
+  virtual bool is_child_data_stable(
+    op_context_t<node_key_t>,
+    uint16_t pos,
+    node_key_t key) const = 0;
 
   template <typename T>
   get_child_ret_t<T> get_child(
@@ -275,6 +281,7 @@ struct FixedKVNode : ChildableCachedExtent {
     node_key_t key)
   {
     assert(children.capacity());
+    assert(key == get_key_from_idx(pos));
     auto child = children[pos];
     ceph_assert(!is_reserved_ptr(child));
     if (is_valid_child_ptr(child)) {
@@ -632,11 +639,17 @@ struct FixedKVInternalNode
     }
   }
 
-  bool is_child_stable(op_context_t<NODE_KEY>, uint16_t pos) const final {
+  bool is_child_stable(
+    op_context_t<NODE_KEY>,
+    uint16_t pos,
+    NODE_KEY key) const final {
     ceph_abort("impossible");
     return false;
   }
-  bool is_child_data_stable(op_context_t<NODE_KEY>, uint16_t pos) const final {
+  bool is_child_data_stable(
+    op_context_t<NODE_KEY>,
+    uint16_t pos,
+    NODE_KEY key) const final {
     ceph_abort("impossible");
     return false;
   }
@@ -1040,14 +1053,25 @@ struct FixedKVLeafNode
   // 2. The child extent is stable
   //
   // For reserved mappings, the return values are undefined.
-  bool is_child_stable(op_context_t<NODE_KEY> c, uint16_t pos) const final {
-    return _is_child_stable(c, pos);
+  bool is_child_stable(
+    op_context_t<NODE_KEY> c,
+    uint16_t pos,
+    NODE_KEY key) const final {
+    return _is_child_stable(c, pos, key);
   }
-  bool is_child_data_stable(op_context_t<NODE_KEY> c, uint16_t pos) const final {
-    return _is_child_stable(c, pos, true);
+  bool is_child_data_stable(
+    op_context_t<NODE_KEY> c,
+    uint16_t pos,
+    NODE_KEY key) const final {
+    return _is_child_stable(c, pos, key, true);
   }
 
-  bool _is_child_stable(op_context_t<NODE_KEY> c, uint16_t pos, bool data_only = false) const {
+  bool _is_child_stable(
+    op_context_t<NODE_KEY> c,
+    uint16_t pos,
+    NODE_KEY key,
+    bool data_only = false) const {
+    assert(key == get_key_from_idx(pos));
     auto child = this->children[pos];
     if (is_reserved_ptr(child)) {
       return true;
index d26d61f25a2298385572e12f564339441e4dc1cf..e78a0d95028b71393affdfa4c7e0885fc95b1469 100644 (file)
@@ -1157,6 +1157,10 @@ public:
     return false;
   };
 
+  virtual void maybe_fix_pos() {
+    ceph_abort("impossible");
+  }
+
   virtual ~PhysicalNodeMapping() {}
 protected:
   std::optional<child_pos_t> child_pos = std::nullopt;
index e5ac044e16be549227b6d800fddbf9e6a8441a5f..ca25dc6a2a02ba03eef2e21b6bc9d831fd8af2e8 100644 (file)
@@ -25,6 +25,8 @@
 
 namespace crimson::os::seastore::lba_manager::btree {
 
+struct LBALeafNode;
+
 class BtreeLBAMapping : public BtreeNodeMapping<laddr_t, paddr_t> {
 // To support cloning, there are two kinds of lba mappings:
 //     1. physical lba mapping: the pladdr in the value of which is the paddr of
@@ -166,6 +168,14 @@ public:
     return p.modified_since(parent_modifications);
   }
 
+  void maybe_fix_pos() final {
+    assert(is_parent_valid());
+    if (!parent_modified()) {
+      return;
+    }
+    auto &p = static_cast<LBALeafNode&>(*parent);
+    p.maybe_fix_mapping_pos(*this);
+  }
 protected:
   std::unique_ptr<BtreeNodeMapping<laddr_t, paddr_t>> _duplicate(
     op_context_t<laddr_t> ctx) const final {
@@ -181,6 +191,10 @@ protected:
     return pin;
   }
 private:
+  void _new_pos(uint16_t pos) {
+    this->pos = pos;
+  }
+
   laddr_t key = L_ADDR_NULL;
   bool indirect = false;
   laddr_t intermediate_key = L_ADDR_NULL;
@@ -189,6 +203,7 @@ private:
   pladdr_t raw_val;
   lba_map_val_t map_val;
   uint64_t parent_modifications = 0;
+  friend struct LBALeafNode;
 };
 
 using BtreeLBAMappingRef = std::unique_ptr<BtreeLBAMapping>;
index 730da546cb0e23602d6404cbf0f0f7c7628d7ad5..504c346ea946d394971cb1e33c6cc3a90386c428 100644 (file)
@@ -10,7 +10,7 @@
 #include "include/buffer.h"
 #include "include/byteorder.h"
 
-#include "crimson/os/seastore/lba_manager/btree/lba_btree_node.h"
+#include "crimson/os/seastore/lba_manager/btree/btree_lba_manager.h"
 #include "crimson/os/seastore/logging.h"
 
 SET_SUBSYS(seastore_lba);
@@ -53,4 +53,23 @@ void LBALeafNode::resolve_relative_addrs(paddr_t base)
   }
 }
 
+void LBALeafNode::maybe_fix_mapping_pos(BtreeLBAMapping &mapping)
+{
+  assert(mapping.get_parent() == this);
+  auto key = mapping.is_indirect()
+    ? mapping.get_intermediate_base()
+    : mapping.get_key();
+  if (key != iter_idx(mapping.get_pos()).get_key()) {
+    auto iter = lower_bound(key);
+    {
+      // a mapping that no longer exist or has its value
+      // modified is considered an outdated one, and
+      // shouldn't be used anymore
+      ceph_assert(iter != end());
+      assert(iter.get_val() == mapping.get_map_val());
+    }
+    mapping._new_pos(iter.get_offset());
+  }
+}
+
 }
index ff06bf81039d4a218abab5eebb36671b5aa980fb..add464e45e6f938b3ea53e5ed41d1deab1617751 100644 (file)
@@ -26,6 +26,8 @@ namespace crimson::os::seastore::lba_manager::btree {
 using base_iertr = LBAManager::base_iertr;
 using LBANode = FixedKVNode<laddr_t>;
 
+class BtreeLBAMapping;
+
 /**
  * lba_map_val_t
  *
@@ -290,6 +292,8 @@ struct LBALeafNode
   }
 
   std::ostream &_print_detail(std::ostream &out) const final;
+
+  void maybe_fix_mapping_pos(BtreeLBAMapping &mapping);
 };
 using LBALeafNodeRef = TCachedExtentRef<LBALeafNode>;
 
index 886f96793200b6046928d983b57d1441f0a9600b..7181a32b9f1969b89147afd16a956a8d1bde10d0 100644 (file)
@@ -181,14 +181,27 @@ public:
     Transaction &t,
     LBAMappingRef pin)
   {
-    // checking the lba child must be atomic with creating
-    // and linking the absent child
-    auto ret = get_extent_if_linked<T>(t, std::move(pin));
-    if (ret.index() == 1) {
-      return std::move(std::get<1>(ret));
+    auto fut = base_iertr::make_ready_future<LBAMappingRef>();
+    if (!pin->is_parent_valid()) {
+      fut = get_pin(t, pin->get_key()
+      ).handle_error_interruptible(
+       crimson::ct_error::enoent::assert_failure{"unexpected enoent"},
+       crimson::ct_error::input_output_error::pass_further{}
+      );
     } else {
-      return this->pin_to_extent<T>(t, std::move(std::get<0>(ret)));
+      pin->maybe_fix_pos();
+      fut = base_iertr::make_ready_future<LBAMappingRef>(std::move(pin));
     }
+    return fut.si_then([&t, this](auto npin) mutable {
+      // checking the lba child must be atomic with creating
+      // and linking the absent child
+      auto ret = get_extent_if_linked<T>(t, std::move(npin));
+      if (ret.index() == 1) {
+       return std::move(std::get<1>(ret));
+      } else {
+       return this->pin_to_extent<T>(t, std::move(std::get<0>(ret)));
+      }
+    });
   }
 
   template <typename T>
@@ -198,6 +211,8 @@ public:
     LBAMappingRef pin)
   {
     ceph_assert(pin->is_parent_valid());
+    // checking the lba child must be atomic with creating
+    // and linking the absent child
     auto v = pin->get_logical_extent(t);
     if (v.has_child()) {
       return v.get_child_fut().safe_then([pin=std::move(pin)](auto extent) {
@@ -459,16 +474,31 @@ public:
       // The according extent might be stable or pending.
       auto fut = base_iertr::now();
       if (!pin->is_indirect()) {
-       auto fut2 = base_iertr::make_ready_future<TCachedExtentRef<T>>();
-       if (full_extent_integrity_check) {
-         fut2 = read_pin<T>(t, pin->duplicate());
+       if (!pin->is_parent_valid()) {
+         fut = get_pin(t, pin->get_key()
+         ).si_then([&pin](auto npin) {
+           assert(npin);
+           pin = std::move(npin);
+           return seastar::now();
+         }).handle_error_interruptible(
+           crimson::ct_error::enoent::assert_failure{"unexpected enoent"},
+           crimson::ct_error::input_output_error::pass_further{}
+         );
        } else {
-         auto ret = get_extent_if_linked<T>(t, pin->duplicate());
-         if (ret.index() == 1) {
-           fut2 = std::move(std::get<1>(ret));
-         }
+         pin->maybe_fix_pos();
        }
-       fut = fut2.si_then([this, &t, &remaps, original_paddr,
+
+       fut = fut.si_then([this, &t, &pin] {
+         if (full_extent_integrity_check) {
+           return read_pin<T>(t, pin->duplicate());
+         } else {
+           auto ret = get_extent_if_linked<T>(t, pin->duplicate());
+           if (ret.index() == 1) {
+             return std::move(std::get<1>(ret));
+           }
+         }
+         return base_iertr::make_ready_future<TCachedExtentRef<T>>();
+       }).si_then([this, &t, &remaps, original_paddr,
                            original_laddr, original_len,
                            &extents, FNAME](auto ext) mutable {
          ceph_assert(full_extent_integrity_check