]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
crimson/os/seastore/btree: make updates of lba leaf nodes ptrs
authorXuehan Xu <xuxuehan@qianxin.com>
Sun, 1 Mar 2026 04:42:49 +0000 (12:42 +0800)
committerXuehan Xu <xuxuehan@qianxin.com>
Mon, 9 Mar 2026 09:56:41 +0000 (17:56 +0800)
synchronous with contents updates

Since we need merge content of lba leaf nodes when committing
trim/cleaner transactions, and we rely on the child ptrs to determine
whether to modify mappings of pending leaf nodes. We must make sure
the ptr updates and node content updates are synchronous.

See LBALeafNode::merge_content_to() for detail

Signed-off-by: Xuehan Xu <xuxuehan@qianxin.com>
src/crimson/os/seastore/backref/btree_backref_manager.cc
src/crimson/os/seastore/btree/fixed_kv_btree.h
src/crimson/os/seastore/lba/btree_lba_manager.cc
src/test/crimson/seastore/test_btree_lba_manager.cc

index f50b56b1ffe9f53c377ecf1a4348f01c8fe137f3..9645fe1773ed217f403959757f4f5e660b256cdc 100644 (file)
@@ -226,7 +226,8 @@ BtreeBackrefManager::new_mapping(
            c,
            *state.insert_iter,
            state.last_end,
-           val
+           val,
+            nullptr
          ).si_then([&state, c, addr, len, key](auto &&p) {
            LOG_PREFIX(BtreeBackrefManager::new_mapping);
            auto [iter, inserted] = std::move(p);
index d6446563bae3b555c8a0e51d9dada5aadb282b26..5085eafb702e929f036daa7b1d40574f669e69bf 100644 (file)
@@ -877,7 +877,8 @@ public:
     op_context_t c,
     iterator iter,
     node_key_t laddr,
-    node_val_t val
+    node_val_t val,
+    BaseChildNode<leaf_node_t, node_key_t> *child
   ) {
     LOG_PREFIX(FixedKVBtree::insert);
     SUBTRACET(
@@ -888,10 +889,10 @@ public:
       iter.is_end() ? min_max_t<node_key_t>::max : iter.get_key());
     return seastar::do_with(
       iter,
-      [this, c, laddr, val](auto &ret) {
+      [this, c, laddr, val, child](auto &ret) {
         return find_insertion(
           c, laddr, ret
-        ).si_then([this, c, laddr, val, &ret] {
+        ).si_then([this, c, laddr, val, &ret, child] {
           if (!ret.at_boundary() && ret.get_key() == laddr) {
             return insert_ret(
               interruptible::ready_future_marker{},
@@ -900,7 +901,7 @@ public:
             ++(get_tree_stats<self_type>(c.trans).num_inserts);
             return handle_split(
               c, ret
-            ).si_then([c, laddr, val, &ret] {
+            ).si_then([c, laddr, val, &ret, child] {
               if (!ret.leaf.node->is_mutable()) {
                 CachedExtentRef mut = c.cache.duplicate_for_write(
                   c.trans, ret.leaf.node
@@ -914,6 +915,12 @@ public:
               assert(laddr >= ret.leaf.node->get_meta().begin &&
                      laddr < ret.leaf.node->get_meta().end);
               ret.leaf.node->insert(iter, laddr, val);
+              if constexpr (std::is_base_of_v<
+                  ParentNode<leaf_node_t, node_key_t>, leaf_node_t>) {
+                ret.leaf.node->insert_child_ptr(
+                  ret.leaf.pos, child, ret.leaf.node->get_size() - 1);
+              }
+              (void)child;
               return insert_ret(
                 interruptible::ready_future_marker{},
                 std::make_pair(ret, true));
@@ -926,11 +933,12 @@ public:
   insert_ret insert(
     op_context_t c,
     node_key_t laddr,
-    node_val_t val) {
+    node_val_t val,
+    BaseChildNode<leaf_node_t, node_key_t> *child) {
     return lower_bound(
       c, laddr
-    ).si_then([this, c, laddr, val](auto iter) {
-      return this->insert(c, iter, laddr, val);
+    ).si_then([child, this, c, laddr, val](auto iter) {
+      return this->insert(c, iter, laddr, val, child);
     });
   }
 
@@ -949,7 +957,8 @@ public:
   update_ret update(
     op_context_t c,
     iterator iter,
-    node_val_t val)
+    node_val_t val,
+    BaseChildNode<leaf_node_t, node_key_t> *child)
   {
     LOG_PREFIX(FixedKVBtree::update);
     SUBTRACET(
@@ -967,6 +976,12 @@ public:
     iter.leaf.node->update(
       iter.leaf.node->iter_idx(iter.leaf.pos),
       val);
+    if constexpr (std::is_base_of_v<
+        ParentNode<leaf_node_t, node_key_t>, leaf_node_t>) {
+      if (child) {
+        iter.leaf.node->update_child_ptr(iter.leaf.pos, child);
+      }
+    }
     return update_ret(
       interruptible::ready_future_marker{},
       iter);
index b6eacba07424fc95f30fd2bfea4d44557196adcd..ef125af18d43a6e5dc7fb49bada7be138264c2e9 100644 (file)
@@ -342,15 +342,11 @@ BtreeLBAManager::reserve_region(
       EXTENT_DEFAULT_REF_COUNT,
       0,
       extent_types_t::NONE};
-    return btree.insert(c, iter, addr, val
+    return btree.insert(
+      c, iter, addr, val, get_reserved_ptr<LBALeafNode, laddr_t>()
     ).si_then([c](auto p) {
       auto &[iter, inserted] = p;
       ceph_assert(inserted);
-      auto &leaf_node = *iter.get_leaf_node();
-      leaf_node.insert_child_ptr(
-       iter.get_leaf_pos(),
-       get_reserved_ptr<LBALeafNode, laddr_t>(),
-       leaf_node.get_size() - 1 /*the size before the insert*/);
       return LBAMapping::create_direct(iter.get_cursor(c));
     });
   });
@@ -397,15 +393,11 @@ BtreeLBAManager::alloc_extents(
              ext->get_paddr(),
              EXTENT_DEFAULT_REF_COUNT,
              ext->get_last_committed_crc(),
-              ext->get_type()}
+              ext->get_type()},
+            ext.get()
          ).si_then([ext, c, FNAME, &iter, &ret](auto p) {
            auto &[it, inserted] = p;
            ceph_assert(inserted);
-           auto &leaf_node = *it.get_leaf_node();
-           leaf_node.insert_child_ptr(
-             it.get_leaf_pos(),
-             ext.get(),
-             leaf_node.get_size() - 1 /*the size before the insert*/);
            TRACET("inserted {}", c.trans, *ext);
            ret.emplace(ret.begin(), LBAMapping::create_direct(it.get_cursor(c)));
            iter = it;
@@ -501,14 +493,10 @@ BtreeLBAManager::clone_mapping(
               inter_key,
               EXTENT_DEFAULT_REF_COUNT,
               0,
-              extent_types_t::NONE});
+              extent_types_t::NONE},
+            get_reserved_ptr<LBALeafNode, laddr_t>());
        }).si_then([c, &state](auto p) {
          auto &[iter, inserted] = p;
-         auto &leaf_node = *iter.get_leaf_node();
-         leaf_node.insert_child_ptr(
-           iter.get_leaf_pos(),
-           get_reserved_ptr<LBALeafNode, laddr_t>(),
-           leaf_node.get_size() - 1 /*the size before the insert*/);
          auto cursor = iter.get_cursor(c);
          return state.mapping.refresh(
          ).si_then([cursor=std::move(cursor)](auto mapping) mutable {
@@ -687,20 +675,16 @@ BtreeLBAManager::insert_mappings(
       [c, &btree, &iter](auto &info)
     {
       assert(info.key != L_ADDR_NULL);
+      bool need_reserved_ptr =
+        info.is_indirect_mapping() || info.is_zero_mapping();
       return btree.insert(
-       c, iter, info.key, info.value
+       c, iter, info.key, info.value,
+        need_reserved_ptr
+          ? get_reserved_ptr<LBALeafNode, laddr_t>()
+          : static_cast<BaseChildNode<LBALeafNode, laddr_t>*>(info.extent)
       ).si_then([c, &iter, &info](auto p) {
        ceph_assert(p.second);
        iter = std::move(p.first);
-       auto &leaf_node = *iter.get_leaf_node();
-       bool need_reserved_ptr =
-         info.is_indirect_mapping() || info.is_zero_mapping();
-       leaf_node.insert_child_ptr(
-         iter.get_leaf_pos(),
-         need_reserved_ptr
-           ? get_reserved_ptr<LBALeafNode, laddr_t>()
-           : static_cast<BaseChildNode<LBALeafNode, laddr_t>*>(info.extent),
-         leaf_node.get_size() - 1 /*the size before the insert*/);
        if (is_valid_child_ptr(info.extent)) {
          ceph_assert(info.value.pladdr.is_paddr());
          assert(info.value.pladdr == iter.get_val().pladdr);
@@ -1114,6 +1098,7 @@ BtreeLBAManager::_update_mapping(
   update_func_t &&f,
   LogicalChildNode* nextent)
 {
+  assert(!is_reserved_ptr(nextent));
   assert(cursor.is_viewable());
   auto c = get_context(t);
   return with_btree<LBABtree>(
@@ -1135,14 +1120,12 @@ BtreeLBAManager::_update_mapping(
       return btree.update(
        c,
        iter,
-       ret
-      ).si_then([c, nextent](auto iter) {
+       ret,
        // child-ptr may already be correct,
        // see LBAManager::update_mappings()
-       if (nextent && !nextent->has_parent_tracker()) {
-         iter.get_leaf_node()->update_child_ptr(
-           iter.get_leaf_pos(), nextent);
-       }
+        nextent && !nextent->has_parent_tracker()
+          ? nextent : nullptr
+      ).si_then([c, nextent](auto iter) {
        assert(!nextent ||
          (nextent->has_parent_tracker()
            && nextent->peek_parent_node().get() == iter.get_leaf_node().get()));
@@ -1161,6 +1144,7 @@ BtreeLBAManager::_update_mapping(
   update_func_t &&f,
   LogicalChildNode* nextent)
 {
+  assert(!is_reserved_ptr(nextent));
   auto c = get_context(t);
   return with_btree<LBABtree>(
     cache,
@@ -1186,18 +1170,15 @@ BtreeLBAManager::_update_mapping(
            return update_mapping_ret_bare_t(addr, ret, iter.get_cursor(c));
          });
        } else {
+          // nextent is provided iff unlinked,
+          // also see TM::rewrite_logical_extent()
+          assert(!nextent || !nextent->has_parent_tracker());
          return btree.update(
            c,
            iter,
-           ret
+           ret,
+            nextent ? nextent : nullptr
          ).si_then([c, nextent](auto iter) {
-           if (nextent) {
-             // nextent is provided iff unlinked,
-              // also see TM::rewrite_logical_extent()
-             assert(!nextent->has_parent_tracker());
-             iter.get_leaf_node()->update_child_ptr(
-               iter.get_leaf_pos(), nextent);
-           }
            assert(!nextent || 
                   (nextent->has_parent_tracker() &&
                    nextent->peek_parent_node().get() == iter.get_leaf_node().get()));
@@ -1362,23 +1343,18 @@ BtreeLBAManager::remap_mappings(
          val.refcount = EXTENT_DEFAULT_REF_COUNT;
          // Checksum will be updated when the committing the transaction
          val.checksum = CRC_NULL;
-         return btree.insert(c, iter, new_key, std::move(val)
-         ).si_then([c, &remap, old_indirect, &ret, &iter](auto p) {
+         return btree.insert(
+            c, iter, new_key, std::move(val),
+            old_indirect
+              ? get_reserved_ptr<LBALeafNode, laddr_t>()
+              : remap.extent
+         ).si_then([c, old_indirect, &ret, &iter](auto p) {
            auto &[it, inserted] = p;
            ceph_assert(inserted);
-           auto &leaf_node = *it.get_leaf_node();
            if (old_indirect) {
-             leaf_node.insert_child_ptr(
-               it.get_leaf_pos(),
-               get_reserved_ptr<LBALeafNode, laddr_t>(),
-               leaf_node.get_size() - 1 /*the size before the insert*/);
              ret.push_back(
                LBAMapping::create_indirect(nullptr, it.get_cursor(c)));
            } else {
-             leaf_node.insert_child_ptr(
-               it.get_leaf_pos(),
-               remap.extent,
-               leaf_node.get_size() - 1 /*the size before the insert*/);
              ret.push_back(
                LBAMapping::create_direct(it.get_cursor(c)));
            }
index d263b72f489adf68c6e5522dbb5f3ebfbe66aab8..35c47cc43337f25cae18aae0a6b3861c80e8eaa8 100644 (file)
@@ -339,13 +339,10 @@ struct lba_btree_test : btree_test_base {
          extents,
          [this, addr, len, &t, &btree](auto &extent) {
          return btree.insert(
-           get_op_context(t), addr, get_map_val(len, extent->get_type())
+           get_op_context(t), addr,
+            get_map_val(len, extent->get_type()), extent.get()
          ).si_then([addr, extent](auto p){
            auto& [iter, inserted] = p;
-           iter.get_leaf_node()->insert_child_ptr(
-             iter.get_leaf_pos(),
-             extent.get(),
-             iter.get_leaf_node()->get_size() - 1);
            assert(inserted);
            extent->set_laddr(addr);
          });