]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
crimson/os/seastore/lba_manager: add LBACursor related interfaces
authorXuehan Xu <xuxuehan@qianxin.com>
Wed, 11 Jun 2025 03:49:51 +0000 (11:49 +0800)
committerXuehan Xu <xuxuehan@qianxin.com>
Tue, 5 Aug 2025 06:33:59 +0000 (14:33 +0800)
Signed-off-by: Xuehan Xu <xuxuehan@qianxin.com>
src/crimson/os/seastore/btree/btree_types.h
src/crimson/os/seastore/btree/fixed_kv_btree.h
src/crimson/os/seastore/lba/btree_lba_manager.cc
src/crimson/os/seastore/lba/btree_lba_manager.h
src/crimson/os/seastore/lba_manager.h
src/crimson/os/seastore/lba_mapping.h
src/crimson/os/seastore/object_data_handler.cc
src/crimson/os/seastore/transaction_manager.cc
src/crimson/os/seastore/transaction_manager.h
src/test/crimson/seastore/test_btree_lba_manager.cc
src/test/crimson/seastore/test_transaction_manager.cc

index 9266a3ad04bfc97a7f53f09858a5d09629e6375a..87f4e72bb1640178a838814793034c4431dd178f 100644 (file)
@@ -274,6 +274,9 @@ struct LBACursor : BtreeCursor<laddr_t, lba::lba_map_val_t> {
     assert(!is_indirect());
     return val->checksum;
   }
+  bool contains(laddr_t laddr) const {
+    return get_laddr() <= laddr && get_laddr() + get_length() > laddr;
+  }
   extent_ref_count_t get_refcount() const {
     assert(!is_end());
     assert(!is_indirect());
index ea7c6ee61483348ce516b47162bed3fb49dca307..f61c93e3ee80055fff89dd4c6b2d617edcbdd014 100644 (file)
@@ -490,32 +490,39 @@ public:
   }
 
   iterator make_partial_iter(
+    op_context_t c,
+    cursor_t &cursor)
+  {
+    return make_partial_iter(
+      c,
+      cursor.parent->template cast<leaf_node_t>(),
+      cursor.key,
+      cursor.pos);
+  }
+
+  std::unique_ptr<cursor_t> get_cursor(
+    op_context_t c,
+    TCachedExtentRef<leaf_node_t> leaf,
+    node_key_t key)
+  {
+    auto it = leaf->lower_bound(key);
+    assert(it != leaf->end());
+    return new cursor_t(
+      c, leaf, leaf->modifications,
+      key, it.get_val(), it.get_offset());
+  }
+
+  boost::intrusive_ptr<cursor_t> get_cursor(
     op_context_t c,
     TCachedExtentRef<leaf_node_t> leaf,
     node_key_t key,
     uint16_t pos)
   {
-    assert(leaf->is_valid());
-    assert(leaf->is_viewable_by_trans(c.trans).first);
-
-    auto depth = get_root().get_depth();
-#ifndef NDEBUG
-    auto ret = iterator(
-      depth,
-      depth == 1
-        ? iterator::state_t::FULL
-        : iterator::state_t::PARTIAL);
-#else
-    auto ret = iterator(depth);
-#endif
-    ret.leaf.node = leaf;
-    ret.leaf.pos = pos;
-    if (ret.is_end()) {
-      ceph_assert(key == min_max_t<node_key_t>::max);
-    } else {
-      ceph_assert(key == ret.get_key());
-    }
-    return ret;
+    assert(leaf->get_size() != pos);
+    auto it = leaf->iter_idx(pos);
+    assert(it.get_key() == key);
+    return std::make_unique<cursor_t>(
+      c, leaf, leaf->modifications, key, it.get_val(), pos);
   }
 
   /**
@@ -1358,6 +1365,35 @@ public:
 private:
   RootBlockRef root_block;
 
+  iterator make_partial_iter(
+    op_context_t c,
+    TCachedExtentRef<leaf_node_t> leaf,
+    node_key_t key,
+    uint16_t pos)
+  {
+    assert(leaf->is_valid());
+    assert(leaf->is_viewable_by_trans(c.trans).first);
+
+    auto depth = get_root().get_depth();
+#ifndef NDEBUG
+    auto ret = iterator(
+      depth,
+      depth == 1
+        ? iterator::state_t::FULL
+        : iterator::state_t::PARTIAL);
+#else
+    auto ret = iterator(depth);
+#endif
+    ret.leaf.node = leaf;
+    ret.leaf.pos = pos;
+    if (ret.is_end()) {
+      ceph_assert(key == min_max_t<node_key_t>::max);
+    } else {
+      ceph_assert(key == ret.get_key());
+    }
+    return ret;
+  }
+
   template <typename T>
   using node_position_t = typename iterator::template node_position_t<T>;
 
index 56b497a57fb1ab2318b1d90a919dbe261c106a21..81da404d633f7324a498a29ec098d7aea9b6030d 100644 (file)
@@ -154,7 +154,7 @@ BtreeLBAManager::get_mappings(
           }
          assert(cursor->val->refcount == EXTENT_DEFAULT_REF_COUNT);
          assert(cursor->val->checksum == 0);
-          return resolve_indirect_cursor(c, btree, *cursor
+          return this->resolve_indirect_cursor(c, btree, *cursor
           ).si_then([FNAME, c, &ret, &cursor, laddr, length](auto direct) {
             ret.emplace_back(LBAMapping::create_indirect(
                std::move(direct), std::move(cursor)));
@@ -233,17 +233,24 @@ BtreeLBAManager::resolve_indirect_cursor(
 BtreeLBAManager::get_mapping_ret
 BtreeLBAManager::get_mapping(
   Transaction &t,
-  laddr_t laddr)
+  laddr_t laddr,
+  bool search_containing)
 {
   LOG_PREFIX(BtreeLBAManager::get_mapping);
-  TRACET("{} ...", t, laddr);
+  TRACET("{} ... search_containing={}", t, laddr, search_containing);
   auto c = get_context(t);
   return with_btree<LBABtree>(
     cache, c,
-    [FNAME, this, c, laddr](auto& btree)
+    [FNAME, this, c, laddr, search_containing](auto& btree)
   {
-    return get_cursor(c, btree, laddr
-    ).si_then([FNAME, this, c, laddr, &btree](LBACursorRef cursor) {
+    auto fut = get_mapping_iertr::make_ready_future<LBACursorRef>();
+    if (search_containing) {
+      fut = get_containing_cursor(c, btree, laddr);
+    } else {
+      fut = get_cursor(c, btree, laddr);
+    }
+    return fut.si_then([FNAME, laddr, &btree, c, this,
+                       search_containing](LBACursorRef cursor) {
       if (!cursor->is_indirect()) {
         TRACET("{} got direct cursor {}",
                c.trans, laddr, *cursor);
@@ -251,7 +258,11 @@ BtreeLBAManager::get_mapping(
         return get_mapping_iertr::make_ready_future<
          LBAMapping>(std::move(mapping));
       }
-      assert(laddr == cursor->get_laddr());
+      if (search_containing) {
+       assert(cursor->contains(laddr));
+      } else {
+       assert(laddr == cursor->get_laddr());
+      }
       assert(cursor->val->refcount == EXTENT_DEFAULT_REF_COUNT);
       assert(cursor->val->checksum == 0);
       return resolve_indirect_cursor(c, btree, *cursor
@@ -268,6 +279,138 @@ BtreeLBAManager::get_mapping(
   });
 }
 
+BtreeLBAManager::get_mapping_ret
+BtreeLBAManager::get_mapping(
+  Transaction &t,
+  LogicalChildNode &extent)
+{
+  LOG_PREFIX(BtreeLBAManager::get_mapping);
+  TRACET("{}", t, extent);
+  assert(extent.peek_parent_node()->is_valid());
+  auto c = get_context(t);
+  return with_btree<LBABtree>(
+    cache,
+    c,
+    [c, &extent, FNAME](auto &btree) {
+    return extent.get_parent_node(c.trans, c.cache
+    ).si_then([&btree, c, &extent, FNAME](auto leaf) {
+      if (leaf->is_pending()) {
+       TRACET("find pending extent {} for {}",
+              c.trans, (void*)leaf.get(), extent);
+      }
+#ifndef NDEBUG
+      auto it = leaf->lower_bound(extent.get_laddr());
+      assert(it != leaf->end() && it.get_key() == extent.get_laddr());
+#endif
+      return get_mapping_iertr::make_ready_future<
+       LBAMapping>(LBAMapping::create_direct(
+         btree.get_cursor(c, leaf, extent.get_laddr())));
+    });
+  });
+}
+
+BtreeLBAManager::alloc_extent_ret
+BtreeLBAManager::reserve_region(
+  Transaction &t,
+  LBAMapping pos,
+  laddr_t addr,
+  extent_len_t len)
+{
+  LOG_PREFIX(BtreeLBAManager::reserve_region);
+  DEBUGT("{} {}~{}", t, pos, addr, len);
+  assert(pos.is_viewable());
+  auto c = get_context(t);
+  return with_btree<LBABtree>(
+    cache,
+    c,
+    [pos=std::move(pos), c, addr, len](auto &btree) mutable {
+    auto &cursor = pos.get_effective_cursor();
+    auto iter = btree.make_partial_iter(c, cursor);
+    lba_map_val_t val{len, P_ADDR_ZERO, EXTENT_DEFAULT_REF_COUNT, 0};
+    return btree.insert(c, iter, addr, val
+    ).si_then([c](auto p) {
+      auto &[iter, inserted] = p;
+      ceph_assert(inserted);
+      auto &leaf_node = *iter.get_leaf_node();
+      leaf_node.insert_child_ptr(
+       iter.get_leaf_pos(),
+       get_reserved_ptr<LBALeafNode, laddr_t>(),
+       leaf_node.get_size() - 1 /*the size before the insert*/);
+      return LBAMapping::create_direct(iter.get_cursor(c));
+    });
+  });
+}
+
+BtreeLBAManager::alloc_extents_ret
+BtreeLBAManager::alloc_extents(
+  Transaction &t,
+  LBAMapping pos,
+  std::vector<LogicalChildNodeRef> extents)
+{
+  LOG_PREFIX(BtreeLBAManager::alloc_extents);
+  DEBUGT("{}", t, pos);
+  assert(pos.is_viewable());
+  auto c = get_context(t);
+  return with_btree<LBABtree>(
+    cache,
+    c,
+    [c, FNAME, pos=std::move(pos), this,
+    extents=std::move(extents)](auto &btree) mutable {
+    auto &cursor = pos.get_effective_cursor();
+    return cursor.refresh(
+    ).si_then(
+      [&cursor, &btree, extents=std::move(extents),
+      pos=std::move(pos), c, FNAME, this] {
+      return seastar::do_with(
+       std::move(extents),
+       btree.make_partial_iter(c, cursor),
+       std::vector<LBAMapping>(),
+       [c, &btree, FNAME, this]
+       (auto &extents, auto &iter, auto &ret) mutable {
+       return trans_intr::do_for_each(
+         extents.rbegin(),
+         extents.rend(),
+         [&btree, FNAME, &iter, c, &ret, this](auto ext) {
+         assert(ext->has_laddr());
+         stats.num_alloc_extents += ext->get_length();
+         return btree.insert(
+           c,
+           iter,
+           ext->get_laddr(),
+           lba_map_val_t{
+             ext->get_length(),
+             ext->get_paddr(),
+             EXTENT_DEFAULT_REF_COUNT,
+             ext->get_last_committed_crc()}
+         ).si_then([ext, c, FNAME, &iter, &ret](auto p) {
+           auto &[it, inserted] = p;
+           ceph_assert(inserted);
+           auto &leaf_node = *it.get_leaf_node();
+           leaf_node.insert_child_ptr(
+             it.get_leaf_pos(),
+             ext.get(),
+             leaf_node.get_size() - 1 /*the size before the insert*/);
+           TRACET("inserted {}", c.trans, *ext);
+           ret.emplace(ret.begin(), LBAMapping::create_direct(it.get_cursor(c)));
+           iter = it;
+         });
+#ifndef NDEBUG
+       }).si_then([&iter, c] {
+         if (iter.is_begin()) {
+           return base_iertr::now();
+         }
+         auto key = iter.get_key();
+         return iter.prev(c).si_then([key](auto it) {
+           assert(key >= it.get_key() + it.get_val().len);
+           return base_iertr::now();
+         });
+#endif
+       }).si_then([&ret] { return std::move(ret); });
+      });
+    });
+  });
+}
+
 BtreeLBAManager::_get_cursor_ret
 BtreeLBAManager::get_cursor(
   op_context_t c,
@@ -611,12 +754,13 @@ BtreeLBAManager::rewrite_extent(
 BtreeLBAManager::update_mapping_ret
 BtreeLBAManager::update_mapping(
   Transaction& t,
-  laddr_t laddr,
+  LBAMapping mapping,
   extent_len_t prev_len,
   paddr_t prev_addr,
   LogicalChildNode& nextent)
 {
   LOG_PREFIX(BtreeLBAManager::update_mapping);
+  auto laddr = mapping.get_key();
   auto addr = nextent.get_paddr();
   auto len = nextent.get_length();
   auto checksum = nextent.get_last_committed_crc();
@@ -624,80 +768,111 @@ BtreeLBAManager::update_mapping(
          t, laddr, prev_addr, prev_len, addr, len, checksum);
   assert(laddr == nextent.get_laddr());
   assert(!addr.is_null());
-  return _update_mapping(
-    t,
-    laddr,
-    [prev_addr, addr, prev_len, len, checksum]
-    (const lba_map_val_t &in) {
-      lba_map_val_t ret = in;
-      ceph_assert(in.pladdr.is_paddr());
-      ceph_assert(in.pladdr.get_paddr() == prev_addr);
-      ceph_assert(in.len == prev_len);
-      ret.pladdr = addr;
-      ret.len = len;
-      ret.checksum = checksum;
-      return ret;
-    },
-    &nextent
-  ).si_then([&t, laddr, prev_addr, prev_len, addr, len, checksum, FNAME](auto res) {
-      assert(res.is_alive_mapping());
-      DEBUGT("laddr={}, paddr {}~0x{:x} => {}~0x{:x}, crc=0x{:x} done -- {}",
-             t, laddr, prev_addr, prev_len, addr, len, checksum, res.get_cursor());
-      return update_mapping_iertr::make_ready_future<
-       extent_ref_count_t>(res.get_cursor().get_refcount());
-    },
-    update_mapping_iertr::pass_further{},
-    /* ENOENT in particular should be impossible */
-    crimson::ct_error::assert_all{
-      "Invalid error in BtreeLBAManager::update_mapping"
-    }
-  );
-}
-
-BtreeLBAManager::update_mappings_ret
-BtreeLBAManager::update_mappings(
-  Transaction& t,
-  const std::list<LogicalChildNodeRef>& extents)
-{
-  return trans_intr::do_for_each(extents, [this, &t](auto &extent) {
-    LOG_PREFIX(BtreeLBAManager::update_mappings);
-    auto laddr = extent->get_laddr();
-    auto prev_addr = extent->get_prior_paddr_and_reset();
-    auto len = extent->get_length();
-    auto addr = extent->get_paddr();
-    auto checksum = extent->get_last_committed_crc();
-    TRACET("laddr={}, paddr {}~0x{:x} => {}, crc=0x{:x}",
-           t, laddr, prev_addr, len, addr, checksum);
-    assert(!addr.is_null());
+  assert(mapping.is_viewable());
+  assert(!mapping.is_indirect());
+  return seastar::do_with(
+    std::move(mapping),
+    [&t, this, prev_len, prev_addr, len, FNAME,
+    laddr, addr, checksum, &nextent](auto &mapping) {
+    auto &cursor = mapping.get_effective_cursor();
     return _update_mapping(
       t,
-      laddr,
-      [prev_addr, addr, len, checksum](
-        const lba_map_val_t &in) {
-        lba_map_val_t ret = in;
-        ceph_assert(in.pladdr.is_paddr());
-        ceph_assert(in.pladdr.get_paddr() == prev_addr);
-        ceph_assert(in.len == len);
-        ret.pladdr = addr;
-        ret.checksum = checksum;
-        return ret;
+      cursor,
+      [prev_addr, addr, prev_len, len, checksum](
+       const lba_map_val_t &in) {
+       assert(!addr.is_null());
+       lba_map_val_t ret = in;
+       ceph_assert(in.pladdr.is_paddr());
+       ceph_assert(in.pladdr.get_paddr() == prev_addr);
+       ceph_assert(in.len == prev_len);
+       ret.pladdr = addr;
+       ret.len = len;
+       ret.checksum = checksum;
+       return ret;
       },
-      nullptr   // all the extents should have already been
-                // added to the fixed_kv_btree
-    ).si_then([&t, laddr, prev_addr, len, addr, checksum, FNAME](auto res) {
-        DEBUGT("laddr={}, paddr {}~0x{:x} => {}, crc=0x{:x} done -- {}",
-               t, laddr, prev_addr, len, addr, checksum, res.get_cursor());
-        return update_mapping_iertr::make_ready_future();
+      &nextent
+    ).si_then([&t, laddr, prev_addr, prev_len, addr, len, checksum, FNAME](auto res) {
+       assert(res.is_alive_mapping());
+       DEBUGT("laddr={}, paddr {}~0x{:x} => {}~0x{:x}, crc=0x{:x} done -- {}",
+              t, laddr, prev_addr, prev_len, addr, len, checksum, res.get_cursor());
+       return update_mapping_iertr::make_ready_future<
+         extent_ref_count_t>(res.get_cursor().get_refcount());
       },
       update_mapping_iertr::pass_further{},
       /* ENOENT in particular should be impossible */
       crimson::ct_error::assert_all{
-        "Invalid error in BtreeLBAManager::update_mappings"
+       "Invalid error in BtreeLBAManager::update_mapping"
       }
     );
   });
 }
 
+BtreeLBAManager::update_mappings_ret
+BtreeLBAManager::update_mappings(
+  Transaction& t,
+  const std::list<LogicalChildNodeRef>& extents)
+{
+  LOG_PREFIX(BtreeLBAManager::update_mappings);
+  auto c = get_context(t);
+  return with_btree<LBABtree>(
+    cache,
+    c,
+    [c, &extents, FNAME, this](auto &btree) {
+    return trans_intr::do_for_each(
+      extents,
+      [this, FNAME, c, &btree](auto &extent) {
+      return extent->get_parent_node(c.trans, c.cache
+      ).si_then([c, &extent, FNAME, &btree, this](auto leaf) {
+       if (leaf->is_pending()) {
+         TRACET("find pending extent {} for {}",
+                c.trans, (void*)leaf.get(), *extent);
+       }
+       return seastar::do_with(
+         btree.get_cursor(c, leaf, extent->get_laddr()),
+         [this, c, &extent, FNAME](auto &cursor) {
+         assert(!cursor->is_end() &&
+           cursor->get_laddr() == extent->get_laddr());
+         auto prev_addr = extent->get_prior_paddr_and_reset();
+         auto len = extent->get_length();
+         auto addr = extent->get_paddr();
+         auto checksum = extent->get_last_committed_crc();
+         TRACET("cursor={}, paddr {}~0x{:x} => {}, crc=0x{:x}",
+                c.trans, *cursor, prev_addr, len, addr, checksum);
+         assert(!addr.is_null());
+         return this->_update_mapping(
+           c.trans,
+           *cursor,
+           [prev_addr, addr, len, checksum](
+             const lba_map_val_t &in) {
+             lba_map_val_t ret = in;
+             ceph_assert(in.pladdr.is_paddr());
+             ceph_assert(in.pladdr.get_paddr() == prev_addr);
+             ceph_assert(in.len == len);
+             ret.pladdr = addr;
+             ret.checksum = checksum;
+             return ret;
+           },
+           nullptr   // all the extents should have already been
+                     // added to the fixed_kv_btree
+         ).si_then([c, &cursor, prev_addr, len, addr,
+                   checksum, FNAME](auto res) {
+             DEBUGT("cursor={}, paddr {}~0x{:x} => {}, crc=0x{:x} done -- {}",
+                    c.trans, *cursor, prev_addr, len,
+                    addr, checksum, res.get_cursor());
+             return update_mapping_iertr::make_ready_future();
+           },
+           update_mapping_iertr::pass_further{},
+           /* ENOENT in particular should be impossible */
+           crimson::ct_error::assert_all{
+             "Invalid error in BtreeLBAManager::update_mappings"
+           }
+         );
+       });
+      });
+    });
+  });
+}
+
 BtreeLBAManager::get_physical_extent_if_live_ret
 BtreeLBAManager::get_physical_extent_if_live(
   Transaction &t,
@@ -725,6 +900,30 @@ BtreeLBAManager::get_physical_extent_if_live(
     });
 }
 
+BtreeLBAManager::complete_lba_mapping_ret
+BtreeLBAManager::complete_indirect_lba_mapping(
+  Transaction &t,
+  LBAMapping mapping)
+{
+  assert(mapping.is_viewable());
+  assert(mapping.is_indirect());
+  if (mapping.is_complete_indirect()) {
+    return complete_lba_mapping_iertr::make_ready_future<
+      LBAMapping>(std::move(mapping));
+  }
+  auto c = get_context(t);
+  return with_btree_state<LBABtree, LBAMapping>(
+    cache,
+    c,
+    std::move(mapping),
+    [this, c](auto &btree, auto &mapping) {
+    return resolve_indirect_cursor(c, btree, *mapping.indirect_cursor
+    ).si_then([&mapping](auto cursor) {
+      mapping.direct_cursor = std::move(cursor);
+    });
+  });
+}
+
 void BtreeLBAManager::register_metrics()
 {
   LOG_PREFIX(BtreeLBAManager::register_metrics);
@@ -777,9 +976,9 @@ BtreeLBAManager::_decref_intermediate(
 
       if (val.refcount == 0) {
        return btree.remove(c, iter
-       ).si_then([key, val](auto) {
+       ).si_then([key, val, c](auto iter) {
          return ref_iertr::make_ready_future<
-           update_mapping_ret_bare_t>(key, val);
+           update_mapping_ret_bare_t>(key, val, iter.get_cursor(c));
        });
       } else {
        return btree.update(c, iter, val
@@ -792,138 +991,54 @@ BtreeLBAManager::_decref_intermediate(
   });
 }
 
-BtreeLBAManager::remap_ret
-BtreeLBAManager::remap_mappings(
-  Transaction &t,
-  LBAMapping orig_mapping,
-  std::vector<remap_entry_t> remaps,
-  std::vector<LogicalChildNodeRef> extents)
-{
-  LOG_PREFIX(BtreeLBAManager::remap_mappings);
-  struct state_t {
-    LBAMapping orig_mapping;
-    std::vector<remap_entry_t> remaps;
-    std::vector<LogicalChildNodeRef> extents;
-    std::vector<alloc_mapping_info_t> alloc_infos;
-    std::vector<LBAMapping> ret;
-  };
-  return seastar::do_with(
-    state_t(std::move(orig_mapping), std::move(remaps), std::move(extents), {}, {}),
-    [this, &t, FNAME](state_t &state)
-  {
-    return update_refcount(
-      t, state.orig_mapping.get_key(), -1, false
-    ).si_then([this, &t, &state, FNAME](auto ret) {
-      // Remapping the shared direct mapping is prohibited,
-      // the refcount of indirect mapping should always be 1.
-      ceph_assert(ret.is_removed_mapping());
-
-      auto orig_laddr = state.orig_mapping.get_key();
-      if (!state.orig_mapping.is_indirect()) {
-       auto &addr = ret.get_removed_mapping().map_value.pladdr;
-       ceph_assert(addr.is_paddr() && !addr.get_paddr().is_zero());
-       return alloc_extents(
-         t,
-         (state.remaps.front().offset + orig_laddr).checked_to_laddr(),
-         std::move(state.extents),
-         EXTENT_DEFAULT_REF_COUNT
-       ).si_then([&state](auto ret) {
-         state.ret = std::move(ret);
-         return remap_iertr::make_ready_future();
-       });
-      }
-
-      extent_len_t orig_len = state.orig_mapping.get_length();
-      auto intermediate_key = state.orig_mapping.get_intermediate_key();
-      ceph_assert(intermediate_key != L_ADDR_NULL);
-      DEBUGT("remap indirect mapping {}", t, state.orig_mapping);
-      for (auto &remap : state.remaps) {
-       DEBUGT("remap 0x{:x}~0x{:x}", t, remap.offset, remap.len);
-       ceph_assert(remap.len != 0);
-       ceph_assert(remap.offset + remap.len <= orig_len);
-       auto remapped_laddr = (orig_laddr + remap.offset)
-           .checked_to_laddr();
-       auto remapped_intermediate_key = (intermediate_key + remap.offset)
-           .checked_to_laddr();
-       state.alloc_infos.emplace_back(
-         alloc_mapping_info_t::create_indirect(
-           remapped_laddr, remap.len, remapped_intermediate_key));
-      }
-
-      return alloc_sparse_mappings(
-       t, state.alloc_infos.front().key, state.alloc_infos,
-       alloc_policy_t::deterministic
-      ).si_then([&t, &state, this](std::list<LBACursorRef> cursors) {
-       return seastar::futurize_invoke([&t, &state, this] {
-         if (state.remaps.size() > 1) {
-           auto base = state.orig_mapping.get_intermediate_base();
-           return update_refcount(
-             t, base, state.remaps.size() - 1, false
-           ).si_then([](update_mapping_ret_bare_t ret) {
-             return ret.take_cursor();
-           });
-         } else {
-           return remap_iertr::make_ready_future<
-             LBACursorRef>(state.orig_mapping.direct_cursor->duplicate());
-         }
-       }).si_then([&state, cursors=std::move(cursors)](auto direct) mutable {
-         for (auto &cursor : cursors) {
-           state.ret.emplace_back(LBAMapping::create_indirect(
-             direct->duplicate(), std::move(cursor)));
-         }
-         return remap_iertr::make_ready_future();
-       });
-      });
-    }).si_then([&state] {
-      assert(state.ret.size() == state.remaps.size());
-#ifndef NDEBUG
-      auto mapping_it = state.ret.begin();
-      auto remap_it = state.remaps.begin();
-      for (;mapping_it != state.ret.end(); mapping_it++, remap_it++) {
-       auto &mapping = *mapping_it;
-       auto &remap = *remap_it;
-       assert(mapping.get_key() == state.orig_mapping.get_key() + remap.offset);
-       assert(mapping.get_length() == remap.len);
-      }
-#endif
-      return remap_iertr::make_ready_future<
-       std::vector<LBAMapping>>(std::move(state.ret));
-    });
-  });
-}
-
 BtreeLBAManager::update_refcount_ret
 BtreeLBAManager::update_refcount(
   Transaction &t,
-  laddr_t addr,
+  std::variant<laddr_t, LBACursor*> addr_or_cursor,
   int delta,
   bool cascade_remove)
 {
+  auto addr = addr_or_cursor.index() == 0
+    ? std::get<0>(addr_or_cursor)
+    : std::get<1>(addr_or_cursor)->key;
   LOG_PREFIX(BtreeLBAManager::update_refcount);
   TRACET("laddr={}, delta={}", t, addr, delta);
-  return _update_mapping(
-    t,
-    addr,
+  auto fut = _update_mapping_iertr::make_ready_future<
+    update_mapping_ret_bare_t>();
+  auto update_func =
     [delta](const lba_map_val_t &in) {
       lba_map_val_t out = in;
       ceph_assert((int)out.refcount + delta >= 0);
       out.refcount += delta;
       return out;
-    },
-    nullptr
-  ).si_then([&t, addr, delta, FNAME, this, cascade_remove](auto res) {
+    };
+  if (addr_or_cursor.index() == 0) {
+    fut = _update_mapping(t, addr, std::move(update_func), nullptr);
+  } else {
+    auto &cursor = std::get<1>(addr_or_cursor);
+    fut = _update_mapping(t, *cursor, std::move(update_func), nullptr);
+  }
+  return fut.si_then([delta, &t, addr, FNAME, this, cascade_remove](auto res) {
     DEBUGT("laddr={}, delta={} done -- {}",
           t, addr, delta,
           res.is_alive_mapping()
             ? res.get_cursor().val
             : res.get_removed_mapping().map_value);
+
     if (res.is_removed_mapping() && cascade_remove &&
        res.get_removed_mapping().map_value.pladdr.is_laddr()) {
       auto &val = res.get_removed_mapping().map_value;
       TRACET("decref intermediate {} -> {}",
             t, addr, val.pladdr.get_laddr());
       return _decref_intermediate(t, val.pladdr.get_laddr(), val.len
-      ).handle_error_interruptible(
+      ).si_then([indirect_res=std::move(res), this](auto res) mutable {
+       return indirect_res.get_removed_mapping().next->refresh(
+       ).si_then([this, res=std::move(res),
+                 ires=std::move(indirect_res)]() mutable {
+         return update_mapping_iertr::make_ready_future<
+           ref_update_result_t>(get_ref_update_result(ires, std::move(res)));
+       });
+      }).handle_error_interruptible(
        update_mapping_iertr::pass_further{},
        crimson::ct_error::assert_all{
          "unexpect ENOENT"
@@ -931,7 +1046,58 @@ BtreeLBAManager::update_refcount(
       );
     }
     return update_mapping_iertr::make_ready_future<
-      update_mapping_ret_bare_t>(std::move(res));
+      ref_update_result_t>(get_ref_update_result(res, std::nullopt));
+  });
+}
+
+BtreeLBAManager::_update_mapping_ret
+BtreeLBAManager::_update_mapping(
+  Transaction &t,
+  LBACursor &cursor,
+  update_func_t &&f,
+  LogicalChildNode* nextent)
+{
+  assert(cursor.is_viewable());
+  auto c = get_context(t);
+  return with_btree<LBABtree>(
+    cache,
+    c,
+    [c, f=std::move(f), &cursor, nextent](auto &btree) {
+    auto iter = btree.make_partial_iter(c, cursor);
+    auto ret = f(iter.get_val());
+    if (ret.refcount == 0) {
+      return btree.remove(
+       c,
+       iter
+      ).si_then([ret, c, laddr=cursor.key](auto iter) {
+       if (iter.is_end()) {
+         return update_mapping_ret_bare_t{
+           L_ADDR_NULL, std::move(ret), nullptr};
+       } else {
+         return update_mapping_ret_bare_t{
+           laddr, std::move(ret), iter.get_cursor(c)};
+       }
+      });
+    } else {
+      return btree.update(
+       c,
+       iter,
+       ret
+      ).si_then([c, nextent](auto iter) {
+       // child-ptr may already be correct,
+       // see LBAManager::update_mappings()
+       if (nextent && !nextent->has_parent_tracker()) {
+         iter.get_leaf_node()->update_child_ptr(
+           iter.get_leaf_pos(), nextent);
+       }
+       assert(!nextent ||
+         (nextent->has_parent_tracker()
+           && nextent->peek_parent_node().get() == iter.get_leaf_node().get()));
+       LBACursorRef cursor = iter.get_cursor(c);
+       assert(cursor->val);
+       return update_mapping_ret_bare_t{std::move(cursor)};
+      });
+    }
   });
 }
 
@@ -963,8 +1129,8 @@ BtreeLBAManager::_update_mapping(
          return btree.remove(
            c,
            iter
-         ).si_then([addr, ret](auto) {
-           return update_mapping_ret_bare_t(addr, ret);
+         ).si_then([addr, ret, c](auto iter) {
+           return update_mapping_ret_bare_t(addr, ret, iter.get_cursor(c));
          });
        } else {
          return btree.update(
@@ -989,6 +1155,141 @@ BtreeLBAManager::_update_mapping(
     });
 }
 
+BtreeLBAManager::_get_cursor_ret
+BtreeLBAManager::get_containing_cursor(
+  op_context_t c,
+  LBABtree &btree,
+  laddr_t laddr)
+{
+  LOG_PREFIX(BtreeLBAManager::get_containing_cursor);
+  TRACET("{}", c.trans, laddr);
+  return btree.upper_bound_right(c, laddr
+  ).si_then([c, laddr, FNAME](LBABtree::iterator iter)
+           -> _get_cursor_ret {
+    if (iter.is_end() ||
+       iter.get_key() > laddr ||
+       iter.get_key() + iter.get_val().len <=laddr) {
+      ERRORT("laddr={} doesn't exist", c.trans, laddr);
+      return crimson::ct_error::enoent::make();
+    }
+    TRACET("{} got {}, {}",
+          c.trans, laddr, iter.get_key(), iter.get_val());
+    return get_mapping_iertr::make_ready_future<
+      LBACursorRef>(iter.get_cursor(c));
+  });
+}
+
+BtreeLBAManager::remap_ret
+BtreeLBAManager::remap_mappings(
+  Transaction &t,
+  LBAMapping mapping,
+  std::vector<remap_entry_t> remaps)
+{
+  LOG_PREFIX(BtreeLBAManager::remap_mappings);
+  DEBUGT("{}", t, mapping);
+  assert(mapping.is_viewable());
+  assert(mapping.is_indirect() == mapping.is_complete_indirect());
+  auto c = get_context(t);
+  return with_btree<LBABtree>(
+    cache,
+    c,
+    [mapping=std::move(mapping), c, this,
+    remaps=std::move(remaps)](auto &btree) mutable {
+    auto &cursor = mapping.get_effective_cursor();
+    return seastar::do_with(
+      std::move(remaps),
+      std::move(mapping),
+      btree.make_partial_iter(c, cursor),
+      std::vector<LBAMapping>(),
+      [c, &btree, this, &cursor](auto &remaps, auto &mapping, auto &iter, auto &ret) {
+      auto val = iter.get_val();
+      assert(val.refcount == EXTENT_DEFAULT_REF_COUNT);
+      assert(mapping.is_indirect() ||
+       (val.pladdr.is_paddr() &&
+        val.pladdr.get_paddr().is_absolute()));
+      return update_refcount(c.trans, &cursor, -1, false
+      ).si_then([&mapping, &btree, &iter, c, &ret,
+               &remaps, pladdr=val.pladdr](auto r) {
+       assert(r.result.refcount == 0);
+       auto &cursor = r.result.mapping.get_effective_cursor();
+       iter = btree.make_partial_iter(c, cursor);
+       return trans_intr::do_for_each(
+         remaps,
+         [&mapping, &btree, &iter, c, &ret, pladdr](auto &remap) {
+         assert(remap.offset + remap.len <= mapping.get_length());
+         assert((bool)remap.extent == !mapping.is_indirect());
+         lba_map_val_t val;
+         auto old_key = mapping.get_key();
+         auto new_key = (old_key + remap.offset).checked_to_laddr();
+         val.len = remap.len;
+         if (pladdr.is_laddr()) {
+           auto laddr = pladdr.get_laddr();
+           val.pladdr = (laddr + remap.offset).checked_to_laddr();
+         } else {
+           auto paddr = pladdr.get_paddr();
+           val.pladdr = paddr + remap.offset;
+         }
+         val.refcount = EXTENT_DEFAULT_REF_COUNT;
+         val.checksum = 0; // the checksum should be updated later when
+                           // committing the transaction
+         return btree.insert(c, iter, new_key, std::move(val)
+         ).si_then([c, &remap, &mapping, &ret, &iter](auto p) {
+           auto &[it, inserted] = p;
+           ceph_assert(inserted);
+           auto &leaf_node = *it.get_leaf_node();
+           if (mapping.is_indirect()) {
+             leaf_node.insert_child_ptr(
+               it.get_leaf_pos(),
+               get_reserved_ptr<LBALeafNode, laddr_t>(),
+               leaf_node.get_size() - 1 /*the size before the insert*/);
+             ret.push_back(
+               LBAMapping::create_indirect(nullptr, it.get_cursor(c)));
+           } else {
+             leaf_node.insert_child_ptr(
+               it.get_leaf_pos(),
+               remap.extent,
+               leaf_node.get_size() - 1 /*the size before the insert*/);
+             ret.push_back(
+               LBAMapping::create_direct(it.get_cursor(c)));
+           }
+           return it.next(c).si_then([&iter](auto it) {
+             iter = std::move(it);
+           });
+         });
+       });
+      }).si_then([&mapping, &ret] {
+       if (mapping.is_indirect()) {
+         auto &cursor = mapping.direct_cursor;
+         return cursor->refresh(
+         ).si_then([&ret, &mapping] {
+           for (auto &m : ret) {
+             m.direct_cursor = mapping.direct_cursor->duplicate();
+           }
+         });
+       }
+       return base_iertr::now();
+      }).si_then([this, c, &mapping, &remaps] {
+       if (remaps.size() > 1 && mapping.is_indirect()) {
+         auto &cursor = mapping.direct_cursor;
+         assert(cursor->is_viewable());
+         return update_refcount(
+           c.trans, cursor.get(), 1, false).discard_result();
+       }
+       return update_refcount_iertr::now();
+      }).si_then([&ret] {
+       return trans_intr::parallel_for_each(
+         ret,
+         [](auto &remapped_mapping) {
+         return remapped_mapping.refresh(
+         ).si_then([&remapped_mapping](auto mapping) {
+           remapped_mapping = std::move(mapping);
+         });
+       });
+      }).si_then([&ret] {
+       return std::move(ret);
+      });
+    });
+  });
 }
 
 }
index 1e4848f683edbd05281fe643cc986a29e6991bce..d0680b0477bfd555f768247881385ba2e120967b 100644 (file)
@@ -68,7 +68,18 @@ public:
 
   get_mapping_ret get_mapping(
     Transaction &t,
-    laddr_t offset) final;
+    laddr_t offset,
+    bool search_containing = false) final;
+
+  get_mapping_ret get_mapping(
+    Transaction &t,
+    LogicalChildNode &extent) final;
+
+  alloc_extent_ret reserve_region(
+    Transaction &t,
+    LBAMapping pos,
+    laddr_t laddr,
+    extent_len_t len) final;
 
   alloc_extent_ret reserve_region(
     Transaction &t,
@@ -122,6 +133,11 @@ public:
        crimson::ct_error::assert_all{"unexpect enoent"});
   }
 
+  alloc_extents_ret alloc_extents(
+    Transaction &t,
+    LBAMapping pos,
+    std::vector<LogicalChildNodeRef> ext) final;
+
   alloc_extent_ret alloc_extent(
     Transaction &t,
     laddr_t hint,
@@ -209,17 +225,43 @@ public:
   ref_ret remove_mapping(
     Transaction &t,
     laddr_t addr) final {
-    return update_refcount(t, addr, -1, true
-    ).si_then([](auto res) {
-      return ref_update_result_t(res);
+    return update_refcount(t, addr, -1, true);
+  }
+
+  ref_ret remove_mapping(
+    Transaction &t,
+    LBAMapping mapping) final {
+    assert(mapping.is_viewable());
+    return seastar::do_with(
+      std::move(mapping),
+      [&t, this](auto &mapping) {
+      auto &cursor = mapping.get_effective_cursor();
+      return update_refcount(t, &cursor, -1, true);
+    });
+  }
+
+  ref_ret incref_extent(
+    Transaction &t,
+    laddr_t addr) final {
+    return update_refcount(t, addr, 1, false);
+  }
+
+  ref_ret incref_extent(
+    Transaction &t,
+    LBAMapping mapping) final {
+    assert(mapping.is_viewable());
+    return seastar::do_with(
+      std::move(mapping),
+      [&t, this](auto &mapping) {
+      auto &cursor = mapping.get_effective_cursor();
+      return update_refcount(t, &cursor, 1, false);
     });
   }
 
   remap_ret remap_mappings(
     Transaction &t,
-    LBAMapping orig_mapping,
-    std::vector<remap_entry_t> remaps,
-    std::vector<LogicalChildNodeRef> extents) final;
+    LBAMapping mapping,
+    std::vector<remap_entry_t> remaps) final;
 
   /**
    * init_cached_extent
@@ -249,7 +291,7 @@ public:
 
   update_mapping_ret update_mapping(
     Transaction& t,
-    laddr_t laddr,
+    LBAMapping mapping,
     extent_len_t prev_len,
     paddr_t prev_addr,
     LogicalChildNode&) final;
@@ -265,6 +307,10 @@ public:
     laddr_t laddr,
     extent_len_t len) final;
 
+  complete_lba_mapping_ret complete_indirect_lba_mapping(
+    Transaction &t,
+    LBAMapping mapping) final;
+
 private:
   Cache &cache;
 
@@ -336,12 +382,14 @@ private:
     update_mapping_ret_bare_t(LBACursorRef cursor)
        : ret(std::move(cursor)) {}
 
-    update_mapping_ret_bare_t(laddr_t laddr, lba_map_val_t value)
-       : ret(removed_mapping_t{laddr, value}) {}
+    update_mapping_ret_bare_t(
+      laddr_t laddr, lba_map_val_t value, LBACursorRef &&cursor)
+       : ret(removed_mapping_t{laddr, value, std::move(cursor)}) {}
 
     struct removed_mapping_t {
       laddr_t laddr;
       lba_map_val_t map_value;
+      LBACursorRef next;
     };
     std::variant<removed_mapping_t, LBACursorRef> ret;
 
@@ -358,6 +406,11 @@ private:
       }
     }
 
+    removed_mapping_t &get_removed_mapping() {
+      assert(is_removed_mapping());
+      return std::get<0>(ret);
+    }
+
     const removed_mapping_t& get_removed_mapping() const {
       assert(is_removed_mapping());
       return std::get<0>(ret);
@@ -372,29 +425,52 @@ private:
       assert(is_alive_mapping());
       return std::move(std::get<1>(ret));
     }
+  };
 
-    explicit operator ref_update_result_t() const {
-      if (is_removed_mapping()) {
-       auto v = get_removed_mapping();
-       auto &val = v.map_value;
-       ceph_assert(val.pladdr.is_paddr());
-       return {v.laddr, val.refcount, val.pladdr, val.len};
-      } else {
-       assert(is_alive_mapping());
-       auto &c = get_cursor();
-       assert(c.val);
-       ceph_assert(!c.is_indirect());
-       return {c.get_laddr(), c.val->refcount, c.val->pladdr, c.val->len};
-      }
+  mapping_update_result_t get_mapping_update_result(
+    update_mapping_ret_bare_t &result) {
+    if (result.is_removed_mapping()) {
+      auto &v = result.get_removed_mapping();
+      auto &val = v.map_value;
+      return {v.laddr,
+             val.refcount,
+             val.pladdr,
+             val.len,
+             (!v.next->is_end() && v.next->is_indirect())
+               ? LBAMapping::create_indirect(nullptr, std::move(v.next))
+               : LBAMapping::create_direct(std::move(v.next))};
+    } else {
+      assert(result.is_alive_mapping());
+      auto &c = result.get_cursor();
+      assert(c.val);
+      ceph_assert(!c.is_indirect());
+      return {c.get_laddr(), c.val->refcount, 
+       c.val->pladdr, c.val->len,
+       LBAMapping::create_direct(result.take_cursor())};
     }
-  };
+  }
+
+  ref_update_result_t get_ref_update_result(
+    update_mapping_ret_bare_t &result,
+    std::optional<update_mapping_ret_bare_t> direct_result) {
+    mapping_update_result_t primary_r = get_mapping_update_result(result);
+
+    if (direct_result) {
+      // only removing indirect mapping can have direct_result
+      assert(result.is_removed_mapping());
+      assert(result.get_removed_mapping().map_value.pladdr.is_laddr());
+      auto direct_r = get_mapping_update_result(*direct_result);
+      return ref_update_result_t{std::move(primary_r), std::move(direct_r)};
+    }
+    return ref_update_result_t{std::move(primary_r), std::nullopt};
+  }
 
   using update_refcount_iertr = ref_iertr;
   using update_refcount_ret = update_refcount_iertr::future<
-    update_mapping_ret_bare_t>;
+    ref_update_result_t>;
   update_refcount_ret update_refcount(
     Transaction &t,
-    laddr_t addr,
+    std::variant<laddr_t, LBACursor*> addr_or_cursor,
     int delta,
     bool cascade_remove);
 
@@ -414,6 +490,11 @@ private:
     laddr_t addr,
     update_func_t &&f,
     LogicalChildNode*);
+  _update_mapping_ret _update_mapping(
+    Transaction &t,
+    LBACursor &cursor,
+    update_func_t &&f,
+    LogicalChildNode*);
 
   struct insert_position_t {
     laddr_t laddr;
@@ -487,10 +568,7 @@ private:
     laddr_t addr,
     int delta) {
     ceph_assert(delta > 0);
-    return update_refcount(t, addr, delta, false
-    ).si_then([](auto res) {
-      return ref_update_result_t(res);
-    });
+    return update_refcount(t, addr, delta, false);
   }
 
   using _get_cursor_ret = get_mapping_iertr::future<LBACursorRef>;
@@ -499,6 +577,11 @@ private:
     LBABtree& btree,
     laddr_t offset);
 
+  _get_cursor_ret get_containing_cursor(
+    op_context_t c,
+    LBABtree &btree,
+    laddr_t laddr);
+
   using _get_cursors_ret = get_mappings_iertr::future<std::list<LBACursorRef>>;
   _get_cursors_ret get_cursors(
     op_context_t c,
@@ -512,6 +595,18 @@ private:
     LBABtree& btree,
     const LBACursor& indirect_cursor);
 
+  resolve_indirect_cursor_ret resolve_indirect_cursor(
+    op_context_t c,
+    const LBACursor& indirect_cursor) {
+    assert(indirect_cursor.is_indirect());
+    return with_btree<LBABtree>(
+      cache,
+      c,
+      [c, &indirect_cursor, this](auto &btree) {
+      return resolve_indirect_cursor(c, btree, indirect_cursor);
+    });
+  }
+
   using _decref_intermediate_ret = ref_iertr::future<
     update_mapping_ret_bare_t>;
   _decref_intermediate_ret _decref_intermediate(
index b01e89633d1ba7b2c88f179e238bb1194110ae9b..7b75d5790e1daf33faa5dfa505bc3cd5b7915745 100644 (file)
@@ -62,7 +62,17 @@ public:
   using get_mapping_ret = get_mapping_iertr::future<LBAMapping>;
   virtual get_mapping_ret get_mapping(
     Transaction &t,
-    laddr_t offset) = 0;
+    laddr_t offset,
+    bool search_containing = false) = 0;
+
+  /*
+   * Fetches the mapping corresponding to the "extent"
+   *
+   */
+  virtual get_mapping_ret get_mapping(
+    Transaction &t,
+    LogicalChildNode &extent) = 0;
+
 
   /**
    * Allocates a new mapping referenced by LBARef
@@ -86,6 +96,15 @@ public:
     laddr_t hint,
     std::vector<LogicalChildNodeRef> extents,
     extent_ref_count_t refcount) = 0;
+  /*
+   * Allocate extents at "pos"
+   *
+   * Returns the inserted lba mappings
+   */
+  virtual alloc_extents_ret alloc_extents(
+    Transaction &t,
+    LBAMapping pos,
+    std::vector<LogicalChildNodeRef> ext) = 0;
 
   virtual alloc_extent_ret clone_mapping(
     Transaction &t,
@@ -99,11 +118,32 @@ public:
     laddr_t hint,
     extent_len_t len) = 0;
 
-  struct ref_update_result_t {
-    laddr_t direct_key;
+  /*
+   * Inserts a zero mapping at the position "pos" with
+   * the key "laddr" and length "len"
+   */
+  virtual alloc_extent_ret reserve_region(
+    Transaction &t,
+    LBAMapping pos,
+    laddr_t hint,
+    extent_len_t len) = 0;
+
+  struct mapping_update_result_t {
+    laddr_t key;
     extent_ref_count_t refcount = 0;
     pladdr_t addr;
     extent_len_t length = 0;
+    LBAMapping mapping; // the mapping pointing to the updated lba entry if
+                       // refcount is non-zero; the next lba entry otherwise;
+                       // null mapping if the mapping is the last one and
+                       // is removed
+    bool need_to_remove_extent() const {
+      return refcount == 0 && addr.is_paddr() && !addr.get_paddr().is_zero();
+    }
+  };
+  struct ref_update_result_t {
+    mapping_update_result_t result;
+    std::optional<mapping_update_result_t> direct_result;
   };
   using ref_iertr = base_iertr::extend<
     crimson::ct_error::enoent>;
@@ -112,19 +152,47 @@ public:
   /**
    * Removes a mapping and deal with indirection
    *
-   * @return returns resulting refcount
+   * @return returns the information about the removed
+   * mappings including the corresponding direct mapping
+   * if the mapping of laddr is indirect.
+   */
+  virtual ref_ret remove_mapping(
+    Transaction &t,
+    laddr_t addr) = 0;
+
+  /*
+   * Removes the mapping and deal with indirection
+   *
+   * @return returns the information about the removed
+   * mappings including the corresponding direct mapping
+   * if the mapping of laddr is indirect.
    */
   virtual ref_ret remove_mapping(
+    Transaction &t,
+    LBAMapping mapping) = 0;
+
+  /**
+   * Increments ref count on extent
+   *
+   * @return returns resulting refcount
+   */
+  virtual ref_ret incref_extent(
     Transaction &t,
     laddr_t addr) = 0;
+  virtual ref_ret incref_extent(
+    Transaction &t,
+    LBAMapping mapping) = 0;
 
   struct remap_entry_t {
     extent_len_t offset;
     extent_len_t len;
-    remap_entry_t(extent_len_t _offset, extent_len_t _len) {
-      offset = _offset;
-      len = _len;
-    }
+    LogicalChildNode* extent = nullptr;
+    remap_entry_t(
+      extent_len_t _offset,
+      extent_len_t _len,
+      LogicalChildNode *extent = nullptr)
+      : offset(_offset), len(_len), extent(extent)
+    {}
   };
   using remap_iertr = ref_iertr;
   using remap_ret = remap_iertr::future<std::vector<LBAMapping>>;
@@ -138,9 +206,7 @@ public:
   virtual remap_ret remap_mappings(
     Transaction &t,
     LBAMapping orig_mapping,
-    std::vector<remap_entry_t> remaps,
-    std::vector<LogicalChildNodeRef> extents  // Required if and only
-                                                // if pin isn't indirect
+    std::vector<remap_entry_t> remaps
     ) = 0;
 
   /**
@@ -195,7 +261,7 @@ public:
   using update_mapping_ret = base_iertr::future<extent_ref_count_t>;
   virtual update_mapping_ret update_mapping(
     Transaction& t,
-    laddr_t laddr,
+    LBAMapping mapping,
     extent_len_t prev_len,
     paddr_t prev_addr,
     LogicalChildNode& nextent) = 0;
@@ -230,6 +296,18 @@ public:
     laddr_t laddr,
     extent_len_t len) = 0;
 
+  using complete_lba_mapping_iertr = get_mappings_iertr;
+  using complete_lba_mapping_ret =
+    complete_lba_mapping_iertr::future<LBAMapping>;
+  /*
+   * Completes an incomplete indirect mappings
+   *
+   * No effect if the indirect mapping is already complete
+   */
+  virtual complete_lba_mapping_ret complete_indirect_lba_mapping(
+    Transaction &t,
+    LBAMapping mapping) = 0;
+
   virtual ~LBAManager() {}
 };
 using LBAManagerRef = std::unique_ptr<LBAManager>;
index f0dfb06ff78fb892c0d507d1ee065792cbbf0376..b57cf9ae13804642bd3707b8b8099ffbdb32badb 100644 (file)
@@ -30,9 +30,11 @@ public:
   }
 
   static LBAMapping create_direct(LBACursorRef direct) {
+    assert(!direct->is_indirect());
     return LBAMapping(std::move(direct), nullptr);
   }
 
+  LBAMapping() = delete;
   LBAMapping(const LBAMapping &) = delete;
   LBAMapping(LBAMapping &&) = default;
   LBAMapping &operator=(const LBAMapping &) = delete;
@@ -150,7 +152,7 @@ private:
   }
 
   bool is_null() const {
-    return !indirect_cursor && !direct_cursor;
+    return !direct_cursor && !indirect_cursor;
   }
 
   bool is_complete_indirect() const {
index bdfff46991889e4d1f3f833d7f3b603e2757941f..782e0b764a1535de540c6ad165431bf985bd5050 100644 (file)
@@ -1737,7 +1737,7 @@ ObjectDataHandler::clone_ret ObjectDataHandler::clone_extents(
            if (pin.get_val().is_zero()) {
              return ctx.tm.reserve_region(ctx.t, addr, pin.get_length());
            } else {
-             return ctx.tm.clone_pin(ctx.t, addr, pin);
+             return ctx.tm.clone_pin(ctx.t, addr, pin.duplicate());
            }
          }).si_then(
            [&pin, &last_pos, offset](auto) {
index 53acf6c937ab647116faa153d0422e2f9f33aeb9..0ffa865a0fcfdb8f29c2f64c728382d7d26b6ba6 100644 (file)
@@ -207,12 +207,15 @@ TransactionManager::ref_ret TransactionManager::remove(
   DEBUGT("{} ...", t, *ref);
   return lba_manager->remove_mapping(t, ref->get_laddr()
   ).si_then([this, FNAME, &t, ref](auto result) {
-    if (result.refcount == 0) {
+    assert(!result.direct_result);
+    auto &primary_result = result.result;
+    if (primary_result.refcount == 0) {
       cache->retire_extent(t, ref);
     }
     DEBUGT("removed {}~0x{:x} refcount={} -- {}",
-           t, result.addr, result.length, result.refcount, *ref);
-    return result.refcount;
+           t, primary_result.addr, primary_result.length,
+           primary_result.refcount, *ref);
+    return primary_result.refcount;
   });
 }
 
@@ -225,17 +228,72 @@ TransactionManager::ref_ret TransactionManager::remove(
   return lba_manager->remove_mapping(t, offset
   ).si_then([this, FNAME, offset, &t](auto result) -> ref_ret {
     auto fut = ref_iertr::now();
-    if (result.refcount == 0) {
-      if (result.addr.is_paddr() &&
-          !result.addr.get_paddr().is_zero()) {
-        fut = cache->retire_extent_addr(
-          t, result.addr.get_paddr(), result.length);
-      }
+    auto &primary_result = result.result;
+    assert(primary_result.refcount == 0);
+    if (primary_result.need_to_remove_extent()) {
+      ceph_assert(!result.direct_result);
+      fut = cache->retire_extent_addr(
+        t, primary_result.addr.get_paddr(), primary_result.length);
+    } else if (auto &direct_result = result.direct_result;
+               direct_result.has_value() &&
+               direct_result->need_to_remove_extent()) {
+      fut = cache->retire_extent_addr(
+        t, direct_result->addr.get_paddr(), direct_result->length);
     }
     return fut.si_then([result=std::move(result), offset, &t, FNAME] {
       DEBUGT("removed {}~0x{:x} refcount={} -- offset={}",
-             t, result.addr, result.length, result.refcount, offset);
-      return result.refcount;
+             t, result.result.addr, result.result.length,
+             result.result.refcount, offset);
+      return result.result.refcount;
+    });
+  });
+}
+
+TransactionManager::ref_iertr::future<LBAMapping> TransactionManager::remove(
+  Transaction &t,
+  LBAMapping mapping)
+{
+  LOG_PREFIX(TransactionManager::remove);
+  return mapping.refresh().si_then([&t, this, FNAME](auto mapping) {
+    auto fut = base_iertr::make_ready_future<LogicalChildNodeRef>();
+    if (!mapping.is_indirect() && mapping.get_val().is_real_location()) {
+      auto ret = get_extent_if_linked<LogicalChildNode>(t, mapping.duplicate());
+      if (ret.index() == 1) {
+        fut = std::move(std::get<1>(ret));
+      }
+    }
+    return fut.si_then([mapping=std::move(mapping),
+                        FNAME, this, &t](auto extent) mutable {
+      auto offset = mapping.get_key();
+      return lba_manager->remove_mapping(t, std::move(mapping)
+      ).si_then([FNAME, this, extent, &t, offset](auto result) {
+        auto fut = ref_iertr::now();
+        auto &primary_result = result.result;
+        assert(primary_result.refcount == 0);
+        if (primary_result.need_to_remove_extent()) {
+          ceph_assert(!result.direct_result);
+          if (extent) {
+            cache->retire_extent(t, extent);
+          } else {
+            fut = cache->retire_extent_addr(
+              t, primary_result.addr.get_paddr(), primary_result.length);
+          }
+        } else if (auto &direct_result = result.direct_result;
+                   direct_result.has_value() &&
+                   direct_result->need_to_remove_extent()) {
+          ceph_assert(!extent);
+          fut = cache->retire_extent_addr(
+            t, direct_result->addr.get_paddr(), direct_result->length);
+        } else {
+          ceph_assert(!extent);
+        }
+        return fut.si_then([result=std::move(result), &t, FNAME, offset]() mutable {
+          DEBUGT("removed {}~0x{:x} refcount={} -- offset={}",
+                 t, result.result.addr, result.result.length,
+                 result.result.refcount, offset);
+          return std::move(result.result.mapping);
+        });
+      });
     });
   });
 }
@@ -522,13 +580,19 @@ TransactionManager::rewrite_logical_extent(
      * extents since we're going to do it again once we either do the ool write
      * or allocate a relative inline addr.  TODO: refactor AsyncCleaner to
      * avoid this complication. */
-    return lba_manager->update_mapping(
-      t,
-      extent->get_laddr(),
-      extent->get_length(),
-      extent->get_paddr(),
-      *nextent
-    ).discard_result();
+    return lba_manager->get_mapping(t, *extent
+    ).si_then([this, &t, extent, nextent](auto mapping) {
+      return lba_manager->update_mapping(
+        t,
+        std::move(mapping),
+        extent->get_length(),
+        extent->get_paddr(),
+        *nextent
+      ).discard_result();
+    }).handle_error_interruptible(
+      rewrite_extent_iertr::pass_further{},
+      crimson::ct_error::assert_all{"unexpected enoent"}
+    );
   } else {
     assert(get_extent_category(extent->get_type()) == data_category_t::DATA);
     auto length = extent->get_length();
@@ -569,15 +633,22 @@ TransactionManager::rewrite_logical_extent(
           auto fut = base_iertr::now();
           if (first_extent) {
             assert(off == 0);
-            fut = lba_manager->update_mapping(
-              t,
-              extent->get_laddr(),
-              extent->get_length(),
-              extent->get_paddr(),
-              *nextent
-            ).si_then([&refcount](auto c) {
-              refcount = c;
-            });
+            fut = lba_manager->get_mapping(t, *extent
+            ).si_then([this, &t, extent, nextent,
+                      &refcount](auto mapping) {
+              return lba_manager->update_mapping(
+                t,
+                std::move(mapping),
+                extent->get_length(),
+                extent->get_paddr(),
+                *nextent
+              ).si_then([&refcount](auto c) {
+                refcount = c;
+              });
+            }).handle_error_interruptible(
+              rewrite_extent_iertr::pass_further{},
+              crimson::ct_error::assert_all{"unexpected enoent"}
+            );
           } else {
             ceph_assert(refcount != 0);
             fut = lba_manager->alloc_extent(
index 6a273e7c49c2bb8e18ce4e514e5ea7e484ac13ff..00dadef002382800ca12aaf7adf17f4cb3c4ec83 100644 (file)
@@ -108,7 +108,24 @@ public:
     laddr_t offset) {
     LOG_PREFIX(TransactionManager::get_pin);
     SUBDEBUGT(seastore_tm, "{} ...", t, offset);
-    return lba_manager->get_mapping(t, offset
+    return lba_manager->get_mapping(t, offset, false
+    ).si_then([FNAME, &t](LBAMapping pin) {
+      SUBDEBUGT(seastore_tm, "got {}", t, pin);
+      return pin;
+    });
+  }
+
+  /**
+   * get_containing_pin
+   *
+   * Get the logical pin containing laddr
+   */
+  get_pin_ret get_containing_pin(
+    Transaction &t,
+    laddr_t laddr) {
+    LOG_PREFIX(TransactionManager::get_containing_pin);
+    SUBDEBUGT(seastore_tm, "{} ...", t, laddr);
+    return lba_manager->get_mapping(t, laddr, true
     ).si_then([FNAME, &t](LBAMapping pin) {
       SUBDEBUGT(seastore_tm, "got {}", t, pin);
       return pin;
@@ -349,6 +366,10 @@ public:
     Transaction &t,
     laddr_t offset);
 
+  ref_iertr::future<LBAMapping> remove(
+    Transaction &t,
+    LBAMapping mapping);
+
   /// remove refcount for list of offset
   using refs_ret = ref_iertr::future<std::vector<unsigned>>;
   refs_ret remove(
@@ -410,33 +431,59 @@ public:
     Transaction &t,
     laddr_t laddr_hint,
     extent_len_t len,
+    std::optional<LBAMapping> pos = std::nullopt,
     placement_hint_t placement_hint = placement_hint_t::HOT) {
     static_assert(is_data_type(T::TYPE));
     LOG_PREFIX(TransactionManager::alloc_data_extents);
     SUBDEBUGT(seastore_tm, "{} hint {}~0x{:x} phint={} ...",
               t, T::TYPE, laddr_hint, len, placement_hint);
-    auto exts = cache->alloc_new_data_extents<T>(
-      t,
-      len,
-      placement_hint,
-      INIT_GENERATION);
-    // user must initialize the logical extent themselves
-    assert(is_user_transaction(t.get_src()));
-    for (auto& ext : exts) {
-      ext->set_seen_by_users();
-    }
-    return lba_manager->alloc_extents(
-      t,
-      laddr_hint,
-      std::vector<LogicalChildNodeRef>(
-       exts.begin(), exts.end()),
-      EXTENT_DEFAULT_REF_COUNT
-    ).si_then([exts=std::move(exts), &t, FNAME](auto &&) mutable {
-      for (auto &ext : exts) {
-       SUBDEBUGT(seastore_tm, "allocated {}", t, *ext);
+    return seastar::do_with(
+      cache->alloc_new_data_extents<T>(
+       t,
+       len,
+       placement_hint,
+       INIT_GENERATION),
+      [pos=std::move(pos), this, &t,
+      FNAME, laddr_hint](auto &exts) mutable {
+      // user must initialize the logical extent themselves
+      assert(is_user_transaction(t.get_src()));
+      for (auto& ext : exts) {
+       ext->set_seen_by_users();
+      }
+      if (pos) {
+       // laddr_hint is determined
+       auto off = laddr_hint;
+       for (auto &extent : exts) {
+         extent->set_laddr(off);
+         off = (off + extent->get_length()).checked_to_laddr();
+       }
+      }
+      auto fut = alloc_extents_iertr::make_ready_future<
+       std::vector<LBAMapping>>();
+      if (pos) {
+       fut = pos->refresh(
+       ).si_then([&t, &exts, this](auto pos) {
+         return lba_manager->alloc_extents(
+           t,
+           std::move(pos),
+           std::vector<LogicalChildNodeRef>(
+             exts.begin(), exts.end()));
+       });
+      } else {
+       fut = lba_manager->alloc_extents(
+         t,
+         laddr_hint,
+         std::vector<LogicalChildNodeRef>(
+           exts.begin(), exts.end()),
+         EXTENT_DEFAULT_REF_COUNT);
       }
-      return alloc_extent_iertr::make_ready_future<
-       std::vector<TCachedExtentRef<T>>>(std::move(exts));
+      return fut.si_then([&exts, &t, FNAME](auto &&) mutable {
+       for (auto &ext : exts) {
+         SUBDEBUGT(seastore_tm, "allocated {}", t, *ext);
+       }
+       return alloc_extent_iertr::make_ready_future<
+         std::vector<TCachedExtentRef<T>>>(std::move(exts));
+      });
     });
   }
 
@@ -485,6 +532,7 @@ public:
     // must be user-oriented required by (the potential) maybe_init
     assert(is_user_transaction(t.get_src()));
 
+    LOG_PREFIX(TransactionManager::remap_pin);
 #ifndef NDEBUG
     std::sort(remaps.begin(), remaps.end(),
       [](remap_entry_t x, remap_entry_t y) {
@@ -512,23 +560,31 @@ public:
 #endif
 
     return seastar::do_with(
-      std::vector<LogicalChildNodeRef>(),
       std::move(pin),
       std::move(remaps),
-      [&t, this](auto &extents, auto &pin, auto &remaps) {
-      laddr_t original_laddr = pin.get_key();
-      extent_len_t original_len = pin.get_length();
-      paddr_t original_paddr = pin.get_val();
-      LOG_PREFIX(TransactionManager::remap_pin);
-      SUBDEBUGT(seastore_tm, "{}~0x{:x} {} into {} remaps ... {}",
-                t, original_laddr, original_len, original_paddr, remaps.size(), pin);
+      [FNAME, &t, this](auto &pin, auto &remaps) {
       // The according extent might be stable or pending.
       auto fut = base_iertr::now();
-      if (!pin.is_indirect()) {
+      if (pin.is_indirect()) {
+       SUBDEBUGT(seastore_tm, "{} into {} remaps ...",
+         t, pin, remaps.size());
+       fut = lba_manager->refresh_lba_mapping(t, std::move(pin)
+       ).si_then([this, &pin, &t](auto mapping) {
+         return lba_manager->complete_indirect_lba_mapping(
+           t, std::move(mapping)
+         ).si_then([&pin](auto mapping) {
+           pin = std::move(mapping);
+         });
+       });
+      } else {
+       laddr_t original_laddr = pin.get_key();
+       extent_len_t original_len = pin.get_length();
+       paddr_t original_paddr = pin.get_val();
+       SUBDEBUGT(seastore_tm, "{}~0x{:x} {} into {} remaps ... {}",
+         t, original_laddr, original_len, original_paddr, remaps.size(), pin);
         ceph_assert(!pin.is_clone());
-       fut = fut.si_then([this, &t, &pin]() mutable {
-         return lba_manager->refresh_lba_mapping(t, std::move(pin));
-       }).si_then([this, &t, &pin, original_paddr, original_len](auto newpin) {
+       fut = lba_manager->refresh_lba_mapping(t, std::move(pin)
+       ).si_then([this, &t, &pin, original_paddr, original_len](auto newpin) {
          pin = std::move(newpin);
          if (full_extent_integrity_check) {
            return read_pin<T>(t, pin.duplicate()
@@ -555,8 +611,7 @@ public:
            }
          }
        }).si_then([this, &t, &remaps, original_paddr,
-                           original_laddr, original_len,
-                           &extents, FNAME](auto ext) mutable {
+                   original_laddr, original_len, FNAME](auto ext) mutable {
          ceph_assert(full_extent_integrity_check
              ? (ext && ext->is_fully_loaded())
              : true);
@@ -593,16 +648,15 @@ public:
              original_bptr);
            // user must initialize the logical extent themselves.
            extent->set_seen_by_users();
-           extents.emplace_back(std::move(extent));
+           remap.extent = extent.get();
          }
        });
       }
-      return fut.si_then([this, &t, &pin, &remaps, &extents, FNAME] {
+      return fut.si_then([this, &t, &pin, &remaps, FNAME] {
        return lba_manager->remap_mappings(
          t,
          std::move(pin),
-         std::vector<remap_entry_t>(remaps.begin(), remaps.end()),
-         std::move(extents)
+         std::vector<remap_entry_t>(remaps.begin(), remaps.end())
        ).si_then([FNAME, &t](auto ret) {
          SUBDEBUGT(seastore_tm, "remapped {} pins", t, ret.size());
          return Cache::retire_extent_iertr::make_ready_future<
@@ -635,6 +689,27 @@ public:
     });
   }
 
+  reserve_extent_ret reserve_region(
+    Transaction &t,
+    LBAMapping pos,
+    laddr_t hint,
+    extent_len_t len) {
+    LOG_PREFIX(TransactionManager::reserve_region);
+    SUBDEBUGT(seastore_tm, "hint {}~0x{:x} ...", t, hint, len);
+    return pos.refresh(
+    ).si_then([FNAME, this, &t, hint, len](auto pos) {
+      return lba_manager->reserve_region(
+       t,
+       std::move(pos),
+       hint,
+       len
+      ).si_then([FNAME, &t](auto pin) {
+       SUBDEBUGT(seastore_tm, "reserved {}", t, pin);
+       return pin;
+      });
+    });
+  }
+
   /*
    * clone_mapping
    *
@@ -648,27 +723,30 @@ public:
   clone_extent_ret clone_pin(
     Transaction &t,
     laddr_t hint,
-    const LBAMapping &mapping) {
-    auto intermediate_key =
-      mapping.is_indirect()
-       ? mapping.get_intermediate_key()
-       : mapping.get_key();
-    auto intermediate_base =
-      mapping.is_indirect()
-        ? mapping.get_intermediate_base()
-        : mapping.get_key();
-
+    LBAMapping mapping) {
     LOG_PREFIX(TransactionManager::clone_pin);
     SUBDEBUGT(seastore_tm, "{} clone to hint {} ...", t, mapping, hint);
-    return lba_manager->clone_mapping(
-      t,
-      hint,
-      mapping.get_length(),
-      intermediate_key,
-      intermediate_base
-    ).si_then([FNAME, &t](auto pin) {
-      SUBDEBUGT(seastore_tm, "cloned as {}", t, pin);
-      return pin;
+    return lba_manager->refresh_lba_mapping(t, std::move(mapping)
+    ).si_then([FNAME, this, &t, hint](auto mapping) {
+      auto intermediate_key =
+       mapping.is_indirect()
+         ? mapping.get_intermediate_key()
+         : mapping.get_key();
+      auto intermediate_base =
+       mapping.is_indirect()
+         ? mapping.get_intermediate_base()
+         : mapping.get_key();
+
+      return lba_manager->clone_mapping(
+       t,
+       hint,
+       mapping.get_length(),
+       intermediate_key,
+       intermediate_base
+      ).si_then([FNAME, &t](auto pin) {
+       SUBDEBUGT(seastore_tm, "cloned as {}", t, pin);
+       return pin;
+      });
     });
   }
 
index b70f24f79c977cd5ab62383d7bf0908beb4e8552..63f1d6465d536ef7ebaa21d33c76f41d0181d5dc 100644 (file)
@@ -602,10 +602,10 @@ struct btree_lba_manager_test : btree_test_base {
          t,
          target->first
        ).si_then([this, &t, target](auto result) {
-         EXPECT_EQ(result.refcount, target->second.refcount);
-         if (result.refcount == 0) {
+         EXPECT_EQ(result.result.refcount, target->second.refcount);
+         if (result.result.refcount == 0) {
            return cache->retire_extent_addr(
-             t, result.addr.get_paddr(), result.length);
+             t, result.result.addr.get_paddr(), result.result.length);
          }
          return Cache::retire_extent_iertr::now();
        });
index e6120c4745fc2c4436e9a77ac02610a7b7bb6ff2..536d3b1452969afd3914a5deb637a1b450c78769 100644 (file)
@@ -704,9 +704,11 @@ struct transaction_manager_test_t :
   LBAMapping clone_pin(
     test_transaction_t &t,
     laddr_t offset,
-    const LBAMapping &mapping) {
-    auto pin = with_trans_intr(*(t.t), [&](auto &trans) {
-      return tm->clone_pin(trans, offset, mapping);
+    LBAMapping mapping) {
+    auto pin = with_trans_intr(
+      *(t.t),
+      [this, offset, mapping=std::move(mapping)](auto &trans) mutable {
+      return tm->clone_pin(trans, offset, std::move(mapping));
     }).unsafe_get();
     EXPECT_EQ(offset, pin.get_key());
     EXPECT_EQ(mapping.get_key(), pin.get_intermediate_key());
@@ -1461,9 +1463,10 @@ struct transaction_manager_test_t :
        auto t = create_transaction();
         auto lpin = get_pin(t, l_offset);
         auto rpin = get_pin(t, r_offset);
-       auto l_clone_pin = clone_pin(t, l_clone_offset, lpin);
-       auto r_clone_pin = clone_pin(t, r_clone_offset, rpin);
+       auto l_clone_pin = clone_pin(t, l_clone_offset, std::move(lpin));
+       auto r_clone_pin = clone_pin(t, r_clone_offset, std::move(rpin));
         //split left
+       l_clone_pin = refresh_lba_mapping(t, std::move(l_clone_pin));
         auto pin1 = remap_pin(t, std::move(l_clone_pin), 0, 16 << 10);
         ASSERT_TRUE(pin1);
         auto pin2 = remap_pin(t, std::move(*pin1), 0, 8 << 10);
@@ -1474,6 +1477,7 @@ struct transaction_manager_test_t :
         EXPECT_EQ('l', lext->get_bptr().c_str()[0]);
 
         //split right
+       r_clone_pin = refresh_lba_mapping(t, std::move(r_clone_pin));
         auto pin4 = remap_pin(t, std::move(r_clone_pin), 16 << 10, 16 << 10);
         ASSERT_TRUE(pin4);
         auto pin5 = remap_pin(t, std::move(*pin4), 8 << 10, 8 << 10);