]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
crimson/os/seastore: implement promote extent
authorZhang Song <zhangsong02@qianxin.com>
Wed, 3 Sep 2025 08:02:01 +0000 (16:02 +0800)
committerXuehan Xu <xuxuehan@qianxin.com>
Sat, 23 May 2026 09:01:33 +0000 (17:01 +0800)
Signed-off-by: Zhang Song <zhangsong02@qianxin.com>
Signed-off-by: Xuehan Xu <xuxuehan@qianxin.com>
src/crimson/os/seastore/lba/btree_lba_manager.cc
src/crimson/os/seastore/lba/btree_lba_manager.h
src/crimson/os/seastore/lba_manager.h
src/crimson/os/seastore/transaction_manager.cc

index e593ce7a7bf4cc8f80d1a0e8a7f195eaa1059ddd..dbaeb6c1698dd485cba3ddd5e02dfd70a3a78769 100644 (file)
@@ -7,6 +7,7 @@
 #include <seastar/core/metrics.hh>
 
 #include "include/buffer.h"
+#include "crimson/common/coroutine.h"
 #include "crimson/os/seastore/lba/btree_lba_manager.h"
 #include "crimson/os/seastore/lba/lba_btree_node.h"
 #include "crimson/os/seastore/logging.h"
@@ -249,6 +250,58 @@ BtreeLBAManager::lower_bound(
   co_return iter.get_cursor(c);
 }
 
+BtreeLBAManager::promote_extent_ret
+BtreeLBAManager::promote_extent(
+  Transaction &t,
+  LBAMapping mapping,
+  std::vector<LogicalChildNodeRef> extents)
+{
+  LOG_PREFIX(BtreeLBAManager::promote_extent);
+  auto laddr = mapping.get_key();
+  ceph_assert(!extents.empty());
+  ceph_assert(!mapping.is_indirect());
+  ceph_assert(laddr == extents.front()->get_laddr());
+  DEBUGT("promote mapping {} with {} extents",
+        t, mapping, extents.size());
+  auto c = get_context(t);
+  auto btree = co_await get_btree<LBABtree>(c);
+  auto iter = btree.make_partial_iter(c, mapping.get_effective_cursor());
+  auto orig_val = iter.get_val();
+  if (extents.size() == 1) {
+    auto new_val = orig_val;
+    ceph_assert(new_val.pladdr.is_paddr());
+    new_val.shadow_paddr = new_val.pladdr.get_paddr();
+    auto extent = extents.front().get();
+    auto paddr = extent->get_paddr();
+    new_val.pladdr = pladdr_t(paddr);
+    TRACET("promote {} from {} to {}",
+          t, iter.get_key(), new_val.shadow_paddr, paddr);
+    assert(extent->is_pending());
+    assert(!extent->has_parent_tracker());
+    iter = co_await btree.update(c, iter, new_val, extent);
+    assert(extent->has_parent_tracker());
+  } else {
+    auto insert_iter = co_await btree.remove(c, std::move(iter));
+    for (auto &extent : extents) {
+      auto offset = extent->get_laddr().get_byte_distance<extent_len_t>(laddr);
+      auto new_val = orig_val;
+      new_val.shadow_paddr = orig_val.pladdr.get_paddr().add_offset(offset);
+      new_val.pladdr = pladdr_t(extent->get_paddr());
+      new_val.len = extent->get_length();
+      new_val.checksum = extent->get_last_committed_crc();
+      TRACET("insert promoted mapping {} {}",
+            c.trans, extent->get_laddr(), new_val);
+      assert(!extent->has_parent_tracker());
+      auto [iter, inserted] = co_await btree.insert(
+       c, std::move(insert_iter), extent->get_laddr(), new_val, extent.get());
+      ceph_assert(inserted);
+      assert(extent->has_parent_tracker());
+      insert_iter = co_await iter.next(c);
+    }
+  }
+  co_return;
+}
+
 BtreeLBAManager::alloc_extent_ret
 BtreeLBAManager::reserve_region(
   Transaction &t,
index 0db84a027d82ace7420aa471e2a7b94fa894812a..26c2e6d4bd246e64c9f69a80490fcab8fd5e1c29 100644 (file)
@@ -79,6 +79,11 @@ public:
     Transaction &t,
     laddr_t laddr) final;
 
+  promote_extent_ret promote_extent(
+    Transaction &t,
+    LBAMapping mapping,
+    std::vector<LogicalChildNodeRef> extents) final;
+
   alloc_extent_ret reserve_region(
     Transaction &t,
     LBACursorRef pos,
index a537a35141850b9cb9a6b28fa263d3ff4e85c0cf..4fab5a7afe58860c0d95d5150ba5b40505665e71 100644 (file)
@@ -167,6 +167,13 @@ public:
     LBACursorRef dest,
     LogicalChildNode &extent) = 0;
 
+  using promote_extent_iertr = base_iertr;
+  using promote_extent_ret = promote_extent_iertr::future<>;
+  virtual promote_extent_ret promote_extent(
+    Transaction &t,
+    LBAMapping mapping,
+    std::vector<LogicalChildNodeRef> extents) = 0;
+
   virtual alloc_extent_ret reserve_region(
     Transaction &t,
     laddr_hint_t hint,
index c20c3e0fac5e29b3b07075253e111893d45dd30c..39888d067ad9da41271c913806b894f7df150eff 100644 (file)
@@ -1024,8 +1024,102 @@ TransactionManager::promote_extent(
   Transaction &t,
   CachedExtentRef extent)
 {
-  // TODO
-  return rewrite_extent_iertr::make_ready_future();
+  LOG_PREFIX(TransactionManager::promote_extent);
+  assert(epm->is_cold_device(extent->get_paddr().get_device_id()));
+  DEBUGT("promote extent: {}", t, *extent);
+  ceph_assert(extent->is_logical());
+
+  std::vector<LogicalChildNodeRef> promoted_extents;
+  auto orig_ext = extent->cast<LogicalChildNode>();
+  // fill extent if it's not fully loaded
+  if (!extent->is_fully_loaded()) {
+    ceph_assert(extent->get_type() == extent_types_t::OBJECT_DATA_BLOCK);
+    extent = co_await cache->read_extent_maybe_partial(
+      t, extent->cast<ObjectDataBlock>(), 0, extent->get_length());
+  }
+
+  cache->retire_extent(t, extent);
+
+  if (get_extent_category(extent->get_type()) == data_category_t::DATA) {
+    auto promoted_raw_extents = cache->alloc_new_data_extents_by_type(
+      t,
+      orig_ext->get_type(),
+      orig_ext->get_length(),
+      placement_hint_t::HOT,
+      INIT_GENERATION);
+
+    promoted_extents.reserve(promoted_raw_extents.size());
+
+    extent_len_t offset = 0;
+    auto orig_laddr = orig_ext->get_laddr();
+    auto orig_paddr = orig_ext->get_paddr();
+    auto orig_length = orig_ext->get_length();
+    for (auto &extent : promoted_raw_extents) {
+      auto slice_laddr = (orig_laddr + offset).checked_to_laddr();
+      auto slice_length = extent->get_length();
+      extent->rewrite(t, *orig_ext, offset);
+
+      auto lext = extent->cast<LogicalChildNode>();
+      lext->set_laddr(slice_laddr);
+      //TODO: this memory copy should be saved
+      orig_ext->get_bptr().copy_out(
+        offset, slice_length, lext->get_bptr().c_str());
+      lext->set_last_committed_crc(lext->calc_crc32c());
+
+      promoted_extents.push_back(lext);
+
+      auto remapped_cold_extent = cache->alloc_remapped_extent_by_type(
+        t,
+        orig_ext->get_type(),
+        slice_laddr,
+        orig_paddr.add_offset(offset),
+        offset,
+        slice_length,
+        std::nullopt);
+      remapped_cold_extent->set_shadow_extent(true);
+
+      offset += slice_length;
+    }
+    ceph_assert(offset == orig_length);
+  } else {
+    auto promoted_extent = cache->alloc_new_non_data_extent_by_type(
+      t,
+      orig_ext->get_type(),
+      orig_ext->get_length(),
+      placement_hint_t::HOT,
+      INIT_GENERATION);
+    auto lext = promoted_extent->cast<LogicalChildNode>();
+    lext->set_laddr(orig_ext->get_laddr());
+    lext->rewrite(t, *orig_ext, 0);
+    //TODO: this memory copy should be saved
+    orig_ext->get_bptr().copy_out(
+      0,
+      orig_ext->get_length(),
+      lext->get_bptr().c_str());
+    promoted_extents.push_back(lext);
+    auto remapped_cold_extent = cache->alloc_remapped_extent_by_type(
+      t,
+      orig_ext->get_type(),
+      orig_ext->get_laddr(),
+      orig_ext->get_paddr(),
+      0,
+      orig_ext->get_length(),
+      std::nullopt);
+    boost::ignore_unused(remapped_cold_extent);
+
+    remapped_cold_extent->set_shadow_extent(true);
+  }
+
+  auto cursor = co_await lba_manager->get_cursor(
+    t, *orig_ext
+  ).handle_error_interruptible(
+    promote_extent_iertr::pass_further(),
+    crimson::ct_error::assert_all("invalid error"));
+  auto mapping = co_await resolve_cursor_to_mapping(t, std::move(cursor));
+  co_return co_await lba_manager->promote_extent(
+    t,
+    orig_ext->get_laddr(),
+    std::move(promoted_extents));
 }
 
 TransactionManager::demote_region_ret