]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
crimson/os/seastore: implement demote region
authorZhang Song <zhangsong02@qianxin.com>
Wed, 3 Sep 2025 08:02:29 +0000 (16:02 +0800)
committerXuehan Xu <xuxuehan@qianxin.com>
Sat, 23 May 2026 09:11:06 +0000 (17:11 +0800)
Signed-off-by: Zhang Song <zhangsong02@qianxin.com>
Signed-off-by: Xuehan Xu <xuxuehan@qianxin.com>
src/crimson/os/seastore/extent_placement_manager.h
src/crimson/os/seastore/lba/btree_lba_manager.cc
src/crimson/os/seastore/lba/btree_lba_manager.h
src/crimson/os/seastore/lba_manager.h
src/crimson/os/seastore/transaction_manager.cc
src/crimson/os/seastore/transaction_manager.h

index b7ad1b06a9e5cf7edb0dcef7b7209141cfdd4f19..8badda70ee83c4a3d5e0deaced473f4110c25016 100644 (file)
@@ -179,7 +179,9 @@ public:
     if (!extent->is_stable_dirty()) {
       return false;
     }
-    assert(t.get_src() == transaction_type_t::TRIM_DIRTY);
+    assert((t.get_src() == transaction_type_t::TRIM_DIRTY) ||
+           (t.get_src() == transaction_type_t::DEMOTE) ||
+           (t.get_src() == transaction_type_t::PROMOTE));
     ceph_assert_always(is_root_type(extent->get_type()) ||
        extent->get_paddr().is_absolute());
     return crimson::os::seastore::can_inplace_rewrite(extent->get_type());
@@ -616,6 +618,10 @@ public:
     return !devices_by_id[addr.get_device_id()]->is_end_to_end_data_protection();
   }
 
+  rewrite_gen_t get_max_hot_gen() const {
+    return hot_tier_generations - 1;
+  }
+
 private:
   rewrite_gen_t adjust_generation(
       data_category_t category,
index dbaeb6c1698dd485cba3ddd5e02dfd70a3a78769..2f933a4ac13b58cfc6e181cdafd48b56c1d1eb1b 100644 (file)
@@ -250,6 +250,26 @@ BtreeLBAManager::lower_bound(
   co_return iter.get_cursor(c);
 }
 
+BtreeLBAManager::upper_bound_right_ret
+BtreeLBAManager::upper_bound_right(
+  Transaction &t,
+  laddr_t laddr)
+{
+  auto c = get_context(t);
+  auto btree = co_await get_btree<LBABtree>(c);
+  auto iter = co_await btree.upper_bound_right(c, laddr);
+  if (iter.is_end()) {
+    co_await upper_bound_right_iertr::future<LBAMapping>(
+      crimson::ct_error::enoent::make());
+  }
+  assert(iter.get_key() >= laddr);
+  if (iter.get_val().pladdr.is_paddr()) {
+    co_return LBAMapping::create_direct(iter.get_cursor(c));
+  } else {
+    co_return LBAMapping::create_indirect(nullptr, iter.get_cursor(c));
+  }
+}
+
 BtreeLBAManager::promote_extent_ret
 BtreeLBAManager::promote_extent(
   Transaction &t,
@@ -301,6 +321,34 @@ BtreeLBAManager::promote_extent(
   }
   co_return;
 }
+BtreeLBAManager::demote_extent_ret
+BtreeLBAManager::demote_extent(
+  Transaction &t,
+  LBAMapping mapping,
+  LogicalChildNode &extent)
+{
+  assert(mapping.is_viewable());
+  assert(!mapping.is_end());
+  assert(!mapping.is_indirect());
+  assert(mapping.has_shadow_val());
+  auto c = get_context(t);
+  auto btree = co_await get_btree<LBABtree>(c);
+  auto ret = co_await _update_mapping(
+    t,
+    mapping.get_effective_cursor(),
+    [&extent](lba_map_val_t val) {
+      assert(val.pladdr.is_paddr());
+      assert(val.shadow_paddr == extent.get_paddr());
+      val.pladdr = pladdr_t(val.shadow_paddr);
+      val.shadow_paddr = P_ADDR_NULL;
+      return val;
+    },
+    &extent
+  ).handle_error_interruptible(
+    demote_extent_iertr::pass_further{},
+    crimson::ct_error::assert_all("unexpected enoent"));
+  co_return LBAMapping::create_direct(std::move(ret));
+}
 
 BtreeLBAManager::alloc_extent_ret
 BtreeLBAManager::reserve_region(
index 26c2e6d4bd246e64c9f69a80490fcab8fd5e1c29..2ed2596ab191d3850c56956c15475418c304a503 100644 (file)
@@ -79,11 +79,20 @@ public:
     Transaction &t,
     laddr_t laddr) final;
 
+  upper_bound_right_ret upper_bound_right(
+    Transaction &t,
+    laddr_t laddr) final;
+
   promote_extent_ret promote_extent(
     Transaction &t,
     LBAMapping mapping,
     std::vector<LogicalChildNodeRef> extents) final;
 
+  demote_extent_ret demote_extent(
+    Transaction &t,
+    LBAMapping mapping,
+    LogicalChildNode &extent) final;
+
   alloc_extent_ret reserve_region(
     Transaction &t,
     LBACursorRef pos,
index 4fab5a7afe58860c0d95d5150ba5b40505665e71..c30507dd94faaa9c23bb736f148e294409d8ea9d 100644 (file)
@@ -60,6 +60,13 @@ public:
     Transaction &t,
     laddr_t laddr) = 0;
 
+  using upper_bound_right_iertr = base_iertr::extend<
+    crimson::ct_error::enoent>;
+  using upper_bound_right_ret = upper_bound_right_iertr::future<LBAMapping>;
+  virtual upper_bound_right_ret upper_bound_right(
+    Transaction &t,
+    laddr_t laddr) = 0;
+
 #ifdef UNIT_TESTS_BUILT
   using get_end_mapping_iertr = base_iertr;
   using get_end_mapping_ret = get_end_mapping_iertr::future<LBACursorRef>;
@@ -174,6 +181,13 @@ public:
     LBAMapping mapping,
     std::vector<LogicalChildNodeRef> extents) = 0;
 
+  using demote_extent_iertr = base_iertr;
+  using demote_extent_ret = demote_extent_iertr::future<LBAMapping>;
+  virtual demote_extent_ret demote_extent(
+    Transaction &t,
+    LBAMapping mapping,
+    LogicalChildNode &extent) = 0;
+
   virtual alloc_extent_ret reserve_region(
     Transaction &t,
     laddr_hint_t hint,
index 39888d067ad9da41271c913806b894f7df150eff..f14376bf9c638d4028ab12385cd6c839605bf6fc 100644 (file)
@@ -479,6 +479,56 @@ TransactionManager::relocate_logical_extent(
   }
 }
 
+base_iertr::future<LogicalChildNodeRef>
+TransactionManager::relocate_shadow_extent(
+  Transaction &t, LBAMapping mapping)
+{
+  LOG_PREFIX(TransactionManager::relocate_shadow_extent);
+  SUBDEBUGT(seastore_tm, "relocate {}", t, mapping);
+  assert(mapping.has_shadow_val());
+  assert(!mapping.is_zero_reserved());
+  assert(mapping.is_viewable());
+  auto v = mapping.get_logical_extent(t);
+  CachedExtentRef extent;
+  auto laddr = mapping.get_key();
+  if (!v.has_child()) {
+    auto &child_pos = v.get_child_pos();
+    extent = cache->retire_absent_extent_addr_by_type(
+      t,
+      laddr,
+      mapping.get_val(),
+      mapping.get_length(),
+      mapping.get_extent_type(),
+      [laddr, &child_pos](auto &extent) {
+        auto lextent = extent.template cast<LogicalChildNode>();
+        assert(extent.is_logical());
+        assert(!lextent->has_laddr());
+        assert(!extent.has_been_invalidated());
+        child_pos.link_child(lextent.get());
+        lextent->set_laddr(laddr);
+      }
+    );
+  } else {
+    auto extent = co_await std::move(v.get_child_fut());
+    cache->retire_extent(t, extent);
+  }
+  auto shadow_paddr = mapping.get_shadow_val();
+  std::ignore = cache->retire_absent_extent_addr_by_type(
+    t, laddr, shadow_paddr, mapping.get_length(), mapping.get_extent_type(),
+    [laddr](auto &ext) {
+      auto lextent = ext.template cast<LogicalChildNode>();
+      assert(ext.is_logical());
+      assert(!lextent->has_laddr());
+      assert(!ext.has_been_invalidated());
+      lextent->set_laddr(laddr);
+    }
+  );
+  co_return cache->alloc_remapped_extent_by_type(
+    t, mapping.get_extent_type(), laddr,
+    mapping.get_shadow_val(), 0, mapping.get_length(), std::nullopt
+  )->cast<LogicalChildNode>();
+}
+
 TransactionManager::submit_transaction_iertr::future<>
 TransactionManager::submit_transaction(
   Transaction &t)
@@ -1117,9 +1167,7 @@ TransactionManager::promote_extent(
     crimson::ct_error::assert_all("invalid error"));
   auto mapping = co_await resolve_cursor_to_mapping(t, std::move(cursor));
   co_return co_await lba_manager->promote_extent(
-    t,
-    orig_ext->get_laddr(),
-    std::move(promoted_extents));
+    t, mapping, std::move(promoted_extents));
 }
 
 TransactionManager::demote_region_ret
@@ -1128,9 +1176,42 @@ TransactionManager::demote_region(
   laddr_t start,
   loffset_t max_proceed_size)
 {
-  // TODO
-  return demote_region_iertr::make_ready_future<demote_region_res_t>(
-    demote_region_res_t{0, false});
+  LOG_PREFIX(TransactionManager::demote_region);
+  auto prefix = start.get_object_prefix();
+  DEBUGT("start demote {}", t, prefix);
+  auto it = co_await lba_manager->upper_bound_right(
+    t, start
+  ).handle_error_interruptible(
+    demote_region_iertr::pass_further{},
+    crimson::ct_error::assert_all("unexpected enoent"));
+  demote_region_res_t ret{0, 0, false};
+  while ((ret.demoted_size + ret.evicted_size) < max_proceed_size) {
+    if (it.is_end() || it.get_key().get_object_prefix() != prefix) {
+      ret.complete = true;
+      break;
+    }
+    if (it.has_shadow_val()) {
+      DEBUGT("demote shadow {}", t, it);
+      auto extent = co_await relocate_shadow_extent(t, it);
+      ret.demoted_size += extent->get_length();
+      LBAMapping nit = co_await lba_manager->demote_extent(t, it, *extent);
+      it = co_await nit.next();
+    } else if (!it.is_indirect() && !it.is_zero_reserved() &&
+      !epm->is_cold_device(it.get_val().get_device_id())) {
+      DEBUGT("demote hot {}", t, it);
+      auto extent = co_await read_cursor_by_type(
+        t, it.direct_cursor, it.get_extent_type());
+      ret.evicted_size += extent->get_length();
+      extent->set_target_rewrite_generation(epm->get_max_hot_gen() + 1);
+      co_await rewrite_logical_extent(t, extent);
+      it = co_await it.next();
+    } else {
+      DEBUGT("skip {}", t, it);
+      it = co_await it.next();
+    }
+  }
+
+  co_return ret;
 }
 
 TransactionManager::get_extents_if_live_ret
index 17afb57364311142a6b6bb70dc355a85317cbff5..9fe71c37dd36004d73fa6295e4b49d5a8267e7a5 100644 (file)
@@ -114,6 +114,10 @@ public:
     Transaction &t,
     LBAMapping mapping);
 
+  base_iertr::future<LogicalChildNodeRef> relocate_shadow_extent(
+    Transaction &t,
+    LBAMapping mapping);
+
   /**
    * get_pin
    *