]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
crimson/os/seastore/random_block_manager: try to allocate consecutive rbm space when...
authorXuehan Xu <xuxuehan@qianxin.com>
Wed, 3 Sep 2025 08:10:28 +0000 (16:10 +0800)
committerXuehan Xu <xuxuehan@qianxin.com>
Sat, 23 May 2026 09:12:01 +0000 (17:12 +0800)
Signed-off-by: Xuehan Xu <xuxuehan@qianxin.com>
15 files changed:
src/crimson/os/seastore/async_cleaner.cc
src/crimson/os/seastore/async_cleaner.h
src/crimson/os/seastore/cache.cc
src/crimson/os/seastore/cache.h
src/crimson/os/seastore/extent_placement_manager.h
src/crimson/os/seastore/random_block_manager.h
src/crimson/os/seastore/random_block_manager/avlallocator.cc
src/crimson/os/seastore/random_block_manager/avlallocator.h
src/crimson/os/seastore/random_block_manager/block_rb_manager.cc
src/crimson/os/seastore/random_block_manager/block_rb_manager.h
src/crimson/os/seastore/random_block_manager/extent_allocator.h
src/crimson/os/seastore/transaction_manager.cc
src/crimson/os/seastore/transaction_manager.h
src/test/crimson/seastore/test_btree_lba_manager.cc
src/test/crimson/seastore/test_extent_allocator.cc

index e6f134c5f50da931b51ed117670cdfa1a63c44df..593b6ddb46ceafa313269c9282b0f618692adecf 100644 (file)
@@ -1301,15 +1301,11 @@ do_reclaim_space_ret do_reclaim_space(
                    &reclaimed, &t, modify_time, target_generation] {
           DEBUGT("reclaim {} extents", t, extents.size());
           // rewrite live extents
-          return trans_intr::do_for_each(
-            extents,
-            [&extent_callback, modify_time, &t,
-           &reclaimed, target_generation](auto ext)
-          {
-            reclaimed += ext->get_length();
-            return extent_callback.rewrite_extent(
-                t, ext, target_generation, modify_time);
-          });
+         for (auto &ext : extents) {
+           reclaimed += ext->get_length();
+         }
+         return extent_callback.rewrite_extents(
+           t, extents, target_generation, modify_time);
         });
       }).si_then([&extent_callback, &t] {
         return extent_callback.submit_transaction_direct(t);
index 5dd9785ebda23882a6d76306135ffbfe2e02d806..0ecf9ad707909332a767a1570b268ad6ea7db823 100644 (file)
@@ -351,6 +351,14 @@ public:
     rewrite_gen_t target_generation,
     sea_time_point modify_time) = 0;
 
+  using rewrite_extents_iertr = base_iertr;
+  using rewrite_extents_ret = rewrite_extents_iertr::future<>;
+  virtual rewrite_extents_ret rewrite_extents(
+    Transaction &t,
+    std::vector<CachedExtentRef> &extents,
+    rewrite_gen_t target_generation,
+    sea_time_point modify_time) = 0;
+
   /**
    * promote_extent
    *
@@ -1868,10 +1876,11 @@ public:
     return paddr;
   }
 
-  std::list<alloc_paddr_result> alloc_paddrs(extent_len_t length) {
+  std::list<alloc_paddr_result> alloc_paddrs(
+    extent_len_t length, paddr_t hint) {
     // TODO: implement allocation strategy (dirty metadata and multiple devices)
     auto rbs = rb_group->get_rb_managers();
-    auto ret = rbs[0]->alloc_extents(length);
+    auto ret = rbs[0]->alloc_extents(length, hint);
     if (!ret.empty()) {
       stats.used_bytes += length;
     }
index 60403e3b9f99c7cc4a81cdc85080bad2c1191087..c42111ced13056cec18da2a1dec5fe561aff1de3 100644 (file)
@@ -1214,6 +1214,7 @@ CachedExtentRef Cache::alloc_new_non_data_extent_by_type(
   extent_len_t length,   ///< [in] length
   placement_hint_t hint, ///< [in] user hint
   rewrite_gen_t gen,     ///< [in] rewrite generation
+  paddr_t paddr_hint,
   bool is_tracked
 )
 {
@@ -1221,7 +1222,7 @@ CachedExtentRef Cache::alloc_new_non_data_extent_by_type(
   SUBDEBUGT(seastore_cache, "allocate {} 0x{:x}B, hint={}, gen={}",
             t, type, length, hint, rewrite_gen_printer_t{gen});
   ceph_assert(get_extent_category(type) == data_category_t::METADATA);
-  auto opt = alloc_option_t{hint, gen, is_tracked};
+  auto opt = alloc_option_t{hint, gen, is_tracked, paddr_hint};
   switch (type) {
   case extent_types_t::ROOT:
     ceph_assert(0 == "ROOT is never directly alloc'd");
@@ -1267,6 +1268,7 @@ std::vector<CachedExtentRef> Cache::alloc_new_data_extents_by_type(
   extent_len_t length,   ///< [in] length
   placement_hint_t hint, ///< [in] user hint
   rewrite_gen_t gen,      ///< [in] rewrite generation
+  paddr_t paddr_hint,
   bool is_tracked
 )
 {
@@ -1279,7 +1281,7 @@ std::vector<CachedExtentRef> Cache::alloc_new_data_extents_by_type(
   case extent_types_t::OBJECT_DATA_BLOCK:
     {
       auto extents = alloc_new_data_extents<
-       ObjectDataBlock>(t, length, {hint, gen, is_tracked,
+       ObjectDataBlock>(t, length, {hint, gen, is_tracked, paddr_hint,
            epm.get_write_policy(type, length)});
       res.insert(res.begin(), extents.begin(), extents.end());
     }
@@ -1287,7 +1289,7 @@ std::vector<CachedExtentRef> Cache::alloc_new_data_extents_by_type(
   case extent_types_t::TEST_BLOCK:
     {
       auto extents = alloc_new_data_extents<
-       TestBlock>(t, length, {hint, gen, is_tracked,
+       TestBlock>(t, length, {hint, gen, is_tracked, paddr_hint,
          epm.get_write_policy(type, length)});
       res.insert(res.begin(), extents.begin(), extents.end());
     }
index 37696d8e7f250705a7c52c87657ae1b1d6e7a10c..1b41f50f886411cd948cbba601ce35d02d826188 100644 (file)
@@ -1242,6 +1242,7 @@ public:
     extent_len_t length,   ///< [in] length
     placement_hint_t hint, ///< [in] user hint
     rewrite_gen_t gen,     ///< [in] rewrite generation
+    paddr_t paddr_hint,
     bool is_tracked
     );
 
@@ -1256,6 +1257,7 @@ public:
     extent_len_t length,   ///< [in] length
     placement_hint_t hint, ///< [in] user hint
     rewrite_gen_t gen,     ///< [in] rewrite generation
+    paddr_t paddr_hint,
     bool is_tracked
     );
 
index 3027f645a0fa0c63df57f72c410c9db305bd542d..1672794e5c25bea6293b42c2c2f1f920645ac17d 100644 (file)
@@ -107,7 +107,9 @@ public:
 
   virtual paddr_t alloc_paddr(extent_len_t length) = 0;
 
-  virtual std::list<alloc_paddr_result> alloc_paddrs(extent_len_t length) = 0;
+  virtual std::list<alloc_paddr_result> alloc_paddrs(
+    extent_len_t length,
+    paddr_t hint) = 0;
 
   using alloc_write_ertr = base_ertr;
   using alloc_write_iertr = trans_iertr<alloc_write_ertr>;
@@ -170,7 +172,7 @@ public:
     return make_delayed_temp_paddr(0);
   }
 
-  std::list<alloc_paddr_result> alloc_paddrs(extent_len_t length) final {
+  std::list<alloc_paddr_result> alloc_paddrs(extent_len_t length, paddr_t) final {
     return {alloc_paddr_result{make_delayed_temp_paddr(0), length}};
   }
 
@@ -237,9 +239,10 @@ public:
     return rb_cleaner->alloc_paddr(length);
   }
 
-  std::list<alloc_paddr_result> alloc_paddrs(extent_len_t length) final {
+  std::list<alloc_paddr_result> alloc_paddrs(
+    extent_len_t length, paddr_t hint) final {
     assert(rb_cleaner);
-    return rb_cleaner->alloc_paddrs(length);
+    return rb_cleaner->alloc_paddrs(length, hint);
   }
 
   bool can_inplace_rewrite(Transaction& t,
@@ -431,6 +434,7 @@ public:
     placement_hint_t hint;
     rewrite_gen_t gen;
     bool is_tracked;
+    paddr_t paddr_hint = P_ADDR_NULL;
     write_policy_t write_policy = write_policy_t::WRITE_BACK;
 #ifdef UNIT_TESTS_BUILT
     std::optional<paddr_t> external_paddr = std::nullopt;
@@ -509,7 +513,8 @@ public:
 #endif
     {
       assert(category == data_category_t::DATA);
-      auto addrs = get_writer(opt.hint, category, opt.gen)->alloc_paddrs(length);
+      auto addrs = get_writer(opt.hint, category, opt.gen)->alloc_paddrs(
+        length, opt.paddr_hint);
       for (auto &ext : addrs) {
         auto left = ext.len;
         while (left > 0) {
index f776483edf270b1bf90ecb72466fe9ff51a5e302..8ccdbf1639f2ea13bb26c62b5495142da7cc12af 100644 (file)
@@ -77,7 +77,7 @@ public:
 
   using allocate_ret_bare = std::list<alloc_paddr_result>;
   using allo_extents_ret = allocate_ertr::future<allocate_ret_bare>;
-  virtual allocate_ret_bare alloc_extents(size_t size) = 0;
+  virtual allocate_ret_bare alloc_extents(size_t size, paddr_t hint) = 0;
 
   virtual void mark_space_used(paddr_t paddr, size_t len) = 0;
   virtual void mark_space_free(paddr_t paddr, size_t len) = 0;
index 3f8df06f9d68d6ebac3ddb6241188e6c862956a7..cc9f2d1f6a7900d504d2620cee13eba82318ac5d 100644 (file)
@@ -97,6 +97,15 @@ extent_len_t AvlAllocator::find_block(
   if (p != extent_size_tree.rend()) {
     max_size = p->end - p->start;
   }
+  const auto compare = extent_tree.key_comp();
+  auto rs = extent_tree.lower_bound(extent_range_t{start, size}, compare);
+  if (rs != extent_tree.end()) {
+    uint64_t offset = rs->start;
+    if (offset + size <= rs->end) {
+      start = offset;
+      return size;
+    }
+  }
 
   assert(max_size);
   if (max_size <= size) {
@@ -203,7 +212,7 @@ std::optional<interval_set<rbm_abs_addr>> AvlAllocator::alloc_extent(
 }
 
 std::optional<interval_set<rbm_abs_addr>> AvlAllocator::alloc_extents(
-  size_t size)
+  size_t size, rbm_abs_addr hint)
 {
   LOG_PREFIX(AvlAllocator::alloc_extents);
   if (available_size < size) {
@@ -217,10 +226,10 @@ std::optional<interval_set<rbm_abs_addr>> AvlAllocator::alloc_extents(
 
   interval_set<rbm_abs_addr> result;
 
-  auto try_to_alloc_block = [this, &result, FNAME] (uint64_t alloc_size)
+  auto try_to_alloc_block = [this, hint, &result, FNAME] (uint64_t alloc_size)
   {
+    rbm_abs_addr start = hint;
     while (alloc_size) {
-      rbm_abs_addr start = 0;
       extent_len_t len = find_block(std::min(max_alloc_size, alloc_size), start);
       ceph_assert(len);
       _remove_from_tree(start, len);
index 8f4eedc4f7a6ced243511cbb412e6613ed73ba44..dab0e36be5b1c4179fda2824834c5905ea22515c 100644 (file)
@@ -65,7 +65,7 @@ public:
   std::optional<interval_set<rbm_abs_addr>> alloc_extent(
     size_t size) final;
   std::optional<interval_set<rbm_abs_addr>> alloc_extents(
-    size_t size) final;
+    size_t size, rbm_abs_addr hint) final;
 
   void free_extent(rbm_abs_addr addr, size_t size) final;
   void mark_extent_used(rbm_abs_addr addr, size_t size) final;
index 74bcbddb91780415443098e7f527293fc2e56f13..c1604a40f73eb7e93c5cc1319ca05b92f4d1b939 100644 (file)
@@ -66,11 +66,13 @@ paddr_t BlockRBManager::alloc_extent(size_t size)
 }
 
 BlockRBManager::allocate_ret_bare
-BlockRBManager::alloc_extents(size_t size)
+BlockRBManager::alloc_extents(size_t size, paddr_t hint)
 {
   LOG_PREFIX(BlockRBManager::alloc_extents);
   assert(allocator);
-  auto alloc = allocator->alloc_extents(size);
+  rbm_abs_addr rbm_hint =
+    (hint == P_ADDR_NULL ? 0 : convert_paddr_to_abs_addr(hint));
+  auto alloc = allocator->alloc_extents(size, rbm_hint);
   if (!alloc) {
     return {};
   }
index 03b2285d8ceb431b6f8d040d0f1a5cabb1f9df97..09ad50f75bd31ddac40f8b28f624d55f9c07e83e 100644 (file)
@@ -58,7 +58,8 @@ public:
    */
   paddr_t alloc_extent(size_t size) override; // allocator, return blocks
 
-  allocate_ret_bare alloc_extents(size_t size) override; // allocator, return blocks
+  allocate_ret_bare alloc_extents(
+    size_t size, paddr_t hint) override; // allocator, return blocks
 
   void complete_allocation(paddr_t addr, size_t size) override;
 
index 2797b8822fd53de8f01fb336549606251cf336f0..d0a06fd4cf05bb0eec83c4770e87989764568863 100644 (file)
@@ -35,7 +35,7 @@ public:
    *
    */
   virtual std::optional<interval_set<rbm_abs_addr>> alloc_extents(
-    size_t size) = 0;
+    size_t size, rbm_abs_addr hint) = 0;
 
   /**
    * free_extent
index 1633df14fb3913ea4eb19eaa053281474bca9ef7..73f52fb1189c206d407aebd9ff2c523f1ec975fc 100644 (file)
@@ -765,10 +765,12 @@ TransactionManager::get_next_dirty_extents(
   return cache->get_next_dirty_extents(t, seq, max_bytes);
 }
 
-TransactionManager::rewrite_extent_ret
+TransactionManager::rewrite_extent_iertr::future<
+  std::vector<CachedExtentRef>>
 TransactionManager::rewrite_logical_extent(
   Transaction& t,
-  LogicalChildNodeRef extent)
+  LogicalChildNodeRef extent,
+  paddr_t paddr_hint)
 {
   LOG_PREFIX(TransactionManager::rewrite_logical_extent);
   if (extent->has_been_invalidated()) {
@@ -794,6 +796,7 @@ TransactionManager::rewrite_logical_extent(
       extent->get_user_hint(),
       // get target rewrite generation
       extent->get_rewrite_generation(),
+      paddr_hint,
       is_tracked)->cast<LogicalChildNode>();
     nextent->rewrite(t, *extent, 0);
 
@@ -824,6 +827,7 @@ TransactionManager::rewrite_logical_extent(
       extent->get_paddr(),
       *nextent
     );
+    co_return std::vector<CachedExtentRef>{nextent};
   } else {
     assert(get_extent_category(extent->get_type()) == data_category_t::DATA);
 
@@ -839,6 +843,7 @@ TransactionManager::rewrite_logical_extent(
       extent->get_user_hint(),
       // get target rewrite generation
       extent->get_rewrite_generation(),
+      paddr_hint,
       is_tracked);
     extent_len_t off = 0;
     auto left = extent->get_length();
@@ -887,6 +892,7 @@ TransactionManager::rewrite_logical_extent(
       off += nextent->get_length();
       left -= nextent->get_length();
     }
+    co_return std::move(extents);
   }
 }
 
@@ -953,7 +959,9 @@ TransactionManager::rewrite_extent_ret TransactionManager::rewrite_extent(
   auto fut = rewrite_extent_iertr::now();
   if (extent->is_logical()) {
     assert(is_logical_type(extent->get_type()));
-    fut = rewrite_logical_extent(t, extent->cast<LogicalChildNode>());
+    fut = rewrite_logical_extent(
+      t, extent->cast<LogicalChildNode>(), P_ADDR_NULL
+    ).discard_result();
   } else if (is_backref_node(extent->get_type())) {
     fut = backref_manager->rewrite_extent(t, extent);
   } else {
@@ -1126,6 +1134,7 @@ TransactionManager::promote_extent(
       orig_ext->get_length(),
       placement_hint_t::HOT,
       INIT_GENERATION,
+      P_ADDR_NULL,
       true);
     t.touch_laddr_prefix(orig_ext->get_laddr().get_object_prefix());
 
@@ -1169,6 +1178,7 @@ TransactionManager::promote_extent(
       orig_ext->get_length(),
       placement_hint_t::HOT,
       INIT_GENERATION,
+      P_ADDR_NULL,
       true);
     auto lext = promoted_extent->cast<LogicalChildNode>();
     lext->set_laddr(orig_ext->get_laddr());
@@ -1203,6 +1213,83 @@ TransactionManager::promote_extent(
     t, mapping, std::move(promoted_extents));
 }
 
+TransactionManager::rewrite_extents_ret TransactionManager::rewrite_extents(
+  Transaction &t,
+  std::vector<CachedExtentRef> &extents,
+  rewrite_gen_t target_generation,
+  sea_time_point modify_time)
+{
+  LOG_PREFIX(TransactionManager::rewrite_extents);
+  return seastar::do_with(
+    P_ADDR_NULL,
+    L_ADDR_NULL,
+    [this, &t, target_generation, modify_time, &extents, FNAME]
+    (auto &paddr_hint, auto &next_laddr) {
+    return trans_intr::do_for_each(
+      extents,
+      [this, &t, target_generation, modify_time, FNAME,
+      &paddr_hint, &next_laddr](auto &extent) {
+      {
+        auto updated = cache->update_extent_from_transaction(t, extent);
+        if (!updated) {
+          DEBUGT("extent is already retired, skipping -- {}", t, *extent);
+          return rewrite_extent_iertr::now();
+        }
+        extent = updated;
+        ceph_assert(!extent->is_pending_io());
+      }
+
+      assert(extent->is_valid() && !extent->is_initial_pending());
+      if (extent->is_stable_dirty()) {
+        if (epm->can_inplace_rewrite(t, extent)) {
+          DEBUGT("delta overwriting extent -- {}", t, *extent);
+          t.add_inplace_rewrite_extent(extent);
+          extent->set_inplace_rewrite_generation();
+          return rewrite_extent_iertr::now();
+        }
+       if (extent->get_version() == 1 && extent->has_mutation()) {
+         t.get_rewrite_stats().account_n_dirty();
+       } else {
+         // extent->get_version() > 1 or DIRTY
+         t.get_rewrite_stats().account_dirty(extent->get_version());
+       }
+        extent->set_target_rewrite_generation(INIT_GENERATION);
+      } else {
+        extent->set_target_rewrite_generation(target_generation);
+        ceph_assert(modify_time != NULL_TIME);
+        extent->set_modify_time(modify_time);
+      }
+
+      if (is_backref_node(extent->get_type())) {
+        DEBUGT("rewriting backref extent -- {}", t, *extent);
+        return backref_manager->rewrite_extent(t, extent);
+      }
+
+      if (extent->get_type() == extent_types_t::ROOT) {
+        DEBUGT("rewriting root extent -- {}", t, *extent);
+        cache->duplicate_for_write(t, extent);
+        return rewrite_extent_iertr::now();
+      }
+
+      if (extent->is_logical()) {
+        auto ext = extent->template cast<LogicalChildNode>();
+        if (next_laddr != ext->get_laddr()) {
+          paddr_hint = P_ADDR_NULL;
+        }
+        next_laddr = (ext->get_laddr() + ext->get_length()).checked_to_laddr();
+        return rewrite_logical_extent(t, ext, paddr_hint
+        ).si_then([&paddr_hint](auto nlextents) {
+          for (auto &nlextent : nlextents) {
+            paddr_hint = nlextent->get_paddr() + nlextent->get_length();
+          }
+        });
+      } else {
+        DEBUGT("rewriting physical extent -- {}", t, *extent);
+        return lba_manager->rewrite_extent(t, extent);
+      }
+    });
+  });
+}
 TransactionManager::demote_region_ret
 TransactionManager::demote_region(
   Transaction &t,
@@ -1218,6 +1305,7 @@ TransactionManager::demote_region(
     demote_region_iertr::pass_further{},
     crimson::ct_error::assert_all("unexpected enoent"));
   demote_region_res_t ret{0, 0, false};
+  std::vector<CachedExtentRef> extents;
   while ((ret.demoted_size + ret.evicted_size) < max_proceed_size) {
     if (it.is_end() || it.get_key().get_object_prefix() != prefix) {
       ret.complete = true;
@@ -1235,8 +1323,7 @@ TransactionManager::demote_region(
       auto extent = co_await read_cursor_by_type(
         t, it.direct_cursor, it.get_extent_type());
       ret.evicted_size += extent->get_length();
-      extent->set_target_rewrite_generation(epm->get_max_hot_gen() + 1);
-      co_await rewrite_logical_extent(t, extent);
+      extents.push_back(extent);
       it = co_await it.next();
     } else {
       DEBUGT("skip {}", t, it);
@@ -1244,6 +1331,10 @@ TransactionManager::demote_region(
     }
   }
 
+  co_await rewrite_extents(
+    t, extents, epm->get_max_hot_gen() + 1,
+    seastar::lowres_system_clock::now());
+
   co_return ret;
 }
 
index 533c49e77ffa2123b1b6de8c6dd095ac5d17a3f7..4327c572f1a814340c4097a4c53b2a515d79a3b4 100644 (file)
@@ -615,7 +615,7 @@ public:
     auto exts = cache->alloc_new_data_extents<T>(
       t, len,
       {
-        placement_hint, INIT_GENERATION, false,
+        placement_hint, INIT_GENERATION, false, P_ADDR_NULL,
         epm->get_write_policy(T::TYPE, len)
       });
     // user must initialize the logical extent themselves
@@ -948,6 +948,13 @@ public:
     rewrite_gen_t target_generation,
     sea_time_point modify_time) final;
 
+  using ExtentCallbackInterface::rewrite_extents_ret;
+  rewrite_extents_ret rewrite_extents(
+    Transaction &t,
+    std::vector<CachedExtentRef> &extents,
+    rewrite_gen_t target_generation,
+    sea_time_point modify_time) final;
+
   using ExtentCallbackInterface::promote_extent_ret;
   promote_extent_ret promote_extent(
     Transaction &t,
@@ -1658,9 +1665,11 @@ private:
     Transaction &t,
     LBAMapping mapping);
 
-  rewrite_extent_ret rewrite_logical_extent(
+  rewrite_extent_iertr::future<std::vector<CachedExtentRef>>
+  rewrite_logical_extent(
     Transaction& t,
-    LogicalChildNodeRef extent);
+    LogicalChildNodeRef extent,
+    paddr_t hint);
 
   submit_transaction_direct_ret do_submit_transaction(
     Transaction &t,
index 291c845a18edaa40982bd3d464b03652b7ce9f8d..178b28375a878022bc19b065e395618749a6f28a 100644 (file)
@@ -328,7 +328,7 @@ struct lba_btree_test : btree_test_base {
     lba_btree_update([=, this](auto &btree, auto &t) {
       auto extents = cache->alloc_new_data_extents<TestBlock>(
          t, TestBlock::SIZE,
-         {placement_hint_t::HOT, 0, false,
+         {placement_hint_t::HOT, 0, false, P_ADDR_NULL,
           write_policy_t::WRITE_BACK, get_paddr()});
       return seastar::do_with(
        std::move(extents),
@@ -547,7 +547,7 @@ struct btree_lba_manager_test : btree_test_base {
       [=, this](auto &t) {
        auto extents = cache->alloc_new_data_extents<TestBlock>(
            t, TestBlock::SIZE,
-           {placement_hint_t::HOT, 0, false,
+           {placement_hint_t::HOT, 0, false, P_ADDR_NULL,
             write_policy_t::WRITE_BACK, get_paddr()});
        return seastar::do_with(
          std::vector<LogicalChildNodeRef>(
index 17eb105e7b2102af26c995d639335ff4bdb0d43f..63c86e660c1c28e56cc1c67d95c314f8532951a4 100644 (file)
@@ -58,7 +58,7 @@ struct allocator_test_t :
     return allocator->alloc_extent(size);
   }
   auto allocates(size_t size) {
-    return allocator->alloc_extents(size);
+    return allocator->alloc_extents(size, 0);
   }
   void free(uint64_t start, uint64_t length) {
     allocator->free_extent(start, length);