From: Xuehan Xu Date: Wed, 3 Sep 2025 08:10:28 +0000 (+0800) Subject: crimson/os/seastore/random_block_manager: try to allocate consecutive rbm space when... X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=d470d41740b25418e26cead78777e086adae8ad1;p=ceph-ci.git crimson/os/seastore/random_block_manager: try to allocate consecutive rbm space when rewriting extents Signed-off-by: Xuehan Xu --- diff --git a/src/crimson/os/seastore/async_cleaner.cc b/src/crimson/os/seastore/async_cleaner.cc index e6f134c5f50..593b6ddb46c 100644 --- a/src/crimson/os/seastore/async_cleaner.cc +++ b/src/crimson/os/seastore/async_cleaner.cc @@ -1301,15 +1301,11 @@ do_reclaim_space_ret do_reclaim_space( &reclaimed, &t, modify_time, target_generation] { DEBUGT("reclaim {} extents", t, extents.size()); // rewrite live extents - return trans_intr::do_for_each( - extents, - [&extent_callback, modify_time, &t, - &reclaimed, target_generation](auto ext) - { - reclaimed += ext->get_length(); - return extent_callback.rewrite_extent( - t, ext, target_generation, modify_time); - }); + for (auto &ext : extents) { + reclaimed += ext->get_length(); + } + return extent_callback.rewrite_extents( + t, extents, target_generation, modify_time); }); }).si_then([&extent_callback, &t] { return extent_callback.submit_transaction_direct(t); diff --git a/src/crimson/os/seastore/async_cleaner.h b/src/crimson/os/seastore/async_cleaner.h index 5dd9785ebda..0ecf9ad7079 100644 --- a/src/crimson/os/seastore/async_cleaner.h +++ b/src/crimson/os/seastore/async_cleaner.h @@ -351,6 +351,14 @@ public: rewrite_gen_t target_generation, sea_time_point modify_time) = 0; + using rewrite_extents_iertr = base_iertr; + using rewrite_extents_ret = rewrite_extents_iertr::future<>; + virtual rewrite_extents_ret rewrite_extents( + Transaction &t, + std::vector &extents, + rewrite_gen_t target_generation, + sea_time_point modify_time) = 0; + /** * promote_extent * @@ -1868,10 +1876,11 @@ public: return paddr; } - std::list alloc_paddrs(extent_len_t length) { + std::list alloc_paddrs( + extent_len_t length, paddr_t hint) { // TODO: implement allocation strategy (dirty metadata and multiple devices) auto rbs = rb_group->get_rb_managers(); - auto ret = rbs[0]->alloc_extents(length); + auto ret = rbs[0]->alloc_extents(length, hint); if (!ret.empty()) { stats.used_bytes += length; } diff --git a/src/crimson/os/seastore/cache.cc b/src/crimson/os/seastore/cache.cc index 60403e3b9f9..c42111ced13 100644 --- a/src/crimson/os/seastore/cache.cc +++ b/src/crimson/os/seastore/cache.cc @@ -1214,6 +1214,7 @@ CachedExtentRef Cache::alloc_new_non_data_extent_by_type( extent_len_t length, ///< [in] length placement_hint_t hint, ///< [in] user hint rewrite_gen_t gen, ///< [in] rewrite generation + paddr_t paddr_hint, bool is_tracked ) { @@ -1221,7 +1222,7 @@ CachedExtentRef Cache::alloc_new_non_data_extent_by_type( SUBDEBUGT(seastore_cache, "allocate {} 0x{:x}B, hint={}, gen={}", t, type, length, hint, rewrite_gen_printer_t{gen}); ceph_assert(get_extent_category(type) == data_category_t::METADATA); - auto opt = alloc_option_t{hint, gen, is_tracked}; + auto opt = alloc_option_t{hint, gen, is_tracked, paddr_hint}; switch (type) { case extent_types_t::ROOT: ceph_assert(0 == "ROOT is never directly alloc'd"); @@ -1267,6 +1268,7 @@ std::vector Cache::alloc_new_data_extents_by_type( extent_len_t length, ///< [in] length placement_hint_t hint, ///< [in] user hint rewrite_gen_t gen, ///< [in] rewrite generation + paddr_t paddr_hint, bool is_tracked ) { @@ -1279,7 +1281,7 @@ std::vector Cache::alloc_new_data_extents_by_type( case extent_types_t::OBJECT_DATA_BLOCK: { auto extents = alloc_new_data_extents< - ObjectDataBlock>(t, length, {hint, gen, is_tracked, + ObjectDataBlock>(t, length, {hint, gen, is_tracked, paddr_hint, epm.get_write_policy(type, length)}); res.insert(res.begin(), extents.begin(), extents.end()); } @@ -1287,7 +1289,7 @@ std::vector Cache::alloc_new_data_extents_by_type( case extent_types_t::TEST_BLOCK: { auto extents = alloc_new_data_extents< - TestBlock>(t, length, {hint, gen, is_tracked, + TestBlock>(t, length, {hint, gen, is_tracked, paddr_hint, epm.get_write_policy(type, length)}); res.insert(res.begin(), extents.begin(), extents.end()); } diff --git a/src/crimson/os/seastore/cache.h b/src/crimson/os/seastore/cache.h index 37696d8e7f2..1b41f50f886 100644 --- a/src/crimson/os/seastore/cache.h +++ b/src/crimson/os/seastore/cache.h @@ -1242,6 +1242,7 @@ public: extent_len_t length, ///< [in] length placement_hint_t hint, ///< [in] user hint rewrite_gen_t gen, ///< [in] rewrite generation + paddr_t paddr_hint, bool is_tracked ); @@ -1256,6 +1257,7 @@ public: extent_len_t length, ///< [in] length placement_hint_t hint, ///< [in] user hint rewrite_gen_t gen, ///< [in] rewrite generation + paddr_t paddr_hint, bool is_tracked ); diff --git a/src/crimson/os/seastore/extent_placement_manager.h b/src/crimson/os/seastore/extent_placement_manager.h index 3027f645a0f..1672794e5c2 100644 --- a/src/crimson/os/seastore/extent_placement_manager.h +++ b/src/crimson/os/seastore/extent_placement_manager.h @@ -107,7 +107,9 @@ public: virtual paddr_t alloc_paddr(extent_len_t length) = 0; - virtual std::list alloc_paddrs(extent_len_t length) = 0; + virtual std::list alloc_paddrs( + extent_len_t length, + paddr_t hint) = 0; using alloc_write_ertr = base_ertr; using alloc_write_iertr = trans_iertr; @@ -170,7 +172,7 @@ public: return make_delayed_temp_paddr(0); } - std::list alloc_paddrs(extent_len_t length) final { + std::list alloc_paddrs(extent_len_t length, paddr_t) final { return {alloc_paddr_result{make_delayed_temp_paddr(0), length}}; } @@ -237,9 +239,10 @@ public: return rb_cleaner->alloc_paddr(length); } - std::list alloc_paddrs(extent_len_t length) final { + std::list alloc_paddrs( + extent_len_t length, paddr_t hint) final { assert(rb_cleaner); - return rb_cleaner->alloc_paddrs(length); + return rb_cleaner->alloc_paddrs(length, hint); } bool can_inplace_rewrite(Transaction& t, @@ -431,6 +434,7 @@ public: placement_hint_t hint; rewrite_gen_t gen; bool is_tracked; + paddr_t paddr_hint = P_ADDR_NULL; write_policy_t write_policy = write_policy_t::WRITE_BACK; #ifdef UNIT_TESTS_BUILT std::optional external_paddr = std::nullopt; @@ -509,7 +513,8 @@ public: #endif { assert(category == data_category_t::DATA); - auto addrs = get_writer(opt.hint, category, opt.gen)->alloc_paddrs(length); + auto addrs = get_writer(opt.hint, category, opt.gen)->alloc_paddrs( + length, opt.paddr_hint); for (auto &ext : addrs) { auto left = ext.len; while (left > 0) { diff --git a/src/crimson/os/seastore/random_block_manager.h b/src/crimson/os/seastore/random_block_manager.h index f776483edf2..8ccdbf1639f 100644 --- a/src/crimson/os/seastore/random_block_manager.h +++ b/src/crimson/os/seastore/random_block_manager.h @@ -77,7 +77,7 @@ public: using allocate_ret_bare = std::list; using allo_extents_ret = allocate_ertr::future; - virtual allocate_ret_bare alloc_extents(size_t size) = 0; + virtual allocate_ret_bare alloc_extents(size_t size, paddr_t hint) = 0; virtual void mark_space_used(paddr_t paddr, size_t len) = 0; virtual void mark_space_free(paddr_t paddr, size_t len) = 0; diff --git a/src/crimson/os/seastore/random_block_manager/avlallocator.cc b/src/crimson/os/seastore/random_block_manager/avlallocator.cc index 3f8df06f9d6..cc9f2d1f6a7 100644 --- a/src/crimson/os/seastore/random_block_manager/avlallocator.cc +++ b/src/crimson/os/seastore/random_block_manager/avlallocator.cc @@ -97,6 +97,15 @@ extent_len_t AvlAllocator::find_block( if (p != extent_size_tree.rend()) { max_size = p->end - p->start; } + const auto compare = extent_tree.key_comp(); + auto rs = extent_tree.lower_bound(extent_range_t{start, size}, compare); + if (rs != extent_tree.end()) { + uint64_t offset = rs->start; + if (offset + size <= rs->end) { + start = offset; + return size; + } + } assert(max_size); if (max_size <= size) { @@ -203,7 +212,7 @@ std::optional> AvlAllocator::alloc_extent( } std::optional> AvlAllocator::alloc_extents( - size_t size) + size_t size, rbm_abs_addr hint) { LOG_PREFIX(AvlAllocator::alloc_extents); if (available_size < size) { @@ -217,10 +226,10 @@ std::optional> AvlAllocator::alloc_extents( interval_set result; - auto try_to_alloc_block = [this, &result, FNAME] (uint64_t alloc_size) + auto try_to_alloc_block = [this, hint, &result, FNAME] (uint64_t alloc_size) { + rbm_abs_addr start = hint; while (alloc_size) { - rbm_abs_addr start = 0; extent_len_t len = find_block(std::min(max_alloc_size, alloc_size), start); ceph_assert(len); _remove_from_tree(start, len); diff --git a/src/crimson/os/seastore/random_block_manager/avlallocator.h b/src/crimson/os/seastore/random_block_manager/avlallocator.h index 8f4eedc4f7a..dab0e36be5b 100644 --- a/src/crimson/os/seastore/random_block_manager/avlallocator.h +++ b/src/crimson/os/seastore/random_block_manager/avlallocator.h @@ -65,7 +65,7 @@ public: std::optional> alloc_extent( size_t size) final; std::optional> alloc_extents( - size_t size) final; + size_t size, rbm_abs_addr hint) final; void free_extent(rbm_abs_addr addr, size_t size) final; void mark_extent_used(rbm_abs_addr addr, size_t size) final; diff --git a/src/crimson/os/seastore/random_block_manager/block_rb_manager.cc b/src/crimson/os/seastore/random_block_manager/block_rb_manager.cc index 74bcbddb917..c1604a40f73 100644 --- a/src/crimson/os/seastore/random_block_manager/block_rb_manager.cc +++ b/src/crimson/os/seastore/random_block_manager/block_rb_manager.cc @@ -66,11 +66,13 @@ paddr_t BlockRBManager::alloc_extent(size_t size) } BlockRBManager::allocate_ret_bare -BlockRBManager::alloc_extents(size_t size) +BlockRBManager::alloc_extents(size_t size, paddr_t hint) { LOG_PREFIX(BlockRBManager::alloc_extents); assert(allocator); - auto alloc = allocator->alloc_extents(size); + rbm_abs_addr rbm_hint = + (hint == P_ADDR_NULL ? 0 : convert_paddr_to_abs_addr(hint)); + auto alloc = allocator->alloc_extents(size, rbm_hint); if (!alloc) { return {}; } diff --git a/src/crimson/os/seastore/random_block_manager/block_rb_manager.h b/src/crimson/os/seastore/random_block_manager/block_rb_manager.h index 03b2285d8ce..09ad50f75bd 100644 --- a/src/crimson/os/seastore/random_block_manager/block_rb_manager.h +++ b/src/crimson/os/seastore/random_block_manager/block_rb_manager.h @@ -58,7 +58,8 @@ public: */ paddr_t alloc_extent(size_t size) override; // allocator, return blocks - allocate_ret_bare alloc_extents(size_t size) override; // allocator, return blocks + allocate_ret_bare alloc_extents( + size_t size, paddr_t hint) override; // allocator, return blocks void complete_allocation(paddr_t addr, size_t size) override; diff --git a/src/crimson/os/seastore/random_block_manager/extent_allocator.h b/src/crimson/os/seastore/random_block_manager/extent_allocator.h index 2797b8822fd..d0a06fd4cf0 100644 --- a/src/crimson/os/seastore/random_block_manager/extent_allocator.h +++ b/src/crimson/os/seastore/random_block_manager/extent_allocator.h @@ -35,7 +35,7 @@ public: * */ virtual std::optional> alloc_extents( - size_t size) = 0; + size_t size, rbm_abs_addr hint) = 0; /** * free_extent diff --git a/src/crimson/os/seastore/transaction_manager.cc b/src/crimson/os/seastore/transaction_manager.cc index 1633df14fb3..73f52fb1189 100644 --- a/src/crimson/os/seastore/transaction_manager.cc +++ b/src/crimson/os/seastore/transaction_manager.cc @@ -765,10 +765,12 @@ TransactionManager::get_next_dirty_extents( return cache->get_next_dirty_extents(t, seq, max_bytes); } -TransactionManager::rewrite_extent_ret +TransactionManager::rewrite_extent_iertr::future< + std::vector> TransactionManager::rewrite_logical_extent( Transaction& t, - LogicalChildNodeRef extent) + LogicalChildNodeRef extent, + paddr_t paddr_hint) { LOG_PREFIX(TransactionManager::rewrite_logical_extent); if (extent->has_been_invalidated()) { @@ -794,6 +796,7 @@ TransactionManager::rewrite_logical_extent( extent->get_user_hint(), // get target rewrite generation extent->get_rewrite_generation(), + paddr_hint, is_tracked)->cast(); nextent->rewrite(t, *extent, 0); @@ -824,6 +827,7 @@ TransactionManager::rewrite_logical_extent( extent->get_paddr(), *nextent ); + co_return std::vector{nextent}; } else { assert(get_extent_category(extent->get_type()) == data_category_t::DATA); @@ -839,6 +843,7 @@ TransactionManager::rewrite_logical_extent( extent->get_user_hint(), // get target rewrite generation extent->get_rewrite_generation(), + paddr_hint, is_tracked); extent_len_t off = 0; auto left = extent->get_length(); @@ -887,6 +892,7 @@ TransactionManager::rewrite_logical_extent( off += nextent->get_length(); left -= nextent->get_length(); } + co_return std::move(extents); } } @@ -953,7 +959,9 @@ TransactionManager::rewrite_extent_ret TransactionManager::rewrite_extent( auto fut = rewrite_extent_iertr::now(); if (extent->is_logical()) { assert(is_logical_type(extent->get_type())); - fut = rewrite_logical_extent(t, extent->cast()); + fut = rewrite_logical_extent( + t, extent->cast(), P_ADDR_NULL + ).discard_result(); } else if (is_backref_node(extent->get_type())) { fut = backref_manager->rewrite_extent(t, extent); } else { @@ -1126,6 +1134,7 @@ TransactionManager::promote_extent( orig_ext->get_length(), placement_hint_t::HOT, INIT_GENERATION, + P_ADDR_NULL, true); t.touch_laddr_prefix(orig_ext->get_laddr().get_object_prefix()); @@ -1169,6 +1178,7 @@ TransactionManager::promote_extent( orig_ext->get_length(), placement_hint_t::HOT, INIT_GENERATION, + P_ADDR_NULL, true); auto lext = promoted_extent->cast(); lext->set_laddr(orig_ext->get_laddr()); @@ -1203,6 +1213,83 @@ TransactionManager::promote_extent( t, mapping, std::move(promoted_extents)); } +TransactionManager::rewrite_extents_ret TransactionManager::rewrite_extents( + Transaction &t, + std::vector &extents, + rewrite_gen_t target_generation, + sea_time_point modify_time) +{ + LOG_PREFIX(TransactionManager::rewrite_extents); + return seastar::do_with( + P_ADDR_NULL, + L_ADDR_NULL, + [this, &t, target_generation, modify_time, &extents, FNAME] + (auto &paddr_hint, auto &next_laddr) { + return trans_intr::do_for_each( + extents, + [this, &t, target_generation, modify_time, FNAME, + &paddr_hint, &next_laddr](auto &extent) { + { + auto updated = cache->update_extent_from_transaction(t, extent); + if (!updated) { + DEBUGT("extent is already retired, skipping -- {}", t, *extent); + return rewrite_extent_iertr::now(); + } + extent = updated; + ceph_assert(!extent->is_pending_io()); + } + + assert(extent->is_valid() && !extent->is_initial_pending()); + if (extent->is_stable_dirty()) { + if (epm->can_inplace_rewrite(t, extent)) { + DEBUGT("delta overwriting extent -- {}", t, *extent); + t.add_inplace_rewrite_extent(extent); + extent->set_inplace_rewrite_generation(); + return rewrite_extent_iertr::now(); + } + if (extent->get_version() == 1 && extent->has_mutation()) { + t.get_rewrite_stats().account_n_dirty(); + } else { + // extent->get_version() > 1 or DIRTY + t.get_rewrite_stats().account_dirty(extent->get_version()); + } + extent->set_target_rewrite_generation(INIT_GENERATION); + } else { + extent->set_target_rewrite_generation(target_generation); + ceph_assert(modify_time != NULL_TIME); + extent->set_modify_time(modify_time); + } + + if (is_backref_node(extent->get_type())) { + DEBUGT("rewriting backref extent -- {}", t, *extent); + return backref_manager->rewrite_extent(t, extent); + } + + if (extent->get_type() == extent_types_t::ROOT) { + DEBUGT("rewriting root extent -- {}", t, *extent); + cache->duplicate_for_write(t, extent); + return rewrite_extent_iertr::now(); + } + + if (extent->is_logical()) { + auto ext = extent->template cast(); + if (next_laddr != ext->get_laddr()) { + paddr_hint = P_ADDR_NULL; + } + next_laddr = (ext->get_laddr() + ext->get_length()).checked_to_laddr(); + return rewrite_logical_extent(t, ext, paddr_hint + ).si_then([&paddr_hint](auto nlextents) { + for (auto &nlextent : nlextents) { + paddr_hint = nlextent->get_paddr() + nlextent->get_length(); + } + }); + } else { + DEBUGT("rewriting physical extent -- {}", t, *extent); + return lba_manager->rewrite_extent(t, extent); + } + }); + }); +} TransactionManager::demote_region_ret TransactionManager::demote_region( Transaction &t, @@ -1218,6 +1305,7 @@ TransactionManager::demote_region( demote_region_iertr::pass_further{}, crimson::ct_error::assert_all("unexpected enoent")); demote_region_res_t ret{0, 0, false}; + std::vector extents; while ((ret.demoted_size + ret.evicted_size) < max_proceed_size) { if (it.is_end() || it.get_key().get_object_prefix() != prefix) { ret.complete = true; @@ -1235,8 +1323,7 @@ TransactionManager::demote_region( auto extent = co_await read_cursor_by_type( t, it.direct_cursor, it.get_extent_type()); ret.evicted_size += extent->get_length(); - extent->set_target_rewrite_generation(epm->get_max_hot_gen() + 1); - co_await rewrite_logical_extent(t, extent); + extents.push_back(extent); it = co_await it.next(); } else { DEBUGT("skip {}", t, it); @@ -1244,6 +1331,10 @@ TransactionManager::demote_region( } } + co_await rewrite_extents( + t, extents, epm->get_max_hot_gen() + 1, + seastar::lowres_system_clock::now()); + co_return ret; } diff --git a/src/crimson/os/seastore/transaction_manager.h b/src/crimson/os/seastore/transaction_manager.h index 533c49e77ff..4327c572f1a 100644 --- a/src/crimson/os/seastore/transaction_manager.h +++ b/src/crimson/os/seastore/transaction_manager.h @@ -615,7 +615,7 @@ public: auto exts = cache->alloc_new_data_extents( t, len, { - placement_hint, INIT_GENERATION, false, + placement_hint, INIT_GENERATION, false, P_ADDR_NULL, epm->get_write_policy(T::TYPE, len) }); // user must initialize the logical extent themselves @@ -948,6 +948,13 @@ public: rewrite_gen_t target_generation, sea_time_point modify_time) final; + using ExtentCallbackInterface::rewrite_extents_ret; + rewrite_extents_ret rewrite_extents( + Transaction &t, + std::vector &extents, + rewrite_gen_t target_generation, + sea_time_point modify_time) final; + using ExtentCallbackInterface::promote_extent_ret; promote_extent_ret promote_extent( Transaction &t, @@ -1658,9 +1665,11 @@ private: Transaction &t, LBAMapping mapping); - rewrite_extent_ret rewrite_logical_extent( + rewrite_extent_iertr::future> + rewrite_logical_extent( Transaction& t, - LogicalChildNodeRef extent); + LogicalChildNodeRef extent, + paddr_t hint); submit_transaction_direct_ret do_submit_transaction( Transaction &t, diff --git a/src/test/crimson/seastore/test_btree_lba_manager.cc b/src/test/crimson/seastore/test_btree_lba_manager.cc index 291c845a18e..178b28375a8 100644 --- a/src/test/crimson/seastore/test_btree_lba_manager.cc +++ b/src/test/crimson/seastore/test_btree_lba_manager.cc @@ -328,7 +328,7 @@ struct lba_btree_test : btree_test_base { lba_btree_update([=, this](auto &btree, auto &t) { auto extents = cache->alloc_new_data_extents( t, TestBlock::SIZE, - {placement_hint_t::HOT, 0, false, + {placement_hint_t::HOT, 0, false, P_ADDR_NULL, write_policy_t::WRITE_BACK, get_paddr()}); return seastar::do_with( std::move(extents), @@ -547,7 +547,7 @@ struct btree_lba_manager_test : btree_test_base { [=, this](auto &t) { auto extents = cache->alloc_new_data_extents( t, TestBlock::SIZE, - {placement_hint_t::HOT, 0, false, + {placement_hint_t::HOT, 0, false, P_ADDR_NULL, write_policy_t::WRITE_BACK, get_paddr()}); return seastar::do_with( std::vector( diff --git a/src/test/crimson/seastore/test_extent_allocator.cc b/src/test/crimson/seastore/test_extent_allocator.cc index 17eb105e7b2..63c86e660c1 100644 --- a/src/test/crimson/seastore/test_extent_allocator.cc +++ b/src/test/crimson/seastore/test_extent_allocator.cc @@ -58,7 +58,7 @@ struct allocator_test_t : return allocator->alloc_extent(size); } auto allocates(size_t size) { - return allocator->alloc_extents(size); + return allocator->alloc_extents(size, 0); } void free(uint64_t start, uint64_t length) { allocator->free_extent(start, length);