From 300d27065b8f4a9f728a9fb0541a3174057cfe4c Mon Sep 17 00:00:00 2001 From: Yingxin Cheng Date: Fri, 8 Oct 2021 10:47:13 +0800 Subject: [PATCH] crimson/os/seastore: implement placement_hint HOT/COLD/REWRITE The current strategy is to inline allocate non-REWRITE extents in order to reduce the record overhead. The observation shows the transactions from user is usually very small and doesn't deserve to be written in ool records, which requires at least a block to store the record metadata. Signed-off-by: Yingxin Cheng --- .../os/seastore/extent_placement_manager.h | 55 ++++++------------- src/crimson/os/seastore/seastore_types.h | 6 +- .../os/seastore/transaction_manager.cc | 3 +- src/crimson/os/seastore/transaction_manager.h | 20 +++++-- 4 files changed, 37 insertions(+), 47 deletions(-) diff --git a/src/crimson/os/seastore/extent_placement_manager.h b/src/crimson/os/seastore/extent_placement_manager.h index 4bd917eda0888..b4b00ef491913 100644 --- a/src/crimson/os/seastore/extent_placement_manager.h +++ b/src/crimson/os/seastore/extent_placement_manager.h @@ -307,12 +307,14 @@ public: Transaction& t, extent_types_t type, segment_off_t length, - placement_hint_t hint = placement_hint_t::NONE) { + placement_hint_t hint) { // only logical extents should fall in this path assert(is_logical_type(type)); assert(hint < placement_hint_t::NUM_HINTS); auto dtype = get_allocator_type(hint); - bool delay = can_delay_allocation(dtype); + // FIXME: set delay for COLD extent when the record overhead is low + bool delay = (hint > placement_hint_t::COLD && + can_delay_allocation(dtype)); CachedExtentRef extent = cache.alloc_new_extent_by_type( t, type, length, delay); extent->backend_type = dtype; @@ -326,12 +328,14 @@ public: TCachedExtentRef alloc_new_extent( Transaction& t, segment_off_t length, - placement_hint_t hint = placement_hint_t::NONE) { + placement_hint_t hint) { // only logical extents should fall in this path static_assert(is_logical_type(T::TYPE)); assert(hint < placement_hint_t::NUM_HINTS); auto dtype = get_allocator_type(hint); - bool delay = can_delay_allocation(dtype); + // FIXME: set delay for COLD extent when the record overhead is low + bool delay = (hint > placement_hint_t::COLD && + can_delay_allocation(dtype)); TCachedExtentRef extent = cache.alloc_new_extent( t, length, delay); extent->backend_type = dtype; @@ -351,9 +355,8 @@ public: LOG_PREFIX(ExtentPlacementManager::delayed_alloc_or_ool_write); DEBUGT("start", t); return seastar::do_with( - std::map>(), - std::list>(), - [this, &t](auto& alloc_map, auto& inline_list) mutable { + std::map>(), + [this, &t](auto& alloc_map) { LOG_PREFIX(ExtentPlacementManager::delayed_alloc_or_ool_write); auto& alloc_list = t.get_delayed_alloc_list(); uint64_t num_ool_extents = 0; @@ -363,38 +366,18 @@ public: t.increment_delayed_invalid_extents(); continue; } - if (should_be_inline(extent)) { - auto old_addr = extent->get_paddr(); - cache.mark_delayed_extent_inline(t, extent); - inline_list.emplace_back(old_addr, extent); - } else { - auto& allocator_ptr = get_allocator( - extent->backend_type, extent->hint - ); - alloc_map[allocator_ptr.get()].emplace_back(extent); - num_ool_extents++; - } + // For now, just do ool allocation for any delayed extent + auto& allocator_ptr = get_allocator( + extent->backend_type, extent->hint + ); + alloc_map[allocator_ptr.get()].emplace_back(extent); + num_ool_extents++; } - DEBUGT("{} inline extents, {} ool extents", - t, - inline_list.size(), - num_ool_extents); + DEBUGT("{} ool extents", t, num_ool_extents); return trans_intr::do_for_each(alloc_map, [&t](auto& p) { auto allocator = p.first; auto& extents = p.second; return allocator->alloc_ool_extents_paddr(t, extents); - }).si_then([&inline_list, this, &t] { - LOG_PREFIX(ExtentPlacementManager::delayed_alloc_or_ool_write); - DEBUGT("processing {} inline extents", t, inline_list.size()); - return trans_intr::do_for_each(inline_list, [this, &t](auto& p) { - auto old_addr = p.first; - auto& extent = p.second; - return lba_manager.update_mapping( - t, - extent->get_laddr(), - old_addr, - extent->get_paddr()); - }); }); }); } @@ -408,10 +391,6 @@ private: return device_type_t::SEGMENTED; } - bool should_be_inline(LogicalCachedExtentRef& extent) { - return (std::rand() % 2) == 0; - } - ExtentAllocatorRef& get_allocator( device_type_t type, placement_hint_t hint) { diff --git a/src/crimson/os/seastore/seastore_types.h b/src/crimson/os/seastore/seastore_types.h index f912f6979cfbc..1f53cc5f18055 100644 --- a/src/crimson/os/seastore/seastore_types.h +++ b/src/crimson/os/seastore/seastore_types.h @@ -245,8 +245,10 @@ constexpr objaddr_t OBJ_ADDR_MAX = std::numeric_limits::max(); constexpr objaddr_t OBJ_ADDR_NULL = OBJ_ADDR_MAX - 1; enum class placement_hint_t { - NONE, /// Denotes empty hint - NUM_HINTS /// Constant for number of hints + HOT = 0, // Most of the metadata + COLD, // Object data + REWRITE, // Cold metadata and data (probably need further splits) + NUM_HINTS // Constant for number of hints }; enum device_type_t { diff --git a/src/crimson/os/seastore/transaction_manager.cc b/src/crimson/os/seastore/transaction_manager.cc index d202b934463b5..170742df09ed3 100644 --- a/src/crimson/os/seastore/transaction_manager.cc +++ b/src/crimson/os/seastore/transaction_manager.cc @@ -323,7 +323,8 @@ TransactionManager::rewrite_logical_extent( auto nlextent = epm->alloc_new_extent_by_type( t, lextent->get_type(), - lextent->get_length())->cast(); + lextent->get_length(), + placement_hint_t::REWRITE)->cast(); lextent->get_bptr().copy_out( 0, lextent->get_length(), diff --git a/src/crimson/os/seastore/transaction_manager.h b/src/crimson/os/seastore/transaction_manager.h index cbdb867c854ee..80d1459b0e227 100644 --- a/src/crimson/os/seastore/transaction_manager.h +++ b/src/crimson/os/seastore/transaction_manager.h @@ -274,7 +274,7 @@ public: * alloc_extent * * Allocates a new block of type T with the minimum lba range of size len - * greater than hint. + * greater than laddr_hint. */ using alloc_extent_iertr = LBAManager::alloc_extent_iertr; template @@ -282,22 +282,30 @@ public: template alloc_extent_ret alloc_extent( Transaction &t, - laddr_t hint, + laddr_t laddr_hint, extent_len_t len) { + placement_hint_t placement_hint; + if constexpr (T::TYPE == extent_types_t::OBJECT_DATA_BLOCK || + T::TYPE == extent_types_t::COLL_BLOCK) { + placement_hint = placement_hint_t::COLD; + } else { + placement_hint = placement_hint_t::HOT; + } auto ext = epm->alloc_new_extent( t, - len); + len, + placement_hint); return lba_manager->alloc_extent( t, - hint, + laddr_hint, len, ext->get_paddr() - ).si_then([ext=std::move(ext), len, hint, &t, this](auto &&ref) mutable { + ).si_then([ext=std::move(ext), len, laddr_hint, &t, this](auto &&ref) mutable { LOG_PREFIX(TransactionManager::alloc_extent); ext->set_pin(std::move(ref)); stats.extents_allocated_total++; stats.extents_allocated_bytes += len; - DEBUGT("new extent: {}, hint: {}", t, *ext, hint); + DEBUGT("new extent: {}, laddr_hint: {}", t, *ext, laddr_hint); return alloc_extent_iertr::make_ready_future>( std::move(ext)); }); -- 2.39.5