From: Yingxin Cheng Date: Thu, 27 Jan 2022 07:00:44 +0000 (+0800) Subject: crimson/os/seastore: allow EPM to make decisions on the general extent allocation... X-Git-Tag: v18.0.0~1401^2~6 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=32da0e0ecb70224d005deb13bda8808f2665a811;p=ceph.git crimson/os/seastore: allow EPM to make decisions on the general extent allocation path Signed-off-by: Yingxin Cheng --- diff --git a/src/crimson/os/seastore/cache.cc b/src/crimson/os/seastore/cache.cc index fd07e98bc67d..127088124153 100644 --- a/src/crimson/os/seastore/cache.cc +++ b/src/crimson/os/seastore/cache.cc @@ -855,37 +855,37 @@ CachedExtentRef Cache::alloc_new_extent_by_type( Transaction &t, ///< [in, out] current transaction extent_types_t type, ///< [in] type tag seastore_off_t length, ///< [in] length - bool delay ///< [in] whether to delay paddr alloc + placement_hint_t hint ) { LOG_PREFIX(Cache::alloc_new_extent_by_type); - SUBDEBUGT(seastore_cache, "allocate {} {}B, delay={}", - t, type, length, delay); + SUBDEBUGT(seastore_cache, "allocate {} {}B, hint={}", + t, type, length, hint); switch (type) { case extent_types_t::ROOT: ceph_assert(0 == "ROOT is never directly alloc'd"); return CachedExtentRef(); case extent_types_t::LADDR_INTERNAL: - return alloc_new_extent(t, length, delay); + return alloc_new_extent(t, length, hint); case extent_types_t::LADDR_LEAF: - return alloc_new_extent(t, length, delay); + return alloc_new_extent(t, length, hint); case extent_types_t::ONODE_BLOCK_STAGED: - return alloc_new_extent(t, length, delay); + return alloc_new_extent(t, length, hint); case extent_types_t::OMAP_INNER: - return alloc_new_extent(t, length, delay); + return alloc_new_extent(t, length, hint); case extent_types_t::OMAP_LEAF: - return alloc_new_extent(t, length, delay); + return alloc_new_extent(t, length, hint); case extent_types_t::COLL_BLOCK: - return alloc_new_extent(t, length, delay); + return alloc_new_extent(t, length, hint); case extent_types_t::OBJECT_DATA_BLOCK: - return alloc_new_extent(t, length, delay); + return alloc_new_extent(t, length, hint); case extent_types_t::RETIRED_PLACEHOLDER: ceph_assert(0 == "impossible"); return CachedExtentRef(); case extent_types_t::TEST_BLOCK: - return alloc_new_extent(t, length, delay); + return alloc_new_extent(t, length, hint); case extent_types_t::TEST_BLOCK_PHYSICAL: - return alloc_new_extent(t, length, delay); + return alloc_new_extent(t, length, hint); case extent_types_t::NONE: { ceph_assert(0 == "NONE is an invalid extent type"); return CachedExtentRef(); diff --git a/src/crimson/os/seastore/cache.h b/src/crimson/os/seastore/cache.h index f289794ff90f..524b72707ec4 100644 --- a/src/crimson/os/seastore/cache.h +++ b/src/crimson/os/seastore/cache.h @@ -9,15 +9,16 @@ #include "include/buffer.h" -#include "crimson/os/seastore/logging.h" -#include "crimson/os/seastore/seastore_types.h" -#include "crimson/os/seastore/transaction.h" -#include "crimson/os/seastore/segment_manager.h" #include "crimson/common/errorator.h" #include "crimson/os/seastore/cached_extent.h" +#include "crimson/os/seastore/extent_placement_manager.h" +#include "crimson/os/seastore/logging.h" +#include "crimson/os/seastore/random_block_manager.h" #include "crimson/os/seastore/root_block.h" +#include "crimson/os/seastore/seastore_types.h" #include "crimson/os/seastore/segment_cleaner.h" -#include "crimson/os/seastore/random_block_manager.h" +#include "crimson/os/seastore/segment_manager.h" +#include "crimson/os/seastore/transaction.h" namespace crimson::os::seastore { @@ -102,6 +103,10 @@ public: Cache(ExtentReader &reader); ~Cache(); + void set_epm(ExtentPlacementManager& epm) { + p_epm = &epm; + } + /// Creates empty transaction by source TransactionRef create_transaction( Transaction::src_t src, @@ -489,17 +494,20 @@ public: */ template TCachedExtentRef alloc_new_extent( - Transaction &t, ///< [in, out] current transaction - seastore_off_t length, ///< [in] length - bool delayed = false ///< [in] whether the paddr allocation of extent is delayed + Transaction &t, ///< [in, out] current transaction + seastore_off_t length, ///< [in] length + placement_hint_t hint = placement_hint_t::HOT ) { LOG_PREFIX(Cache::alloc_new_extent); - SUBDEBUGT(seastore_cache, "allocate {} {}B, delay={}", - t, T::TYPE, length, delayed); - auto ret = CachedExtent::make_cached_extent_ref( - alloc_cache_buf(length)); - t.add_fresh_extent(ret, delayed); + SUBTRACET(seastore_cache, "allocate {} {}B, hint={}", + t, T::TYPE, length, hint); + auto result = p_epm->alloc_new_extent(t, T::TYPE, length, hint); + auto ret = CachedExtent::make_cached_extent_ref(std::move(result.bp)); + ret->set_paddr(result.paddr); + t.add_fresh_extent(ret); ret->state = CachedExtent::extent_state_t::INITIAL_WRITE_PENDING; + SUBDEBUGT(seastore_cache, "allocated {} {}B extent at {}, hint={} -- {}", + t, T::TYPE, length, result.paddr, hint, *ret); return ret; } @@ -512,7 +520,7 @@ public: Transaction &t, ///< [in, out] current transaction extent_types_t type, ///< [in] type tag seastore_off_t length, ///< [in] length - bool delayed = false ///< [in] whether delay addr allocation + placement_hint_t hint = placement_hint_t::HOT ); /** @@ -732,6 +740,7 @@ public: private: ExtentReader &reader; ///< ref to extent reader + ExtentPlacementManager* p_epm = nullptr; RootBlockRef root; ///< ref to current root ExtentIndex extents; ///< set of live extents diff --git a/src/crimson/os/seastore/extent_placement_manager.cc b/src/crimson/os/seastore/extent_placement_manager.cc index 498bfdf570a0..71d86e00c8bb 100644 --- a/src/crimson/os/seastore/extent_placement_manager.cc +++ b/src/crimson/os/seastore/extent_placement_manager.cc @@ -1,9 +1,11 @@ // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*- // vim: ts=8 sw=2 smarttab expandtab -#include "crimson/os/seastore/journal.h" #include "crimson/os/seastore/extent_placement_manager.h" +#include "crimson/os/seastore/lba_manager.h" +#include "crimson/os/seastore/segment_cleaner.h" + namespace { seastar::logger& logger() { return crimson::get_logger(ceph_subsys_seastore_tm); @@ -18,13 +20,11 @@ SegmentedAllocator::SegmentedAllocator( SegmentProvider& sp, SegmentManager& sm, LBAManager& lba_manager, - Journal& journal, - Cache& cache) + Journal& journal) : segment_provider(sp), segment_manager(sm), lba_manager(lba_manager), - journal(journal), - cache(cache) + journal(journal) { std::generate_n( std::back_inserter(writers), @@ -35,8 +35,7 @@ SegmentedAllocator::SegmentedAllocator( segment_provider, segment_manager, lba_manager, - journal, - cache}; + journal}; }); } diff --git a/src/crimson/os/seastore/extent_placement_manager.h b/src/crimson/os/seastore/extent_placement_manager.h index 764844113a37..9d56e3e02635 100644 --- a/src/crimson/os/seastore/extent_placement_manager.h +++ b/src/crimson/os/seastore/extent_placement_manager.h @@ -6,10 +6,11 @@ #include "seastar/core/gate.hh" #include "crimson/common/condition_variable.h" -#include "crimson/os/seastore/logging.h" -#include "crimson/os/seastore/cache.h" #include "crimson/os/seastore/cached_extent.h" -#include "crimson/os/seastore/lba_manager.h" +#include "crimson/os/seastore/journal.h" +#include "crimson/os/seastore/logging.h" +#include "crimson/os/seastore/segment_manager.h" +#include "crimson/os/seastore/transaction.h" namespace crimson::os::seastore { @@ -162,6 +163,9 @@ struct open_segment_wrapper_t : public boost::intrusive_ref_counter< using open_segment_wrapper_ref = boost::intrusive_ptr; +class LBAManager; +class SegmentProvider; + /** * SegmentedAllocator * @@ -182,13 +186,11 @@ class SegmentedAllocator : public ExtentAllocator { SegmentProvider& sp, SegmentManager& sm, LBAManager& lba_manager, - Journal& journal, - Cache& cache) + Journal& journal) : segment_provider(sp), segment_manager(sm), lba_manager(lba_manager), - journal(journal), - cache(cache) + journal(journal) {} Writer(Writer &&) = default; @@ -203,8 +205,9 @@ class SegmentedAllocator : public ExtentAllocator { }); } private: - using update_lba_mapping_iertr = LBAManager::update_le_mapping_iertr; - using finish_record_iertr = update_lba_mapping_iertr; + using finish_record_ertr = crimson::errorator< + crimson::ct_error::input_output_error>; + using finish_record_iertr = trans_iertr; using finish_record_ret = finish_record_iertr::future<>; finish_record_ret finish_write( Transaction& t, @@ -237,15 +240,13 @@ class SegmentedAllocator : public ExtentAllocator { crimson::condition_variable segment_rotation_guard; seastar::gate writer_guard; bool rolling_segment = false; - Cache& cache; }; public: SegmentedAllocator( SegmentProvider& sp, SegmentManager& sm, LBAManager& lba_manager, - Journal& journal, - Cache& cache); + Journal& journal); Writer &get_writer(placement_hint_t hint) { return writers[std::rand() % writers.size()]; @@ -282,58 +283,50 @@ private: std::vector writers; LBAManager& lba_manager; Journal& journal; - Cache& cache; }; class ExtentPlacementManager { public: ExtentPlacementManager( - Cache& cache, LBAManager& lba_manager - ) : cache(cache), lba_manager(lba_manager) {} + ) : lba_manager(lba_manager) {} - /** - * alloc_new_extent_by_type - * - * Create a new extent, CachedExtent::poffset may not be set - * if a delayed allocation is needed. - */ - CachedExtentRef alloc_new_extent_by_type( + struct alloc_result_t { + paddr_t paddr; + bufferptr bp; + }; + alloc_result_t alloc_new_extent( Transaction& t, extent_types_t type, seastore_off_t length, - placement_hint_t hint) { - // only logical extents should fall in this path - assert(is_logical_type(type)); + placement_hint_t hint + ) { assert(hint < placement_hint_t::NUM_HINTS); - auto dtype = get_allocator_type(hint); - // FIXME: set delay for COLD extent when the record overhead is low - bool delay = (hint > placement_hint_t::COLD && - can_delay_allocation(dtype)); - CachedExtentRef extent = cache.alloc_new_extent_by_type( - t, type, length, delay); - extent->hint = hint; - return extent; - } - template< - typename T, - std::enable_if_t, int> = 0> - TCachedExtentRef alloc_new_extent( - Transaction& t, - seastore_off_t length, - placement_hint_t hint) { - // only logical extents should fall in this path - static_assert(is_logical_type(T::TYPE)); - assert(hint < placement_hint_t::NUM_HINTS); - auto dtype = get_allocator_type(hint); + // XXX: bp might be extended to point to differnt memory (e.g. PMem) + // according to the allocator. + auto bp = ceph::bufferptr( + buffer::create_page_aligned(length)); + bp.zero(); + + if (!is_logical_type(type)) { + // TODO: implement out-of-line strategy for physical extent. + return {make_record_relative_paddr(0), + std::move(bp)}; + } + // FIXME: set delay for COLD extent when the record overhead is low + // NOTE: delay means to delay the decision about whether to write the + // extent as inline or out-of-line extents. bool delay = (hint > placement_hint_t::COLD && - can_delay_allocation(dtype)); - TCachedExtentRef extent = cache.alloc_new_extent( - t, length, delay); - extent->hint = hint; - return extent; + can_delay_allocation(get_allocator_type(hint))); + if (delay) { + return {make_delayed_temp_paddr(0), + std::move(bp)}; + } else { + return {make_record_relative_paddr(0), + std::move(bp)}; + } } /** @@ -395,7 +388,6 @@ private: return devices[std::rand() % devices.size()]; } - Cache& cache; LBAManager& lba_manager; std::map> allocators; }; diff --git a/src/crimson/os/seastore/seastore.cc b/src/crimson/os/seastore/seastore.cc index 2702e4973097..bc3e5ca7674f 100644 --- a/src/crimson/os/seastore/seastore.cc +++ b/src/crimson/os/seastore/seastore.cc @@ -1418,7 +1418,8 @@ seastar::future> make_seastore( auto cache = std::make_unique(scanner_ref); auto lba_manager = lba_manager::create_lba_manager(*sm, *cache); - auto epm = std::make_unique(*cache, *lba_manager); + auto epm = std::make_unique(*lba_manager); + cache->set_epm(*epm); journal->set_segment_provider(&*segment_cleaner); diff --git a/src/crimson/os/seastore/transaction.h b/src/crimson/os/seastore/transaction.h index f0d35fc9e19b..409074441284 100644 --- a/src/crimson/os/seastore/transaction.h +++ b/src/crimson/os/seastore/transaction.h @@ -116,15 +116,16 @@ public: } void add_fresh_extent( - CachedExtentRef ref, - bool delayed = false) { + CachedExtentRef ref) { ceph_assert(!is_weak()); - if (delayed) { + if (ref->get_paddr().is_delayed()) { + assert(ref->get_paddr() == make_delayed_temp_paddr(0)); assert(ref->is_logical()); ref->set_paddr(make_delayed_temp_paddr(delayed_temp_offset)); delayed_temp_offset += ref->get_length(); delayed_alloc_list.emplace_back(ref->cast()); } else { + assert(ref->get_paddr() == make_record_relative_paddr(0)); ref->set_paddr(make_record_relative_paddr(offset)); offset += ref->get_length(); inline_block_list.push_back(ref); diff --git a/src/crimson/os/seastore/transaction_manager.cc b/src/crimson/os/seastore/transaction_manager.cc index 25252b34afd8..db37abdd0963 100644 --- a/src/crimson/os/seastore/transaction_manager.cc +++ b/src/crimson/os/seastore/transaction_manager.cc @@ -381,7 +381,7 @@ TransactionManager::rewrite_logical_extent( auto lextent = extent->cast(); cache->retire_extent(t, extent); - auto nlextent = epm->alloc_new_extent_by_type( + auto nlextent = cache->alloc_new_extent_by_type( t, lextent->get_type(), lextent->get_length(), diff --git a/src/crimson/os/seastore/transaction_manager.h b/src/crimson/os/seastore/transaction_manager.h index 34f32e39217f..a5bb254badb8 100644 --- a/src/crimson/os/seastore/transaction_manager.h +++ b/src/crimson/os/seastore/transaction_manager.h @@ -304,7 +304,7 @@ public: LOG_PREFIX(TransactionManager::alloc_extent); SUBTRACET(seastore_tm, "{} len={}, placement_hint={}, laddr_hint={}", t, T::TYPE, len, placement_hint, laddr_hint); - auto ext = epm->alloc_new_extent( + auto ext = cache->alloc_new_extent( t, len, placement_hint); @@ -551,8 +551,7 @@ public: *segment_cleaner, *sm, *lba_manager, - *journal, - *cache)); + *journal)); } ~TransactionManager();