From bb658f115edf963cc74d1a2e1ab7768cf6e68ef0 Mon Sep 17 00:00:00 2001 From: Yingxin Cheng Date: Fri, 1 Apr 2022 09:42:32 +0800 Subject: [PATCH] crimson/os/seastore/EPM: refactor to index writers by hint Generally, simplify the indexing from: EPM ==(device-id)=> Allocator ==> Writers to: EPM ==(hint)=> Writers This is for the following considerations: * Writer may not be specific to a device; * Faster placement decision making directly by hint; Signed-off-by: Yingxin Cheng --- .../os/seastore/extent_placement_manager.cc | 32 +-- .../os/seastore/extent_placement_manager.h | 238 ++++++------------ .../os/seastore/transaction_manager.cc | 3 + src/crimson/os/seastore/transaction_manager.h | 5 - 4 files changed, 94 insertions(+), 184 deletions(-) diff --git a/src/crimson/os/seastore/extent_placement_manager.cc b/src/crimson/os/seastore/extent_placement_manager.cc index 722ffc00511..4450dd6cff1 100644 --- a/src/crimson/os/seastore/extent_placement_manager.cc +++ b/src/crimson/os/seastore/extent_placement_manager.cc @@ -9,15 +9,7 @@ SET_SUBSYS(seastore_journal); namespace crimson::os::seastore { -SegmentedAllocator::SegmentedAllocator( - SegmentProvider &sp, - SegmentSeqAllocator &ssa) - : cold_writer{"COLD", sp, ssa}, - rewrite_writer{"REWRITE", sp, ssa} -{ -} - -SegmentedAllocator::Writer::Writer( +SegmentedOolWriter::SegmentedOolWriter( std::string name, SegmentProvider& sp, SegmentSeqAllocator &ssa) @@ -34,13 +26,13 @@ SegmentedAllocator::Writer::Writer( { } -SegmentedAllocator::Writer::write_ertr::future<> -SegmentedAllocator::Writer::write_record( +SegmentedOolWriter::alloc_write_ertr::future<> +SegmentedOolWriter::write_record( Transaction& t, record_t&& record, std::list&& extents) { - LOG_PREFIX(SegmentedAllocator::Writer::write_record); + LOG_PREFIX(SegmentedOolWriter::write_record); assert(extents.size()); assert(extents.size() == record.extents.size()); assert(!record.deltas.size()); @@ -71,12 +63,12 @@ SegmentedAllocator::Writer::write_record( }); } -SegmentedAllocator::Writer::write_iertr::future<> -SegmentedAllocator::Writer::do_write( +SegmentedOolWriter::alloc_write_iertr::future<> +SegmentedOolWriter::do_write( Transaction& t, std::list& extents) { - LOG_PREFIX(SegmentedAllocator::Writer::do_write); + LOG_PREFIX(SegmentedOolWriter::do_write); assert(!extents.empty()); if (!record_submitter.is_available()) { DEBUGT("{} extents={} wait ...", @@ -114,7 +106,7 @@ SegmentedAllocator::Writer::do_write( DEBUGT("{} extents={} submit {} extents and roll, unavailable ...", t, segment_allocator.get_name(), extents.size(), num_extents); - auto fut_write = write_ertr::now(); + auto fut_write = alloc_write_ertr::now(); if (num_extents > 0) { assert(record_submitter.check_action(record.size) != action_t::ROLL); @@ -163,7 +155,7 @@ SegmentedAllocator::Writer::do_write( if (!extents.empty()) { return do_write(t, extents); } else { - return write_iertr::now(); + return alloc_write_iertr::now(); } }); } @@ -179,13 +171,13 @@ SegmentedAllocator::Writer::do_write( write_record(t, std::move(record), std::move(pending_extents))); } -SegmentedAllocator::Writer::write_iertr::future<> -SegmentedAllocator::Writer::write( +SegmentedOolWriter::alloc_write_iertr::future<> +SegmentedOolWriter::alloc_write_ool_extents( Transaction& t, std::list& extents) { if (extents.empty()) { - return write_iertr::now(); + return alloc_write_iertr::now(); } return seastar::with_gate(write_guard, [this, &t, &extents] { return do_write(t, extents); diff --git a/src/crimson/os/seastore/extent_placement_manager.h b/src/crimson/os/seastore/extent_placement_manager.h index a1fdf4db7f5..30dbae086ec 100644 --- a/src/crimson/os/seastore/extent_placement_manager.h +++ b/src/crimson/os/seastore/extent_placement_manager.h @@ -15,7 +15,8 @@ namespace crimson::os::seastore { /** * ExtentOolWriter * - * Interface through which final write to ool segment is performed. + * Write the extents as out-of-line and allocate the physical addresses. + * Different writers write extents to different locations. */ class ExtentOolWriter { using base_ertr = crimson::errorator< @@ -26,145 +27,60 @@ public: using open_ertr = base_ertr; virtual open_ertr::future<> open() = 0; - using write_ertr = base_ertr; - using write_iertr = trans_iertr; - virtual write_iertr::future<> write( - Transaction& t, - std::list& extent) = 0; + using alloc_write_ertr = base_ertr; + using alloc_write_iertr = trans_iertr; + virtual alloc_write_iertr::future<> alloc_write_ool_extents( + Transaction &t, + std::list &extents) = 0; - using stop_ertr = base_ertr; - virtual stop_ertr::future<> stop() = 0; + using close_ertr = base_ertr; + virtual close_ertr::future<> close() = 0; }; - -/** - * ExtentAllocator - * - * Handles allocating ool extents from a specific family of targets. - */ -class ExtentAllocator { -public: - using open_ertr = ExtentOolWriter::open_ertr; - virtual open_ertr::future<> open() = 0; - - using alloc_paddr_iertr = ExtentOolWriter::write_iertr; - virtual alloc_paddr_iertr::future<> alloc_ool_extents_paddr( - Transaction& t, - std::list&) = 0; - - using stop_ertr = ExtentOolWriter::stop_ertr; - virtual stop_ertr::future<> stop() = 0; - virtual ~ExtentAllocator() {}; -}; -using ExtentAllocatorRef = std::unique_ptr; +using ExtentOolWriterRef = std::unique_ptr; class SegmentProvider; /** - * SegmentedAllocator - * - * Handles out-of-line writes to a SegmentManager device (such as a ZNS device - * or conventional flash device where sequential writes are heavily preferred). + * SegmentedOolWriter * - * Creates Writer instances - * internally to round-robin writes. Later work will partition allocations - * based on hint (age, presumably) among the created Writers. - - * Each Writer makes use of SegmentProvider to obtain a new segment for writes - * as needed. + * Different writers write extents to different out-of-line segments provided + * by the SegmentProvider. */ -class SegmentedAllocator : public ExtentAllocator { - class Writer : public ExtentOolWriter { - public: - Writer(std::string name, - SegmentProvider &sp, - SegmentSeqAllocator &ssa); - Writer(Writer &&) = default; - - open_ertr::future<> open() final { - return record_submitter.open().discard_result(); - } - - write_iertr::future<> write( - Transaction& t, - std::list& extent) final; - - stop_ertr::future<> stop() final { - return write_guard.close().then([this] { - return record_submitter.close(); - }).safe_then([this] { - write_guard = seastar::gate(); - }); - } - - private: - write_iertr::future<> do_write( - Transaction& t, - std::list& extent); - - write_ertr::future<> write_record( - Transaction& t, - record_t&& record, - std::list&& extents); - - journal::SegmentAllocator segment_allocator; - journal::RecordSubmitter record_submitter; - seastar::gate write_guard; - }; +class SegmentedOolWriter : public ExtentOolWriter { public: - SegmentedAllocator( - SegmentProvider &sp, - SegmentSeqAllocator &ssa); + SegmentedOolWriter(std::string name, + SegmentProvider &sp, + SegmentSeqAllocator &ssa); - Writer &get_writer(placement_hint_t hint) { - assert(hint >= placement_hint_t::COLD); - assert(hint < placement_hint_t::NUM_HINTS); - if (hint == placement_hint_t::COLD) { - return cold_writer; - } else { - assert(hint == placement_hint_t::REWRITE); - return rewrite_writer; - } + open_ertr::future<> open() final { + return record_submitter.open().discard_result(); } - open_ertr::future<> open() { - return cold_writer.open( - ).safe_then([this] { - return rewrite_writer.open(); - }); - } + alloc_write_iertr::future<> alloc_write_ool_extents( + Transaction &t, + std::list &extents) final; - alloc_paddr_iertr::future<> alloc_ool_extents_paddr( - Transaction& t, - std::list& extents) final { - LOG_PREFIX(SegmentedAllocator::alloc_ool_extents_paddr); - SUBDEBUGT(seastore_journal, "start", t); - return seastar::do_with( - std::map>(), - [this, extents=std::move(extents), &t](auto& alloc_map) { - for (auto& extent : extents) { - auto writer = &(get_writer(extent->hint)); - alloc_map[writer].emplace_back(extent); - } - return trans_intr::do_for_each(alloc_map, [&t](auto& p) { - auto writer = p.first; - auto& extents_to_pesist = p.second; - return writer->write(t, extents_to_pesist); - }); + close_ertr::future<> close() final { + return write_guard.close().then([this] { + return record_submitter.close(); + }).safe_then([this] { + write_guard = seastar::gate(); }); } - stop_ertr::future<> stop() { - return cold_writer.stop( - ).safe_then([this] { - return rewrite_writer.stop(); - }); - } private: - // TODO: - // - hot_writer - // - a map of hint -> writer - Writer cold_writer; - Writer rewrite_writer; + alloc_write_iertr::future<> do_write( + Transaction& t, + std::list &extent); + + alloc_write_ertr::future<> write_record( + Transaction& t, + record_t&& record, + std::list &&extents); + + journal::SegmentAllocator segment_allocator; + journal::RecordSubmitter record_submitter; + seastar::gate write_guard; }; class ExtentPlacementManager { @@ -173,6 +89,23 @@ public: devices_by_id.resize(DEVICE_ID_MAX, nullptr); } + void init_ool_writers(SegmentProvider &sp, SegmentSeqAllocator &ssa) { + // Currently only one SegmentProvider is supported, so hardcode the + // writers_by_hint for now. + writer_refs.clear(); + writers_by_hint.resize((std::size_t)placement_hint_t::NUM_HINTS, {}); + + // ool writer is not supported for placement_hint_t::HOT + writer_refs.emplace_back( + std::make_unique("COLD", sp, ssa)); + writers_by_hint[(std::size_t)placement_hint_t::COLD + ].emplace_back(writer_refs.back().get()); + writer_refs.emplace_back( + std::make_unique("REWRITE", sp, ssa)); + writers_by_hint[(std::size_t)placement_hint_t::REWRITE + ].emplace_back(writer_refs.back().get()); + } + void add_device(Device* device, bool is_primary) { auto device_id = device->get_device_id(); ceph_assert(devices_by_id[device_id] == nullptr); @@ -183,14 +116,6 @@ public: } } - void add_allocator(device_type_t type, ExtentAllocatorRef&& allocator) { - allocators[type].emplace_back(std::move(allocator)); - LOG_PREFIX(ExtentPlacementManager::add_allocator); - SUBDEBUG(seastore_journal, "allocators for {}: {}", - type, - allocators[type].size()); - } - seastore_off_t get_block_size() const { assert(primary_device != nullptr); // assume all the devices have the same block size @@ -206,9 +131,9 @@ public: open_ertr::future<> open() { LOG_PREFIX(ExtentPlacementManager::open); SUBINFO(seastore_journal, "started"); - return crimson::do_for_each(allocators, [](auto& allocators_item) { - return crimson::do_for_each(allocators_item.second, [](auto& allocator) { - return allocator->open(); + return crimson::do_for_each(writers_by_hint, [](auto& writers) { + return crimson::do_for_each(writers, [](auto& writer) { + return writer->open(); }); }); } @@ -240,8 +165,7 @@ public: // FIXME: set delay for COLD extent and improve GC // NOTE: delay means to delay the decision about whether to write the // extent as inline or out-of-line extents. - bool delay = (hint > placement_hint_t::COLD && - can_delay_allocation(get_allocator_type(hint))); + bool delay = (hint > placement_hint_t::COLD); if (delay) { return {make_delayed_temp_paddr(0), std::move(bp)}; @@ -256,7 +180,7 @@ public: * * Performs delayed allocation and do writes for out-of-line extents. */ - using alloc_paddr_iertr = ExtentOolWriter::write_iertr; + using alloc_paddr_iertr = ExtentOolWriter::alloc_write_iertr; alloc_paddr_iertr::future<> delayed_alloc_or_ool_write( Transaction& t, const std::list& delayed_extents) { @@ -264,33 +188,30 @@ public: SUBDEBUGT(seastore_journal, "start with {} delayed extents", t, delayed_extents.size()); return seastar::do_with( - std::map>(), + std::map>(), [this, &t, &delayed_extents](auto& alloc_map) { for (auto& extent : delayed_extents) { // For now, just do ool allocation for any delayed extent - auto& allocator_ptr = get_allocator( - get_allocator_type(extent->hint), extent->hint - ); - alloc_map[allocator_ptr.get()].emplace_back(extent); + auto writer_ptr = get_writer(extent->hint); + alloc_map[writer_ptr].emplace_back(extent); } return trans_intr::do_for_each(alloc_map, [&t](auto& p) { - auto allocator = p.first; + auto writer = p.first; auto& extents = p.second; - return allocator->alloc_ool_extents_paddr(t, extents); + return writer->alloc_write_ool_extents(t, extents); }); }); } - using close_ertr = ExtentOolWriter::stop_ertr; + using close_ertr = ExtentOolWriter::close_ertr; close_ertr::future<> close() { LOG_PREFIX(ExtentPlacementManager::close); SUBINFO(seastore_journal, "started"); - return crimson::do_for_each(allocators, [](auto& allocators_item) { - return crimson::do_for_each(allocators_item.second, [](auto& allocator) { - return allocator->stop(); + return crimson::do_for_each(writers_by_hint, [](auto& writers) { + return crimson::do_for_each(writers, [](auto& writer) { + return writer->close(); }); }).safe_then([this] { - allocators.clear(); devices_by_id.clear(); devices_by_id.resize(DEVICE_ID_MAX, nullptr); primary_device = nullptr; @@ -308,18 +229,17 @@ public: } private: - device_type_t get_allocator_type(placement_hint_t hint) { - return device_type_t::SEGMENTED; - } - - ExtentAllocatorRef& get_allocator( - device_type_t type, - placement_hint_t hint) { - auto& devices = allocators[type]; - return devices[std::rand() % devices.size()]; + ExtentOolWriter* get_writer(placement_hint_t hint) { + assert(hint < placement_hint_t::NUM_HINTS); + auto hint_index = static_cast(hint); + assert(hint_index < writers_by_hint.size()); + auto& writers = writers_by_hint[hint_index]; + assert(writers.size() > 0); + return writers[std::rand() % writers.size()]; } - std::map> allocators; + std::vector writer_refs; + std::vector> writers_by_hint; std::vector devices_by_id; Device* primary_device = nullptr; }; diff --git a/src/crimson/os/seastore/transaction_manager.cc b/src/crimson/os/seastore/transaction_manager.cc index 2b7b235ed99..fca39a43ab5 100644 --- a/src/crimson/os/seastore/transaction_manager.cc +++ b/src/crimson/os/seastore/transaction_manager.cc @@ -547,6 +547,9 @@ TransactionManagerRef make_transaction_manager(bool detailed) detailed); auto journal = journal::make_segmented(*segment_cleaner); auto epm = std::make_unique(); + epm->init_ool_writers( + *segment_cleaner, + segment_cleaner->get_ool_segment_seq_allocator()); auto cache = std::make_unique(*epm); auto lba_manager = lba_manager::create_lba_manager(*cache); diff --git a/src/crimson/os/seastore/transaction_manager.h b/src/crimson/os/seastore/transaction_manager.h index 75a689c09fa..1568276a522 100644 --- a/src/crimson/os/seastore/transaction_manager.h +++ b/src/crimson/os/seastore/transaction_manager.h @@ -538,11 +538,6 @@ public: SUBDEBUG(seastore_tm, "adding device {}, is_primary={}", dev->get_device_id(), is_primary); epm->add_device(dev, is_primary); - epm->add_allocator( - dev->get_device_type(), - std::make_unique( - *segment_cleaner, - segment_cleaner->get_ool_segment_seq_allocator())); ceph_assert(dev->get_device_type() == device_type_t::SEGMENTED); auto sm = dynamic_cast(dev); -- 2.39.5