From 7f17a1fbd328984a3c631ac9be421c4284c0d8e1 Mon Sep 17 00:00:00 2001 From: Yingxin Cheng Date: Thu, 31 Mar 2022 11:43:41 +0800 Subject: [PATCH] crimson/os/seastore: fix inconsistent segment allocation/reclaim with multiple devices The current cleaning mechanism does not distingush devices, so we cannot allocate segments from a specific segment manager safely. Replace segment manager by segment manager group in SegmentAllocator and the related classes, Signed-off-by: Yingxin Cheng --- .../os/seastore/extent_placement_manager.cc | 13 ++-- .../os/seastore/extent_placement_manager.h | 6 +- src/crimson/os/seastore/journal.cc | 3 +- src/crimson/os/seastore/journal.h | 2 - .../os/seastore/journal/segment_allocator.cc | 38 +++++----- .../os/seastore/journal/segment_allocator.h | 27 +++----- .../os/seastore/journal/segmented_journal.cc | 46 +------------ .../os/seastore/journal/segmented_journal.h | 10 --- src/crimson/os/seastore/seastore.cc | 2 +- src/crimson/os/seastore/segment_cleaner.cc | 16 ++--- src/crimson/os/seastore/segment_cleaner.h | 69 ++++++++++--------- src/crimson/os/seastore/segment_manager.h | 5 -- .../os/seastore/segment_manager_group.cc | 52 ++++++++++++-- .../os/seastore/segment_manager_group.h | 49 +++++++++++++ .../os/seastore/transaction_manager.cc | 15 ++-- src/crimson/os/seastore/transaction_manager.h | 15 ++-- src/crimson/tools/store_nbd/tm_driver.cc | 2 +- .../seastore/test_btree_lba_manager.cc | 26 +++---- .../crimson/seastore/test_seastore_journal.cc | 22 +++--- .../seastore/transaction_manager_test_state.h | 4 +- 20 files changed, 221 insertions(+), 201 deletions(-) diff --git a/src/crimson/os/seastore/extent_placement_manager.cc b/src/crimson/os/seastore/extent_placement_manager.cc index ddba2186228b4..722ffc00511cb 100644 --- a/src/crimson/os/seastore/extent_placement_manager.cc +++ b/src/crimson/os/seastore/extent_placement_manager.cc @@ -10,19 +10,18 @@ SET_SUBSYS(seastore_journal); namespace crimson::os::seastore { SegmentedAllocator::SegmentedAllocator( - SegmentProvider& sp, - SegmentManager& sm, + SegmentProvider &sp, SegmentSeqAllocator &ssa) - : cold_writer{"COLD", sp, sm, ssa}, - rewrite_writer{"REWRITE", sp, sm, ssa} -{} + : cold_writer{"COLD", sp, ssa}, + rewrite_writer{"REWRITE", sp, ssa} +{ +} SegmentedAllocator::Writer::Writer( std::string name, SegmentProvider& sp, - SegmentManager& sm, SegmentSeqAllocator &ssa) - : segment_allocator(name, segment_type_t::OOL, sp, sm, ssa), + : segment_allocator(name, segment_type_t::OOL, sp, ssa), record_submitter(crimson::common::get_conf( "seastore_journal_iodepth_limit"), crimson::common::get_conf( diff --git a/src/crimson/os/seastore/extent_placement_manager.h b/src/crimson/os/seastore/extent_placement_manager.h index 8865aa9201e5e..a1fdf4db7f56a 100644 --- a/src/crimson/os/seastore/extent_placement_manager.h +++ b/src/crimson/os/seastore/extent_placement_manager.h @@ -76,8 +76,7 @@ class SegmentedAllocator : public ExtentAllocator { class Writer : public ExtentOolWriter { public: Writer(std::string name, - SegmentProvider& sp, - SegmentManager& sm, + SegmentProvider &sp, SegmentSeqAllocator &ssa); Writer(Writer &&) = default; @@ -113,8 +112,7 @@ class SegmentedAllocator : public ExtentAllocator { }; public: SegmentedAllocator( - SegmentProvider& sp, - SegmentManager& sm, + SegmentProvider &sp, SegmentSeqAllocator &ssa); Writer &get_writer(placement_hint_t hint) { diff --git a/src/crimson/os/seastore/journal.cc b/src/crimson/os/seastore/journal.cc index 2ed4d49739a72..98dfd7dd85378 100644 --- a/src/crimson/os/seastore/journal.cc +++ b/src/crimson/os/seastore/journal.cc @@ -7,11 +7,10 @@ namespace crimson::os::seastore::journal { JournalRef make_segmented( - SegmentManager &sm, SegmentManagerGroup &sms, SegmentProvider &provider) { - return std::make_unique(sm, sms, provider); + return std::make_unique(sms, provider); } } diff --git a/src/crimson/os/seastore/journal.h b/src/crimson/os/seastore/journal.h index b0448d7e9508f..b16a3874ac7ad 100644 --- a/src/crimson/os/seastore/journal.h +++ b/src/crimson/os/seastore/journal.h @@ -15,7 +15,6 @@ namespace nvme_device { class NVMeBlockDevice; } -class SegmentManager; class SegmentManagerGroup; class SegmentProvider; @@ -92,7 +91,6 @@ using JournalRef = std::unique_ptr; namespace journal { JournalRef make_segmented( - SegmentManager &sm, SegmentManagerGroup &sms, SegmentProvider &provider); diff --git a/src/crimson/os/seastore/journal/segment_allocator.cc b/src/crimson/os/seastore/journal/segment_allocator.cc index a3684d91c8253..6b2dabefbc7f0 100644 --- a/src/crimson/os/seastore/journal/segment_allocator.cc +++ b/src/crimson/os/seastore/journal/segment_allocator.cc @@ -16,13 +16,12 @@ SegmentAllocator::SegmentAllocator( std::string name, segment_type_t type, SegmentProvider &sp, - SegmentManager &sm, SegmentSeqAllocator &ssa) : name{name}, print_name{fmt::format("D?_{}", name)}, type{type}, segment_provider{sp}, - segment_manager{sm}, + sm_group{*sp.get_segment_manager_group()}, segment_seq_allocator(ssa) { ceph_assert(type != segment_type_t::NULL_SEG); @@ -36,14 +35,14 @@ SegmentAllocator::do_open() ceph_assert(!current_segment); segment_seq_t new_segment_seq = segment_seq_allocator.get_and_inc_next_segment_seq(); - auto meta = segment_manager.get_meta(); + auto meta = sm_group.get_meta(); current_segment_nonce = ceph_crc32c( new_segment_seq, reinterpret_cast(meta.seastore_id.bytes()), sizeof(meta.seastore_id.uuid)); - auto new_segment_id = segment_provider.get_segment( - get_device_id(), new_segment_seq, type); - return segment_manager.open(new_segment_id + auto new_segment_id = segment_provider.get_segment(new_segment_seq, type); + ceph_assert(new_segment_id != NULL_SEG_ID); + return sm_group.open(new_segment_id ).handle_error( open_ertr::pass_further{}, crimson::ct_error::assert_all{ @@ -67,7 +66,7 @@ SegmentAllocator::do_open() INFO("{} writing header to new segment ... -- {}", print_name, header); - auto header_length = segment_manager.get_block_size(); + auto header_length = get_block_size(); bufferlist bl; encode(header, bl); bufferptr bp(ceph::buffer::create_page_aligned(header_length)); @@ -117,9 +116,16 @@ SegmentAllocator::open_ret SegmentAllocator::open() { LOG_PREFIX(SegmentAllocator::open); - print_name = fmt::format("D{}_{}", - device_id_printer_t{get_device_id()}, - name); + auto& device_ids = sm_group.get_device_ids(); + ceph_assert(device_ids.size()); + std::ostringstream oss; + oss << "D"; + for (auto& device_id : device_ids) { + oss << "_" << device_id_printer_t{device_id}; + } + oss << "_" << name; + print_name = oss.str(); + INFO("{}", print_name); return do_open(); } @@ -147,7 +153,7 @@ SegmentAllocator::write(ceph::bufferlist to_write) }; TRACE("{} {}~{}", print_name, write_start_seq, write_length); assert(write_length > 0); - assert((write_length % segment_manager.get_block_size()) == 0); + assert((write_length % get_block_size()) == 0); assert(!needs_roll(write_length)); auto write_result = write_result_t{ @@ -230,20 +236,16 @@ SegmentAllocator::close_segment(bool is_rolling) current_segment_nonce, tail.journal_tail); - bufferptr bp( - ceph::buffer::create_page_aligned( - segment_manager.get_block_size())); + bufferptr bp(ceph::buffer::create_page_aligned(get_block_size())); bp.zero(); auto iter = bl.cbegin(); iter.copy(bl.length(), bp.c_str()); bl.clear(); bl.append(bp); - assert(bl.length() == - (size_t)segment_manager.get_rounded_tail_length()); + assert(bl.length() == sm_group.get_rounded_tail_length()); return seg_to_close->write( - segment_manager.get_segment_size() - - segment_manager.get_rounded_tail_length(), + sm_group.get_segment_size() - sm_group.get_rounded_tail_length(), bl ).safe_then([seg_to_close=std::move(seg_to_close)] { return seg_to_close->close(); diff --git a/src/crimson/os/seastore/journal/segment_allocator.h b/src/crimson/os/seastore/journal/segment_allocator.h index 0a1b9812fd626..ebb7ff08430c3 100644 --- a/src/crimson/os/seastore/journal/segment_allocator.h +++ b/src/crimson/os/seastore/journal/segment_allocator.h @@ -11,7 +11,7 @@ #include "include/buffer.h" #include "crimson/common/errorator.h" -#include "crimson/os/seastore/segment_manager.h" +#include "crimson/os/seastore/segment_manager_group.h" #include "crimson/os/seastore/segment_seq_allocator.h" namespace crimson::os::seastore { @@ -33,29 +33,20 @@ class SegmentAllocator { SegmentAllocator(std::string name, segment_type_t type, SegmentProvider &sp, - SegmentManager &sm, SegmentSeqAllocator &ssa); const std::string& get_name() const { return print_name; } - device_id_t get_device_id() const { - return segment_manager.get_device_id(); - } - seastore_off_t get_block_size() const { - return segment_manager.get_block_size(); + return sm_group.get_block_size(); } extent_len_t get_max_write_length() const { - return segment_manager.get_segment_size() - - p2align(ceph::encoded_sizeof_bounded(), - size_t(segment_manager.get_block_size())); - } - - device_segment_id_t get_num_segments() const { - return segment_manager.get_num_segments(); + return sm_group.get_segment_size() - + sm_group.get_rounded_header_length() - + sm_group.get_rounded_tail_length(); } bool can_write() const { @@ -80,8 +71,10 @@ class SegmentAllocator { // returns true iff the current segment has insufficient space bool needs_roll(std::size_t length) const { assert(can_write()); - auto write_capacity = current_segment->get_write_capacity() - - segment_manager.get_rounded_tail_length(); + assert(current_segment->get_write_capacity() == + sm_group.get_segment_size()); + auto write_capacity = current_segment->get_write_capacity() - + sm_group.get_rounded_tail_length(); return length + written_to > std::size_t(write_capacity); } @@ -125,7 +118,7 @@ class SegmentAllocator { std::string print_name; const segment_type_t type; // JOURNAL or OOL SegmentProvider &segment_provider; - SegmentManager &segment_manager; + SegmentManagerGroup &sm_group; SegmentRef current_segment; seastore_off_t written_to; SegmentSeqAllocator &segment_seq_allocator; diff --git a/src/crimson/os/seastore/journal/segmented_journal.cc b/src/crimson/os/seastore/journal/segmented_journal.cc index 18b29e50fab1e..dc86de3c2006e 100644 --- a/src/crimson/os/seastore/journal/segmented_journal.cc +++ b/src/crimson/os/seastore/journal/segmented_journal.cc @@ -27,7 +27,6 @@ SET_SUBSYS(seastore_journal); namespace crimson::os::seastore::journal { SegmentedJournal::SegmentedJournal( - SegmentManager &segment_manager, SegmentManagerGroup &sms, SegmentProvider &segment_provider) : segment_provider(segment_provider), @@ -36,8 +35,7 @@ SegmentedJournal::SegmentedJournal( journal_segment_allocator("JOURNAL", segment_type_t::JOURNAL, segment_provider, - segment_manager, - *segment_seq_allocator), + *segment_seq_allocator), record_submitter(crimson::common::get_conf( "seastore_journal_iodepth_limit"), crimson::common::get_conf( @@ -86,10 +84,9 @@ SegmentedJournal::prep_replay_segments( std::for_each( segments.begin(), segments.end(), - [this, FNAME](auto &seg) + [FNAME](auto &seg) { if (seg.first != seg.second.physical_segment_id || - seg.first.device_id() != journal_segment_allocator.get_device_id() || seg.second.get_type() != segment_type_t::JOURNAL) { ERROR("illegal journal segment for replay -- {}", seg.second); ceph_abort(); @@ -248,48 +245,11 @@ SegmentedJournal::replay_segment( ); } -SegmentedJournal::find_journal_segments_ret -SegmentedJournal::find_journal_segments() -{ - return seastar::do_with( - find_journal_segments_ret_bare{}, - [this](auto &ret) -> find_journal_segments_ret { - return crimson::do_for_each( - boost::counting_iterator(0), - boost::counting_iterator( - journal_segment_allocator.get_num_segments()), - [this, &ret](device_segment_id_t d_segment_id) { - segment_id_t segment_id{ - journal_segment_allocator.get_device_id(), - d_segment_id}; - return sms.read_segment_header( - segment_id - ).safe_then([segment_id, &ret](auto &&header) { - if (header.get_type() == segment_type_t::JOURNAL) { - ret.emplace_back(std::make_pair(segment_id, std::move(header))); - } - }).handle_error( - crimson::ct_error::enoent::handle([](auto) { - return find_journal_segments_ertr::now(); - }), - crimson::ct_error::enodata::handle([](auto) { - return find_journal_segments_ertr::now(); - }), - crimson::ct_error::input_output_error::pass_further{} - ); - }).safe_then([&ret]() mutable { - return find_journal_segments_ret{ - find_journal_segments_ertr::ready_future_marker{}, - std::move(ret)}; - }); - }); -} - SegmentedJournal::replay_ret SegmentedJournal::replay( delta_handler_t &&delta_handler) { LOG_PREFIX(Journal::replay); - return find_journal_segments( + return sms.find_journal_segment_headers( ).safe_then([this, FNAME, delta_handler=std::move(delta_handler)] (auto &&segment_headers) mutable -> replay_ret { INFO("got {} segments", segment_headers.size()); diff --git a/src/crimson/os/seastore/journal/segmented_journal.h b/src/crimson/os/seastore/journal/segmented_journal.h index 973d04fd6f1ec..f3a51a4c77462 100644 --- a/src/crimson/os/seastore/journal/segmented_journal.h +++ b/src/crimson/os/seastore/journal/segmented_journal.h @@ -25,7 +25,6 @@ namespace crimson::os::seastore::journal { class SegmentedJournal : public Journal { public: SegmentedJournal( - SegmentManager &segment_manager, SegmentManagerGroup& sms, SegmentProvider& cleaner); ~SegmentedJournal() {} @@ -59,15 +58,6 @@ private: SegmentManagerGroup& sms; WritePipeline* write_pipeline = nullptr; - /// read journal segment headers from sms - using find_journal_segments_ertr = crimson::errorator< - crimson::ct_error::input_output_error>; - using find_journal_segments_ret_bare = std::vector< - std::pair>; - using find_journal_segments_ret = find_journal_segments_ertr::future< - find_journal_segments_ret_bare>; - find_journal_segments_ret find_journal_segments(); - /// return ordered vector of segments to replay using replay_segments_t = std::vector< std::pair>; diff --git a/src/crimson/os/seastore/seastore.cc b/src/crimson/os/seastore/seastore.cc index 4771c95f92e9a..6e81ce50f7c9d 100644 --- a/src/crimson/os/seastore/seastore.cc +++ b/src/crimson/os/seastore/seastore.cc @@ -1653,7 +1653,7 @@ seastar::future> make_seastore( return Device::make_device( device ).then([&device](DeviceRef device_obj) { - auto tm = make_transaction_manager(*device_obj, false /* detailed */); + auto tm = make_transaction_manager(false /* detailed */); auto cm = std::make_unique(*tm); return std::make_unique( device, diff --git a/src/crimson/os/seastore/segment_cleaner.cc b/src/crimson/os/seastore/segment_cleaner.cc index 82d164f9184a9..416099ca65952 100644 --- a/src/crimson/os/seastore/segment_cleaner.cc +++ b/src/crimson/os/seastore/segment_cleaner.cc @@ -238,14 +238,13 @@ void SegmentCleaner::register_metrics() } segment_id_t SegmentCleaner::get_segment( - device_id_t device_id, segment_seq_t seq, segment_type_t type) { LOG_PREFIX(SegmentCleaner::get_segment); assert(seq != NULL_SEG_SEQ); - for (auto it = segments.device_begin(device_id); - it != segments.device_end(device_id); + for (auto it = segments.begin(); + it != segments.end(); ++it) { auto seg_id = it->first; auto& segment_info = it->second; @@ -255,8 +254,7 @@ segment_id_t SegmentCleaner::get_segment( return seg_id; } } - ERROR("(TODO) handle out of space from device {} with segment_seq={}", - device_id, segment_seq_printer_t{seq}); + ERROR("out of space with segment_seq={}", segment_seq_printer_t{seq}); ceph_abort(); return NULL_SEG_ID; } @@ -502,8 +500,7 @@ SegmentCleaner::gc_reclaim_space_ret SegmentCleaner::gc_reclaim_space() }); } -SegmentCleaner::mount_ret SegmentCleaner::mount( - device_id_t pdevice_id) +SegmentCleaner::mount_ret SegmentCleaner::mount() { const auto& sms = sm_group->get_segment_managers(); logger().debug( @@ -513,7 +510,6 @@ SegmentCleaner::mount_ret SegmentCleaner::mount( journal_tail_target = JOURNAL_SEQ_NULL; journal_tail_committed = JOURNAL_SEQ_NULL; journal_head = JOURNAL_SEQ_NULL; - journal_device_id = pdevice_id; space_tracker.reset( detailed ? @@ -647,11 +643,11 @@ SegmentCleaner::scan_extents_ret SegmentCleaner::scan_nonfull_segment( } return seastar::now(); }), - [&cursor, header, segment_id, this](auto& handler) { + [&cursor, header, this](auto& handler) { return sm_group->scan_valid_records( cursor, header.segment_nonce, - segments[segment_id.device_id()]->segment_size, + segments.get_segment_size(), handler); } ); diff --git a/src/crimson/os/seastore/segment_cleaner.h b/src/crimson/os/seastore/segment_cleaner.h index b5a53ba87ff0a..fa7578cde0ca4 100644 --- a/src/crimson/os/seastore/segment_cleaner.h +++ b/src/crimson/os/seastore/segment_cleaner.h @@ -30,13 +30,11 @@ class segment_info_set_t { segment_manager_info_t( device_id_t device_id, device_segment_id_t num_segments, - seastore_off_t segment_size, seastore_off_t block_size, size_t empty_segments, size_t size) : device_id(device_id), num_segments(num_segments), - segment_size(segment_size), block_size(block_size), empty_segments(empty_segments), size(size), @@ -45,7 +43,6 @@ class segment_info_set_t { device_id_t device_id = 0; device_segment_id_t num_segments = 0; - seastore_off_t segment_size = 0; seastore_off_t block_size = 0; size_t empty_segments = 0; size_t size = 0; @@ -120,11 +117,14 @@ public: journal_segments = 0; avail_bytes = 0; opened_segments = 0; + segment_size = 0; } void add_segment_manager(SegmentManager& segment_manager) { device_id_t d_id = segment_manager.get_device_id(); + auto ssize = segment_manager.get_segment_size(); + ceph_assert(ssize != 0); segments.add_device( d_id, segment_manager.get_num_segments(), @@ -132,13 +132,19 @@ public: sm_infos[segment_manager.get_device_id()].emplace( d_id, segment_manager.get_num_segments(), - segment_manager.get_segment_size(), segment_manager.get_block_size(), segment_manager.get_num_segments(), segment_manager.get_size()); total_bytes += segment_manager.get_size(); avail_bytes += segment_manager.get_size(); + + // assume all the segment managers share the same settings as follows. + if (segment_size == 0) { + segment_size = ssize; + } else { + ceph_assert(segment_size == ssize); + } } device_segment_id_t size() const { @@ -159,13 +165,6 @@ public: return segments.end(); } - auto device_begin(device_id_t id) { - return segments.device_begin(id); - } - auto device_end(device_id_t id) { - return segments.device_end(id); - } - // the following methods are used for keeping track of // seastore disk space usage void segment_opened(segment_id_t segment) { @@ -174,15 +173,15 @@ public: ceph_assert(segments[segment].is_empty()); // must be opening a new segment auto [iter, inserted] = sm_info->open_segment_avails.emplace( - segment, sm_info->segment_size); + segment, get_segment_size()); opened_segments++; ceph_assert(inserted); } void segment_emptied(segment_id_t segment) { auto& sm_info = sm_infos[segment.device_id()]; sm_info->empty_segments++; - sm_info->avail_bytes += sm_info->segment_size; - avail_bytes += sm_info->segment_size; + sm_info->avail_bytes += get_segment_size(); + avail_bytes += get_segment_size(); } void segment_closed(segment_id_t segment) { assert(segments.contains(segment)); @@ -199,11 +198,11 @@ public: opened_segments--; } else { ceph_assert(segment_info.is_empty()); - assert(sm_info->avail_bytes >= (size_t)sm_info->segment_size); - assert(avail_bytes >= (size_t)sm_info->segment_size); + assert(sm_info->avail_bytes >= (std::size_t)get_segment_size()); + assert(avail_bytes >= (std::size_t)get_segment_size()); assert(sm_info->empty_segments > 0); - sm_info->avail_bytes -= sm_info->segment_size; - avail_bytes -= sm_info->segment_size; + sm_info->avail_bytes -= get_segment_size(); + avail_bytes -= get_segment_size(); sm_info->empty_segments--; } segment_info.set_closed(); @@ -219,7 +218,8 @@ public: offset); return; } - auto new_avail_bytes = sm_info->segment_size - offset.as_seg_paddr().get_segment_off(); + auto new_avail_bytes = get_segment_size() - + offset.as_seg_paddr().get_segment_off(); if (iter->second < new_avail_bytes) { crimson::get_logger(ceph_subsys_seastore_cleaner).error( "SegmentCleaner::update_segment_avail_bytes:" @@ -272,6 +272,11 @@ public: } return num; } + seastore_off_t get_segment_size() const { + assert(segment_size != 0); + return segment_size; + } + private: std::vector> sm_infos; segment_map_t segments; @@ -280,6 +285,7 @@ private: size_t total_bytes = 0; size_t avail_bytes = 0; size_t opened_segments = 0; + seastore_off_t segment_size = 0; friend class SegmentCleaner; }; @@ -290,7 +296,7 @@ private: class SegmentProvider { public: virtual segment_id_t get_segment( - device_id_t id, segment_seq_t seq, segment_type_t type) = 0; + segment_seq_t seq, segment_type_t type) = 0; virtual void close_segment(segment_id_t) {} @@ -310,6 +316,8 @@ public: virtual void update_segment_avail_bytes(paddr_t offset) = 0; + virtual SegmentManagerGroup* get_segment_manager_group() = 0; + virtual ~SegmentProvider() {} }; @@ -692,8 +700,6 @@ private: /// head of journal journal_seq_t journal_head; - device_id_t journal_device_id; - ExtentCallbackInterface *ecb = nullptr; /// populated if there is an IO blocked on hard limits @@ -714,10 +720,10 @@ public: using mount_ertr = crimson::errorator< crimson::ct_error::input_output_error>; using mount_ret = mount_ertr::future<>; - mount_ret mount(device_id_t pdevice_id); + mount_ret mount(); segment_id_t get_segment( - device_id_t id, segment_seq_t seq, segment_type_t type) final; + segment_seq_t seq, segment_type_t type) final; void close_segment(segment_id_t segment) final; @@ -754,6 +760,10 @@ public: return segments[id].get_type(); } + SegmentManagerGroup* get_segment_manager_group() final { + return sm_group.get(); + } + using release_ertr = SegmentManagerGroup::release_ertr; release_ertr::future<> maybe_release_segment(Transaction &t); @@ -1053,9 +1063,7 @@ private: } size_t get_bytes_available_current_segment() const { - auto& seg_addr = journal_head.offset.as_seg_paddr(); - auto segment_size = - segments[seg_addr.get_segment_id().device_id()]->segment_size; + auto segment_size = segments.get_segment_size(); return segment_size - get_bytes_used_current_segment(); } @@ -1109,12 +1117,10 @@ private: if (journal_head == JOURNAL_SEQ_NULL) { // this for calculating journal bytes in the journal // replay phase in which journal_head is not set - return segments.get_journal_segments() * segments[journal_device_id]->segment_size; + return segments.get_journal_segments() * segments.get_segment_size(); } else { assert(journal_head >= journal_tail_committed); - auto& seg_addr = journal_head.offset.as_seg_paddr(); - auto segment_size = - segments[seg_addr.get_segment_id().device_id()]->segment_size; + auto segment_size = segments.get_segment_size(); return (journal_head.segment_seq - journal_tail_committed.segment_seq + 1) * segment_size; } @@ -1319,7 +1325,6 @@ private: assert(s_type != segment_type_t::NULL_SEG); segments[segment].type = s_type; if (s_type == segment_type_t::JOURNAL) { - assert(journal_device_id == segment.device_id()); segments.new_journal_segment(); } else { assert(s_type == segment_type_t::OOL); diff --git a/src/crimson/os/seastore/segment_manager.h b/src/crimson/os/seastore/segment_manager.h index eb4cc6ccab27e..44d249dcac9fc 100644 --- a/src/crimson/os/seastore/segment_manager.h +++ b/src/crimson/os/seastore/segment_manager.h @@ -173,11 +173,6 @@ public: ceph_assert(get_size() % get_segment_size() == 0); return ((device_segment_id_t)(get_size() / get_segment_size())); } - seastore_off_t get_rounded_tail_length() const { - return p2roundup( - ceph::encoded_sizeof_bounded(), - (size_t)get_block_size()); - } virtual ~SegmentManager() {} diff --git a/src/crimson/os/seastore/segment_manager_group.cc b/src/crimson/os/seastore/segment_manager_group.cc index b691e5100583d..659f5e413a5af 100644 --- a/src/crimson/os/seastore/segment_manager_group.cc +++ b/src/crimson/os/seastore/segment_manager_group.cc @@ -17,9 +17,8 @@ SegmentManagerGroup::read_segment_tail(segment_id_t segment) return segment_manager.read( paddr_t::make_seg_paddr( segment, - segment_manager.get_segment_size() - - segment_manager.get_rounded_tail_length()), - segment_manager.get_rounded_tail_length() + segment_manager.get_segment_size() - get_rounded_tail_length()), + get_rounded_tail_length() ).handle_error( read_segment_header_ertr::pass_further{}, crimson::ct_error::assert_all{ @@ -59,7 +58,7 @@ SegmentManagerGroup::read_segment_header(segment_id_t segment) auto& segment_manager = *segment_managers[segment.device_id()]; return segment_manager.read( paddr_t::make_seg_paddr(segment, 0), - segment_manager.get_block_size() + get_rounded_header_length() ).handle_error( read_segment_header_ertr::pass_further{}, crimson::ct_error::assert_all{ @@ -387,4 +386,49 @@ SegmentManagerGroup::consume_next_records( }); } +SegmentManagerGroup::find_journal_segment_headers_ret +SegmentManagerGroup::find_journal_segment_headers() +{ + return seastar::do_with( + get_segment_managers(), + find_journal_segment_headers_ret_bare{}, + [this](auto &sms, auto& ret) -> find_journal_segment_headers_ret + { + return crimson::do_for_each(sms, + [this, &ret](SegmentManager *sm) + { + LOG_PREFIX(SegmentManagerGroup::find_journal_segment_headers); + auto device_id = sm->get_device_id(); + auto num_segments = sm->get_num_segments(); + INFO("processing {} with {} segments", + device_id_printer_t{device_id}, num_segments); + return crimson::do_for_each( + boost::counting_iterator(0), + boost::counting_iterator(num_segments), + [this, &ret, device_id](device_segment_id_t d_segment_id) + { + segment_id_t segment_id{device_id, d_segment_id}; + return read_segment_header(segment_id + ).safe_then([segment_id, &ret](auto &&header) { + if (header.get_type() == segment_type_t::JOURNAL) { + ret.emplace_back(std::make_pair(segment_id, std::move(header))); + } + }).handle_error( + crimson::ct_error::enoent::handle([](auto) { + return find_journal_segment_headers_ertr::now(); + }), + crimson::ct_error::enodata::handle([](auto) { + return find_journal_segment_headers_ertr::now(); + }), + crimson::ct_error::input_output_error::pass_further{} + ); + }); + }).safe_then([&ret]() mutable { + return find_journal_segment_headers_ret{ + find_journal_segment_headers_ertr::ready_future_marker{}, + std::move(ret)}; + }); + }); +} + } // namespace crimson::os::seastore diff --git a/src/crimson/os/seastore/segment_manager_group.h b/src/crimson/os/seastore/segment_manager_group.h index cc715e470866d..f2690c3448839 100644 --- a/src/crimson/os/seastore/segment_manager_group.h +++ b/src/crimson/os/seastore/segment_manager_group.h @@ -45,6 +45,38 @@ public: device_ids.clear(); } + /** + * get device info + * + * Assume all segment managers share the same following information. + */ + seastore_off_t get_block_size() const { + assert(device_ids.size()); + return segment_managers[*device_ids.begin()]->get_block_size(); + } + + seastore_off_t get_segment_size() const { + assert(device_ids.size()); + return segment_managers[*device_ids.begin()]->get_segment_size(); + } + + const seastore_meta_t &get_meta() const { + assert(device_ids.size()); + return segment_managers[*device_ids.begin()]->get_meta(); + } + + std::size_t get_rounded_header_length() const { + return p2roundup( + ceph::encoded_sizeof_bounded(), + (std::size_t)get_block_size()); + } + + std::size_t get_rounded_tail_length() const { + return p2roundup( + ceph::encoded_sizeof_bounded(), + (std::size_t)get_block_size()); + } + using read_segment_header_ertr = crimson::errorator< crimson::ct_error::enoent, crimson::ct_error::enodata, @@ -101,6 +133,23 @@ public: found_record_handler_t &handler ///< [in] handler for records ); ///< @return used budget + /* + * read journal segment headers + */ + using find_journal_segment_headers_ertr = crimson::errorator< + crimson::ct_error::input_output_error>; + using find_journal_segment_headers_ret_bare = std::vector< + std::pair>; + using find_journal_segment_headers_ret = find_journal_segment_headers_ertr::future< + find_journal_segment_headers_ret_bare>; + find_journal_segment_headers_ret find_journal_segment_headers(); + + using open_ertr = SegmentManager::open_ertr; + open_ertr::future open(segment_id_t id) { + assert(has_device(id.device_id())); + return segment_managers[id.device_id()]->open(id); + } + using release_ertr = SegmentManager::release_ertr; release_ertr::future<> release_segment(segment_id_t id) { assert(has_device(id.device_id())); diff --git a/src/crimson/os/seastore/transaction_manager.cc b/src/crimson/os/seastore/transaction_manager.cc index 768007027233d..5f3b670c864ed 100644 --- a/src/crimson/os/seastore/transaction_manager.cc +++ b/src/crimson/os/seastore/transaction_manager.cc @@ -44,7 +44,6 @@ TransactionManager::mkfs_ertr::future<> TransactionManager::mkfs() LOG_PREFIX(TransactionManager::mkfs); INFO("enter"); return segment_cleaner->mount( - epm->get_primary_device().get_device_id() ).safe_then([this] { return journal->open_for_write(); }).safe_then([this](auto addr) { @@ -84,7 +83,6 @@ TransactionManager::mount_ertr::future<> TransactionManager::mount() INFO("enter"); cache->init(); return segment_cleaner->mount( - epm->get_primary_device().get_device_id() ).safe_then([this] { return journal->replay( [this](const auto &offsets, const auto &e, auto last_modified) { @@ -156,10 +154,10 @@ TransactionManager::close_ertr::future<> TransactionManager::close() { cache->dump_contents(); return journal->close(); }).safe_then([this] { - sms.reset(); return epm->close(); - }).safe_then([FNAME] { + }).safe_then([FNAME, this] { INFO("completed"); + sms.reset(); return seastar::now(); }); } @@ -541,9 +539,7 @@ TransactionManager::get_extent_if_live_ret TransactionManager::get_extent_if_liv TransactionManager::~TransactionManager() {} -TransactionManagerRef make_transaction_manager( - Device &device, - bool detailed) +TransactionManagerRef make_transaction_manager(bool detailed) { auto sms = std::make_unique(); auto& sms_ref = *sms.get(); @@ -551,10 +547,7 @@ TransactionManagerRef make_transaction_manager( SegmentCleaner::config_t::get_default(), std::move(sms), detailed); - ceph_assert(device.get_device_type() == device_type_t::SEGMENTED); - auto sm = dynamic_cast(&device); - ceph_assert(sm != nullptr); - auto journal = journal::make_segmented(*sm, sms_ref, *segment_cleaner); + auto journal = journal::make_segmented(sms_ref, *segment_cleaner); auto epm = std::make_unique(); auto cache = std::make_unique(*epm); auto lba_manager = lba_manager::create_lba_manager(*cache); diff --git a/src/crimson/os/seastore/transaction_manager.h b/src/crimson/os/seastore/transaction_manager.h index 2e5385025b205..76f503b5d7bfe 100644 --- a/src/crimson/os/seastore/transaction_manager.h +++ b/src/crimson/os/seastore/transaction_manager.h @@ -539,16 +539,15 @@ public: SUBDEBUG(seastore_tm, "adding device {}, is_primary={}", dev->get_device_id(), is_primary); epm->add_device(dev, is_primary); + epm->add_allocator( + dev->get_device_type(), + std::make_unique( + *segment_cleaner, + segment_cleaner->get_ool_segment_seq_allocator())); ceph_assert(dev->get_device_type() == device_type_t::SEGMENTED); auto sm = dynamic_cast(dev); ceph_assert(sm != nullptr); - epm->add_allocator( - dev->get_device_type(), - std::make_unique( - *segment_cleaner, - *sm, - segment_cleaner->get_ool_segment_seq_allocator())); sms.add_segment_manager(sm); } @@ -581,8 +580,6 @@ public: }; using TransactionManagerRef = std::unique_ptr; -TransactionManagerRef make_transaction_manager( - Device &device, - bool detailed); +TransactionManagerRef make_transaction_manager(bool detailed); } diff --git a/src/crimson/tools/store_nbd/tm_driver.cc b/src/crimson/tools/store_nbd/tm_driver.cc index 76f4825ec8c79..94a8684a48828 100644 --- a/src/crimson/tools/store_nbd/tm_driver.cc +++ b/src/crimson/tools/store_nbd/tm_driver.cc @@ -131,7 +131,7 @@ seastar::future TMDriver::read( void TMDriver::init() { - tm = make_transaction_manager(*device, false /* detailed */); + tm = make_transaction_manager(false /* detailed */); tm->add_device(device.get(), true); } diff --git a/src/test/crimson/seastore/test_btree_lba_manager.cc b/src/test/crimson/seastore/test_btree_lba_manager.cc index 66f405006e995..54d36da45c055 100644 --- a/src/test/crimson/seastore/test_btree_lba_manager.cc +++ b/src/test/crimson/seastore/test_btree_lba_manager.cc @@ -39,11 +39,10 @@ struct btree_test_base : segment_id_t next; - btree_test_base() = default; - std::map segment_seqs; std::map segment_types; + btree_test_base() = default; seastar::lowres_system_clock::time_point get_last_modified( segment_id_t id) const final { @@ -57,30 +56,32 @@ struct btree_test_base : void update_segment_avail_bytes(paddr_t offset) final {} segment_id_t get_segment( - device_id_t id, segment_seq_t seq, - segment_type_t type) final - { + segment_type_t type + ) final { auto ret = next; next = segment_id_t{ - next.device_id(), + segment_manager->get_device_id(), next.device_segment_id() + 1}; segment_seqs[ret] = seq; segment_types[ret] = type; return ret; } - segment_seq_t get_seq(segment_id_t id) { + journal_seq_t get_journal_tail_target() const final { return journal_seq_t{}; } + + void update_journal_tail_committed(journal_seq_t committed) final {} + + SegmentManagerGroup* get_segment_manager_group() final { return sms.get(); } + + segment_seq_t get_seq(segment_id_t id) final { return segment_seqs[id]; } - segment_type_t get_type(segment_id_t id) { + segment_type_t get_type(segment_id_t id) final { return segment_types[id]; } - journal_seq_t get_journal_tail_target() const final { return journal_seq_t{}; } - void update_journal_tail_committed(journal_seq_t committed) final {} - virtual void complete_commit(Transaction &t) {} seastar::future<> submit_transaction(TransactionRef t) { @@ -100,8 +101,7 @@ struct btree_test_base : segment_manager = segment_manager::create_test_ephemeral(); sms.reset(new SegmentManagerGroup()); auto& sms_ref = *sms.get(); - journal = journal::make_segmented( - *segment_manager, sms_ref, *this); + journal = journal::make_segmented(sms_ref, *this); epm.reset(new ExtentPlacementManager()); cache.reset(new Cache(*epm)); diff --git a/src/test/crimson/seastore/test_seastore_journal.cc b/src/test/crimson/seastore/test_seastore_journal.cc index 0daae06f73476..e9d9b203de83f 100644 --- a/src/test/crimson/seastore/test_seastore_journal.cc +++ b/src/test/crimson/seastore/test_seastore_journal.cc @@ -98,20 +98,25 @@ struct journal_test_t : seastar_test_suite_t, SegmentProvider { void update_segment_avail_bytes(paddr_t offset) final {} segment_id_t get_segment( - device_id_t id, segment_seq_t seq, - segment_type_t type) final - { + segment_type_t type + ) final { auto ret = next; next = segment_id_t{ - next.device_id(), + segment_manager->get_device_id(), next.device_segment_id() + 1}; segment_seqs[ret] = seq; segment_types[ret] = type; return ret; } - segment_seq_t get_seq(segment_id_t id) { + journal_seq_t get_journal_tail_target() const final { return journal_seq_t{}; } + + void update_journal_tail_committed(journal_seq_t paddr) final {} + + SegmentManagerGroup* get_segment_manager_group() final { return sms.get(); } + + segment_seq_t get_seq(segment_id_t id) final { return segment_seqs[id]; } @@ -119,15 +124,12 @@ struct journal_test_t : seastar_test_suite_t, SegmentProvider { return segment_types[id]; } - journal_seq_t get_journal_tail_target() const final { return journal_seq_t{}; } - void update_journal_tail_committed(journal_seq_t paddr) final {} - seastar::future<> set_up_fut() final { segment_manager = segment_manager::create_test_ephemeral(); block_size = segment_manager->get_block_size(); sms.reset(new SegmentManagerGroup()); next = segment_id_t(segment_manager->get_device_id(), 0); - journal = journal::make_segmented(*segment_manager, *sms, *this); + journal = journal::make_segmented(*sms, *this); journal->set_write_pipeline(&pipeline); sms->add_segment_manager(segment_manager.get()); return segment_manager->init( @@ -158,7 +160,7 @@ struct journal_test_t : seastar_test_suite_t, SegmentProvider { return journal->close( ).safe_then([this, f=std::move(f)]() mutable { journal = journal::make_segmented( - *segment_manager, *sms, *this); + *sms, *this); journal->set_write_pipeline(&pipeline); return journal->replay(std::forward(std::move(f))); }).safe_then([this] { diff --git a/src/test/crimson/seastore/transaction_manager_test_state.h b/src/test/crimson/seastore/transaction_manager_test_state.h index e987cd44fcc53..356b55976bbdc 100644 --- a/src/test/crimson/seastore/transaction_manager_test_state.h +++ b/src/test/crimson/seastore/transaction_manager_test_state.h @@ -71,7 +71,7 @@ protected: }; auto get_seastore(SeaStore::MDStoreRef mdstore, SegmentManagerRef sm) { - auto tm = make_transaction_manager(*sm, true); + auto tm = make_transaction_manager(true); auto cm = std::make_unique(*tm); return std::make_unique( "", @@ -92,7 +92,7 @@ protected: TMTestState() : EphemeralTestState() {} virtual void _init() override { - tm = make_transaction_manager(*segment_manager, true); + tm = make_transaction_manager(true); tm->add_device(segment_manager.get(), true); segment_cleaner = tm->get_segment_cleaner(); lba_manager = tm->get_lba_manager(); -- 2.39.5