From 4554f3e3ff73c2ce8978140613bfb504f380e1b4 Mon Sep 17 00:00:00 2001 From: Xuehan Xu Date: Wed, 25 Aug 2021 10:35:27 +0800 Subject: [PATCH] crimson/os/seastore: multi-device support Signed-off-by: Xuehan Xu --- .../os/seastore/extent_placement_manager.h | 4 + src/crimson/os/seastore/extent_reader.cc | 9 +- src/crimson/os/seastore/extent_reader.h | 7 +- src/crimson/os/seastore/seastore.cc | 129 ++++++++++++++++-- src/crimson/os/seastore/seastore.h | 3 + src/crimson/os/seastore/seastore_types.cc | 26 ++++ src/crimson/os/seastore/seastore_types.h | 39 ++++++ src/crimson/os/seastore/segment_cleaner.h | 4 +- src/crimson/os/seastore/segment_manager.h | 84 +++++++++++- .../os/seastore/segment_manager/block.cc | 77 +++++++---- .../os/seastore/segment_manager/block.h | 61 +++------ .../os/seastore/segment_manager/ephemeral.h | 35 ++++- .../os/seastore/transaction_manager.cc | 14 +- src/crimson/os/seastore/transaction_manager.h | 21 ++- src/crimson/tools/store_nbd/tm_driver.cc | 14 +- .../seastore/test_randomblock_manager.cc | 1 + .../seastore/transaction_manager_test_state.h | 23 +++- src/vstart.sh | 25 ++++ 18 files changed, 471 insertions(+), 105 deletions(-) diff --git a/src/crimson/os/seastore/extent_placement_manager.h b/src/crimson/os/seastore/extent_placement_manager.h index b4b00ef49191..d8df40cb8920 100644 --- a/src/crimson/os/seastore/extent_placement_manager.h +++ b/src/crimson/os/seastore/extent_placement_manager.h @@ -384,6 +384,10 @@ public: void add_allocator(device_type_t type, ExtentAllocatorRef&& allocator) { allocators[type].emplace_back(std::move(allocator)); + LOG_PREFIX(ExtentPlacementManager::add_allocator); + DEBUG("allocators for {}: {}", + device_type_to_string(type), + allocators[type].size()); } private: diff --git a/src/crimson/os/seastore/extent_reader.cc b/src/crimson/os/seastore/extent_reader.cc index c3b21ae57761..984be2d5198b 100644 --- a/src/crimson/os/seastore/extent_reader.cc +++ b/src/crimson/os/seastore/extent_reader.cc @@ -16,6 +16,7 @@ namespace crimson::os::seastore { ExtentReader::read_segment_header_ret ExtentReader::read_segment_header(segment_id_t segment) { + auto& segment_manager = *segment_managers[segment.device_id()]; return segment_manager.read( paddr_t{segment, 0}, segment_manager.get_block_size() @@ -24,7 +25,7 @@ ExtentReader::read_segment_header(segment_id_t segment) crimson::ct_error::assert_all{ "Invalid error in ExtentReader::read_segment_header" } - ).safe_then([=](bufferptr bptr) -> read_segment_header_ret { + ).safe_then([=, &segment_manager](bufferptr bptr) -> read_segment_header_ret { logger().debug("segment {} bptr size {}", segment, bptr.length()); segment_header_t header; @@ -112,6 +113,8 @@ ExtentReader::scan_valid_records_ret ExtentReader::scan_valid_records( size_t budget, found_record_handler_t &handler) { + auto& segment_manager = + *segment_managers[cursor.offset.segment.device_id()]; if (cursor.offset.offset == 0) { cursor.offset.offset = segment_manager.get_block_size(); } @@ -220,6 +223,7 @@ ExtentReader::read_validate_record_metadata( paddr_t start, segment_nonce_t nonce) { + auto& segment_manager = *segment_managers[start.segment.device_id()]; auto block_size = segment_manager.get_block_size(); if (start.offset + block_size > (int64_t)segment_manager.get_segment_size()) { return read_validate_record_metadata_ret( @@ -228,7 +232,7 @@ ExtentReader::read_validate_record_metadata( } return segment_manager.read(start, block_size ).safe_then( - [=](bufferptr bptr) mutable + [=, &segment_manager](bufferptr bptr) mutable -> read_validate_record_metadata_ret { logger().debug("read_validate_record_metadata: reading {}", start); auto block_size = segment_manager.get_block_size(); @@ -308,6 +312,7 @@ ExtentReader::read_validate_data( paddr_t record_base, const record_header_t &header) { + auto& segment_manager = *segment_managers[record_base.segment.device_id()]; return segment_manager.read( record_base.add_offset(header.mdlength), header.dlength diff --git a/src/crimson/os/seastore/extent_reader.h b/src/crimson/os/seastore/extent_reader.h index 7f0d1ea653b6..32aa1db517d3 100644 --- a/src/crimson/os/seastore/extent_reader.h +++ b/src/crimson/os/seastore/extent_reader.h @@ -10,6 +10,7 @@ namespace crimson::os::seastore { class SegmentCleaner; +class TransactionManager; class ExtentReader { public: @@ -76,8 +77,11 @@ public: } private: - SegmentManager& segment_manager; + std::vector segment_managers; + std::vector& get_segment_managers() { + return segment_managers; + } /// read record metadata for record starting at start using read_validate_record_metadata_ertr = read_ertr; using read_validate_record_metadata_ret = @@ -105,6 +109,7 @@ private: /// validate embedded metadata checksum static bool validate_metadata(const bufferlist &bl); + friend class TransactionManager; }; using ExtentReaderRef = std::unique_ptr; diff --git a/src/crimson/os/seastore/seastore.cc b/src/crimson/os/seastore/seastore.cc index f06aa650d8ee..0a81fd63daf4 100644 --- a/src/crimson/os/seastore/seastore.cc +++ b/src/crimson/os/seastore/seastore.cc @@ -33,11 +33,13 @@ using crimson::common::local_conf; namespace crimson::os::seastore { SeaStore::SeaStore( + std::string root, SegmentManagerRef sm, TransactionManagerRef tm, CollectionManagerRef cm, OnodeManagerRef om) - : segment_manager(std::move(sm)), + : root(root), + segment_manager(std::move(sm)), transaction_manager(std::move(tm)), collection_manager(std::move(cm)), onode_manager(std::move(om)) @@ -90,6 +92,24 @@ seastar::future<> SeaStore::mount() { return segment_manager->mount( ).safe_then([this] { + transaction_manager->add_segment_manager(segment_manager.get()); + auto sec_devices = segment_manager->get_secondary_devices(); + return crimson::do_for_each(sec_devices, [this](auto& device_entry) { + device_id_t id = device_entry.first; + magic_t magic = device_entry.second.magic; + device_type_t dtype = device_entry.second.dtype; + auto sm = std::make_unique< + segment_manager::block::BlockSegmentManager>( + root + "/block." + device_type_to_string(dtype) + + "." + std::to_string(id)); + return sm->mount().safe_then([this, sm=std::move(sm), magic]() mutable { + assert(sm->get_magic() == magic); + transaction_manager->add_segment_manager(sm.get()); + secondaries.emplace_back(std::move(sm)); + return seastar::now(); + }); + }); + }).safe_then([this] { return transaction_manager->mount(); }).handle_error( crimson::ct_error::assert_all{ @@ -101,7 +121,15 @@ seastar::future<> SeaStore::mount() seastar::future<> SeaStore::umount() { return transaction_manager->close( - ).handle_error( + ).safe_then([this] { + return crimson::do_for_each( + secondaries, + [](auto& sm) -> SegmentManager::close_ertr::future<> { + return sm->close(); + }); + }).safe_then([this] { + return segment_manager->close(); + }).handle_error( crimson::ct_error::assert_all{ "Invalid error in SeaStore::umount" } @@ -110,11 +138,89 @@ seastar::future<> SeaStore::umount() SeaStore::mkfs_ertr::future<> SeaStore::mkfs(uuid_d new_osd_fsid) { - return segment_manager->mkfs( - seastore_meta_t{new_osd_fsid} - ).safe_then([this] { + return seastar::do_with( + secondary_device_set_t(), + [this, new_osd_fsid](auto& sds) { + auto fut = seastar::now(); + LOG_PREFIX(SeaStore::mkfs); + DEBUG("root: {}", root); + if (!root.empty()) { + fut = seastar::open_directory(root).then( + [this, &sds, new_osd_fsid](seastar::file rdir) mutable { + std::unique_ptr root_f = + std::make_unique(std::move(rdir)); + auto sub = root_f->list_directory( + [this, &sds, new_osd_fsid](auto de) mutable + -> seastar::future<> { + LOG_PREFIX(SeaStore::mkfs); + DEBUG("found file: {}", de.name); + if (de.name.find("block.") == 0 + && de.name.length() > 6 /* 6 for "block." */) { + std::string entry_name = de.name; + auto dtype_end = entry_name.find_first_of('.', 6); + device_type_t dtype = + string_to_device_type( + entry_name.substr(6, dtype_end - 6)); + if (!dtype) { + // invalid device type + return seastar::now(); + } + auto id = std::stoi(entry_name.substr(dtype_end + 1)); + auto sm = std::make_unique< + segment_manager::block::BlockSegmentManager + >(root + "/" + entry_name); + magic_t magic = (magic_t)std::rand(); + sds.emplace( + (device_id_t)id, + device_spec_t{ + magic, + dtype, + (device_id_t)id}); + return sm->mkfs( + segment_manager_config_t{ + false, + magic, + dtype, + (device_id_t)id, + seastore_meta_t{new_osd_fsid}, + secondary_device_set_t()} + ).safe_then([this, sm=std::move(sm), id]() mutable { + LOG_PREFIX(SeaStore::mkfs); + DEBUG("mkfs: finished for segment manager {}", id); + secondaries.emplace_back(std::move(sm)); + return seastar::now(); + }).handle_error(crimson::ct_error::assert_all{"not possible"}); + } + return seastar::now(); + }); + return sub.done().then( + [root_f=std::move(root_f)] { + return seastar::now(); + }); + }); + } + return fut.then([this, &sds, new_osd_fsid] { + return segment_manager->mkfs( + segment_manager_config_t{ + true, + (magic_t)std::rand(), + device_type_t::SEGMENTED, + 0, + seastore_meta_t{new_osd_fsid}, + sds} + ); + }).safe_then([this] { + return crimson::do_for_each(secondaries, [this](auto& sec_sm) { + return sec_sm->mount().safe_then([this, &sec_sm] { + transaction_manager->add_segment_manager(sec_sm.get()); + return seastar::now(); + }); + }); + }); + }).safe_then([this] { return segment_manager->mount(); }).safe_then([this] { + transaction_manager->add_segment_manager(segment_manager.get()); return transaction_manager->mkfs(); }).safe_then([this] { return transaction_manager->mount(); @@ -1194,15 +1300,6 @@ std::unique_ptr make_seastore( auto epm = std::make_unique(*cache, *lba_manager); - epm->add_allocator( - device_type_t::SEGMENTED, - std::make_unique( - *segment_cleaner, - *sm, - *lba_manager, - *journal, - *cache)); - journal->set_segment_provider(&*segment_cleaner); auto tm = std::make_unique( @@ -1211,10 +1308,12 @@ std::unique_ptr make_seastore( std::move(journal), std::move(cache), std::move(lba_manager), - std::move(epm)); + std::move(epm), + scanner_ref); auto cm = std::make_unique(*tm); return std::make_unique( + device, std::move(sm), std::move(tm), std::move(cm), diff --git a/src/crimson/os/seastore/seastore.h b/src/crimson/os/seastore/seastore.h index d9eacf418986..f21e2832fab4 100644 --- a/src/crimson/os/seastore/seastore.h +++ b/src/crimson/os/seastore/seastore.h @@ -42,6 +42,7 @@ class SeaStore final : public FuturizedStore { public: SeaStore( + std::string root, SegmentManagerRef sm, TransactionManagerRef tm, CollectionManagerRef cm, @@ -263,7 +264,9 @@ private: const std::optional &_start, OMapManager::omap_list_config_t config); + std::string root; SegmentManagerRef segment_manager; + std::vector secondaries; TransactionManagerRef transaction_manager; CollectionManagerRef collection_manager; OnodeManagerRef onode_manager; diff --git a/src/crimson/os/seastore/seastore_types.cc b/src/crimson/os/seastore/seastore_types.cc index 9e858243188f..26c1946193d3 100644 --- a/src/crimson/os/seastore/seastore_types.cc +++ b/src/crimson/os/seastore/seastore_types.cc @@ -209,4 +209,30 @@ bool can_delay_allocation(device_type_t type) { return type <= RANDOM_BLOCK; } +device_type_t string_to_device_type(std::string type) { + if (type == "segmented") { + return device_type_t::SEGMENTED; + } + if (type == "random_block") { + return device_type_t::RANDOM_BLOCK; + } + if (type == "pmem") { + return device_type_t::PMEM; + } + return device_type_t::NONE; +} + +std::string device_type_to_string(device_type_t dtype) { + switch (dtype) { + case device_type_t::SEGMENTED: + return "segmented"; + case device_type_t::RANDOM_BLOCK: + return "random_block"; + case device_type_t::PMEM: + return "pmem"; + default: + ceph_assert(0 == "impossible"); + } +} + } diff --git a/src/crimson/os/seastore/seastore_types.h b/src/crimson/os/seastore/seastore_types.h index 1ab1a5a88df1..b3517efcb0e1 100644 --- a/src/crimson/os/seastore/seastore_types.h +++ b/src/crimson/os/seastore/seastore_types.h @@ -369,6 +369,8 @@ enum device_type_t { }; bool can_delay_allocation(device_type_t type); +device_type_t string_to_device_type(std::string type); +std::string device_type_to_string(device_type_t type); /* Monotonically increasing identifier for the location of a * journal_record. @@ -1015,3 +1017,40 @@ WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::delta_info_t) WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::record_header_t) WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::extent_info_t) WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::segment_header_t) + +template<> +struct denc_traits { + static constexpr bool supported = true; + static constexpr bool featured = false; + static constexpr bool bounded = true; + static constexpr bool need_contiguous = false; + + static void bound_encode( + const crimson::os::seastore::device_type_t &o, + size_t& p, + uint64_t f=0) { + p += sizeof(crimson::os::seastore::device_type_t); + } + template + static std::enable_if_t> + encode( + const crimson::os::seastore::device_type_t &o, + It& p, + uint64_t f=0) { + get_pos_add(p) = o; + } + template + static std::enable_if_t> + decode( + crimson::os::seastore::device_type_t& o, + It& p, + uint64_t f=0) { + o = get_pos_add(p); + } + static void decode( + crimson::os::seastore::device_type_t& o, + ceph::buffer::list::const_iterator &p) { + p.copy(sizeof(crimson::os::seastore::device_type_t), + reinterpret_cast(&o)); + } +}; diff --git a/src/crimson/os/seastore/segment_cleaner.h b/src/crimson/os/seastore/segment_cleaner.h index 7060a55e0cd2..b38b1b65f2e0 100644 --- a/src/crimson/os/seastore/segment_cleaner.h +++ b/src/crimson/os/seastore/segment_cleaner.h @@ -683,7 +683,7 @@ public: ExtentReaderRef&& scanner, bool detailed = false); - void mount(SegmentManager &psm, std::vector& sms) { + void mount(device_id_t pdevice_id, std::vector& sms) { crimson::get_logger(ceph_subsys_seastore).debug( "SegmentCleaner::mount: {} segment managers", sms.size()); init_complete = false; @@ -691,7 +691,7 @@ public: journal_tail_target = journal_seq_t{}; journal_tail_committed = journal_seq_t{}; journal_head = journal_seq_t{}; - journal_device_id = psm.get_device_id(); + journal_device_id = pdevice_id; for (auto& sm : sms) { if (sm) diff --git a/src/crimson/os/seastore/segment_manager.h b/src/crimson/os/seastore/segment_manager.h index 53a5ff7a5145..437aa72ad49f 100644 --- a/src/crimson/os/seastore/segment_manager.h +++ b/src/crimson/os/seastore/segment_manager.h @@ -17,6 +17,70 @@ namespace crimson::os::seastore { +using magic_t = uint64_t; + +struct device_spec_t{ + magic_t magic; + device_type_t dtype; + device_id_t id; + DENC(device_spec_t, v, p) { + DENC_START(1, 1, p); + denc(v.magic, p); + denc(v.dtype, p); + denc(v.id, p); + DENC_FINISH(p); + } +}; + +using secondary_device_set_t = + std::map; + +struct block_sm_superblock_t { + size_t size = 0; + size_t segment_size = 0; + size_t block_size = 0; + + size_t segments = 0; + uint64_t tracker_offset = 0; + uint64_t first_segment_offset = 0; + + bool major_dev = false; + magic_t magic = 0; + device_type_t dtype = device_type_t::NONE; + device_id_t device_id = 0; + + seastore_meta_t meta; + + secondary_device_set_t secondary_devices; + DENC(block_sm_superblock_t, v, p) { + DENC_START(1, 1, p); + denc(v.size, p); + denc(v.segment_size, p); + denc(v.block_size, p); + denc(v.segments, p); + denc(v.tracker_offset, p); + denc(v.first_segment_offset, p); + denc(v.meta, p); + denc(v.major_dev, p); + denc(v.magic, p); + denc(v.dtype, p); + denc(v.device_id, p); + if (v.major_dev) { + denc(v.secondary_devices, p); + } + DENC_FINISH(p); + } +}; + +struct segment_manager_config_t { + bool major_dev = false; + magic_t magic = 0; + device_type_t dtype = device_type_t::NONE; + device_id_t device_id = 0; + seastore_meta_t meta; + secondary_device_set_t secondary_devices; +}; + class Segment : public boost::intrusive_ref_counter< Segment, boost::thread_unsafe_counter>{ @@ -89,9 +153,14 @@ public: using mount_ret = access_ertr::future<>; virtual mount_ret mount() = 0; + using close_ertr = crimson::errorator< + crimson::ct_error::input_output_error + >; + virtual close_ertr::future<> close() = 0; + using mkfs_ertr = access_ertr; using mkfs_ret = mkfs_ertr::future<>; - virtual mkfs_ret mkfs(seastore_meta_t meta) = 0; + virtual mkfs_ret mkfs(segment_manager_config_t meta) = 0; using open_ertr = crimson::errorator< crimson::ct_error::input_output_error, @@ -137,8 +206,21 @@ public: virtual device_id_t get_device_id() const = 0; + virtual secondary_device_set_t& get_secondary_devices() = 0; + + virtual device_spec_t get_device_spec() const = 0; + + virtual magic_t get_magic() const = 0; + virtual ~SegmentManager() {} }; using SegmentManagerRef = std::unique_ptr; } + +WRITE_CLASS_DENC( + crimson::os::seastore::device_spec_t +) +WRITE_CLASS_DENC( + crimson::os::seastore::block_sm_superblock_t +) diff --git a/src/crimson/os/seastore/segment_manager/block.cc b/src/crimson/os/seastore/segment_manager/block.cc index 12e45d88a6bd..5b395fac589b 100644 --- a/src/crimson/os/seastore/segment_manager/block.cc +++ b/src/crimson/os/seastore/segment_manager/block.cc @@ -131,7 +131,7 @@ SegmentStateTracker::read_in( static block_sm_superblock_t make_superblock( - seastore_meta_t meta, + segment_manager_config_t sm_config, const seastar::stat_data &data) { using crimson::common::get_conf; @@ -168,7 +168,12 @@ block_sm_superblock_t make_superblock( segments, data.block_size, tracker_size + data.block_size, - meta + sm_config.major_dev, + sm_config.magic, + sm_config.dtype, + sm_config.device_id, + sm_config.meta, + std::move(sm_config.secondary_devices) }; } @@ -260,12 +265,12 @@ open_device_ret open_device( }); } - + static BlockSegmentManager::access_ertr::future<> write_superblock(seastar::file &device, block_sm_superblock_t sb) { - assert(ceph::encoded_sizeof_bounded() < + assert(ceph::encoded_sizeof(sb) < sb.block_size); return seastar::do_with( bufferptr(ceph::buffer::create_page_aligned(sb.block_size)), @@ -284,8 +289,6 @@ static BlockSegmentManager::access_ertr::future read_superblock(seastar::file &device, seastar::stat_data sd) { - assert(ceph::encoded_sizeof_bounded() < - sd.block_size); return seastar::do_with( bufferptr(ceph::buffer::create_page_aligned(sd.block_size)), [=, &device](auto &bp) { @@ -299,7 +302,13 @@ read_superblock(seastar::file &device, seastar::stat_data sd) bl.push_back(bp); block_sm_superblock_t ret; auto bliter = bl.cbegin(); - decode(ret, bliter); + try { + decode(ret, bliter); + } catch (...) { + ceph_assert(0 == "invalid superblock"); + } + assert(ceph::encoded_sizeof(ret) < + sd.block_size); return BlockSegmentManager::access_ertr::future( BlockSegmentManager::access_ertr::ready_future_marker{}, ret); @@ -384,11 +393,11 @@ BlockSegmentManager::mount_ret BlockSegmentManager::mount() ).safe_then([=](auto p) { device = std::move(p.first); auto sd = p.second; - stats.data_read.increment( - ceph::encoded_sizeof_bounded()); return read_superblock(device, sd); }).safe_then([=](auto sb) { superblock = sb; + stats.data_read.increment( + ceph::encoded_sizeof(superblock)); tracker = std::make_unique( superblock.segments, superblock.block_size); @@ -405,11 +414,17 @@ BlockSegmentManager::mount_ret BlockSegmentManager::mount() stats.metadata_write.increment(tracker->get_size()); return tracker->write_out(device, superblock.tracker_offset); }); + }).safe_then([this] { + logger().debug("segment manager {} mounted", get_device_id()); + register_metrics(); }); } -BlockSegmentManager::mkfs_ret BlockSegmentManager::mkfs(seastore_meta_t meta) +BlockSegmentManager::mkfs_ret BlockSegmentManager::mkfs( + segment_manager_config_t sm_config) { + logger().debug("BlockSegmentManager::mkfs: magic={}, dtype={}, id={}", + sm_config.magic, sm_config.dtype, sm_config.device_id); return seastar::do_with( seastar::file{}, seastar::stat_data{}, @@ -426,12 +441,12 @@ BlockSegmentManager::mkfs_ret BlockSegmentManager::mkfs(seastore_meta_t meta) return maybe_create.safe_then([this] { return open_device(device_path, seastar::open_flags::rw); - }).safe_then([&, meta](auto p) { + }).safe_then([&, sm_config](auto p) { device = p.first; stat = p.second; - sb = make_superblock(meta, stat); + sb = make_superblock(sm_config, stat); stats.metadata_write.increment( - ceph::encoded_sizeof_bounded()); + ceph::encoded_sizeof(sb)); return write_superblock(device, sb); }).safe_then([&] { logger().debug("BlockSegmentManager::mkfs: superblock written"); @@ -449,6 +464,7 @@ BlockSegmentManager::mkfs_ret BlockSegmentManager::mkfs(seastore_meta_t meta) BlockSegmentManager::close_ertr::future<> BlockSegmentManager::close() { + logger().debug("closing segment manager {}", get_device_id()); metrics.clear(); return device.close(); } @@ -549,8 +565,11 @@ SegmentManager::read_ertr::future<> BlockSegmentManager::read( void BlockSegmentManager::register_metrics() { + logger().debug("{} {}", __func__, get_device_id()); namespace sm = seastar::metrics; - // TODO: add label for device_id + sm::label label("device_id"); + std::vector label_instances; + label_instances.push_back(label(get_device_id())); stats.reset(); metrics.add_group( "segment_manager", @@ -558,52 +577,62 @@ void BlockSegmentManager::register_metrics() sm::make_counter( "data_read_num", stats.data_read.num, - sm::description("total number of data read") + sm::description("total number of data read"), + label_instances ), sm::make_counter( "data_read_bytes", stats.data_read.bytes, - sm::description("total bytes of data read") + sm::description("total bytes of data read"), + label_instances ), sm::make_counter( "data_write_num", stats.data_write.num, - sm::description("total number of data write") + sm::description("total number of data write"), + label_instances ), sm::make_counter( "data_write_bytes", stats.data_write.bytes, - sm::description("total bytes of data write") + sm::description("total bytes of data write"), + label_instances ), sm::make_counter( "metadata_write_num", stats.metadata_write.num, - sm::description("total number of metadata write") + sm::description("total number of metadata write"), + label_instances ), sm::make_counter( "metadata_write_bytes", stats.metadata_write.bytes, - sm::description("total bytes of metadata write") + sm::description("total bytes of metadata write"), + label_instances ), sm::make_counter( "opened_segments", stats.opened_segments, - sm::description("total segments opened") + sm::description("total segments opened"), + label_instances ), sm::make_counter( "closed_segments", stats.closed_segments, - sm::description("total segments closed") + sm::description("total segments closed"), + label_instances ), sm::make_counter( "closed_segments_unused_bytes", stats.closed_segments_unused_bytes, - sm::description("total unused bytes of closed segments") + sm::description("total unused bytes of closed segments"), + label_instances ), sm::make_counter( "released_segments", stats.released_segments, - sm::description("total segments released") + sm::description("total segments released"), + label_instances ), } ); diff --git a/src/crimson/os/seastore/segment_manager/block.h b/src/crimson/os/seastore/segment_manager/block.h index 2712243e3b40..5ce2943f461a 100644 --- a/src/crimson/os/seastore/segment_manager/block.h +++ b/src/crimson/os/seastore/segment_manager/block.h @@ -16,30 +16,6 @@ namespace crimson::os::seastore::segment_manager::block { -struct block_sm_superblock_t { - size_t size = 0; - size_t segment_size = 0; - size_t block_size = 0; - - size_t segments = 0; - uint64_t tracker_offset = 0; - uint64_t first_segment_offset = 0; - - seastore_meta_t meta; - - DENC(block_sm_superblock_t, v, p) { - DENC_START(1, 1, p); - denc(v.size, p); - denc(v.segment_size, p); - denc(v.block_size, p); - denc(v.segments, p); - denc(v.tracker_offset, p); - denc(v.first_segment_offset, p); - denc(v.meta, p); - DENC_FINISH(p); - } -}; - using write_ertr = crimson::errorator< crimson::ct_error::input_output_error>; using read_ertr = crimson::errorator< @@ -134,20 +110,14 @@ class BlockSegmentManager final : public SegmentManager { public: mount_ret mount() final; - mkfs_ret mkfs(seastore_meta_t) final; - - using close_ertr = crimson::errorator< - crimson::ct_error::input_output_error - >; + mkfs_ret mkfs(segment_manager_config_t) final; + close_ertr::future<> close(); BlockSegmentManager( - const std::string &path, - device_id_t device_id = 0) - : device_path(path), - device_id(device_id) { - register_metrics(); - } + const std::string &path) + : device_path(path) {} + ~BlockSegmentManager(); open_ertr::future open(segment_id_t id) final; @@ -170,15 +140,27 @@ public: } device_id_t get_device_id() const final { - return device_id; + return superblock.device_id; + } + secondary_device_set_t& get_secondary_devices() final { + return superblock.secondary_devices; } - // public so tests can bypass segment interface when simpler Segment::write_ertr::future<> segment_write( paddr_t addr, ceph::bufferlist bl, bool ignore_check=false); + device_spec_t get_device_spec() const final { + return {superblock.magic, + superblock.dtype, + superblock.device_id}; + } + + magic_t get_magic() const final { + return superblock.magic; + } + private: friend class BlockSegment; using segment_state_t = Segment::segment_state_t; @@ -242,8 +224,3 @@ private: }; } - -WRITE_CLASS_DENC_BOUNDED( - crimson::os::seastore::segment_manager::block::block_sm_superblock_t -) - diff --git a/src/crimson/os/seastore/segment_manager/ephemeral.h b/src/crimson/os/seastore/segment_manager/ephemeral.h index fd8b28c43e20..10fd2c6b345c 100644 --- a/src/crimson/os/seastore/segment_manager/ephemeral.h +++ b/src/crimson/os/seastore/segment_manager/ephemeral.h @@ -20,12 +20,18 @@ struct ephemeral_config_t { size_t size = 0; size_t block_size = 0; size_t segment_size = 0; + magic_t magic = 0; + device_type_t dtype = device_type_t::NONE; + device_id_t id = 0; }; constexpr ephemeral_config_t DEFAULT_TEST_EPHEMERAL = { 1 << 30, 4 << 10, - 8 << 20 + 8 << 20, + 0xabcd, + device_type_t::SEGMENTED, + 0 }; std::ostream &operator<<(std::ostream &, const ephemeral_config_t &); @@ -65,17 +71,20 @@ class EphemeralSegmentManager final : public SegmentManager { Segment::close_ertr::future<> segment_close(segment_id_t id); - device_id_t device_id = 0; + secondary_device_set_t sec_device_set; public: EphemeralSegmentManager( - ephemeral_config_t config, - device_id_t device_id = 0) - : config(config), device_id(device_id) {} + ephemeral_config_t config) + : config(config) {} ~EphemeralSegmentManager(); + close_ertr::future<> close() final { + return close_ertr::now(); + } + device_id_t get_device_id() const { - return device_id; + return config.id; } using init_ertr = crimson::errorator< @@ -88,7 +97,7 @@ public: return mount_ertr::now(); } - mkfs_ret mkfs(seastore_meta_t) { + mkfs_ret mkfs(segment_manager_config_t) { return mkfs_ertr::now(); } @@ -116,6 +125,18 @@ public: return *meta; } + secondary_device_set_t& get_secondary_devices() final { + return sec_device_set; + } + + device_spec_t get_device_spec() const final { + return {config.magic, config.dtype, config.id}; + } + + magic_t get_magic() const final { + return config.magic; + } + void remount(); // public so tests can bypass segment interface when simpler diff --git a/src/crimson/os/seastore/transaction_manager.cc b/src/crimson/os/seastore/transaction_manager.cc index 2bb4686bdaaa..5d52fa83d2c1 100644 --- a/src/crimson/os/seastore/transaction_manager.cc +++ b/src/crimson/os/seastore/transaction_manager.cc @@ -17,13 +17,15 @@ TransactionManager::TransactionManager( JournalRef _journal, CacheRef _cache, LBAManagerRef _lba_manager, - ExtentPlacementManagerRef&& epm) + ExtentPlacementManagerRef&& epm, + ExtentReader& scanner) : segment_manager(_segment_manager), segment_cleaner(std::move(_segment_cleaner)), cache(std::move(_cache)), lba_manager(std::move(_lba_manager)), journal(std::move(_journal)), - epm(std::move(epm)) + epm(std::move(epm)), + scanner(scanner) { segment_cleaner->set_extent_callback(this); journal->set_write_pipeline(&write_pipeline); @@ -33,7 +35,9 @@ TransactionManager::TransactionManager( TransactionManager::mkfs_ertr::future<> TransactionManager::mkfs() { LOG_PREFIX(TransactionManager::mkfs); - segment_cleaner->mount(segment_manager, segment_managers); + segment_cleaner->mount( + segment_manager.get_device_id(), + scanner.get_segment_managers()); return journal->open_for_write().safe_then([this, FNAME](auto addr) { DEBUG("about to do_with"); segment_cleaner->init_mkfs(addr); @@ -64,7 +68,9 @@ TransactionManager::mount_ertr::future<> TransactionManager::mount() { LOG_PREFIX(TransactionManager::mount); cache->init(); - segment_cleaner->mount(segment_manager, segment_managers); + segment_cleaner->mount( + segment_manager.get_device_id(), + scanner.get_segment_managers()); return segment_cleaner->init_segments().safe_then( [this](auto&& segments) { return journal->replay( diff --git a/src/crimson/os/seastore/transaction_manager.h b/src/crimson/os/seastore/transaction_manager.h index 80d1459b0e22..7dc7b654c416 100644 --- a/src/crimson/os/seastore/transaction_manager.h +++ b/src/crimson/os/seastore/transaction_manager.h @@ -71,7 +71,8 @@ public: JournalRef journal, CacheRef cache, LBAManagerRef lba_manager, - ExtentPlacementManagerRef&& epm); + ExtentPlacementManagerRef&& epm, + ExtentReader& scanner); /// Writes initial metadata to disk using mkfs_ertr = base_ertr; @@ -499,17 +500,35 @@ public: return segment_cleaner->stat(); } + void add_segment_manager(SegmentManager* sm) { + LOG_PREFIX(TransactionManager::add_segment_manager); + DEBUG("adding segment manager {}", sm->get_device_id()); + scanner.add_segment_manager(sm); + epm->add_allocator( + device_type_t::SEGMENTED, + std::make_unique( + *segment_cleaner, + *sm, + *lba_manager, + *journal, + *cache)); + } + ~TransactionManager(); private: friend class Transaction; + // although there might be multiple devices backing seastore, + // only one of them are supposed to hold the journal. This + // segment manager is that device SegmentManager &segment_manager; SegmentCleanerRef segment_cleaner; CacheRef cache; LBAManagerRef lba_manager; JournalRef journal; ExtentPlacementManagerRef epm; + ExtentReader& scanner; WritePipeline write_pipeline; diff --git a/src/crimson/tools/store_nbd/tm_driver.cc b/src/crimson/tools/store_nbd/tm_driver.cc index 5e97007a563a..f83801730a7c 100644 --- a/src/crimson/tools/store_nbd/tm_driver.cc +++ b/src/crimson/tools/store_nbd/tm_driver.cc @@ -134,7 +134,8 @@ void TMDriver::init() SegmentCleaner::config_t::get_default(), std::move(scanner), false /* detailed */); - segment_cleaner->mount(*segment_manager); + std::vector sms; + segment_cleaner->mount(segment_manager->get_device_id(), sms); auto journal = std::make_unique(*segment_manager, scanner_ref); auto cache = std::make_unique(scanner_ref, segment_manager->get_block_size()); auto lba_manager = lba_manager::create_lba_manager(*segment_manager, *cache); @@ -158,7 +159,8 @@ void TMDriver::init() std::move(journal), std::move(cache), std::move(lba_manager), - std::move(epm)); + std::move(epm), + scanner_ref); } void TMDriver::clear() @@ -181,7 +183,13 @@ seastar::future<> TMDriver::mkfs() seastore_meta_t meta; meta.seastore_id.generate_random(); return segment_manager->mkfs( - std::move(meta) + segment_manager_config_t{ + true, + (magic_t)std::rand(), + device_type_t::SEGMENTED, + 0, + meta, + secondary_device_set_t()} ).safe_then([this] { logger().debug(""); return segment_manager->mount(); diff --git a/src/test/crimson/seastore/test_randomblock_manager.cc b/src/test/crimson/seastore/test_randomblock_manager.cc index f4c2526203fb..2973ef487dcc 100644 --- a/src/test/crimson/seastore/test_randomblock_manager.cc +++ b/src/test/crimson/seastore/test_randomblock_manager.cc @@ -61,6 +61,7 @@ struct rbm_test_t : public seastar_test_suite_t, reader(new ExtentReader()), cache(*reader, segment_manager->get_block_size()) { + reader->add_segment_manager(segment_manager.get()); device = new nvme_device::TestMemory(DEFAULT_TEST_SIZE); rbm_manager.reset(new NVMeManager(device, std::string())); config.start = 0; diff --git a/src/test/crimson/seastore/transaction_manager_test_state.h b/src/test/crimson/seastore/transaction_manager_test_state.h index e12b79eddbb1..00e77b68b784 100644 --- a/src/test/crimson/seastore/transaction_manager_test_state.h +++ b/src/test/crimson/seastore/transaction_manager_test_state.h @@ -76,7 +76,6 @@ auto get_transaction_manager( auto segment_cleaner = std::make_unique( SegmentCleaner::config_t::get_default(), std::move(scanner), - segment_manager, true); auto journal = std::make_unique(segment_manager, scanner_ref); auto cache = std::make_unique(scanner_ref, segment_manager.get_block_size()); @@ -101,13 +100,15 @@ auto get_transaction_manager( std::move(journal), std::move(cache), std::move(lba_manager), - std::move(epm)); + std::move(epm), + scanner_ref); } auto get_seastore(SegmentManagerRef sm) { auto tm = get_transaction_manager(*sm); auto cm = std::make_unique(*tm); return std::make_unique( + "", std::move(sm), std::move(tm), std::move(cm), @@ -195,6 +196,7 @@ protected: class TestSegmentManagerWrapper final : public SegmentManager { SegmentManager &sm; device_id_t device_id = 0; + secondary_device_set_t set; public: TestSegmentManagerWrapper( SegmentManager &sm, @@ -209,10 +211,25 @@ public: return mount_ertr::now(); // we handle this above } - mkfs_ret mkfs(seastore_meta_t c) final { + mkfs_ret mkfs(segment_manager_config_t c) final { return mkfs_ertr::now(); // we handle this above } + close_ertr::future<> close() final { + return sm.close(); + } + + secondary_device_set_t& get_secondary_devices() final { + return sm.get_secondary_devices(); + } + + device_spec_t get_device_spec() const final { + return sm.get_device_spec(); + } + + magic_t get_magic() const final { + return sm.get_magic(); + } open_ertr::future open(segment_id_t id) final { return sm.open(id); diff --git a/src/vstart.sh b/src/vstart.sh index f66fc90d797d..ef35228e4cc7 100755 --- a/src/vstart.sh +++ b/src/vstart.sh @@ -180,6 +180,7 @@ with_mgr_restful=false filestore_path= kstore_path= declare -a block_devs +declare -a secondary_block_devs VSTART_SEC="client.vstart.sh" @@ -245,6 +246,7 @@ options: --no-parallel: dont start all OSDs in parallel --jaeger: use jaegertracing for tracing --seastore-devs: comma-separated list of blockdevs to use for seastore + --seastore-secondary-des: comma-separated list of secondary blockdevs to use for seastore EOF usage_exit() { @@ -267,6 +269,21 @@ parse_block_devs() { done } +parse_secondary_devs() { + local opt_name=$1 + shift + local devs=$1 + shift + local dev + IFS=',' read -r -a secondary_block_devs <<< "$devs" + for dev in "${secondary_block_devs[@]}"; do + if [ ! -b $dev ] || [ ! -w $dev ]; then + echo "All $opt_name must refer to writable block devices" + exit 1 + fi + done +} + while [ $# -ge 1 ]; do case $1 in -d | --debug) @@ -475,6 +492,10 @@ case $1 in parse_block_devs --seastore-devs "$2" shift ;; + --seastore-secondary-devs) + parse_secondary_devs --seastore-devs "$2" + shift + ;; --bluestore-spdk) [ -z "$2" ] && usage_exit IFS=',' read -r -a bluestore_spdk_dev <<< "$2" @@ -952,6 +973,10 @@ EOF dd if=/dev/zero of=${block_devs[$osd]} bs=1M count=1 ln -s ${block_devs[$osd]} $CEPH_DEV_DIR/osd$osd/block fi + if [ -n "${secondary_block_devs[$osd]}" ]; then + dd if=/dev/zero of=${secondary_block_devs[$osd]} bs=1M count=1 + ln -s ${secondary_block_devs[$osd]} $CEPH_DEV_DIR/osd$osd/block.segmented.1 + fi fi if [ "$objectstore" == "bluestore" ]; then wconf <