From ad5b0c12a788bca998cb95bfbaa745d9856c56ba Mon Sep 17 00:00:00 2001 From: chunmei-liu Date: Tue, 22 Nov 2022 08:59:56 -0800 Subject: [PATCH] crimson/os: make ZNS support shard metadata Signed-off-by: chunmei-liu --- .../os/seastore/segment_manager/zns.cc | 74 +++++++++++++++--- src/crimson/os/seastore/segment_manager/zns.h | 77 +++++++++++++++---- 2 files changed, 125 insertions(+), 26 deletions(-) diff --git a/src/crimson/os/seastore/segment_manager/zns.cc b/src/crimson/os/seastore/segment_manager/zns.cc index 3ccbfecd21f1e..deaaadf66687b 100644 --- a/src/crimson/os/seastore/segment_manager/zns.cc +++ b/src/crimson/os/seastore/segment_manager/zns.cc @@ -86,7 +86,16 @@ static zns_sm_metadata_t make_metadata( size_t segment_size = zone_size; size_t zones_per_segment = segment_size / zone_size; size_t segments = (num_zones - RESERVED_ZONES) / zones_per_segment; + size_t per_shard_segments = segments / seastar::smp::count; size_t available_size = zone_capacity * segments; + size_t per_shard_available_size = zone_capacity * per_shard_segments; + std::vector shard_infos(seastar::smp::count); + for (unsigned int i = 0; i < seastar::smp::count; i++) { + shard_infos[i].size = per_shard_available_size; + shard_infos[i].segments = per_shard_segments; + shard_infos[i].first_segment_offset = zone_size * RESERVED_ZONES + + i * segment_size* per_shard_segments; + } assert(total_size == num_zones * zone_size); @@ -107,15 +116,14 @@ static zns_sm_metadata_t make_metadata( zone_capacity * zones_per_segment); zns_sm_metadata_t ret = zns_sm_metadata_t{ - available_size, + seastar::smp::count, segment_size, zone_capacity * zones_per_segment, zones_per_segment, zone_capacity, data.block_size, - segments, zone_size, - zone_size * RESERVED_ZONES, + shard_infos, meta}; ret.validate(); return ret; @@ -160,8 +168,7 @@ static seastar::future<> reset_device( { return seastar::do_with( blk_zone_range{}, - ZoneReport(nr_zones), - [&, nr_zones](auto &range, auto &zr) { + [&, nr_zones, zone_size_sects](auto &range) { range.sector = 0; range.nr_sectors = zone_size_sects * nr_zones; return device.ioctl( @@ -332,7 +339,18 @@ read_metadata(seastar::file &device, seastar::stat_data sd) }); } -ZNSSegmentManager::mount_ret ZNSSegmentManager::mount() +ZNSSegmentManager::mount_ret ZNSSegmentManager::mount() +{ + return shard_devices.invoke_on_all([](auto &local_device) { + return local_device.shard_mount( + ).handle_error( + crimson::ct_error::assert_all{ + "Invalid error in ZNSSegmentManager::mount" + }); + }); +} + +ZNSSegmentManager::mount_ret ZNSSegmentManager::shard_mount() { return open_device( device_path, seastar::open_flags::rw @@ -341,6 +359,7 @@ ZNSSegmentManager::mount_ret ZNSSegmentManager::mount() auto sd = p.second; return read_metadata(device, sd); }).safe_then([=, this](auto meta){ + shard_info = meta.shard_infos[seastar::this_shard_id()]; metadata = meta; return mount_ertr::now(); }); @@ -349,7 +368,22 @@ ZNSSegmentManager::mount_ret ZNSSegmentManager::mount() ZNSSegmentManager::mkfs_ret ZNSSegmentManager::mkfs( device_config_t config) { - LOG_PREFIX(ZNSSegmentManager::mkfs); + return shard_devices.local().primary_mkfs(config + ).safe_then([this] { + return shard_devices.invoke_on_all([](auto &local_device) { + return local_device.shard_mkfs( + ).handle_error( + crimson::ct_error::assert_all{ + "Invalid error in ZNSSegmentManager::mkfs" + }); + }); + }); +} + +ZNSSegmentManager::mkfs_ret ZNSSegmentManager::primary_mkfs( + device_config_t config) +{ + LOG_PREFIX(ZNSSegmentManager::primary_mkfs); INFO("starting, device_path {}", device_path); return seastar::do_with( seastar::file{}, @@ -409,6 +443,26 @@ ZNSSegmentManager::mkfs_ret ZNSSegmentManager::mkfs( }); } +ZNSSegmentManager::mkfs_ret ZNSSegmentManager::shard_mkfs() +{ + LOG_PREFIX(ZNSSegmentManager::shard_mkfs); + INFO("starting, device_path {}", device_path); + return open_device( + device_path, seastar::open_flags::rw + ).safe_then([=, this](auto p) { + device = std::move(p.first); + auto sd = p.second; + return read_metadata(device, sd); + }).safe_then([=, this](auto meta){ + shard_info = meta.shard_infos[seastar::this_shard_id()]; + metadata = meta; + return device.close(); + }).safe_then([FNAME] { + DEBUG("Returning from shard_mkfs."); + return mkfs_ertr::now(); + }); +} + // Return range of sectors to operate on. struct blk_zone_range make_range( segment_id_t id, @@ -479,7 +533,7 @@ ZNSSegmentManager::open_ertr::future ZNSSegmentManager::open( range = make_range( id, metadata.segment_size, - metadata.first_segment_offset); + shard_info.first_segment_offset); return blk_zone_op( device, range, @@ -506,7 +560,7 @@ ZNSSegmentManager::release_ertr::future<> ZNSSegmentManager::release( range = make_range( id, metadata.segment_size, - metadata.first_segment_offset); + shard_info.first_segment_offset); return blk_zone_op( device, range, @@ -555,7 +609,7 @@ Segment::close_ertr::future<> ZNSSegmentManager::segment_close( range = make_range( id, metadata.segment_size, - metadata.first_segment_offset); + shard_info.first_segment_offset); return blk_zone_op( device, range, diff --git a/src/crimson/os/seastore/segment_manager/zns.h b/src/crimson/os/seastore/segment_manager/zns.h index ad8c3b4fe3b68..b98ff1c89f4ad 100644 --- a/src/crimson/os/seastore/segment_manager/zns.h +++ b/src/crimson/os/seastore/segment_manager/zns.h @@ -19,16 +19,30 @@ namespace crimson::os::seastore::segment_manager::zns { - struct zns_sm_metadata_t { + struct zns_shard_info_t { size_t size = 0; + size_t segments = 0; + size_t first_segment_offset = 0; + + DENC(zns_shard_info_t, v, p) { + DENC_START(1, 1, p); + denc(v.size, p); + denc(v.segments, p); + denc(v.first_segment_offset, p); + DENC_FINISH(p); + } + }; + + struct zns_sm_metadata_t { + unsigned int shard_num = 0; size_t segment_size = 0; size_t segment_capacity = 0; size_t zones_per_segment = 0; size_t zone_capacity = 0; size_t block_size = 0; - size_t segments = 0; size_t zone_size = 0; - size_t first_segment_offset = 0; + + std::vector shard_infos; seastore_meta_t meta; @@ -40,15 +54,14 @@ namespace crimson::os::seastore::segment_manager::zns { DENC(zns_sm_metadata_t, v, p) { DENC_START(1, 1, p); - denc(v.size, p); + denc(v.shard_num, p); denc(v.segment_size, p); denc(v.segment_capacity, p); denc(v.zones_per_segment, p); denc(v.zone_capacity, p); denc(v.block_size, p); - denc(v.segments, p); denc(v.zone_size, p); - denc(v.first_segment_offset, p); + denc(v.shard_infos, p); denc(v.meta, p); denc(v.magic, p); denc(v.dtype, p); @@ -60,12 +73,15 @@ namespace crimson::os::seastore::segment_manager::zns { } void validate() const { - ceph_assert_always(size > 0); - ceph_assert_always(size <= DEVICE_OFF_MAX); + ceph_assert_always(shard_num == seastar::smp::count); + for (unsigned int i = 0; i < seastar::smp::count; i++) { + ceph_assert_always(shard_infos[i].size > 0); + ceph_assert_always(shard_infos[i].size <= DEVICE_OFF_MAX); + ceph_assert_always(shard_infos[i].segments > 0); + ceph_assert_always(shard_infos[i].segments <= DEVICE_SEGMENT_ID_MAX); + } ceph_assert_always(segment_capacity > 0); ceph_assert_always(segment_capacity <= SEGMENT_OFF_MAX); - ceph_assert_always(segments > 0); - ceph_assert_always(segments <= DEVICE_SEGMENT_ID_MAX); } }; @@ -102,9 +118,29 @@ namespace crimson::os::seastore::segment_manager::zns { }; class ZNSSegmentManager final : public SegmentManager{ + // interfaces used by Device public: + seastar::future<> start() { + return shard_devices.start(device_path); + } + + seastar::future<> stop() { + return shard_devices.stop(); + } + + Device& get_sharded_device() final { + return shard_devices.local(); + } + mount_ret mount() final; mkfs_ret mkfs(device_config_t meta) final; + + ZNSSegmentManager(const std::string &path) : device_path(path) {} + + ~ZNSSegmentManager() final = default; + + //interfaces used by each shard device + public: open_ertr::future open(segment_id_t id) final; close_ertr::future<> close() final; @@ -120,7 +156,7 @@ namespace crimson::os::seastore::segment_manager::zns { } size_t get_available_size() const final { - return metadata.size; + return shard_info.size; }; extent_len_t get_block_size() const final { @@ -141,10 +177,6 @@ namespace crimson::os::seastore::segment_manager::zns { magic_t get_magic() const final; - ZNSSegmentManager(const std::string &path) : device_path(path) {} - - ~ZNSSegmentManager() final = default; - Segment::write_ertr::future<> segment_write( paddr_t addr, ceph::bufferlist bl, @@ -153,6 +185,7 @@ namespace crimson::os::seastore::segment_manager::zns { private: friend class ZNSSegment; std::string device_path; + zns_shard_info_t shard_info; zns_sm_metadata_t metadata; seastar::file device; uint32_t nr_zones; @@ -188,14 +221,26 @@ namespace crimson::os::seastore::segment_manager::zns { uint64_t get_offset(paddr_t addr) { auto& seg_addr = addr.as_seg_paddr(); - return (metadata.first_segment_offset + + return (shard_info.first_segment_offset + (seg_addr.get_segment_id().device_segment_id() * metadata.segment_size)) + seg_addr.get_segment_off(); } + private: + // shard 0 mkfs + mkfs_ret primary_mkfs(device_config_t meta); + // all shards mkfs + mkfs_ret shard_mkfs(); + + mount_ret shard_mount(); + + seastar::sharded shard_devices; }; } +WRITE_CLASS_DENC_BOUNDED( + crimson::os::seastore::segment_manager::zns::zns_shard_info_t +) WRITE_CLASS_DENC_BOUNDED( crimson::os::seastore::segment_manager::zns::zns_sm_metadata_t ) -- 2.39.5