From: Joseph Sawaya Date: Fri, 3 Dec 2021 17:28:21 +0000 (+0000) Subject: crimson/os/seastore: add ZNSSegmentManager X-Git-Tag: v17.1.0~232^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=84b040ba7d425fa4f3c756d402b9c4e5e6635fdd;p=ceph.git crimson/os/seastore: add ZNSSegmentManager This commit adds the ZNSSegmentManager, so SeaStore can interface with ZNS devices using the linux/blkzoned library. Signed-off-by: Joseph Sawaya --- diff --git a/CMakeLists.txt b/CMakeLists.txt index 5406669eda08..8264a083d9c4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -193,6 +193,12 @@ if(WITH_ZFS) set(HAVE_LIBZFS ${ZFS_FOUND}) endif() +option(WITH_ZNS "enable zns support" OFF) +if (WITH_ZNS) + # TODO: add detection, need kernel header >= 5.5 + set(HAVE_ZNS ON) +endif() + option(WITH_BLUESTORE "Bluestore OSD backend" ON) if(WITH_BLUESTORE) if(LINUX) diff --git a/src/crimson/os/futurized_store.cc b/src/crimson/os/futurized_store.cc index 864510b631b9..e072c0d262bd 100644 --- a/src/crimson/os/futurized_store.cc +++ b/src/crimson/os/futurized_store.cc @@ -16,14 +16,20 @@ FuturizedStore::create(const std::string& type, const ConfigValues& values) { if (type == "cyanstore") { - return seastar::make_ready_future>(std::make_unique(data)); + return seastar::make_ready_future>( + std::make_unique(data)); } else if (type == "seastore") { - return seastar::make_ready_future>(crimson::os::seastore::make_seastore(data, values)); + return crimson::os::seastore::make_seastore( + data, values + ).then([] (auto seastore) { + return seastar::make_ready_future>( + seastore.release()); + }); } else { #ifdef WITH_BLUESTORE // use AlienStore as a fallback. It adapts e.g. BlueStore. - return seastar::make_ready_future>(std::make_unique( - type, data, values)); + return seastar::make_ready_future>( + std::make_unique(type, data, values)); #else ceph_abort_msgf("unsupported objectstore type: %s", type.c_str()); return {}; diff --git a/src/crimson/os/seastore/CMakeLists.txt b/src/crimson/os/seastore/CMakeLists.txt index 6a03e8faa873..e28a1a8a7c57 100644 --- a/src/crimson/os/seastore/CMakeLists.txt +++ b/src/crimson/os/seastore/CMakeLists.txt @@ -1,6 +1,7 @@ -add_library(crimson-seastore STATIC +set(crimson_seastore_srcs cached_extent.cc seastore_types.cc + segment_manager.cc segment_manager/ephemeral.cc segment_manager/block.cc transaction_manager.cc @@ -41,6 +42,15 @@ add_library(crimson-seastore STATIC ../../../test/crimson/seastore/test_block.cc ${PROJECT_SOURCE_DIR}/src/os/Transaction.cc ) + +if(HAVE_ZNS) + list(APPEND crimson_seastore_srcs + segment_manager/zns.cc) +endif() + +add_library(crimson-seastore STATIC + ${crimson_seastore_srcs}) + target_link_libraries(crimson-seastore crimson) set_target_properties(crimson-seastore PROPERTIES diff --git a/src/crimson/os/seastore/seastore.cc b/src/crimson/os/seastore/seastore.cc index 600ba4b58f77..f4cf18048013 100644 --- a/src/crimson/os/seastore/seastore.cc +++ b/src/crimson/os/seastore/seastore.cc @@ -22,6 +22,7 @@ #include "crimson/os/futurized_collection.h" #include "crimson/os/seastore/segment_cleaner.h" +#include "crimson/os/seastore/segment_manager.h" #include "crimson/os/seastore/segment_manager/block.h" #include "crimson/os/seastore/collection_manager/flat_collection_manager.h" #include "crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.h" @@ -30,6 +31,7 @@ #include "crimson/os/seastore/onode_manager.h" #include "crimson/os/seastore/object_data_handler.h" + using std::string; using crimson::common::local_conf; @@ -1376,45 +1378,45 @@ uuid_d SeaStore::get_fsid() const return segment_manager->get_meta().seastore_id; } -std::unique_ptr make_seastore( +seastar::future> make_seastore( const std::string &device, const ConfigValues &config) { - auto sm = std::make_unique< - segment_manager::block::BlockSegmentManager - >(device + "/block"); - - auto scanner = std::make_unique(); - auto& scanner_ref = *scanner.get(); - auto segment_cleaner = std::make_unique( - SegmentCleaner::config_t::get_default(), - std::move(scanner), - false /* detailed */); - - auto journal = std::make_unique(*sm, scanner_ref); - auto cache = std::make_unique(scanner_ref); - auto lba_manager = lba_manager::create_lba_manager(*sm, *cache); - - auto epm = std::make_unique(*cache, *lba_manager); - - journal->set_segment_provider(&*segment_cleaner); - - auto tm = std::make_unique( - *sm, - std::move(segment_cleaner), - std::move(journal), - std::move(cache), - std::move(lba_manager), - std::move(epm), - scanner_ref); - - auto cm = std::make_unique(*tm); - return std::make_unique( - device, - std::move(sm), - std::move(tm), - std::move(cm), - std::make_unique(*tm)); + return SegmentManager::get_segment_manager( + device + ).then([&device](auto sm) { + auto scanner = std::make_unique(); + auto& scanner_ref = *scanner.get(); + auto segment_cleaner = std::make_unique( + SegmentCleaner::config_t::get_default(), + std::move(scanner), + false /* detailed */); + + auto journal = std::make_unique(*sm, scanner_ref); + auto cache = std::make_unique(scanner_ref); + auto lba_manager = lba_manager::create_lba_manager(*sm, *cache); + + auto epm = std::make_unique(*cache, *lba_manager); + + journal->set_segment_provider(&*segment_cleaner); + + auto tm = std::make_unique( + *sm, + std::move(segment_cleaner), + std::move(journal), + std::move(cache), + std::move(lba_manager), + std::move(epm), + scanner_ref); + + auto cm = std::make_unique(*tm); + return std::make_unique( + device, + std::move(sm), + std::move(tm), + std::move(cm), + std::make_unique(*tm)); + }); } } diff --git a/src/crimson/os/seastore/seastore.h b/src/crimson/os/seastore/seastore.h index b205872141d7..e533d42e1d74 100644 --- a/src/crimson/os/seastore/seastore.h +++ b/src/crimson/os/seastore/seastore.h @@ -388,7 +388,7 @@ private: seastar::future<> write_fsid(uuid_d new_osd_fsid); }; -std::unique_ptr make_seastore( +seastar::future> make_seastore( const std::string &device, const ConfigValues &config); } diff --git a/src/crimson/os/seastore/segment_manager.cc b/src/crimson/os/seastore/segment_manager.cc new file mode 100644 index 000000000000..012990d8ac96 --- /dev/null +++ b/src/crimson/os/seastore/segment_manager.cc @@ -0,0 +1,61 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "crimson/os/seastore/segment_manager.h" +#include "crimson/os/seastore/segment_manager/block.h" +#include "crimson/common/log.h" + + +#ifdef HAVE_ZNS +#include "crimson/os/seastore/segment_manager/zns.h" +#endif + +namespace{ + seastar::logger &logger(){ + return crimson::get_logger(ceph_subsys_seastore); + } +} + +namespace crimson::os::seastore { + +seastar::future +SegmentManager::get_segment_manager( + const std::string &device) +{ +#ifdef HAVE_ZNS + return seastar::do_with( + static_cast(0), + [&](auto &nr_zones) { + return seastar::open_file_dma( + device + "/block", + seastar::open_flags::rw + ).then([&](auto file) { + return seastar::do_with( + file, + [=, &nr_zones](auto &f) -> seastar::future { + ceph_assert(f); + return f.ioctl(BLKGETNRZONES, (void *)&nr_zones); + }); + }).then([&](auto ret) -> crimson::os::seastore::SegmentManagerRef { + crimson::os::seastore::SegmentManagerRef sm; + logger().error("NR_ZONES: {}", nr_zones); + if (nr_zones != 0) { + return std::make_unique< + segment_manager::zns::ZNSSegmentManager + >(device + "/block"); + } else { + return std::make_unique< + segment_manager::block::BlockSegmentManager + >(device + "/block"); + } + }); + }); +#else + return seastar::make_ready_future( + std::make_unique< + segment_manager::block::BlockSegmentManager + >(device + "/block")); +#endif +} + +} diff --git a/src/crimson/os/seastore/segment_manager.h b/src/crimson/os/seastore/segment_manager.h index 437aa72ad49f..4315829618c2 100644 --- a/src/crimson/os/seastore/segment_manager.h +++ b/src/crimson/os/seastore/segment_manager.h @@ -13,6 +13,7 @@ #include "include/ceph_assert.h" #include "crimson/os/seastore/seastore_types.h" #include "include/buffer_fwd.h" +#include "crimson/common/config_proxy.h" #include "crimson/osd/exceptions.h" namespace crimson::os::seastore { @@ -141,6 +142,9 @@ public: using SegmentRef = boost::intrusive_ptr; constexpr size_t PADDR_SIZE = sizeof(paddr_t); +class SegmentManager; + +using SegmentManagerRef = std::unique_ptr; class SegmentManager { public: @@ -213,8 +217,9 @@ public: virtual magic_t get_magic() const = 0; virtual ~SegmentManager() {} + + static seastar::future get_segment_manager(const std::string &device); }; -using SegmentManagerRef = std::unique_ptr; } diff --git a/src/crimson/os/seastore/segment_manager/zns.cc b/src/crimson/os/seastore/segment_manager/zns.cc new file mode 100644 index 000000000000..bc9752b87e78 --- /dev/null +++ b/src/crimson/os/seastore/segment_manager/zns.cc @@ -0,0 +1,597 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include +#include +#include + +#include "crimson/os/seastore/segment_manager/zns.h" +#include "crimson/common/config_proxy.h" +#include "crimson/common/log.h" +#include "include/buffer.h" + +namespace { +seastar::logger &logger(){ + return crimson::get_logger(ceph_subsys_seastore); +} +} + +namespace crimson::os::seastore::segment_manager::zns { + +using open_device_ret = ZNSSegmentManager::access_ertr::future< + std::pair>; +static open_device_ret open_device( + const std::string &path, + seastar::open_flags mode) +{ + return seastar::file_stat( + path, seastar::follow_symlink::yes + ).then([mode, &path](auto stat) mutable{ + return seastar::open_file_dma(path, mode).then([=](auto file){ + logger().error( + "open_device: open successful, size {}", + stat.size); + return std::make_pair(file, stat); + }); + }).handle_exception( + [](auto e) -> open_device_ret { + logger().error( + "open_device: got error {}", + e); + return crimson::ct_error::input_output_error::make(); + } + ); +} + +static zns_sm_metadata_t make_metadata( + seastore_meta_t meta, + const seastar::stat_data &data, + size_t zone_size, + size_t zone_capacity, + size_t num_zones) +{ + using crimson::common::get_conf; + + auto config_size = get_conf( + "seastore_device_size"); + + size_t size = (data.size == 0) ? config_size : data.size; + + auto config_segment_size = get_conf( + "seastore_segment_size"); + logger().error("CONFIG SIZE: {}", config_segment_size); + size_t zones_per_segment = config_segment_size / zone_capacity; + + size_t segments = (num_zones - 1) * zones_per_segment; + + logger().debug( + "{}: size {}, block_size {}, allocated_size {}, configured_size {}, " + "segment_size {}", + __func__, + data.size, + data.block_size, + data.allocated_size, + config_size, + config_segment_size); + + zns_sm_metadata_t ret = zns_sm_metadata_t{ + size, + config_segment_size, + zone_capacity * zones_per_segment, + zones_per_segment, + zone_capacity, + data.block_size, + segments, + zone_size, + zone_size, + meta}; + return ret; +} + +struct ZoneReport { + struct blk_zone_report *hdr; + ZoneReport(int nr_zones) + : hdr((blk_zone_report *)malloc( + sizeof(struct blk_zone_report) + nr_zones * sizeof(struct blk_zone))){;} + ~ZoneReport(){ + free(hdr); + } + ZoneReport(const ZoneReport &) = delete; + ZoneReport(ZoneReport &&rhs) : hdr(rhs.hdr) { + rhs.hdr = nullptr; + } +}; + +static seastar::future<> reset_device( + seastar::file &device, + uint32_t zone_size, + uint32_t nr_zones) +{ + return seastar::do_with( + blk_zone_range{}, + ZoneReport(nr_zones), + [&, nr_zones] (auto &range, auto &zr){ + range.sector = 0; + range.nr_sectors = zone_size * nr_zones; + return device.ioctl( + BLKRESETZONE, + &range + ).then([&](int ret){ + return seastar::now(); + }); + } + ); +} + +static seastar::future get_zone_capacity( + seastar::file &device, + uint32_t zone_size, + uint32_t nr_zones) +{ + return seastar::do_with( + blk_zone_range{}, + ZoneReport(nr_zones), + [&] (auto &first_zone_range, auto &zr){ + first_zone_range.sector = 0; + first_zone_range.nr_sectors = zone_size; + return device.ioctl( + BLKOPENZONE, + &first_zone_range + ).then([&](int ret){ + return device.ioctl(BLKREPORTZONE, zr.hdr); + }).then([&] (int ret){ + return device.ioctl(BLKRESETZONE, &first_zone_range); + }).then([&](int ret){ + return seastar::make_ready_future(zr.hdr->zones[0].wp); + }); + } + ); +} + +static write_ertr::future<> do_write( + seastar::file &device, + uint64_t offset, + bufferptr &bptr) +{ + logger().debug( + "zns: do_write offset {} len {}", + offset, + bptr.length()); + return device.dma_write( + offset, + bptr.c_str(), + bptr.length() + ).handle_exception( + [](auto e) -> write_ertr::future { + logger().error( + "do_write: dma_write got error {}", + e); + return crimson::ct_error::input_output_error::make(); + } + ).then([length = bptr.length()](auto result) -> write_ertr::future<> { + if (result != length) { + return crimson::ct_error::input_output_error::make(); + } + return write_ertr::now(); + }); +} + +static write_ertr::future<> do_writev( + seastar::file &device, + uint64_t offset, + bufferlist&& bl, + size_t block_size) +{ + logger().error( + "block: do_writev offset {} len {}", + offset, + bl.length()); + // writev requires each buffer to be aligned to the disks' block + // size, we need to rebuild here + bl.rebuild_aligned(block_size); + + std::vector iov; + bl.prepare_iov(&iov); + return device.dma_write( + offset, + std::move(iov) + ).handle_exception( + [](auto e) -> write_ertr::future { + logger().error( + "do_writev: dma_write got error {}", + e); + return crimson::ct_error::input_output_error::make(); + } + ).then([bl=std::move(bl)/* hold the buf until the end of io */](size_t written) + -> write_ertr::future<> { + if (written != bl.length()) { + return crimson::ct_error::input_output_error::make(); + } + return write_ertr::now(); + }); +} + +static ZNSSegmentManager::access_ertr::future<> +write_metadata(seastar::file &device, zns_sm_metadata_t sb) +{ + assert(ceph::encoded_sizeof_bounded() < + sb.block_size); + return seastar::do_with( + bufferptr(ceph::buffer::create_page_aligned(sb.block_size)), + [=, &device](auto &bp){ + logger().error("BLOCK SIZE: {}", sb.block_size); + bufferlist bl; + encode(sb, bl); + auto iter = bl.begin(); + assert(bl.length() < sb.block_size); + logger().error("{}", bl.length()); + iter.copy(bl.length(), bp.c_str()); + logger().debug("write_metadata: doing writeout"); + return do_write(device, 0, bp); + }); +} + +static read_ertr::future<> do_read( + seastar::file &device, + uint64_t offset, + size_t len, + bufferptr &bptr) +{ + assert(len <= bptr.length()); + logger().debug( + "block: do_read offset {} len {}", + offset, + len); + return device.dma_read( + offset, + bptr.c_str(), + len + ).handle_exception( + [](auto e) -> read_ertr::future { + logger().error( + "do_read: dma_read got error {}", + e); + return crimson::ct_error::input_output_error::make(); + } + ).then([len](auto result) -> read_ertr::future<> { + if (result != len) { + return crimson::ct_error::input_output_error::make(); + } + return read_ertr::now(); + }); +} + +static +ZNSSegmentManager::access_ertr::future +read_metadata(seastar::file &device, seastar::stat_data sd) +{ + assert(ceph::encoded_sizeof_bounded() < + sd.block_size); + return seastar::do_with( + bufferptr(ceph::buffer::create_page_aligned(sd.block_size)), + [=, &device](auto &bp) { + return do_read( + device, + 0, + bp.length(), + bp + ).safe_then([=, &bp] { + bufferlist bl; + bl.push_back(bp); + zns_sm_metadata_t ret; + auto bliter = bl.cbegin(); + decode(ret, bliter); + return ZNSSegmentManager::access_ertr::future( + ZNSSegmentManager::access_ertr::ready_future_marker{}, + ret); + }); + }); +} + +ZNSSegmentManager::mount_ret ZNSSegmentManager::mount() +{ + return open_device( + device_path, seastar::open_flags::rw + ).safe_then([=](auto p) { + device = std::move(p.first); + auto sd = p.second; + return read_metadata(device, sd); + }).safe_then([=](auto meta){ + metadata = meta; + return mount_ertr::now(); + }); +} + +ZNSSegmentManager::mkfs_ret ZNSSegmentManager::mkfs( + segment_manager_config_t config) +{ + logger().error("ZNSSegmentManager::mkfs: starting"); + return seastar::do_with( + seastar::file{}, + seastar::stat_data{}, + zns_sm_metadata_t{}, + size_t(), + size_t(), + [=](auto &device, auto &stat, auto &sb, auto &zone_size, auto &nr_zones){ + logger().error("ZNSSegmentManager::mkfs path {}", device_path); + return open_device( + device_path, + seastar::open_flags::rw + ).safe_then([=, &device, &stat, &sb, &zone_size, &nr_zones](auto p){ + device = p.first; + stat = p.second; + return device.ioctl( + BLKGETNRZONES, + (void *)&nr_zones + ).then([&](int ret){ + if (nr_zones == 0) { + return seastar::make_exception_future( + std::system_error(std::make_error_code(std::errc::io_error))); + } + return device.ioctl(BLKGETZONESZ, (void *)&zone_size); + }).then([&] (int ret){ + return reset_device(device, zone_size, nr_zones); + }).then([&] { + return get_zone_capacity(device, zone_size, nr_zones); + }).then([&, config] (auto zone_capacity){ + sb = make_metadata( + config.meta, + stat, + zone_size, + zone_capacity, + nr_zones); + metadata = sb; + stats.metadata_write.increment( + ceph::encoded_sizeof_bounded()); + logger().error("WROTE TO STATS"); + return write_metadata(device, sb); + }).finally([&] { + logger().error("CLOSING DEVICE"); + return device.close(); + }).safe_then([] { + logger().error("RETURNING FROM MKFS"); + return mkfs_ertr::now(); + }); + }); + }); +} + +struct blk_zone_range make_range( + segment_id_t id, + size_t segment_size, + size_t block_size, + size_t first_segment_offset) +{ + return blk_zone_range{ + (id.device_segment_id() * segment_size + first_segment_offset), + (segment_size) + }; +} + +using blk_open_zone_ertr = crimson::errorator< + crimson::ct_error::input_output_error>; +using blk_open_zone_ret = blk_open_zone_ertr::future<>; +blk_open_zone_ret blk_open_zone(seastar::file &device, blk_zone_range &range){ + return device.ioctl( + BLKOPENZONE, + &range + ).then_wrapped([=](auto f) -> blk_open_zone_ret{ + if (f.failed()) { + return crimson::ct_error::input_output_error::make(); + } + else { + int ret = f.get(); + if (ret == 0) { + return seastar::now(); + } else { + return crimson::ct_error::input_output_error::make(); + } + } + }); +} + +ZNSSegmentManager::open_ertr::future ZNSSegmentManager::open( + segment_id_t id) +{ + return seastar::do_with( + blk_zone_range{}, + [=] (auto &range){ + range = make_range( + id, + metadata.zone_size, + metadata.block_size, + metadata.first_segment_offset); + return blk_open_zone( + device, + range + ); + } + ).safe_then([=] { + logger().error("open _segment: open successful"); + return open_ertr::future( + open_ertr::ready_future_marker{}, + SegmentRef(new ZNSSegment(*this, id)) + ); + }); +} + +using blk_close_zone_ertr = crimson::errorator< + crimson::ct_error::input_output_error>; +using blk_close_zone_ret = blk_close_zone_ertr::future<>; +blk_close_zone_ret blk_close_zone( + seastar::file &device, + blk_zone_range &range) +{ + return device.ioctl( + BLKCLOSEZONE, + &range + ).then_wrapped([=](auto f) -> blk_open_zone_ret{ + if (f.failed()) { + return crimson::ct_error::input_output_error::make(); + } + else { + int ret = f.get(); + if (ret == 0) { + return seastar::now(); + } else { + return crimson::ct_error::input_output_error::make(); + } + } + }); +} + +ZNSSegmentManager::release_ertr::future<> ZNSSegmentManager::release( + segment_id_t id) +{ + return seastar::do_with( + blk_zone_range{}, + [=] (auto &range){ + range = make_range( + id, + metadata.zone_size, + metadata.block_size, + metadata.first_segment_offset); + return blk_close_zone( + device, + range + ); + } + ).safe_then([=] { + logger().error("release _segment: release successful"); + return release_ertr::now(); + }); +} + +SegmentManager::read_ertr::future<> ZNSSegmentManager::read( + paddr_t addr, + size_t len, + ceph::bufferptr &out) +{ + auto& seg_addr = addr.as_seg_paddr(); + if (seg_addr.get_segment_id().device_segment_id() >= get_num_segments()) { + logger().error( + "ZNSSegmentManager::read: invalid segment {}", + addr); + return crimson::ct_error::invarg::make(); + } + + if (seg_addr.get_segment_off() + len > metadata.zone_size) { + logger().error( + "ZNSSegmentManager::read: invalid offset {}~{}!", + addr, + len); + return crimson::ct_error::invarg::make(); + } + return do_read( + device, + get_offset(addr), + len, + out); +} + +Segment::close_ertr::future<> ZNSSegmentManager::segment_close( + segment_id_t id, segment_off_t write_pointer) +{ + return seastar::do_with( + blk_zone_range{}, + [=] (auto &range){ + range = make_range( + id, + metadata.zone_size, + metadata.block_size, + metadata.first_segment_offset); + return blk_close_zone( + device, + range + ); + } + ).safe_then([=] { + logger().error("open _segment: open successful"); + return Segment::close_ertr::now(); + }); +} + +Segment::write_ertr::future<> ZNSSegmentManager::segment_write( + paddr_t addr, + ceph::bufferlist bl, + bool ignore_check) +{ + assert(addr.get_device_id() == get_device_id()); + assert((bl.length() % metadata.block_size) == 0); + auto& seg_addr = addr.as_seg_paddr(); + logger().debug( + "BlockSegmentManager::segment_write: " + "segment_write to segment {} at offset {}, physical offset {}, len {}", + seg_addr.get_segment_id(), + seg_addr.get_segment_off(), + get_offset(addr), + bl.length()); + stats.data_write.increment(bl.length()); + return do_writev( + device, + get_offset(addr), + std::move(bl), + metadata.block_size); +} + +device_id_t ZNSSegmentManager::get_device_id() const +{ + return metadata.device_id; +}; + +secondary_device_set_t& ZNSSegmentManager::get_secondary_devices() +{ + return metadata.secondary_devices; +}; + +device_spec_t ZNSSegmentManager::get_device_spec() const +{ + auto spec = device_spec_t(); + spec.magic = metadata.magic; + spec.dtype = metadata.dtype; + spec.id = metadata.device_id; + return spec; +}; + +magic_t ZNSSegmentManager::get_magic() const +{ + return metadata.magic; +}; + +segment_off_t ZNSSegment::get_write_capacity() const +{ + return manager.get_segment_size(); +} + +SegmentManager::close_ertr::future<> ZNSSegmentManager::close() +{ + if (device) { + return device.close(); + } + return seastar::now(); +} + +Segment::close_ertr::future<> ZNSSegment::close() +{ + return manager.segment_close(id, write_pointer); +} + +Segment::write_ertr::future<> ZNSSegment::write( + segment_off_t offset, ceph::bufferlist bl) +{ + if (offset < write_pointer || offset % manager.metadata.block_size != 0) { + logger().error( + "ZNSSegmentManager::ZNSSegment::write: " + "invalid segment write on segment {} to offset {}", + id, + offset); + return crimson::ct_error::invarg::make(); + } + if (offset + bl.length() > manager.metadata.segment_size) + return crimson::ct_error::enospc::make(); + + write_pointer = offset + bl.length(); + return manager.segment_write(paddr_t::make_seg_paddr(id, offset), bl); +} + +} diff --git a/src/crimson/os/seastore/segment_manager/zns.h b/src/crimson/os/seastore/segment_manager/zns.h new file mode 100644 index 000000000000..0bb804373c90 --- /dev/null +++ b/src/crimson/os/seastore/segment_manager/zns.h @@ -0,0 +1,181 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +#pragma once + +#include + +#include +#include + +#include +#include +#include + +#include "crimson/common/layout.h" + +#include "crimson/os/seastore/segment_manager.h" + +#include "include/uuid.h" + +namespace crimson::os::seastore::segment_manager::zns { + + struct zns_sm_metadata_t { + size_t size = 0; + size_t segment_size = 0; + size_t segment_capacity = 0; + size_t zones_per_segment = 0; + size_t zone_capacity = 0; + size_t block_size = 0; + size_t segments = 0; + size_t zone_size = 0; + uint64_t first_segment_offset = 0; + seastore_meta_t meta; + + bool major_dev = false; + magic_t magic = 0; + device_type_t dtype = device_type_t::NONE; + device_id_t device_id = 0; + secondary_device_set_t secondary_devices; + + DENC(zns_sm_metadata_t, v, p) { + DENC_START(1, 1, p); + denc(v.size, p); + denc(v.segment_size, p); + denc(v.zone_capacity, p); + denc(v.zones_per_segment, p); + denc(v.block_size, p); + denc(v.segments, p); + denc(v.zone_size, p); + denc(v.first_segment_offset, p); + denc(v.meta, p); + denc(v.magic, p); + denc(v.dtype, p); + denc(v.device_id, p); + if (v.major_dev) { + denc(v.secondary_devices, p); + } + DENC_FINISH(p); + } + }; + + using write_ertr = crimson::errorator; + using read_ertr = crimson::errorator; + + class ZNSSegmentManager; + + class ZNSSegment final : public Segment { + public: + ZNSSegment(ZNSSegmentManager &man, segment_id_t i) : manager(man), id(i){}; + + segment_id_t get_segment_id() const final { return id; } + segment_off_t get_write_capacity() const final; + segment_off_t get_write_ptr() const final { return write_pointer; } + close_ertr::future<> close() final; + write_ertr::future<> write(segment_off_t offset, ceph::bufferlist bl) final; + + ~ZNSSegment() {} + private: + friend class ZNSSegmentManager; + ZNSSegmentManager &manager; + const segment_id_t id; + segment_off_t write_pointer = 0; + }; + + class ZNSSegmentManager final : public SegmentManager{ + public: + mount_ret mount() final; + mkfs_ret mkfs(segment_manager_config_t meta) final; + open_ertr::future open(segment_id_t id) final; + close_ertr::future<> close() final; + + release_ertr::future<> release(segment_id_t id) final; + + read_ertr::future<> read( + paddr_t addr, + size_t len, + ceph::bufferptr &out) final; + + size_t get_size() const final { + return metadata.size; + }; + + segment_off_t get_block_size() const final { + return metadata.block_size; + }; + + segment_off_t get_segment_size() const final { + return metadata.segment_size; + }; + + const seastore_meta_t &get_meta() const { + return metadata.meta; + }; + + device_id_t get_device_id() const final; + + secondary_device_set_t& get_secondary_devices() final; + + device_spec_t get_device_spec() const final; + + magic_t get_magic() const final; + + ZNSSegmentManager(const std::string &path) : device_path(path) {} + + ~ZNSSegmentManager() final = default; + + Segment::write_ertr::future<> segment_write( + paddr_t addr, + ceph::bufferlist bl, + bool ignore_check=false); + + private: + friend class ZNSSegment; + std::string device_path; + zns_sm_metadata_t metadata; + seastar::file device; + uint32_t nr_zones; + struct effort_t { + uint64_t num = 0; + uint64_t bytes = 0; + + void increment(uint64_t read_bytes) { + ++num; + bytes += read_bytes; + } + }; + + struct zns_sm_stats { + effort_t data_read = {}; + effort_t data_write = {}; + effort_t metadata_write = {}; + uint64_t opened_segments = 0; + uint64_t closed_segments = 0; + uint64_t closed_segments_unused_bytes = 0; + uint64_t released_segments = 0; + + void reset() { + *this = zns_sm_stats{}; + } + } stats; + + void register_metrics(); + seastar::metrics::metric_group metrics; + + Segment::close_ertr::future<> segment_close( + segment_id_t id, segment_off_t write_pointer); + + uint64_t get_offset(paddr_t addr) { + auto& seg_addr = addr.as_seg_paddr(); + const auto default_sector_size = 512; + return (metadata.first_segment_offset + + (seg_addr.get_segment_id().device_segment_id() * + metadata.zone_size)) * default_sector_size + + seg_addr.get_segment_off(); + } + }; + +} + +WRITE_CLASS_DENC_BOUNDED( + crimson::os::seastore::segment_manager::zns::zns_sm_metadata_t +) diff --git a/src/include/config-h.in.cmake b/src/include/config-h.in.cmake index 01681c494612..3499e64893dd 100644 --- a/src/include/config-h.in.cmake +++ b/src/include/config-h.in.cmake @@ -108,6 +108,9 @@ /* Define to 1 if you have libxfs */ #cmakedefine HAVE_LIBXFS 1 +/* Define to 1 if zns support enabled */ +#cmakedefine HAVE_ZNS + /* SPDK conditional compilation */ #cmakedefine HAVE_SPDK