From: Chunmei Liu Date: Sat, 18 Oct 2025 00:17:44 +0000 (+0000) Subject: crimson/os/seastore: support other devices X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=35bbc3a8ec1d64af57c14effa8acdcd8b4f965fc;p=ceph-ci.git crimson/os/seastore: support other devices Signed-off-by: Chunmei Liu --- diff --git a/src/crimson/os/seastore/random_block_manager.h b/src/crimson/os/seastore/random_block_manager.h index 88a7ff71e75..66064b38a4d 100644 --- a/src/crimson/os/seastore/random_block_manager.h +++ b/src/crimson/os/seastore/random_block_manager.h @@ -71,7 +71,6 @@ struct rbm_superblock_t { } void validate() const { - ceph_assert(shard_num == seastar::smp::count); ceph_assert(block_size > 0); for (unsigned int i = 0; i < seastar::smp::count; i ++) { ceph_assert(shard_infos[i].size > block_size && diff --git a/src/crimson/os/seastore/random_block_manager/nvme_block_device.cc b/src/crimson/os/seastore/random_block_manager/nvme_block_device.cc index 8938b8f344e..61127a8163a 100644 --- a/src/crimson/os/seastore/random_block_manager/nvme_block_device.cc +++ b/src/crimson/os/seastore/random_block_manager/nvme_block_device.cc @@ -19,9 +19,30 @@ SET_SUBSYS(seastore_device); namespace crimson::os::seastore::random_block_device::nvme { +seastar::future<> NVMeBlockDevice::start(unsigned int shard_nums) +{ + device_shard_nums = shard_nums; + auto num_shard_services = (device_shard_nums + seastar::smp::count - 1 ) / seastar::smp::count; + LOG_PREFIX(NVMeBlockDevice::start); + DEBUG("device_shard_nums={} seastar::smp={}, num_shard_services={}", device_shard_nums, seastar::smp::count, num_shard_services); + return shard_devices.start(num_shard_services, device_path); + +} + +seastar::future<> NVMeBlockDevice::stop() +{ + return shard_devices.stop(); +} + +Device& NVMeBlockDevice::get_sharded_device(unsigned int store_index) +{ + assert(store_index < shard_devices.local().mshard_devices.size()); + return *shard_devices.local().mshard_devices[store_index]; +} + NVMeBlockDevice::mkfs_ret NVMeBlockDevice::mkfs(device_config_t config) { using crimson::common::get_conf; - co_await shard_devices.local().do_primary_mkfs(config, + co_await shard_devices.local().mshard_devices[0]->do_primary_mkfs(config, seastar::smp::count, get_conf("seastore_cbjournal_size") ); @@ -76,10 +97,12 @@ NVMeBlockDevice::mount_ret NVMeBlockDevice::mount() LOG_PREFIX(NVMeBlockDevice::mount); DEBUG("mount"); co_await shard_devices.invoke_on_all([](auto &local_device) { - return local_device.do_shard_mount( - ).handle_error( - crimson::ct_error::assert_all{ - "Invalid error in NVMeBlockDevice::do_shard_mount" + return seastar::do_for_each(local_device.mshard_devices, [](auto& mshard_device) { + return mshard_device->do_shard_mount( + ).handle_error( + crimson::ct_error::assert_all{ + "Invalid error in NVMeBlockDevice::do_shard_mount" + }); }); }); diff --git a/src/crimson/os/seastore/random_block_manager/nvme_block_device.h b/src/crimson/os/seastore/random_block_manager/nvme_block_device.h index f8535e8417a..b148cdb0e24 100644 --- a/src/crimson/os/seastore/random_block_manager/nvme_block_device.h +++ b/src/crimson/os/seastore/random_block_manager/nvme_block_device.h @@ -212,7 +212,9 @@ public: * atomic_write_unit does not require fsync(). */ - NVMeBlockDevice(std::string device_path) : device_path(device_path) {} + NVMeBlockDevice(std::string device_path, unsigned int store_index = 0) + : RBMDevice(store_index), + device_path(device_path) {} ~NVMeBlockDevice() = default; open_ertr::future<> open( @@ -282,17 +284,11 @@ public: return device_path; } - seastar::future<> start() final { - return shard_devices.start(device_path); - } + seastar::future<> start(unsigned int shard_nums) final; - seastar::future<> stop() final { - return shard_devices.stop(); - } + seastar::future<> stop() final; - Device& get_sharded_device() final { - return shard_devices.local(); - } + Device& get_sharded_device(unsigned int store_index = 0) final; uint64_t get_preffered_write_granularity() const { return write_granularity; } uint64_t get_preffered_write_alignment() const { return write_alignment; } @@ -372,7 +368,26 @@ private: int namespace_id; // TODO: multi namespaces std::string device_path; - seastar::sharded shard_devices; + + class MultiShardDevices { + public: + std::vector> mshard_devices; + + public: + MultiShardDevices(size_t count, + const std::string path) + : mshard_devices() { + mshard_devices.reserve(count); + for (size_t store_index = 0; store_index < count; ++store_index) { + mshard_devices.emplace_back(std::make_unique( + path, store_index)); + } + } + ~MultiShardDevices() { + mshard_devices.clear(); + } + }; + seastar::sharded shard_devices; }; } diff --git a/src/crimson/os/seastore/random_block_manager/rbm_device.cc b/src/crimson/os/seastore/random_block_manager/rbm_device.cc index 91d42170666..3e0663c061a 100644 --- a/src/crimson/os/seastore/random_block_manager/rbm_device.cc +++ b/src/crimson/os/seastore/random_block_manager/rbm_device.cc @@ -176,7 +176,7 @@ RBMDevice::mount_ret RBMDevice::do_shard_mount() return std::nullopt; })); if (!st) { - co_await mount_ertr::future<>( + co_return co_await mount_ertr::future<>( crimson::ct_error::input_output_error::make() ); @@ -190,11 +190,45 @@ RBMDevice::mount_ret RBMDevice::do_shard_mount() "Invalid error read_rbm_superblock in RBMDevice::do_shard_mount"} ); LOG_PREFIX(RBMDevice::do_shard_mount); - shard_info = s.shard_infos[seastar::this_shard_id()]; + if(seastar::this_shard_id() + seastar::smp::count * store_index >= s.shard_num) { + INFO("{} shard_id {} out of range {}", + device_id_printer_t{get_device_id()}, + seastar::this_shard_id() + seastar::smp::count * store_index, + s.shard_num); + shard_status = false; + co_return; + } + shard_info = s.shard_infos[seastar::this_shard_id() + seastar::smp::count * store_index]; INFO("{} read {}", device_id_printer_t{get_device_id()}, shard_info); s.validate(); } +read_ertr::future RBMDevice::get_shard_nums() +{ + co_await open(get_device_path(), + seastar::open_flags::rw | seastar::open_flags::dsync + ).handle_error( + crimson::ct_error::assert_all{ + "Invalid error open in RBMDevice::get_shard_nums"} + ); + + auto st = co_await stat_device( + ).handle_error( + crimson::ct_error::assert_all{ + "Invalid error stat_device in RBMDevice::get_shard_nums"} + ); + + assert(st.block_size > 0); + super.block_size = st.block_size; + auto sb = co_await read_rbm_superblock(RBM_START_ADDRESS + ).handle_error( + crimson::ct_error::assert_all{ + "Invalid error in RBMDevice::get_shard_nums"} + ); + + co_return sb.shard_num; +} + EphemeralRBMDeviceRef create_test_ephemeral(uint64_t journal_size, uint64_t data_size) { return EphemeralRBMDeviceRef( new EphemeralRBMDevice(journal_size + data_size + diff --git a/src/crimson/os/seastore/random_block_manager/rbm_device.h b/src/crimson/os/seastore/random_block_manager/rbm_device.h index ad79b61a5cf..fa9b33ac361 100644 --- a/src/crimson/os/seastore/random_block_manager/rbm_device.h +++ b/src/crimson/os/seastore/random_block_manager/rbm_device.h @@ -80,8 +80,12 @@ public: protected: rbm_superblock_t super; rbm_shard_info_t shard_info; + unsigned int device_shard_nums = 0; + unsigned int store_index = 0; + bool shard_status = true; public: - RBMDevice() {} + RBMDevice(unsigned int store_index = 0) + : store_index(store_index) {} virtual ~RBMDevice() = default; template @@ -115,6 +119,8 @@ public: std::size_t get_available_size() const { return super.size; } extent_len_t get_block_size() const { return super.block_size; } + read_ertr::future get_shard_nums() final; + virtual read_ertr::future<> read( uint64_t offset, bufferptr &bptr) = 0; diff --git a/src/crimson/os/seastore/segment_manager/zbd.cc b/src/crimson/os/seastore/segment_manager/zbd.cc index cb343e0fa0c..5213ce3f3b0 100644 --- a/src/crimson/os/seastore/segment_manager/zbd.cc +++ b/src/crimson/os/seastore/segment_manager/zbd.cc @@ -45,6 +45,27 @@ template <> struct fmt::formatter: fmt::formatter { namespace crimson::os::seastore::segment_manager::zbd { +seastar::future<> ZBDSegmentManager::start(unsigned int shard_nums) +{ + LOG_PREFIX(ZBDSegmentManager::start); + device_shard_nums = shard_nums; + auto num_shard_services = (device_shard_nums + seastar::smp::count - 1 ) / seastar::smp::count; + INFO("device_shard_nums={} seastar::smp={}, num_shard_services={}", device_shard_nums, seastar::smp::count, num_shard_services); + return shard_devices.start(num_shard_services, device_path); + +} + +seastar::future<> ZBDSegmentManager::stop() +{ + return shard_devices.stop(); +} + +Device& ZBDSegmentManager::get_sharded_device(unsigned int store_index) +{ + assert(store_index < shard_devices.local().mshard_devices.size()); + return *shard_devices.local().mshard_devices[store_index]; +} + using open_device_ret = ZBDSegmentManager::access_ertr::future< std::pair>; static open_device_ret open_device( @@ -399,13 +420,31 @@ read_metadata(seastar::file &device, seastar::stat_data sd) }); } +ZBDSegmentManager::read_ertr::future ZBDSegmentManager::get_shard_nums() +{ + return open_device( + device_path, seastar::open_flags::rw + ).safe_then([this](auto p) { + device = std::move(p.first); + auto sd = p.second; + return read_metadata(device, sd); + }).safe_then([this](auto meta){ + return read_ertr::make_ready_future(meta.shard_num); + }).handle_error( + crimson::ct_error::assert_all{ + "Invalid error in ZBDSegmentManager::get_shard_nums" + }); +} + ZBDSegmentManager::mount_ret ZBDSegmentManager::mount() { return shard_devices.invoke_on_all([](auto &local_device) { - return local_device.shard_mount( - ).handle_error( - crimson::ct_error::assert_all{ - "Invalid error in ZBDSegmentManager::mount" + return seastar::do_for_each(local_device.mshard_devices, [](auto& mshard_device) { + return mshard_device->shard_mount( + ).handle_error( + crimson::ct_error::assert_all{ + "Invalid error in ZBDSegmentManager::mount" + }); }); }); } @@ -419,7 +458,15 @@ ZBDSegmentManager::mount_ret ZBDSegmentManager::shard_mount() auto sd = p.second; return read_metadata(device, sd); }).safe_then([=, this](auto meta){ - shard_info = meta.shard_infos[seastar::this_shard_id()]; + if(seastar::this_shard_id() + seastar::smp::count * store_index >= meta.shard_num) { + INFO("{} shard_id {} out of range {}", + device_id_printer_t{get_device_id()}, + seastar::this_shard_id() + seastar::smp::count * store_index, + sb.shard_num); + shard_status = false; + return mount_ertr::now(); + } + shard_info = meta.shard_infos[seastar::this_shard_id() + seastar::smp::count * store_index]; metadata = meta; return mount_ertr::now(); }); @@ -428,13 +475,15 @@ ZBDSegmentManager::mount_ret ZBDSegmentManager::shard_mount() ZBDSegmentManager::mkfs_ret ZBDSegmentManager::mkfs( device_config_t config) { - return shard_devices.local().primary_mkfs(config + return shard_devices.local().mshard_devices[0]->primary_mkfs(config ).safe_then([this] { return shard_devices.invoke_on_all([](auto &local_device) { - return local_device.shard_mkfs( - ).handle_error( - crimson::ct_error::assert_all{ - "Invalid error in ZBDSegmentManager::mkfs" + return seastar::do_for_each(local_device.mshard_devices, [](auto& mshard_device) { + return mshard_device->shard_mkfs( + ).handle_error( + crimson::ct_error::assert_all{ + "Invalid error in ZBDSegmentManager::mkfs" + }); }); }); }); diff --git a/src/crimson/os/seastore/segment_manager/zbd.h b/src/crimson/os/seastore/segment_manager/zbd.h index d82974783d4..0da82d7883d 100644 --- a/src/crimson/os/seastore/segment_manager/zbd.h +++ b/src/crimson/os/seastore/segment_manager/zbd.h @@ -74,7 +74,6 @@ namespace crimson::os::seastore::segment_manager::zbd { } void validate() const { - ceph_assert_always(shard_num == seastar::smp::count); for (unsigned int i = 0; i < seastar::smp::count; i++) { ceph_assert_always(shard_infos[i].size > 0); ceph_assert_always(shard_infos[i].size <= DEVICE_OFF_MAX); @@ -121,22 +120,18 @@ namespace crimson::os::seastore::segment_manager::zbd { class ZBDSegmentManager final : public SegmentManager{ // interfaces used by Device public: - seastar::future<> start() { - return shard_devices.start(device_path); - } + seastar::future<> start(int shard_nums) final; - seastar::future<> stop() { - return shard_devices.stop(); - } + seastar::future<> stop() final; - Device& get_sharded_device() final { - return shard_devices.local(); - } + Device& get_sharded_device(unsigned int store_index = 0) final; mount_ret mount() final; mkfs_ret mkfs(device_config_t meta) final; - ZBDSegmentManager(const std::string &path) : device_path(path) {} + ZBDSegmentManager(const std::string &path, unsigned int store_index = 0) + : device_path(path), + store_index(store_index) {} ~ZBDSegmentManager() final = default; @@ -152,6 +147,8 @@ namespace crimson::os::seastore::segment_manager::zbd { size_t len, ceph::bufferptr &out) final; + read_ertr::future get_shard_nums() final; + device_type_t get_device_type() const final { return device_type_t::ZBD; } @@ -214,7 +211,7 @@ namespace crimson::os::seastore::segment_manager::zbd { } } stats; - void register_metrics(); + void register_metrics(unsigned int store_index); seastar::metrics::metric_group metrics; Segment::close_ertr::future<> segment_close( @@ -234,7 +231,28 @@ namespace crimson::os::seastore::segment_manager::zbd { mount_ret shard_mount(); - seastar::sharded shard_devices; + unsigned int device_shard_nums = 0; + unsigned int store_index = 0; + bool shard_status = true; + class MultiShardDevices { + public: + std::vector> mshard_devices; + + public: + MultiShardDevices(size_t count, + const std::string path) + : mshard_devices() { + mshard_devices.reserve(count); + for (size_t store_index = 0; store_index < count; ++store_index) { + mshard_devices.emplace_back(std::make_unique( + path, store_index)); + } + } + ~MultiShardDevices() { + mshard_devices.clear(); + } + }; + seastar::sharded shard_devices; }; } diff --git a/src/test/crimson/seastore/nvmedevice/test_nvmedevice.cc b/src/test/crimson/seastore/nvmedevice/test_nvmedevice.cc index 8962346c1e1..3ad41e29dc7 100644 --- a/src/test/crimson/seastore/nvmedevice/test_nvmedevice.cc +++ b/src/test/crimson/seastore/nvmedevice/test_nvmedevice.cc @@ -58,7 +58,7 @@ TEST_F(nvdev_test_t, write_and_verify_test) run_async([this] { device.reset(new random_block_device::nvme::NVMeBlockDevice(dev_path)); local_conf().set_val("seastore_cbjournal_size", "1048576").get(); - device->start().get(); + device->start(seastar::smp::count).get(); device->mkfs( device_config_t{ true,