From bb77b5dc20776f86c08cd2d1e6a28381fecf1881 Mon Sep 17 00:00:00 2001 From: Zhang Song Date: Thu, 31 Jul 2025 16:06:13 +0800 Subject: [PATCH] crimson/os/seastore: set backend type explicitly Signed-off-by: Zhang Song Signed-off-by: Xuehan Xu --- src/common/options/crimson.yaml.in | 15 ++++ src/crimson/os/seastore/device.cc | 22 ++++-- src/crimson/os/seastore/device.h | 9 ++- .../os/seastore/extent_placement_manager.cc | 1 + .../random_block_manager/block_rb_manager.cc | 4 +- src/crimson/os/seastore/seastore.cc | 76 ++++++++++++------- src/crimson/os/seastore/seastore_types.cc | 18 ++++- src/crimson/os/seastore/seastore_types.h | 7 +- .../os/seastore/segment_manager/ephemeral.cc | 2 + src/crimson/tools/store_nbd/tm_driver.cc | 19 +++-- .../seastore/nvmedevice/test_nvmedevice.cc | 1 + src/vstart.sh | 35 +++++++-- 12 files changed, 156 insertions(+), 53 deletions(-) diff --git a/src/common/options/crimson.yaml.in b/src/common/options/crimson.yaml.in index baca736abb9..accc8234725 100644 --- a/src/common/options/crimson.yaml.in +++ b/src/common/options/crimson.yaml.in @@ -239,6 +239,21 @@ options: level: dev desc: The main device type seastore uses (SSD or RANDOM_BLOCK_SSD) default: SSD +- name: seastore_main_backend_type + type: str + level: dev + desc: The backend used by main device (SEGMENTED or RANDOM_BLOCK) + default: SEGMENTED +- name: seastore_secondary_device_type + type: str + level: dev + desc: The secondary device type seastore uses (RANDOM_BLOCK_SSD, SSD or HDD) + default: HDD +- name: seastore_secondary_backend_type + type: str + level: dev + desc: The backend used by secondary device (SEGMENTED or RANDOM_BLOCK) + default: SEGMENTED - name: seastore_cbjournal_size type: size level: dev diff --git a/src/crimson/os/seastore/device.cc b/src/crimson/os/seastore/device.cc index bd6231fa94b..2e653a48a23 100644 --- a/src/crimson/os/seastore/device.cc +++ b/src/crimson/os/seastore/device.cc @@ -58,6 +58,8 @@ void device_superblock_t::validate() const } ceph_assert(block_size > 0); ceph_assert(config.spec.magic != 0); + // allow HDD devices use segmented backend + ceph_assert(config.spec.btype != backend_type_t::NONE); ceph_assert(config.spec.id <= DEVICE_ID_MAX_VALID); if (!config.major_dev) { ceph_assert(config.secondary_devices.empty()); @@ -114,19 +116,23 @@ void device_superblock_t::validate() const } seastar::future -Device::make_device(const std::string& device, device_type_t dtype) +Device::make_device( + const std::string& device, + device_type_t dtype, + backend_type_t btype) { - if (get_default_backend_of_device(dtype) == backend_type_t::SEGMENTED) { + if (btype == backend_type_t::SEGMENTED) { return SegmentManager::get_segment_manager(device, dtype ).then([](DeviceRef ret) { return ret; }); - } - assert(get_default_backend_of_device(dtype) == backend_type_t::RANDOM_BLOCK); - return get_rb_device(device - ).then([](DeviceRef ret) { - return ret; - }); + } else { + ceph_assert(btype != backend_type_t::NONE); + return get_rb_device(device + ).then([](DeviceRef ret) { + return ret; + }); + } } check_create_device_ret check_create_device( diff --git a/src/crimson/os/seastore/device.h b/src/crimson/os/seastore/device.h index d3f7dfba041..5c837ccd7fa 100644 --- a/src/crimson/os/seastore/device.h +++ b/src/crimson/os/seastore/device.h @@ -23,11 +23,13 @@ using magic_t = uint64_t; struct device_spec_t { magic_t magic = 0; device_type_t dtype = device_type_t::NONE; + backend_type_t btype = backend_type_t::NONE; device_id_t id = DEVICE_ID_NULL; DENC(device_spec_t, v, p) { DENC_START(1, 1, p); denc(v.magic, p); denc(v.dtype, p); + denc(v.btype, p); denc(v.id, p); DENC_FINISH(p); } @@ -55,12 +57,14 @@ struct device_config_t { uuid_d new_osd_fsid, device_id_t id, device_type_t d_type, + backend_type_t b_type, secondary_device_set_t sds) { return device_config_t{ true, device_spec_t{ (magic_t)std::rand(), d_type, + b_type, id}, seastore_meta_t{new_osd_fsid}, sds}; @@ -69,12 +73,14 @@ struct device_config_t { uuid_d new_osd_fsid, device_id_t id, device_type_t d_type, + backend_type_t b_type, magic_t magic) { return device_config_t{ false, device_spec_t{ magic, d_type, + b_type, id}, seastore_meta_t{new_osd_fsid}, secondary_device_set_t()}; @@ -329,7 +335,8 @@ public: static seastar::future make_device( const std::string &device, - device_type_t dtype); + device_type_t dtype, + backend_type_t btype); // interfaces used by each device shard public: diff --git a/src/crimson/os/seastore/extent_placement_manager.cc b/src/crimson/os/seastore/extent_placement_manager.cc index eb926072d70..fc9fc163e80 100644 --- a/src/crimson/os/seastore/extent_placement_manager.cc +++ b/src/crimson/os/seastore/extent_placement_manager.cc @@ -282,6 +282,7 @@ void ExtentPlacementManager::init( std::move(cold_cleaner), hot_tier_generations, pinboard); + ceph_assert(get_main_backend_type() != backend_type_t::NONE); if (cold_segment_cleaner) { ceph_assert(get_main_backend_type() == backend_type_t::SEGMENTED); ceph_assert(background_process.has_cold_tier()); diff --git a/src/crimson/os/seastore/random_block_manager/block_rb_manager.cc b/src/crimson/os/seastore/random_block_manager/block_rb_manager.cc index db8775a1b18..74bcbddb917 100644 --- a/src/crimson/os/seastore/random_block_manager/block_rb_manager.cc +++ b/src/crimson/os/seastore/random_block_manager/block_rb_manager.cc @@ -31,7 +31,7 @@ device_config_t get_rbm_ephemeral_device_config( ++secondary_index) { device_id_t secondary_id = static_cast(secondary_index); secondary_devices.insert({ - secondary_index, device_spec_t{magic, type, secondary_id} + secondary_index, device_spec_t{magic, type, backend_type_t::RANDOM_BLOCK, secondary_id} }); } } else { // index > 0 @@ -41,7 +41,7 @@ device_config_t get_rbm_ephemeral_device_config( device_id_t id = static_cast(DEVICE_ID_RANDOM_BLOCK_MIN + index); seastore_meta_t meta = {}; return {is_major_device, - device_spec_t{magic, type, id}, + device_spec_t{magic, type, backend_type_t::RANDOM_BLOCK, id}, meta, secondary_devices}; } diff --git a/src/crimson/os/seastore/seastore.cc b/src/crimson/os/seastore/seastore.cc index e7b2a742ce5..6666005dda5 100644 --- a/src/crimson/os/seastore/seastore.cc +++ b/src/crimson/os/seastore/seastore.cc @@ -280,8 +280,11 @@ seastar::future SeaStore::start() assert(d_type == device_type_t::SSD || d_type == device_type_t::RANDOM_BLOCK_SSD); + type = get_conf("seastore_main_backend_type"); + auto b_type = string_to_backend_type(type); + INFO("main device type: {}, main backend type: {}", d_type, b_type); ceph_assert(root != ""); - DeviceRef device_obj = co_await Device::make_device(root, d_type); + DeviceRef device_obj = co_await Device::make_device(root, d_type, b_type); device = std::move(device_obj); co_await get_shard_nums(); co_await device->start(store_shard_nums); @@ -354,8 +357,11 @@ Device::access_ertr::future<> SeaStore::_mount() device_id_t id = device_entry.first; [[maybe_unused]] magic_t magic = device_entry.second.magic; device_type_t dtype = device_entry.second.dtype; - std::string path = fmt::format("{}/block.{}.{}", root, dtype, std::to_string(id)); - DeviceRef sec_dev = co_await Device::make_device(path, dtype); + backend_type_t btype = device_entry.second.btype; + auto btype_conf_str = get_conf("seastore_secondary_backend_type"); + ceph_assert(string_to_backend_type(btype_conf_str) == btype); + std::string path = fmt::format("{}/block.{}", root, std::to_string(id)); + DeviceRef sec_dev = co_await Device::make_device(path, dtype, btype); co_await sec_dev->start(store_shard_nums); co_await sec_dev->mount(); auto sec_block_size = sec_dev->get_sharded_device(0).get_block_size(); @@ -549,6 +555,19 @@ seastar::future<> SeaStore::prepare_meta(uuid_d new_osd_fsid) co_await write_meta("mkfs_done", "yes"); } +std::optional parse_device_id(const seastar::sstring &name) { + auto prefix_len = sizeof("block.") - 1; + if (name.starts_with("block.") && name.length() > prefix_len) { + int id = 0; + std::string id_str = name.substr(prefix_len); + std::istringstream iss(id_str); + iss >> id; + assert(id < std::numeric_limits::max()); + return std::make_optional(id); + } + return std::nullopt; +} + Device::access_ertr::future<> SeaStore::_mkfs(uuid_d new_osd_fsid) { LOG_PREFIX(SeaStore::_mkfs); @@ -561,6 +580,12 @@ Device::access_ertr::future<> SeaStore::_mkfs(uuid_d new_osd_fsid) co_return; } DEBUG("mkfs_done does not exist, starting mkfs"); + auto dtype_str = get_conf("seastore_secondary_device_type"); + auto dtype = string_to_device_type(dtype_str); + auto btype_str = get_conf("seastore_secondary_backend_type"); + auto btype = string_to_backend_type(btype_str); + ceph_assert(!root.empty()); + INFO("secondary device type: {}, secondary backend type: {}", dtype, btype); secondary_device_set_t sds; if (!root.empty()) { seastar::file rdir = co_await seastar::open_directory(root); @@ -569,43 +594,40 @@ Device::access_ertr::future<> SeaStore::_mkfs(uuid_d new_osd_fsid) while (auto de = co_await lister()) { auto& entry = *de; DEBUG("found file: {}", entry.name); - if (entry.name.find("block.") == 0 && entry.name.length() > 6 ) { - // 6 for "block." - std::string entry_name = entry.name; - auto dtype_end = entry_name.find_first_of('.', 6); - device_type_t dtype = - string_to_device_type( - entry_name.substr(6, dtype_end - 6)); - if (dtype == device_type_t::NONE) { - // invalid device type - co_return; - } - auto id = std::stoi(entry_name.substr(dtype_end + 1)); - std::string path = fmt::format("{}/{}", root, entry_name); - DeviceRef sec_dev = co_await Device::make_device(path, dtype); - auto p_sec_dev = sec_dev.get(); - secondaries.emplace_back(std::move(sec_dev)); - co_await p_sec_dev->start(store_shard_nums); - magic_t magic = (magic_t)std::rand(); - sds.emplace((device_id_t)id, device_spec_t{magic, dtype, (device_id_t)id}); - co_await p_sec_dev->mkfs( - device_config_t::create_secondary(new_osd_fsid, id, dtype, magic) - ).handle_error(crimson::ct_error::assert_all{"not possible"}); - co_await set_secondaries(); + auto p = parse_device_id(entry.name); + if (!p) { + continue; } + std::string path = fmt::format("{}/{}", root, entry.name); + DeviceRef sec_dev = co_await Device::make_device(path, dtype, btype); + auto p_sec_dev = sec_dev.get(); + secondaries.emplace_back(std::move(sec_dev)); + co_await p_sec_dev->start(store_shard_nums); + magic_t magic = (magic_t)std::rand(); + auto id = *p; + sds.emplace((device_id_t)id, + device_spec_t{magic, dtype, btype, (device_id_t)id}); + co_await p_sec_dev->mkfs( + device_config_t::create_secondary(new_osd_fsid, id, dtype, btype, magic) + ).handle_error(crimson::ct_error::assert_all{"not possible"}); + co_await set_secondaries(); } co_await rdir.close(); } device_id_t id = 0; device_type_t d_type = device->get_device_type(); + backend_type_t b_type = device->get_backend_type(); assert(d_type == device_type_t::SSD || d_type == device_type_t::RANDOM_BLOCK_SSD); + assert(b_type != backend_type_t::NONE); if (d_type == device_type_t::RANDOM_BLOCK_SSD) { id = static_cast(DEVICE_ID_RANDOM_BLOCK_MIN); } DEBUG("creating primary device"); - co_await device->mkfs(device_config_t::create_primary(new_osd_fsid, id, d_type, sds)); + co_await device->mkfs( + device_config_t::create_primary( + new_osd_fsid, id, d_type, b_type, sds)); DEBUG("mounting {} secondaries", secondaries.size()); for (auto& sec_dev : secondaries) { co_await sec_dev->mount(); diff --git a/src/crimson/os/seastore/seastore_types.cc b/src/crimson/os/seastore/seastore_types.cc index adddf6fd3fd..6ac435cd069 100644 --- a/src/crimson/os/seastore/seastore_types.cc +++ b/src/crimson/os/seastore/seastore_types.cc @@ -1270,10 +1270,24 @@ std::ostream& operator<<(std::ostream& out, device_type_t t) } } +backend_type_t string_to_backend_type(const std::string &str) { + if (str == "SEGMENTED") { + return backend_type_t::SEGMENTED; + } else if (str == "RANDOM_BLOCK") { + return backend_type_t::RANDOM_BLOCK; + } else { + ceph_abort("backend str not valid"); + return backend_type_t::SEGMENTED; + } +} + std::ostream& operator<<(std::ostream& out, backend_type_t btype) { - if (btype == backend_type_t::SEGMENTED) { + switch (btype) { + case backend_type_t::NONE: + return out << "NONE"; + case backend_type_t::SEGMENTED: return out << "SEGMENTED"; - } else { + case backend_type_t::RANDOM_BLOCK: return out << "RANDOM_BLOCK"; } } diff --git a/src/crimson/os/seastore/seastore_types.h b/src/crimson/os/seastore/seastore_types.h index 77306ffba44..dbf662a6bec 100644 --- a/src/crimson/os/seastore/seastore_types.h +++ b/src/crimson/os/seastore/seastore_types.h @@ -974,7 +974,8 @@ std::ostream& operator<<(std::ostream& out, device_type_t t); bool can_delay_allocation(device_type_t type); device_type_t string_to_device_type(std::string type); -enum class backend_type_t { +enum class backend_type_t : uint8_t { + NONE, SEGMENTED, // SegmentManager: SSD, ZBD, HDD RANDOM_BLOCK // RBMDevice: RANDOM_BLOCK_SSD }; @@ -983,7 +984,7 @@ std::ostream& operator<<(std::ostream& out, backend_type_t); constexpr backend_type_t get_default_backend_of_device(device_type_t dtype) { assert(dtype != device_type_t::NONE && - dtype != device_type_t::NUM_TYPES); + dtype != device_type_t::NUM_TYPES); if (dtype >= device_type_t::HDD && dtype <= device_type_t::EPHEMERAL_MAIN) { return backend_type_t::SEGMENTED; @@ -992,6 +993,8 @@ constexpr backend_type_t get_default_backend_of_device(device_type_t dtype) { } } +backend_type_t string_to_backend_type(const std::string &str); + /** * Monotonically increasing identifier for the location of a * journal_record. diff --git a/src/crimson/os/seastore/segment_manager/ephemeral.cc b/src/crimson/os/seastore/segment_manager/ephemeral.cc index 393c20ed042..3144b9b9de4 100644 --- a/src/crimson/os/seastore/segment_manager/ephemeral.cc +++ b/src/crimson/os/seastore/segment_manager/ephemeral.cc @@ -59,6 +59,7 @@ device_config_t get_ephemeral_device_config( device_spec_t{ magic, get_sec_dtype(secondary_index), + backend_type_t::SEGMENTED, secondary_id } }); @@ -73,6 +74,7 @@ device_config_t get_ephemeral_device_config( device_spec_t{ magic, get_sec_dtype(index), + backend_type_t::SEGMENTED, id }, meta, diff --git a/src/crimson/tools/store_nbd/tm_driver.cc b/src/crimson/tools/store_nbd/tm_driver.cc index 894dd27fa70..edc6ccfa7b7 100644 --- a/src/crimson/tools/store_nbd/tm_driver.cc +++ b/src/crimson/tools/store_nbd/tm_driver.cc @@ -168,7 +168,10 @@ seastar::future<> TMDriver::mkfs() { assert(config.path); logger().debug("mkfs"); - return Device::make_device(*config.path, device_type_t::SSD + return Device::make_device( + *config.path, + device_type_t::SSD, + backend_type_t::SEGMENTED ).then([this](DeviceRef dev) { device = std::move(dev); seastore_meta_t meta; @@ -176,9 +179,12 @@ seastar::future<> TMDriver::mkfs() return device->mkfs( device_config_t{ true, - (magic_t)std::rand(), - device_type_t::SSD, - 0, + device_spec_t{ + (magic_t)std::rand(), + device_type_t::SSD, + backend_type_t::SEGMENTED, + 0 + }, meta, secondary_device_set_t()}); }).safe_then([this] { @@ -210,7 +216,10 @@ seastar::future<> TMDriver::mount() { return (config.mkfs ? mkfs() : seastar::now() ).then([this] { - return Device::make_device(*config.path, device_type_t::SSD); + return Device::make_device( + *config.path, + device_type_t::SSD, + backend_type_t::SEGMENTED); }).then([this](DeviceRef dev) { device = std::move(dev); return device->mount(); diff --git a/src/test/crimson/seastore/nvmedevice/test_nvmedevice.cc b/src/test/crimson/seastore/nvmedevice/test_nvmedevice.cc index 3ad41e29dc7..fb995765ee4 100644 --- a/src/test/crimson/seastore/nvmedevice/test_nvmedevice.cc +++ b/src/test/crimson/seastore/nvmedevice/test_nvmedevice.cc @@ -65,6 +65,7 @@ TEST_F(nvdev_test_t, write_and_verify_test) device_spec_t{ (magic_t)std::rand(), device_type_t::RANDOM_BLOCK_SSD, + backend_type_t::RANDOM_BLOCK, static_cast(DEVICE_ID_RANDOM_BLOCK_MIN)}, seastore_meta_t{uuid_d()}, secondary_device_set_t()} diff --git a/src/vstart.sh b/src/vstart.sh index 27bd1502268..5938bc75785 100755 --- a/src/vstart.sh +++ b/src/vstart.sh @@ -199,7 +199,10 @@ declare -a bluestore_db_devs declare -a bluestore_wal_devs declare -a secondary_block_devs declare -a cpu_table -secondary_block_devs_type="SSD" +seastore_main_device_type="SSD" +seastore_main_backend_type="SEGMENTED" +seastore_secondary_device_type="SSD" +seastore_secondary_backend_type="SEGMENTED" VSTART_SEC="client.vstart.sh" @@ -270,7 +273,10 @@ options: --seastore-device-size: set total size of seastore --seastore-devs: comma-separated list of blockdevs to use for seastore --seastore-secondary-devs: comma-separated list of secondary blockdevs to use for seastore - --seastore-secondary-devs-type: device type of all secondary blockdevs. HDD, SSD(default), ZNS or RANDOM_BLOCK_SSD + --seastore-main-device-type: device type of main blockdevs. (SSD or RANDOM_BLOCK_SSD) + --seastore-main-backend-type: the driver used by main blockdevs (SEGMENTED or RANDOM_BLOCK) + --seastore-secondary-device-type: device type of all secondary blockdevs. HDD, SSD(default), ZNS or RANDOM_BLOCK_SSD + --seastore-secondary-backend-type: the driver used by secondary blockdevs (SEGMENTED or RANDOM_BLOCK) --crimson-smp: number of cores to use for crimson --crimson-alien-num-threads: number of alien-tp threads --crimson-reactor-physical-only: use only one cpu per physical core for seastar reactors @@ -605,12 +611,24 @@ case $1 in parse_block_devs --seastore-devs "$2" shift ;; + --seastore-main-device-type) + seastore_main_device_type="$2" + shift + ;; + --seastore-main-backend-type) + seastore_main_backend_type="$2" + shift + ;; --seastore-secondary-devs) parse_secondary_devs --seastore-devs "$2" shift ;; - --seastore-secondary-devs-type) - secondary_block_devs_type="$2" + --seastore-secondary-device-type) + seastore_secondary_device_type="$2" + shift + ;; + --seastore-secondary-backend-type) + seastore_secondary_backend_type="$2" shift ;; --crimson-smp) @@ -936,6 +954,11 @@ EOF SEASTORE_OPTS=" seastore device size = $seastore_size" fi + SEASTORE_OPTS+=" + seastore_main_device_type=$seastore_main_device_type + seastore_main_backend_type=$seastore_main_backend_type + seastore_secondary_device_type=$seastore_secondary_device_type + seastore_secondary_backend_type=$seastore_secondary_backend_type" fi wconf <