From: Ronen Friedman Date: Mon, 9 Mar 2026 17:23:18 +0000 (+0000) Subject: crimson/osd: use a unified super-block for devices X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=97dcba15eb506b0cede1f5a3e5d467e1ffe1d541;p=ceph.git crimson/osd: use a unified super-block for devices This commit refactors the on-hardware super-block structure used by the seastore to a unified format that can accommodate all three device types (HDD, ZBD, RBM). All devices now have a 60 bytes header at address 0, similar to the existing BlueStore layout. A 23-byte magic string ("CRIMSON_DEVICE") is placed at the beginning of the header, followed by 37 bytes of null padding (to match the existing 60 bytes of the super-block), and then the DENC-encoded device_superblock_t structure starting at offset 60. A unified device_config_t is now used for all device types. The per-shard data structure is also unified, now including a union of all relevant fields for each device type. We are also adding a check for the super-block magic value in the RBMDevice::read_rbm_superblock() method, similar to the existing check in SegmentManager::read_segment_manager_superblock(). Signed-off-by: Ronen Friedman --- diff --git a/src/crimson/os/seastore/device.cc b/src/crimson/os/seastore/device.cc index 632f6d9fc059..bd6231fa94b6 100644 --- a/src/crimson/os/seastore/device.cc +++ b/src/crimson/os/seastore/device.cc @@ -3,6 +3,8 @@ #include "device.h" +#include + #include "segment_manager.h" #include "random_block_manager.h" #include "random_block_manager/rbm_device.h" @@ -34,6 +36,83 @@ std::ostream& operator<<(std::ostream& out, const device_config_t& conf) return out << "))"; } +std::ostream& operator<<(std::ostream& out, const device_shard_info_t& si) +{ + fmt::print(out, "{}", si); + return out; +} + +std::ostream& operator<<(std::ostream& out, const device_superblock_t& sb) +{ + fmt::print(out, "{}", sb); + return out; +} + +void device_superblock_t::validate() const +{ + // NOTE: magic is validated at the read site, outside this struct. + ceph_assert(version == CRIMSON_DEVICE_SUPERBLOCK_VERSION); + if (crimson::common::get_conf( + "seastore_require_partition_count_match_reactor_count")) { + ceph_assert(shard_num == seastar::smp::count); + } + ceph_assert(block_size > 0); + ceph_assert(config.spec.magic != 0); + ceph_assert(config.spec.id <= DEVICE_ID_MAX_VALID); + if (!config.major_dev) { + ceph_assert(config.secondary_devices.empty()); + } + for (const auto& [k, v] : config.secondary_devices) { + ceph_assert(k != config.spec.id); + ceph_assert(k <= DEVICE_ID_MAX_VALID); + ceph_assert(k == v.id); + ceph_assert(v.magic != 0); + ceph_assert(v.dtype > device_type_t::NONE); + ceph_assert(v.dtype < device_type_t::NUM_TYPES); + } + if (config.spec.dtype == device_type_t::ZBD) { + // ZBD: check zone/segment geometry + ceph_assert(segment_capacity > 0); + ceph_assert_always(segment_capacity <= SEGMENT_OFF_MAX); + } + auto backend = get_default_backend_of_device(config.spec.dtype); + if (backend == backend_type_t::SEGMENTED) { + ceph_assert(segment_size > 0 && segment_size % block_size == 0); + ceph_assert_always(segment_size <= SEGMENT_OFF_MAX); + ceph_assert(shard_infos.size() >= shard_num); + for (unsigned int i = 0; i < shard_num; i++) { + ceph_assert(shard_infos[i].size > 0); + ceph_assert_always(shard_infos[i].size <= DEVICE_OFF_MAX); + ceph_assert(shard_infos[i].segments > 0); + ceph_assert_always(shard_infos[i].segments <= DEVICE_SEGMENT_ID_MAX); + if (config.spec.dtype != device_type_t::ZBD) { + // HDD: check tracker and first-segment offsets + ceph_assert(shard_infos[i].size > segment_size && + shard_infos[i].size % block_size == 0); + ceph_assert(shard_infos[i].tracker_offset > 0 && + shard_infos[i].tracker_offset % block_size == 0); + ceph_assert(shard_infos[i].first_segment_offset > + shard_infos[i].tracker_offset && + shard_infos[i].first_segment_offset % block_size == 0); + } + } + } else { + // RBM + ceph_assert(total_size > 0); + ceph_assert(get_default_backend_of_device(config.spec.dtype) == + backend_type_t::RANDOM_BLOCK); + ceph_assert(shard_infos.size() >= shard_num); + for (unsigned int i = 0; i < shard_num; i++) { + ceph_assert(shard_infos[i].size > block_size && + shard_infos[i].size % block_size == 0); + ceph_assert_always(shard_infos[i].size <= DEVICE_OFF_MAX); + ceph_assert(journal_size > 0 && journal_size % block_size == 0); + ceph_assert(shard_infos[i].start_offset < total_size && + shard_infos[i].start_offset % block_size == 0); + } + } +} + seastar::future Device::make_device(const std::string& device, device_type_t dtype) { diff --git a/src/crimson/os/seastore/device.h b/src/crimson/os/seastore/device.h index da37ebf198ff..d3f7dfba0411 100644 --- a/src/crimson/os/seastore/device.h +++ b/src/crimson/os/seastore/device.h @@ -3,10 +3,15 @@ #pragma once +#include +#include #include +#include +#include #include "include/buffer_fwd.h" +#include "common/fmt_common.h" #include "crimson/common/errorator.h" #include "crimson/os/seastore/seastore_types.h" #include "crimson/common/smp_helpers.h" @@ -78,6 +83,211 @@ struct device_config_t { std::ostream& operator<<(std::ostream&, const device_config_t&); +/* ----------------------------------------------------------------------- + * Unified superblock written at offset 0 on every Crimson device type: + * HDD (block-segment), ZBD (zoned-block), RBM (random-block / NVMe) + * ----------------------------------------------------------------------- + */ + +/// On-disk superblock magic type (23 bytes, matching classic BlueStore prefix size). +/// Written outside the DENC envelope at device offset 0. +using superblock_magic_t = std::array; + +/// Magic identifying all Crimson device superblocks: "CRIMSON_DEVICE\0..." +constexpr superblock_magic_t CRIMSON_DEVICE_SUPERBLOCK_MAGIC = {{ + std::byte{'C'}, std::byte{'R'}, std::byte{'I'}, std::byte{'M'}, + std::byte{'S'}, std::byte{'O'}, std::byte{'N'}, std::byte{'_'}, + std::byte{'D'}, std::byte{'E'}, std::byte{'V'}, std::byte{'I'}, + std::byte{'C'}, std::byte{'E'}, std::byte{0}, std::byte{0}, + std::byte{0}, std::byte{0}, std::byte{0}, std::byte{0}, + std::byte{0}, std::byte{0}, std::byte{0} +}}; + +/// Size constants for the on-disk prefix (magic + null padding) that +/// precedes the DENC-encoded superblock, matching the classic BlueStore +/// 60-byte label prefix layout. +constexpr size_t SUPERBLOCK_MAGIC_SIZE = sizeof(CRIMSON_DEVICE_SUPERBLOCK_MAGIC); // 23 +constexpr size_t SUPERBLOCK_MAGIC_PAD = 37; // matching the UID block in BlueStore's +constexpr size_t SUPERBLOCK_HEADER_PREFIX = SUPERBLOCK_MAGIC_SIZE + SUPERBLOCK_MAGIC_PAD; +static_assert(SUPERBLOCK_HEADER_PREFIX == 60); + +/// Current superblock format version +constexpr uint8_t CRIMSON_DEVICE_SUPERBLOCK_VERSION = 1; + +/// Feature bits stored in device_superblock_t::feature +enum class device_feature_t : uint64_t { + NVME_END_TO_END_PROTECTION = 1, +}; + +/// Unified per-shard layout info for all device types. +/// Fields unused by a given device type are left at their zero default. +struct device_shard_info_t { + size_t size = 0; ///< usable shard size in bytes (all) + size_t segments = 0; ///< number of segments (HDD/ZBD; 0 for RBM) + uint64_t first_segment_offset = 0; ///< byte offset of first segment (HDD/ZBD) + uint64_t tracker_offset = 0; ///< byte offset of segment-state tracker (HDD) + uint64_t start_offset = 0; ///< byte offset of shard start (RBM) + + DENC(device_shard_info_t, v, p) { + DENC_START(1, 1, p); + denc(v.size, p); + denc(v.segments, p); + denc(v.first_segment_offset, p); + denc(v.tracker_offset, p); + denc(v.start_offset, p); + DENC_FINISH(p); + } + + auto fmt_print_ctx(auto& ctx) const -> decltype(ctx.out()) { + return fmt::format_to(ctx.out(), + "device_shard_info(size={:#x}, segments={}, " + "first_segment_offset={:#x}, tracker_offset={:#x}, start_offset={:#x})", + size, segments, first_segment_offset, tracker_offset, start_offset); + } +}; + +std::ostream& operator<<(std::ostream&, const device_shard_info_t&); + +/// Unified on-disk superblock for all Crimson device types. +/// Fields specific to a device type are zero for other types. +struct device_superblock_t { + // --- Fixed header (all device types) --- + // NOTE: the magic string is written/read separately at device offset 0, + // outside this DENC-encoded structure (see SUPERBLOCK_HEADER_PREFIX). + uint16_t version = CRIMSON_DEVICE_SUPERBLOCK_VERSION; + uint16_t shard_num = 0; + size_t segment_size = 0; ///< logical segment size in bytes (HDD/ZBD; 0 for RBM) + size_t block_size = 0; + device_config_t config; + + // --- Device-type-specific size information (union concept) --- + size_t total_size = 0; ///< total device capacity in bytes (RBM) + uint64_t journal_size = 0; ///< journal area size in bytes (RBM) + size_t segment_capacity = 0; ///< usable bytes/segment = zone_capacity*zones_per_segment (ZBD) + size_t zones_per_segment = 0; ///< zones per segment (ZBD) + size_t zone_size = 0; ///< physical zone size in bytes (ZBD) + size_t zone_capacity = 0; ///< usable zone capacity in bytes (ZBD) + + // --- Per-shard information --- + std::vector shard_infos; + + // --- RBM-specific remaining fields --- + checksum_t crc = 0; + uint64_t feature = 0; ///< device_feature_t bits + uint32_t nvme_block_size = 0; ///< NVMe logical block size (E2E protection) + + DENC(device_superblock_t, v, p) { + DENC_START(1, 1, p); + denc(v.version, p); + denc(v.shard_num, p); + denc(v.segment_size, p); + denc(v.block_size, p); + denc(v.config, p); + denc(v.total_size, p); + denc(v.journal_size, p); + denc(v.segment_capacity, p); + denc(v.zones_per_segment, p); + denc(v.zone_size, p); + denc(v.zone_capacity, p); + denc(v.shard_infos, p); + denc(v.crc, p); + denc(v.feature, p); + denc(v.nvme_block_size, p); + DENC_FINISH(p); + } + + void validate() const; + + /// Create a superblock for a segmented (HDD/SSD) device. + static device_superblock_t make_segmented( + uint16_t shard_num, + size_t segment_size, + size_t block_size, + device_config_t config, + std::vector shard_infos) + { + device_superblock_t sb; + sb.shard_num = shard_num; + sb.segment_size = segment_size; + sb.block_size = block_size; + sb.config = std::move(config); + sb.shard_infos = std::move(shard_infos); + return sb; + } + + /// Create a superblock for a ZBD (zone-based) segmented device. + static device_superblock_t make_zbd( + uint16_t shard_num, + size_t segment_size, + size_t block_size, + device_config_t config, + size_t zone_size, + size_t zone_capacity, + size_t zones_per_segment, + std::vector shard_infos) + { + device_superblock_t sb; + sb.shard_num = shard_num; + sb.segment_size = segment_size; + sb.block_size = block_size; + sb.config = std::move(config); + sb.segment_capacity = zone_capacity * zones_per_segment; + sb.zones_per_segment = zones_per_segment; + sb.zone_size = zone_size; + sb.zone_capacity = zone_capacity; + sb.shard_infos = std::move(shard_infos); + return sb; + } + + /// Create a superblock for an RBM (random-block) device. + static device_superblock_t make_rbm( + uint16_t shard_num, + size_t block_size, + size_t total_size, + uint64_t journal_size, + device_config_t config, + std::vector shard_infos) + { + device_superblock_t sb; + sb.shard_num = shard_num; + sb.block_size = block_size; + sb.total_size = total_size; + sb.journal_size = journal_size; + sb.config = std::move(config); + sb.shard_infos = std::move(shard_infos); + return sb; + } + + bool is_end_to_end_data_protection() const { + return feature & (uint64_t)device_feature_t::NVME_END_TO_END_PROTECTION; + } + void set_end_to_end_data_protection() { + feature |= (uint64_t)device_feature_t::NVME_END_TO_END_PROTECTION; + } + + auto fmt_print_ctx(auto& ctx) const -> decltype(ctx.out()) { + fmt::format_to(ctx.out(), + "device_superblock(version={}, shard_num={}, " + "segment_size={:#x}, block_size={:#x}, config={}, " + "total_size={:#x}, journal_size={:#x}, " + "segment_capacity={:#x}, zones_per_segment={}, " + "zone_size={:#x}, zone_capacity={:#x}, " + "crc={}, feature={:#x}, nvme_block_size={}, shards:[", + (unsigned)version, shard_num, + segment_size, block_size, config, + total_size, journal_size, + segment_capacity, zones_per_segment, + zone_size, zone_capacity, + crc, feature, nvme_block_size); + for (const auto& si : shard_infos) { + fmt::format_to(ctx.out(), "{},", si); + } + return fmt::format_to(ctx.out(), "])"); + } +}; + +std::ostream& operator<<(std::ostream&, const device_superblock_t&); + class Device; using DeviceRef = std::unique_ptr; @@ -184,6 +394,8 @@ check_create_device_ret check_create_device( WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::device_spec_t) WRITE_CLASS_DENC(crimson::os::seastore::device_config_t) +WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::device_shard_info_t) +WRITE_CLASS_DENC(crimson::os::seastore::device_superblock_t) #if FMT_VERSION >= 90000 template <> struct fmt::formatter : fmt::ostream_formatter {}; diff --git a/src/crimson/os/seastore/random_block_manager.h b/src/crimson/os/seastore/random_block_manager.h index a0ff7e4808d7..dadd2578f45d 100644 --- a/src/crimson/os/seastore/random_block_manager.h +++ b/src/crimson/os/seastore/random_block_manager.h @@ -27,74 +27,6 @@ struct alloc_paddr_result { extent_len_t len; }; -struct rbm_shard_info_t { - std::size_t size = 0; - uint64_t start_offset = 0; - - DENC(rbm_shard_info_t, v, p) { - DENC_START(1, 1, p); - denc(v.size, p); - denc(v.start_offset, p); - DENC_FINISH(p); - } -}; - -enum class rbm_feature_t : uint64_t { - RBM_NVME_END_TO_END_PROTECTION = 1, -}; - -struct rbm_superblock_t { - size_t size = 0; - size_t block_size = 0; - uint64_t feature = 0; - uint64_t journal_size = 0; - checksum_t crc = 0; - device_config_t config; - uint32_t shard_num = 0; - // Must be assigned if ent-to-end-data-protection features is enabled - uint32_t nvme_block_size = 0; - std::vector shard_infos; - - DENC(rbm_superblock_t, v, p) { - DENC_START(1, 1, p); - denc(v.size, p); - denc(v.block_size, p); - denc(v.feature, p); - - denc(v.journal_size, p); - denc(v.crc, p); - denc(v.config, p); - denc(v.shard_num, p); - denc(v.nvme_block_size, p); - denc(v.shard_infos, p); - DENC_FINISH(p); - } - - void validate() const { - ceph_assert(block_size > 0); - for (unsigned int i = 0; i < seastar::smp::count; i ++) { - ceph_assert(shard_infos[i].size > block_size && - shard_infos[i].size % block_size == 0); - ceph_assert_always(shard_infos[i].size <= DEVICE_OFF_MAX); - ceph_assert(journal_size > 0 && - journal_size % block_size == 0); - ceph_assert(shard_infos[i].start_offset < size && - shard_infos[i].start_offset % block_size == 0); - } - ceph_assert(config.spec.magic != 0); - ceph_assert(get_default_backend_of_device(config.spec.dtype) == - backend_type_t::RANDOM_BLOCK); - ceph_assert(config.spec.id <= DEVICE_ID_MAX_VALID); - } - - bool is_end_to_end_data_protection() const { - return (feature & (uint64_t)rbm_feature_t::RBM_NVME_END_TO_END_PROTECTION); - } - void set_end_to_end_data_protection() { - feature |= (uint64_t)rbm_feature_t::RBM_NVME_END_TO_END_PROTECTION; - } -}; - enum class rbm_extent_state_t { FREE, // not allocated RESERVED, // extent is reserved by alloc_new_extent, but is not persistent @@ -184,18 +116,6 @@ namespace random_block_device { seastar::future> get_rb_device(const std::string &device); -std::ostream &operator<<(std::ostream &out, const rbm_superblock_t &header); -std::ostream &operator<<(std::ostream &out, const rbm_shard_info_t &shard); +std::ostream &operator<<(std::ostream &out, const rbm_extent_state_t &state); } -WRITE_CLASS_DENC_BOUNDED( - crimson::os::seastore::rbm_shard_info_t -) -WRITE_CLASS_DENC_BOUNDED( - crimson::os::seastore::rbm_superblock_t -) - -#if FMT_VERSION >= 90000 -template<> struct fmt::formatter : fmt::ostream_formatter {}; -template<> struct fmt::formatter : fmt::ostream_formatter {}; -#endif diff --git a/src/crimson/os/seastore/random_block_manager/block_rb_manager.cc b/src/crimson/os/seastore/random_block_manager/block_rb_manager.cc index 26d15d4412dc..db8775a1b182 100644 --- a/src/crimson/os/seastore/random_block_manager/block_rb_manager.cc +++ b/src/crimson/os/seastore/random_block_manager/block_rb_manager.cc @@ -205,28 +205,4 @@ void BlockRBManager::prefill_fragmented_device() } #endif -std::ostream &operator<<(std::ostream &out, const rbm_superblock_t &header) -{ - out << " rbm_superblock_t(size=" << header.size - << ", block_size=" << header.block_size - << ", feature=" << header.feature - << ", journal_size=" << header.journal_size - << ", crc=" << header.crc - << ", config=" << header.config - << ", shard_num=" << header.shard_num - << ", end_to_end_data_protection=" << header.is_end_to_end_data_protection() - << ", device_block_size=" << header.nvme_block_size; - for (auto p : header.shard_infos) { - out << p; - } - return out << ")"; -} - -std::ostream &operator<<(std::ostream &out, const rbm_shard_info_t &shard) -{ - out << " rbm_shard_info_t(size=" << shard.size - << ", start_offset=" << shard.start_offset; - return out << ")"; -} - } diff --git a/src/crimson/os/seastore/random_block_manager/block_rb_manager.h b/src/crimson/os/seastore/random_block_manager/block_rb_manager.h index 46d2ce88aa50..03b2285d8ceb 100644 --- a/src/crimson/os/seastore/random_block_manager/block_rb_manager.h +++ b/src/crimson/os/seastore/random_block_manager/block_rb_manager.h @@ -37,9 +37,10 @@ public: /* * Ondisk layout (TODO) * - * --------------------------------------------------------------------------- - * | rbm_superblock_t | metadatas | ... | data blocks | - * --------------------------------------------------------------------------- + * ------------------------------------------------------------------------------- + * | 23B magic | 37B null padding | DENC-encoded superblock header | data blocks | + * | | | header (device_superblock_t) | | + * ------------------------------------------------------------------------------- */ read_ertr::future<> read(paddr_t addr, bufferptr &buffer) override; diff --git a/src/crimson/os/seastore/random_block_manager/rbm_device.cc b/src/crimson/os/seastore/random_block_manager/rbm_device.cc index 0b759e05c4d4..86e2b6bb45d1 100644 --- a/src/crimson/os/seastore/random_block_manager/rbm_device.cc +++ b/src/crimson/os/seastore/random_block_manager/rbm_device.cc @@ -44,26 +44,30 @@ RBMDevice::mkfs_ret RBMDevice::do_primary_mkfs(device_config_t config, ); } - super.block_size = (*st).block_size; - super.size = (*st).size; - super.config = std::move(config); - super.journal_size = journal_size; - ceph_assert_always(super.journal_size > 0); - ceph_assert_always(super.size >= super.journal_size); + const size_t cur_block_size = (*st).block_size; + const size_t cur_total_size = (*st).size; + ceph_assert_always(journal_size > 0); + ceph_assert_always(cur_total_size >= journal_size); ceph_assert_always(shard_num > 0); - std::vector shard_infos(shard_num); + const size_t aligned_size = + (cur_total_size / shard_num) - + ((cur_total_size / shard_num) % cur_block_size); + + std::vector shard_infos(shard_num); for (int i = 0; i < shard_num; i++) { - uint64_t aligned_size = - (super.size / shard_num) - - ((super.size / shard_num) % super.block_size); shard_infos[i].size = aligned_size; shard_infos[i].start_offset = i * aligned_size; - assert(shard_infos[i].size > super.journal_size); + assert(shard_infos[i].size > journal_size); } - super.shard_infos = shard_infos; - super.shard_num = shard_num; shard_info = shard_infos[seastar::this_shard_id()]; + super = device_superblock_t::make_rbm( + shard_num, + cur_block_size, + cur_total_size, + journal_size, + std::move(config), + std::move(shard_infos)); DEBUG("super {} ", super); // write super block @@ -100,24 +104,46 @@ write_ertr::future<> RBMDevice::write_rbm_superblock() bufferlist bl; encode(super, bl); - auto iter = bl.begin(); auto bp = bufferptr(ceph::buffer::create_page_aligned(super.block_size)); - assert(bl.length() < super.block_size); - iter.copy(bl.length(), bp.c_str()); + bp.zero(); + // magic at offset 0, followed by 37 bytes of null padding (just zeroed) + std::memcpy(bp.c_str(), + CRIMSON_DEVICE_SUPERBLOCK_MAGIC.data(), SUPERBLOCK_MAGIC_SIZE); + // DENC-encoded superblock at offset 60 + assert(SUPERBLOCK_HEADER_PREFIX + bl.length() < super.block_size); + auto iter = bl.begin(); + iter.copy(bl.length(), bp.c_str() + SUPERBLOCK_HEADER_PREFIX); co_return co_await write(RBM_START_ADDRESS, bp); } -read_ertr::future RBMDevice::read_rbm_superblock( +read_ertr::future RBMDevice::read_rbm_superblock( rbm_abs_addr addr) { LOG_PREFIX(RBMDevice::read_rbm_superblock); assert(super.block_size > 0); auto bptr = bufferptr(ceph::buffer::create_page_aligned(super.block_size)); co_await read(addr, bptr); + + // verify magic at offset 0 + superblock_magic_t disk_magic; + std::memcpy(disk_magic.data(), bptr.c_str(), SUPERBLOCK_MAGIC_SIZE); + if (disk_magic != CRIMSON_DEVICE_SUPERBLOCK_MAGIC) { + ERROR("invalid superblock magic in read_rbm_superblock, got: {:02x}", + fmt::join( + std::views::transform(disk_magic, + [](std::byte b) { return std::to_integer(b); }), + " ")); + co_return co_await read_ertr::future( + crimson::ct_error::input_output_error::make() + ); + } + + // decode DENC superblock from offset 60 bufferlist bl; - bl.append(bptr); + bl.append(bptr.c_str() + SUPERBLOCK_HEADER_PREFIX, + bptr.length() - SUPERBLOCK_HEADER_PREFIX); auto p = bl.cbegin(); - rbm_superblock_t super_block; + device_superblock_t super_block; bool err = false; try { decode(super_block, p); @@ -127,7 +153,7 @@ read_ertr::future RBMDevice::read_rbm_superblock( err = true; } if (err) { - co_return co_await read_ertr::future( + co_return co_await read_ertr::future( crimson::ct_error::input_output_error::make() ); } @@ -135,15 +161,16 @@ read_ertr::future RBMDevice::read_rbm_superblock( bufferlist meta_b_header; super_block.crc = 0; encode(super_block, meta_b_header); - assert(ceph::encoded_sizeof(super_block) < - super_block.block_size); + assert(SUPERBLOCK_HEADER_PREFIX + + ceph::encoded_sizeof(super_block) < + super_block.block_size); // Do CRC verification only if data protection is not supported. if (super_block.is_end_to_end_data_protection() == false) { if (meta_b_header.crc32c(-1) != crc) { DEBUG("bad crc on super block, expected {} != actual {} ", meta_b_header.crc32c(-1), crc); - co_return co_await read_ertr::future( + co_return co_await read_ertr::future( crimson::ct_error::input_output_error::make() ); } @@ -153,7 +180,7 @@ read_ertr::future RBMDevice::read_rbm_superblock( super_block.crc = crc; super = super_block; DEBUG("got {} ", super); - co_return co_await read_ertr::future( + co_return co_await read_ertr::future( read_ertr::ready_future_marker{}, super_block ); @@ -231,7 +258,8 @@ read_ertr::future RBMDevice::get_shard_nums() EphemeralRBMDeviceRef create_test_ephemeral(uint64_t journal_size, uint64_t data_size) { return EphemeralRBMDeviceRef( - new EphemeralRBMDevice(journal_size + data_size + + new EphemeralRBMDevice( + (journal_size + data_size) * seastar::smp::count + random_block_device::RBMDevice::get_shard_reserved_size(), EphemeralRBMDevice::TEST_BLOCK_SIZE)); } @@ -331,7 +359,7 @@ EphemeralRBMDevice::mount_ret EphemeralRBMDevice::mount() { } EphemeralRBMDevice::mkfs_ret EphemeralRBMDevice::mkfs(device_config_t config) { - return do_primary_mkfs(config, 1, DEFAULT_TEST_CBJOURNAL_SIZE); + return do_primary_mkfs(config, seastar::smp::count, DEFAULT_TEST_CBJOURNAL_SIZE); } } diff --git a/src/crimson/os/seastore/random_block_manager/rbm_device.h b/src/crimson/os/seastore/random_block_manager/rbm_device.h index 2bf2336b9de5..3e87960849db 100644 --- a/src/crimson/os/seastore/random_block_manager/rbm_device.h +++ b/src/crimson/os/seastore/random_block_manager/rbm_device.h @@ -84,8 +84,8 @@ public: return _readv(rbm_addr, std::move(ptrs)); } protected: - rbm_superblock_t super; - rbm_shard_info_t shard_info; + device_superblock_t super; + device_shard_info_t shard_info; uint32_t device_shard_nums = 0; store_index_t store_index = 0; bool shard_status = true; @@ -126,7 +126,7 @@ public: secondary_device_set_t& get_secondary_devices() final { return super.config.secondary_devices; } - std::size_t get_available_size() const { return super.size; } + std::size_t get_available_size() const { return super.total_size; } extent_len_t get_block_size() const { return super.block_size; } read_ertr::future get_shard_nums() final; @@ -179,7 +179,7 @@ public: write_ertr::future<> write_rbm_superblock(); - read_ertr::future read_rbm_superblock(rbm_abs_addr addr); + read_ertr::future read_rbm_superblock(rbm_abs_addr addr); using stat_device_ret = read_ertr::future; diff --git a/src/crimson/os/seastore/seastore_types.h b/src/crimson/os/seastore/seastore_types.h index aecf25faa754..7db5b930c48b 100644 --- a/src/crimson/os/seastore/seastore_types.h +++ b/src/crimson/os/seastore/seastore_types.h @@ -123,11 +123,6 @@ constexpr device_id_t DEVICE_ID_SEGMENTED_MIN = 0; constexpr device_id_t DEVICE_ID_RANDOM_BLOCK_MIN = 1 << (std::numeric_limits::digits - 1); -// TODO this Signature is only applicable for segment devices(SSD/HDD) not -// for other two devices like ZBD/RANDOM_BLOCK_SSD -constexpr const char SEASTORE_SUPERBLOCK_SIGN[] = "seastore block device\n"; -constexpr std::size_t SEASTORE_SUPERBLOCK_SIGN_LEN = sizeof(SEASTORE_SUPERBLOCK_SIGN) - 1; - struct device_id_printer_t { device_id_t id; }; diff --git a/src/crimson/os/seastore/segment_manager.cc b/src/crimson/os/seastore/segment_manager.cc index 9de3d7191fe7..e87c9639ec0c 100644 --- a/src/crimson/os/seastore/segment_manager.cc +++ b/src/crimson/os/seastore/segment_manager.cc @@ -13,33 +13,6 @@ SET_SUBSYS(seastore_device); namespace crimson::os::seastore { -std::ostream& operator<<(std::ostream& out, const block_shard_info_t& sf) -{ - out << "(" - << "size=0x" << std::hex << sf.size << std::dec - << ", segments=" << sf.segments - << ", tracker_offset=0x" << std::hex << sf.tracker_offset - << ", first_segment_offset=0x" << sf.first_segment_offset << std::dec - <<")"; - return out; -} - -std::ostream& operator<<(std::ostream& out, const block_sm_superblock_t& sb) -{ - out << "superblock(" - << "shard_num=" << sb.shard_num - << ", segment_size=0x" << std::hex << sb.segment_size - << ", block_size=0x" << sb.block_size << std::dec - << ", shard_info:"; - for (auto &sf : sb.shard_infos) { - out << sf - << ","; - } - out << "config=" << sb.config - << ")"; - return out; -} - std::ostream& operator<<(std::ostream &out, Segment::segment_state_t s) { using state_t = Segment::segment_state_t; diff --git a/src/crimson/os/seastore/segment_manager.h b/src/crimson/os/seastore/segment_manager.h index 5de78d597a52..0653e73c0a89 100644 --- a/src/crimson/os/seastore/segment_manager.h +++ b/src/crimson/os/seastore/segment_manager.h @@ -21,80 +21,6 @@ namespace crimson::os::seastore { using std::vector; -struct block_shard_info_t { - std::size_t size; - std::size_t segments; - uint64_t tracker_offset; - uint64_t first_segment_offset; - - DENC(block_shard_info_t, v, p) { - DENC_START(1, 1, p); - denc(v.size, p); - denc(v.segments, p); - denc(v.tracker_offset, p); - denc(v.first_segment_offset, p); - DENC_FINISH(p); - } -}; - -struct block_sm_superblock_t { - uint32_t shard_num = 0; - size_t segment_size = 0; - size_t block_size = 0; - - std::vector shard_infos; - - device_config_t config; - - DENC(block_sm_superblock_t, v, p) { - DENC_START(1, 1, p); - denc(v.shard_num, p); - denc(v.segment_size, p); - denc(v.block_size, p); - denc(v.shard_infos, p); - denc(v.config, p); - DENC_FINISH(p); - } - - void validate() const { - if(crimson::common::get_conf("seastore_require_partition_count_match_reactor_count")) { - ceph_assert(shard_num == seastar::smp::count); - } - ceph_assert(block_size > 0); - ceph_assert(segment_size > 0 && - segment_size % block_size == 0); - ceph_assert_always(segment_size <= SEGMENT_OFF_MAX); - for (unsigned int i = 0; i < shard_num; i ++) { - ceph_assert(shard_infos[i].size > segment_size && - shard_infos[i].size % block_size == 0); - ceph_assert_always(shard_infos[i].size <= DEVICE_OFF_MAX); - ceph_assert(shard_infos[i].segments > 0); - ceph_assert_always(shard_infos[i].segments <= DEVICE_SEGMENT_ID_MAX); - ceph_assert(shard_infos[i].tracker_offset > 0 && - shard_infos[i].tracker_offset % block_size == 0); - ceph_assert(shard_infos[i].first_segment_offset > shard_infos[i].tracker_offset && - shard_infos[i].first_segment_offset % block_size == 0); - } - ceph_assert(config.spec.magic != 0); - ceph_assert(get_default_backend_of_device(config.spec.dtype) == - backend_type_t::SEGMENTED); - ceph_assert(config.spec.id <= DEVICE_ID_MAX_VALID); - if (!config.major_dev) { - ceph_assert(config.secondary_devices.size() == 0); - } - for (const auto& [k, v] : config.secondary_devices) { - ceph_assert(k != config.spec.id); - ceph_assert(k <= DEVICE_ID_MAX_VALID); - ceph_assert(k == v.id); - ceph_assert(v.magic != 0); - ceph_assert(v.dtype > device_type_t::NONE); - ceph_assert(v.dtype < device_type_t::NUM_TYPES); - } - } -}; - -std::ostream& operator<<(std::ostream&, const block_shard_info_t&); -std::ostream& operator<<(std::ostream&, const block_sm_superblock_t&); class Segment : public boost::intrusive_ref_counter< Segment, @@ -204,15 +130,3 @@ public: }; } - -WRITE_CLASS_DENC( - crimson::os::seastore::block_shard_info_t -) -WRITE_CLASS_DENC( - crimson::os::seastore::block_sm_superblock_t -) - -#if FMT_VERSION >= 90000 -template <> struct fmt::formatter : fmt::ostream_formatter {}; -template <> struct fmt::formatter : fmt::ostream_formatter {}; -#endif diff --git a/src/crimson/os/seastore/segment_manager/block.cc b/src/crimson/os/seastore/segment_manager/block.cc index d4f2129338b4..fcc1eced9096 100644 --- a/src/crimson/os/seastore/segment_manager/block.cc +++ b/src/crimson/os/seastore/segment_manager/block.cc @@ -238,7 +238,7 @@ SegmentStateTracker::read_in( } using std::vector; static -block_sm_superblock_t make_superblock( +device_superblock_t make_superblock( device_id_t device_id, device_config_t sm_config, const seastar::stat_data &data) @@ -262,7 +262,7 @@ block_sm_superblock_t make_superblock( size_t segments = (size - tracker_off - total_tracker_size) / config_segment_size; size_t segments_per_shard = segments / seastar::smp::count; - vector shard_infos(seastar::smp::count); + vector shard_infos(seastar::smp::count); for (unsigned int i = 0; i < seastar::smp::count; i++) { shard_infos[i].size = segments_per_shard * config_segment_size; shard_infos[i].segments = segments_per_shard; @@ -280,13 +280,12 @@ block_sm_superblock_t make_superblock( INFO("shard {} infos: {}", i, shard_infos[i]); } - return block_sm_superblock_t{ + return device_superblock_t::make_segmented( seastar::smp::count, config_segment_size, data.block_size, - shard_infos, - std::move(sm_config) - }; + std::move(sm_config), + std::move(shard_infos)); } using open_device_ret = @@ -326,31 +325,33 @@ BlockSegmentManager::access_ertr::future<> write_superblock( device_id_t device_id, seastar::file &device, - block_sm_superblock_t sb) + device_superblock_t sb) { LOG_PREFIX(block_write_superblock); DEBUG("{} write {}", device_id_printer_t{device_id}, sb); sb.validate(); - assert(ceph::encoded_sizeof(sb) < - sb.block_size); + assert(SUPERBLOCK_HEADER_PREFIX + + ceph::encoded_sizeof(sb) < sb.block_size); return seastar::do_with( bufferptr(ceph::buffer::create_page_aligned(sb.block_size)), [=, &device](auto &bp) { - // Encode SEASTORE_SUPERBLOCK_SIGN at offset 0 before - // encoding anything else + bp.zero(); + // magic at offset 0, followed by 37 bytes of null padding (already zero) + std::memcpy(bp.c_str(), + CRIMSON_DEVICE_SUPERBLOCK_MAGIC.data(), SUPERBLOCK_MAGIC_SIZE); + // DENC-encoded superblock at offset 60 bufferlist bl; - bl.append(SEASTORE_SUPERBLOCK_SIGN); encode(sb, bl); auto iter = bl.begin(); - assert(bl.length() < sb.block_size); - iter.copy(bl.length(), bp.c_str()); + assert(SUPERBLOCK_HEADER_PREFIX + bl.length() < sb.block_size); + iter.copy(bl.length(), bp.c_str() + SUPERBLOCK_HEADER_PREFIX); return do_write(device_id, device, 0, bp); }); } static -BlockSegmentManager::access_ertr::future +BlockSegmentManager::access_ertr::future read_superblock(seastar::file &device, seastar::stat_data sd) { LOG_PREFIX(block_read_superblock); @@ -366,28 +367,32 @@ read_superblock(seastar::file &device, seastar::stat_data sd) bp.length(), bp ).safe_then([=, &bp] { + // verify magic at offset 0 + superblock_magic_t disk_magic; + std::memcpy(disk_magic.data(), bp.c_str(), SUPERBLOCK_MAGIC_SIZE); + if (disk_magic != CRIMSON_DEVICE_SUPERBLOCK_MAGIC) { + ERROR("invalid superblock magic, got: {:02x}", + fmt::join( + std::views::transform(disk_magic, + [](std::byte b) { return std::to_integer(b); }), + " ")); + ceph_abort_msg("invalid superblock magic"); + } + // decode DENC superblock from offset 60 bufferlist bl; - bl.push_back(bp); - block_sm_superblock_t ret; + bl.append(bp.c_str() + SUPERBLOCK_HEADER_PREFIX, + bp.length() - SUPERBLOCK_HEADER_PREFIX); + device_superblock_t ret; auto bliter = bl.cbegin(); - // Validate the magic prefix - std::string sb_magic; - bliter.copy(SEASTORE_SUPERBLOCK_SIGN_LEN, sb_magic); - if (sb_magic != SEASTORE_SUPERBLOCK_SIGN) { - ERROR("invalid superblock signature: got '{}' expected '{}'", - sb_magic, SEASTORE_SUPERBLOCK_SIGN); - ceph_abort_msg("invalid superblock signature"); - } - try { decode(ret, bliter); } catch (...) { ERROR("got decode error!"); ceph_assert(0 == "invalid superblock"); } - assert(ceph::encoded_sizeof(ret) + - SEASTORE_SUPERBLOCK_SIGN_LEN <= sd.block_size); - return BlockSegmentManager::access_ertr::future( + assert(SUPERBLOCK_HEADER_PREFIX + + ceph::encoded_sizeof(ret) <= sd.block_size); + return BlockSegmentManager::access_ertr::future( BlockSegmentManager::access_ertr::ready_future_marker{}, ret); }); @@ -548,7 +553,7 @@ BlockSegmentManager::mount_ret BlockSegmentManager::shard_mount() device_id_printer_t{get_device_id()}, seastar::this_shard_id() + seastar::smp::count * store_index, sb.shard_num); - store_active = false; + shard_status = false; return mount_ertr::now(); } shard_info = sb.shard_infos[seastar::this_shard_id() + seastar::smp::count * store_index]; @@ -556,7 +561,7 @@ BlockSegmentManager::mount_ret BlockSegmentManager::shard_mount() sb.validate(); superblock = sb; stats.data_read.increment( - ceph::encoded_sizeof(superblock)); + ceph::encoded_sizeof(superblock)); tracker = std::make_unique( shard_info.segments, superblock.block_size); @@ -610,7 +615,7 @@ BlockSegmentManager::mkfs_ret BlockSegmentManager::primary_mkfs( seastar::file device; seastar::stat_data stat; - block_sm_superblock_t sb; + device_superblock_t sb; std::unique_ptr tracker; using crimson::common::get_conf; @@ -625,7 +630,7 @@ BlockSegmentManager::mkfs_ret BlockSegmentManager::primary_mkfs( std::ignore = device.close(); }); sb = make_superblock(get_device_id(), sm_config, stat); - stats.metadata_write.increment(ceph::encoded_sizeof(sb)); + stats.metadata_write.increment(ceph::encoded_sizeof(sb)); co_await write_superblock(get_device_id(), device, sb); INFO("{} complete", device_id_printer_t{get_device_id()}); } @@ -831,7 +836,7 @@ SegmentManager::read_ertr::future<> BlockSegmentManager::read( void BlockSegmentManager::register_metrics(store_index_t store_index) { LOG_PREFIX(BlockSegmentManager::register_metrics); - if (!store_active) { + if (!shard_status) { INFO("{} shard {} is not active, skip registering metrics", device_id_printer_t{get_device_id()}, store_index); return; diff --git a/src/crimson/os/seastore/segment_manager/block.h b/src/crimson/os/seastore/segment_manager/block.h index 770284aad666..dd9184898ad3 100644 --- a/src/crimson/os/seastore/segment_manager/block.h +++ b/src/crimson/os/seastore/segment_manager/block.h @@ -220,8 +220,8 @@ private: std::string device_path; std::unique_ptr tracker; - block_shard_info_t shard_info; - block_sm_superblock_t superblock; + device_shard_info_t shard_info; + device_superblock_t superblock; seastar::file device; void set_device_id(device_id_t id) { @@ -260,7 +260,8 @@ private: uint32_t device_shard_nums = 0; store_index_t store_index = 0; - bool store_active = true; + bool shard_status = true; + class MultiShardDevices { public: std::vector> mshard_devices; diff --git a/src/crimson/os/seastore/segment_manager/zbd.cc b/src/crimson/os/seastore/segment_manager/zbd.cc index 2d4ce8c1bfbc..2372d9b6d023 100644 --- a/src/crimson/os/seastore/segment_manager/zbd.cc +++ b/src/crimson/os/seastore/segment_manager/zbd.cc @@ -91,9 +91,9 @@ static open_device_ret open_device( ); } -static zbd_sm_metadata_t make_metadata( +static device_superblock_t make_metadata( uint64_t total_size, - seastore_meta_t meta, + device_config_t config, const seastar::stat_data &data, size_t zone_size_sectors, size_t zone_capacity_sectors, @@ -142,7 +142,7 @@ static zbd_sm_metadata_t make_metadata( per_shard_segments, per_shard_available_size); - std::vector shard_infos(seastar::smp::count); + std::vector shard_infos(seastar::smp::count); for (unsigned int i = 0; i < seastar::smp::count; i++) { shard_infos[i].size = per_shard_available_size; shard_infos[i].segments = per_shard_segments; @@ -152,16 +152,15 @@ static zbd_sm_metadata_t make_metadata( i, shard_infos[i].first_segment_offset); } - zbd_sm_metadata_t ret = zbd_sm_metadata_t{ + auto ret = device_superblock_t::make_zbd( seastar::smp::count, segment_size, - zone_capacity * zones_per_segment, - zones_per_segment, - zone_capacity, data.block_size, + std::move(config), zone_size, - shard_infos, - meta}; + zone_capacity, + zones_per_segment, + std::move(shard_infos)); ret.validate(); return ret; } @@ -343,21 +342,26 @@ static write_ertr::future<> do_writev( } static ZBDSegmentManager::access_ertr::future<> -write_metadata(seastar::file &device, zbd_sm_metadata_t sb) +write_metadata(seastar::file &device, device_superblock_t sb) { - assert(ceph::encoded_sizeof_bounded() < - sb.block_size); + assert(SUPERBLOCK_HEADER_PREFIX + + ceph::encoded_sizeof(sb) < sb.block_size); return seastar::do_with( bufferptr(ceph::buffer::create_page_aligned(sb.block_size)), [=, &device](auto &bp) { LOG_PREFIX(ZBDSegmentManager::write_metadata); DEBUG("block_size 0x{:x}", sb.block_size); + bp.zero(); + // magic at offset 0, followed by 37 bytes of null padding (just zero'ed) + std::memcpy(bp.c_str(), + CRIMSON_DEVICE_SUPERBLOCK_MAGIC.data(), SUPERBLOCK_MAGIC_SIZE); + // DENC-encoded superblock at offset 60 bufferlist bl; encode(sb, bl); auto iter = bl.begin(); - assert(bl.length() < sb.block_size); + assert(SUPERBLOCK_HEADER_PREFIX + bl.length() < sb.block_size); DEBUG("buffer length 0x{:x}", bl.length()); - iter.copy(bl.length(), bp.c_str()); + iter.copy(bl.length(), bp.c_str() + SUPERBLOCK_HEADER_PREFIX); DEBUG("doing writeout"); return do_write(device, 0, bp); }); @@ -430,11 +434,9 @@ static read_ertr::future<> do_readv( } static -ZBDSegmentManager::access_ertr::future +ZBDSegmentManager::access_ertr::future read_metadata(seastar::file &device, seastar::stat_data sd) { - assert(ceph::encoded_sizeof_bounded() < - sd.block_size); return seastar::do_with( bufferptr(ceph::buffer::create_page_aligned(sd.block_size)), [=, &device](auto &bp) { @@ -444,13 +446,32 @@ read_metadata(seastar::file &device, seastar::stat_data sd) bp.length(), bp ).safe_then([=, &bp] { + LOG_PREFIX(ZBDSegmentManager::read_metadata); + // verify magic at offset 0 + superblock_magic_t disk_magic; + std::memcpy(disk_magic.data(), bp.c_str(), SUPERBLOCK_MAGIC_SIZE); + if (disk_magic != CRIMSON_DEVICE_SUPERBLOCK_MAGIC) { + ERROR("invalid superblock magic, got: {:02x}", + fmt::join( + std::views::transform(disk_magic, + [](std::byte b) { return std::to_integer(b); }), + " ")); + ceph_abort_msg("invalid superblock magic"); + } + // decode DENC superblock from offset 60 bufferlist bl; - bl.push_back(bp); - zbd_sm_metadata_t ret; + bl.append(bp.c_str() + SUPERBLOCK_HEADER_PREFIX, + bp.length() - SUPERBLOCK_HEADER_PREFIX); + device_superblock_t ret; auto bliter = bl.cbegin(); - decode(ret, bliter); + try { + decode(ret, bliter); + } catch (...) { + ERROR("got decode error!"); + ceph_abort_msg("failed to decode superblock"); + } ret.validate(); - return ZBDSegmentManager::access_ertr::future( + return ZBDSegmentManager::access_ertr::future( ZBDSegmentManager::access_ertr::ready_future_marker{}, ret); }); @@ -535,7 +556,7 @@ ZBDSegmentManager::mkfs_ret ZBDSegmentManager::primary_mkfs( return seastar::do_with( seastar::file{}, seastar::stat_data{}, - zbd_sm_metadata_t{}, + device_superblock_t{}, size_t(), size_t(), size_t(), @@ -581,7 +602,7 @@ ZBDSegmentManager::mkfs_ret ZBDSegmentManager::primary_mkfs( zone_size_sects, zone_capacity_sects); sb = make_metadata( size, - config.meta, + config, stat, zone_size_sects, zone_capacity_sects, @@ -589,7 +610,7 @@ ZBDSegmentManager::mkfs_ret ZBDSegmentManager::primary_mkfs( nr_zones); metadata = sb; stats.metadata_write.increment( - ceph::encoded_sizeof_bounded()); + ceph::encoded_sizeof(sb)); DEBUG("Wrote to stats."); return write_metadata(device, sb); }).finally([&, FNAME] { @@ -836,17 +857,17 @@ Segment::write_ertr::future<> ZBDSegmentManager::segment_write( device_id_t ZBDSegmentManager::get_device_id() const { - return metadata.device_id; + return metadata.config.spec.id; }; secondary_device_set_t& ZBDSegmentManager::get_secondary_devices() { - return metadata.secondary_devices; + return metadata.config.secondary_devices; }; magic_t ZBDSegmentManager::get_magic() const { - return metadata.magic; + return metadata.config.spec.magic; }; segment_off_t ZBDSegment::get_write_capacity() const diff --git a/src/crimson/os/seastore/segment_manager/zbd.h b/src/crimson/os/seastore/segment_manager/zbd.h index 382223b138f7..c5c110bc433b 100644 --- a/src/crimson/os/seastore/segment_manager/zbd.h +++ b/src/crimson/os/seastore/segment_manager/zbd.h @@ -20,71 +20,6 @@ namespace crimson::os::seastore::segment_manager::zbd { - struct zbd_shard_info_t { - size_t size = 0; - size_t segments = 0; - size_t first_segment_offset = 0; - - DENC(zbd_shard_info_t, v, p) { - DENC_START(1, 1, p); - denc(v.size, p); - denc(v.segments, p); - denc(v.first_segment_offset, p); - DENC_FINISH(p); - } - }; - - struct zbd_sm_metadata_t { - uint32_t shard_num = 0; - size_t segment_size = 0; - size_t segment_capacity = 0; - size_t zones_per_segment = 0; - size_t zone_capacity = 0; - size_t block_size = 0; - size_t zone_size = 0; - - std::vector shard_infos; - - seastore_meta_t meta; - - bool major_dev = false; - magic_t magic = 0; - device_type_t dtype = device_type_t::NONE; - device_id_t device_id = 0; - secondary_device_set_t secondary_devices; - - DENC(zbd_sm_metadata_t, v, p) { - DENC_START(1, 1, p); - denc(v.shard_num, p); - denc(v.segment_size, p); - denc(v.segment_capacity, p); - denc(v.zones_per_segment, p); - denc(v.zone_capacity, p); - denc(v.block_size, p); - denc(v.zone_size, p); - denc(v.shard_infos, p); - denc(v.meta, p); - denc(v.magic, p); - denc(v.dtype, p); - denc(v.device_id, p); - if (v.major_dev) { - denc(v.secondary_devices, p); - } - DENC_FINISH(p); - } - - void validate() const { - for (unsigned int i = 0; i < seastar::smp::count; i++) { - ceph_assert_always(shard_infos[i].size > 0); - ceph_assert_always(shard_infos[i].size <= DEVICE_OFF_MAX); - ceph_assert_always(shard_infos[i].segments > 0); - ceph_assert_always(shard_infos[i].segments <= DEVICE_SEGMENT_ID_MAX); - } - ceph_assert_always(segment_capacity > 0); - ceph_assert_always(segment_capacity <= SEGMENT_OFF_MAX); - } - }; - using write_ertr = crimson::errorator; using read_ertr = crimson::errorator; @@ -117,7 +52,7 @@ namespace crimson::os::seastore::segment_manager::zbd { write_ertr::future<> write_padding_bytes(size_t padding_bytes); }; - class ZBDSegmentManager final : public SegmentManager{ + class ZBDSegmentManager final : public SegmentManager { // interfaces used by Device public: seastar::future<> start(uint32_t shard_nums) override; @@ -145,7 +80,8 @@ namespace crimson::os::seastore::segment_manager::zbd { read_ertr::future<> read( paddr_t addr, size_t len, - ceph::bufferptr &out) final; + ceph::bufferptr &out) override; + read_ertr::future<> readv( paddr_t addr, std::vector ptrs) override; @@ -154,7 +90,7 @@ namespace crimson::os::seastore::segment_manager::zbd { device_type_t get_device_type() const override { return device_type_t::ZBD; - } + }; size_t get_available_size() const override { return shard_info.size; @@ -169,7 +105,7 @@ namespace crimson::os::seastore::segment_manager::zbd { }; const seastore_meta_t &get_meta() const { - return metadata.meta; + return metadata.config.meta; }; device_id_t get_device_id() const override; @@ -186,10 +122,11 @@ namespace crimson::os::seastore::segment_manager::zbd { private: friend class ZBDSegment; std::string device_path; - zbd_shard_info_t shard_info; - zbd_sm_metadata_t metadata; + device_shard_info_t shard_info; + device_superblock_t metadata; seastar::file device; uint32_t nr_zones; + struct effort_t { uint64_t num = 0; uint64_t bytes = 0; @@ -237,6 +174,7 @@ namespace crimson::os::seastore::segment_manager::zbd { uint32_t device_shard_nums = 0; store_index_t store_index = 0; bool shard_status = true; + class MultiShardDevices { public: std::vector> mshard_devices; @@ -260,9 +198,3 @@ namespace crimson::os::seastore::segment_manager::zbd { } -WRITE_CLASS_DENC_BOUNDED( - crimson::os::seastore::segment_manager::zbd::zbd_shard_info_t -) -WRITE_CLASS_DENC_BOUNDED( - crimson::os::seastore::segment_manager::zbd::zbd_sm_metadata_t -) diff --git a/src/crimson/tools/objectstore/crimson_objectstore_tool.cc b/src/crimson/tools/objectstore/crimson_objectstore_tool.cc index 39e14022005c..338c2b2a2ddd 100644 --- a/src/crimson/tools/objectstore/crimson_objectstore_tool.cc +++ b/src/crimson/tools/objectstore/crimson_objectstore_tool.cc @@ -456,7 +456,7 @@ public: explicit SeastoreMetaReader(const std::string& path, const std::string& device_type) : m_data_path(path), m_device_type(device_type) {} - tl::expected load_seastore_superblock() { + tl::expected load_seastore_superblock() { try { std::string block_path = m_data_path + "/block"; @@ -479,26 +479,22 @@ public: auto bliter = bl.cbegin(); - // fmt::println(std::cout, "Device type: {}", m_device_type); - - if (m_device_type != "RANDOM_BLOCK_SSD") { - // TODO this Signature is only applicable for segment devices(SSD/HDD) not - // for other two devices like ZBD/RANDOM_BLOCK_SSD - constexpr const char SEASTORE_SUPERBLOCK_SIGN[] = "seastore block device\n"; - constexpr std::size_t SEASTORE_SUPERBLOCK_SIGN_LEN = sizeof(SEASTORE_SUPERBLOCK_SIGN) - 1; - - // Validate the magic prefix - std::string sb_magic; - bliter.copy(SEASTORE_SUPERBLOCK_SIGN_LEN, sb_magic); - if (sb_magic != SEASTORE_SUPERBLOCK_SIGN) { - return tl::unexpected("invalid superblock signature " + block_path); - } + // Check magic string before attempting full decode, so that a + // non-crimson device produces a clear error instead of a decode + // exception. + std::string magic; + try { + denc(magic, bliter); + } catch (...) {} + if (magic != crimson::os::seastore::CRIMSON_DEVICE_SUPERBLOCK_MAGIC) { + return tl::unexpected("invalid superblock signature in " + block_path); } - crimson::os::seastore::block_sm_superblock_t superblock; + bliter = bl.cbegin(); + crimson::os::seastore::device_superblock_t superblock; decode(superblock, bliter); - ceph_assert(ceph::encoded_sizeof(superblock) < + ceph_assert(ceph::encoded_sizeof(superblock) < block_size); return superblock;