]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
crimson/osd: use a unified super-block for devices
authorRonen Friedman <rfriedma@redhat.com>
Mon, 9 Mar 2026 17:23:18 +0000 (17:23 +0000)
committerRonen Friedman <rfriedma@redhat.com>
Fri, 24 Apr 2026 08:16:22 +0000 (08:16 +0000)
This commit refactors the on-hardware super-block structure
used by the seastore to a unified format that
can accommodate all three device types (HDD, ZBD, RBM).

All devices now have a 60 bytes header at address 0,
similar to the existing BlueStore layout. A 23-byte magic
string ("CRIMSON_DEVICE") is placed at the beginning of
the header, followed by 37 bytes of null padding (to
match the existing 60 bytes of the super-block), and
then the DENC-encoded device_superblock_t structure starting
at offset 60.

A unified device_config_t is now used for all device types.

The per-shard data structure is also unified, now including a union
of all relevant fields for each device type.

We are also adding a check for the super-block magic value in the
RBMDevice::read_rbm_superblock() method, similar to the existing check
in SegmentManager::read_segment_manager_superblock().

Signed-off-by: Ronen Friedman <rfriedma@redhat.com>
15 files changed:
src/crimson/os/seastore/device.cc
src/crimson/os/seastore/device.h
src/crimson/os/seastore/random_block_manager.h
src/crimson/os/seastore/random_block_manager/block_rb_manager.cc
src/crimson/os/seastore/random_block_manager/block_rb_manager.h
src/crimson/os/seastore/random_block_manager/rbm_device.cc
src/crimson/os/seastore/random_block_manager/rbm_device.h
src/crimson/os/seastore/seastore_types.h
src/crimson/os/seastore/segment_manager.cc
src/crimson/os/seastore/segment_manager.h
src/crimson/os/seastore/segment_manager/block.cc
src/crimson/os/seastore/segment_manager/block.h
src/crimson/os/seastore/segment_manager/zbd.cc
src/crimson/os/seastore/segment_manager/zbd.h
src/crimson/tools/objectstore/crimson_objectstore_tool.cc

index 632f6d9fc05953e784e5c963b0e7ff54d9eb55a4..bd6231fa94b6de2205b93d3361892d26d10ee6e9 100644 (file)
@@ -3,6 +3,8 @@
 
 #include "device.h"
 
+#include <seastar/core/smp.hh>
+
 #include "segment_manager.h"
 #include "random_block_manager.h"
 #include "random_block_manager/rbm_device.h"
@@ -34,6 +36,83 @@ std::ostream& operator<<(std::ostream& out, const device_config_t& conf)
   return out << "))";
 }
 
+std::ostream& operator<<(std::ostream& out, const device_shard_info_t& si)
+{
+  fmt::print(out, "{}", si);
+  return out;
+}
+
+std::ostream& operator<<(std::ostream& out, const device_superblock_t& sb)
+{
+  fmt::print(out, "{}", sb);
+  return out;
+}
+
+void device_superblock_t::validate() const
+{
+  // NOTE: magic is validated at the read site, outside this struct.
+  ceph_assert(version == CRIMSON_DEVICE_SUPERBLOCK_VERSION);
+  if (crimson::common::get_conf<bool>(
+          "seastore_require_partition_count_match_reactor_count")) {
+    ceph_assert(shard_num == seastar::smp::count);
+  }
+  ceph_assert(block_size > 0);
+  ceph_assert(config.spec.magic != 0);
+  ceph_assert(config.spec.id <= DEVICE_ID_MAX_VALID);
+  if (!config.major_dev) {
+    ceph_assert(config.secondary_devices.empty());
+  }
+  for (const auto& [k, v] : config.secondary_devices) {
+    ceph_assert(k != config.spec.id);
+    ceph_assert(k <= DEVICE_ID_MAX_VALID);
+    ceph_assert(k == v.id);
+    ceph_assert(v.magic != 0);
+    ceph_assert(v.dtype > device_type_t::NONE);
+    ceph_assert(v.dtype < device_type_t::NUM_TYPES);
+  }
+  if (config.spec.dtype == device_type_t::ZBD) {
+    // ZBD: check zone/segment geometry
+    ceph_assert(segment_capacity > 0);
+    ceph_assert_always(segment_capacity <= SEGMENT_OFF_MAX);
+  }
+  auto backend = get_default_backend_of_device(config.spec.dtype);
+  if (backend == backend_type_t::SEGMENTED) {
+    ceph_assert(segment_size > 0 && segment_size % block_size == 0);
+    ceph_assert_always(segment_size <= SEGMENT_OFF_MAX);
+    ceph_assert(shard_infos.size() >= shard_num);
+    for (unsigned int i = 0; i < shard_num; i++) {
+      ceph_assert(shard_infos[i].size > 0);
+      ceph_assert_always(shard_infos[i].size <= DEVICE_OFF_MAX);
+      ceph_assert(shard_infos[i].segments > 0);
+      ceph_assert_always(shard_infos[i].segments <= DEVICE_SEGMENT_ID_MAX);
+      if (config.spec.dtype != device_type_t::ZBD) {
+        // HDD: check tracker and first-segment offsets
+        ceph_assert(shard_infos[i].size > segment_size &&
+                    shard_infos[i].size % block_size == 0);
+        ceph_assert(shard_infos[i].tracker_offset > 0 &&
+                    shard_infos[i].tracker_offset % block_size == 0);
+        ceph_assert(shard_infos[i].first_segment_offset >
+                      shard_infos[i].tracker_offset &&
+                    shard_infos[i].first_segment_offset % block_size == 0);
+      }
+    }
+  } else {
+    // RBM
+    ceph_assert(total_size > 0);
+    ceph_assert(get_default_backend_of_device(config.spec.dtype) ==
+                backend_type_t::RANDOM_BLOCK);
+    ceph_assert(shard_infos.size() >= shard_num);
+    for (unsigned int i = 0; i < shard_num; i++) {
+      ceph_assert(shard_infos[i].size > block_size &&
+                  shard_infos[i].size % block_size == 0);
+      ceph_assert_always(shard_infos[i].size <= DEVICE_OFF_MAX);
+      ceph_assert(journal_size > 0 && journal_size % block_size == 0);
+      ceph_assert(shard_infos[i].start_offset < total_size &&
+                  shard_infos[i].start_offset % block_size == 0);
+    }
+  }
+}
+
 seastar::future<DeviceRef>
 Device::make_device(const std::string& device, device_type_t dtype)
 {
index da37ebf198ff1ecf63da25afcc9fc193f22bbe62..d3f7dfba0411de2cc7257107e341036b2c83225c 100644 (file)
@@ -3,10 +3,15 @@
 
 #pragma once
 
+#include <array>
+#include <cstddef>
 #include <memory>
+#include <string>
+#include <string_view>
 
 #include "include/buffer_fwd.h"
 
+#include "common/fmt_common.h"
 #include "crimson/common/errorator.h"
 #include "crimson/os/seastore/seastore_types.h"
 #include "crimson/common/smp_helpers.h"
@@ -78,6 +83,211 @@ struct device_config_t {
 
 std::ostream& operator<<(std::ostream&, const device_config_t&);
 
+/* -----------------------------------------------------------------------
+ * Unified superblock written at offset 0 on every Crimson device type:
+ *   HDD (block-segment), ZBD (zoned-block), RBM (random-block / NVMe)
+ * -----------------------------------------------------------------------
+ */
+
+/// On-disk superblock magic type (23 bytes, matching classic BlueStore prefix size).
+/// Written outside the DENC envelope at device offset 0.
+using superblock_magic_t = std::array<std::byte, 23>;
+
+/// Magic identifying all Crimson device superblocks: "CRIMSON_DEVICE\0..."
+constexpr superblock_magic_t CRIMSON_DEVICE_SUPERBLOCK_MAGIC = {{
+    std::byte{'C'}, std::byte{'R'}, std::byte{'I'}, std::byte{'M'},
+    std::byte{'S'}, std::byte{'O'}, std::byte{'N'}, std::byte{'_'},
+    std::byte{'D'}, std::byte{'E'}, std::byte{'V'}, std::byte{'I'},
+    std::byte{'C'}, std::byte{'E'}, std::byte{0},   std::byte{0},
+    std::byte{0},   std::byte{0},   std::byte{0},   std::byte{0},
+    std::byte{0},   std::byte{0},   std::byte{0}
+}};
+
+/// Size constants for the on-disk prefix (magic + null padding) that
+/// precedes the DENC-encoded superblock, matching the classic BlueStore
+/// 60-byte label prefix layout.
+constexpr size_t SUPERBLOCK_MAGIC_SIZE = sizeof(CRIMSON_DEVICE_SUPERBLOCK_MAGIC); // 23
+constexpr size_t SUPERBLOCK_MAGIC_PAD  = 37; // matching the UID block in BlueStore's
+constexpr size_t SUPERBLOCK_HEADER_PREFIX = SUPERBLOCK_MAGIC_SIZE + SUPERBLOCK_MAGIC_PAD;
+static_assert(SUPERBLOCK_HEADER_PREFIX == 60);
+
+/// Current superblock format version
+constexpr uint8_t CRIMSON_DEVICE_SUPERBLOCK_VERSION = 1;
+
+/// Feature bits stored in device_superblock_t::feature
+enum class device_feature_t : uint64_t {
+  NVME_END_TO_END_PROTECTION = 1,
+};
+
+/// Unified per-shard layout info for all device types.
+/// Fields unused by a given device type are left at their zero default.
+struct device_shard_info_t {
+  size_t size = 0;                    ///< usable shard size in bytes (all)
+  size_t segments = 0;                ///< number of segments (HDD/ZBD; 0 for RBM)
+  uint64_t first_segment_offset = 0;  ///< byte offset of first segment (HDD/ZBD)
+  uint64_t tracker_offset = 0;        ///< byte offset of segment-state tracker (HDD)
+  uint64_t start_offset = 0;          ///< byte offset of shard start (RBM)
+
+  DENC(device_shard_info_t, v, p) {
+    DENC_START(1, 1, p);
+    denc(v.size, p);
+    denc(v.segments, p);
+    denc(v.first_segment_offset, p);
+    denc(v.tracker_offset, p);
+    denc(v.start_offset, p);
+    DENC_FINISH(p);
+  }
+
+  auto fmt_print_ctx(auto& ctx) const -> decltype(ctx.out()) {
+    return fmt::format_to(ctx.out(),
+      "device_shard_info(size={:#x}, segments={}, "
+      "first_segment_offset={:#x}, tracker_offset={:#x}, start_offset={:#x})",
+      size, segments, first_segment_offset, tracker_offset, start_offset);
+  }
+};
+
+std::ostream& operator<<(std::ostream&, const device_shard_info_t&);
+
+/// Unified on-disk superblock for all Crimson device types.
+/// Fields specific to a device type are zero for other types.
+struct device_superblock_t {
+  // --- Fixed header (all device types) ---
+  // NOTE: the magic string is written/read separately at device offset 0,
+  // outside this DENC-encoded structure (see SUPERBLOCK_HEADER_PREFIX).
+  uint16_t version = CRIMSON_DEVICE_SUPERBLOCK_VERSION;
+  uint16_t shard_num = 0;
+  size_t segment_size = 0;   ///< logical segment size in bytes (HDD/ZBD; 0 for RBM)
+  size_t block_size = 0;
+  device_config_t config;
+
+  // --- Device-type-specific size information (union concept) ---
+  size_t total_size = 0;          ///< total device capacity in bytes (RBM)
+  uint64_t journal_size = 0;      ///< journal area size in bytes (RBM)
+  size_t segment_capacity = 0;    ///< usable bytes/segment = zone_capacity*zones_per_segment (ZBD)
+  size_t zones_per_segment = 0;   ///< zones per segment (ZBD)
+  size_t zone_size = 0;           ///< physical zone size in bytes (ZBD)
+  size_t zone_capacity = 0;       ///< usable zone capacity in bytes (ZBD)
+
+  // --- Per-shard information ---
+  std::vector<device_shard_info_t> shard_infos;
+
+  // --- RBM-specific remaining fields ---
+  checksum_t crc = 0;
+  uint64_t feature = 0;          ///< device_feature_t bits
+  uint32_t nvme_block_size = 0;  ///< NVMe logical block size (E2E protection)
+
+  DENC(device_superblock_t, v, p) {
+    DENC_START(1, 1, p);
+    denc(v.version, p);
+    denc(v.shard_num, p);
+    denc(v.segment_size, p);
+    denc(v.block_size, p);
+    denc(v.config, p);
+    denc(v.total_size, p);
+    denc(v.journal_size, p);
+    denc(v.segment_capacity, p);
+    denc(v.zones_per_segment, p);
+    denc(v.zone_size, p);
+    denc(v.zone_capacity, p);
+    denc(v.shard_infos, p);
+    denc(v.crc, p);
+    denc(v.feature, p);
+    denc(v.nvme_block_size, p);
+    DENC_FINISH(p);
+  }
+
+  void validate() const;
+
+  /// Create a superblock for a segmented (HDD/SSD) device.
+  static device_superblock_t make_segmented(
+    uint16_t shard_num,
+    size_t segment_size,
+    size_t block_size,
+    device_config_t config,
+    std::vector<device_shard_info_t> shard_infos)
+  {
+    device_superblock_t sb;
+    sb.shard_num = shard_num;
+    sb.segment_size = segment_size;
+    sb.block_size = block_size;
+    sb.config = std::move(config);
+    sb.shard_infos = std::move(shard_infos);
+    return sb;
+  }
+
+  /// Create a superblock for a ZBD (zone-based) segmented device.
+  static device_superblock_t make_zbd(
+    uint16_t shard_num,
+    size_t segment_size,
+    size_t block_size,
+    device_config_t config,
+    size_t zone_size,
+    size_t zone_capacity,
+    size_t zones_per_segment,
+    std::vector<device_shard_info_t> shard_infos)
+  {
+    device_superblock_t sb;
+    sb.shard_num = shard_num;
+    sb.segment_size = segment_size;
+    sb.block_size = block_size;
+    sb.config = std::move(config);
+    sb.segment_capacity = zone_capacity * zones_per_segment;
+    sb.zones_per_segment = zones_per_segment;
+    sb.zone_size = zone_size;
+    sb.zone_capacity = zone_capacity;
+    sb.shard_infos = std::move(shard_infos);
+    return sb;
+  }
+
+  /// Create a superblock for an RBM (random-block) device.
+  static device_superblock_t make_rbm(
+    uint16_t shard_num,
+    size_t block_size,
+    size_t total_size,
+    uint64_t journal_size,
+    device_config_t config,
+    std::vector<device_shard_info_t> shard_infos)
+  {
+    device_superblock_t sb;
+    sb.shard_num = shard_num;
+    sb.block_size = block_size;
+    sb.total_size = total_size;
+    sb.journal_size = journal_size;
+    sb.config = std::move(config);
+    sb.shard_infos = std::move(shard_infos);
+    return sb;
+  }
+
+  bool is_end_to_end_data_protection() const {
+    return feature & (uint64_t)device_feature_t::NVME_END_TO_END_PROTECTION;
+  }
+  void set_end_to_end_data_protection() {
+    feature |= (uint64_t)device_feature_t::NVME_END_TO_END_PROTECTION;
+  }
+
+  auto fmt_print_ctx(auto& ctx) const -> decltype(ctx.out()) {
+    fmt::format_to(ctx.out(),
+      "device_superblock(version={}, shard_num={}, "
+      "segment_size={:#x}, block_size={:#x}, config={}, "
+      "total_size={:#x}, journal_size={:#x}, "
+      "segment_capacity={:#x}, zones_per_segment={}, "
+      "zone_size={:#x}, zone_capacity={:#x}, "
+      "crc={}, feature={:#x}, nvme_block_size={}, shards:[",
+      (unsigned)version, shard_num,
+      segment_size, block_size, config,
+      total_size, journal_size,
+      segment_capacity, zones_per_segment,
+      zone_size, zone_capacity,
+      crc, feature, nvme_block_size);
+    for (const auto& si : shard_infos) {
+      fmt::format_to(ctx.out(), "{},", si);
+    }
+    return fmt::format_to(ctx.out(), "])");
+  }
+};
+
+std::ostream& operator<<(std::ostream&, const device_superblock_t&);
+
 class Device;
 using DeviceRef = std::unique_ptr<Device>;
 
@@ -184,6 +394,8 @@ check_create_device_ret check_create_device(
 
 WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::device_spec_t)
 WRITE_CLASS_DENC(crimson::os::seastore::device_config_t)
+WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::device_shard_info_t)
+WRITE_CLASS_DENC(crimson::os::seastore::device_superblock_t)
 
 #if FMT_VERSION >= 90000
 template <> struct fmt::formatter<crimson::os::seastore::device_config_t> : fmt::ostream_formatter {};
index a0ff7e4808d74eae2812d5a3690850a6b24344d6..dadd2578f45d0540ec05f2498baffd2a754f820d 100644 (file)
@@ -27,74 +27,6 @@ struct alloc_paddr_result {
   extent_len_t len;
 };
 
-struct rbm_shard_info_t {
-  std::size_t size = 0;
-  uint64_t start_offset = 0;
-
-  DENC(rbm_shard_info_t, v, p) {
-    DENC_START(1, 1, p);
-    denc(v.size, p);
-    denc(v.start_offset, p);
-    DENC_FINISH(p);
-  }
-};
-
-enum class rbm_feature_t : uint64_t {
-  RBM_NVME_END_TO_END_PROTECTION = 1,
-};
-
-struct rbm_superblock_t {
-  size_t size = 0;
-  size_t block_size = 0;
-  uint64_t feature = 0;
-  uint64_t journal_size = 0;
-  checksum_t crc = 0;
-  device_config_t config;
-  uint32_t shard_num = 0;
-  // Must be assigned if ent-to-end-data-protection features is enabled
-  uint32_t nvme_block_size = 0;
-  std::vector<rbm_shard_info_t> shard_infos;
-
-  DENC(rbm_superblock_t, v, p) {
-    DENC_START(1, 1, p);
-    denc(v.size, p);
-    denc(v.block_size, p);
-    denc(v.feature, p);
-
-    denc(v.journal_size, p);
-    denc(v.crc, p);
-    denc(v.config, p);
-    denc(v.shard_num, p);
-    denc(v.nvme_block_size, p);
-    denc(v.shard_infos, p);
-    DENC_FINISH(p);
-  }
-
-  void validate() const {
-    ceph_assert(block_size > 0);
-    for (unsigned int i = 0; i < seastar::smp::count; i ++) {
-      ceph_assert(shard_infos[i].size > block_size &&
-                  shard_infos[i].size % block_size == 0);
-      ceph_assert_always(shard_infos[i].size <= DEVICE_OFF_MAX);
-      ceph_assert(journal_size > 0 &&
-                  journal_size % block_size == 0);
-      ceph_assert(shard_infos[i].start_offset < size &&
-                 shard_infos[i].start_offset % block_size == 0);
-    }
-    ceph_assert(config.spec.magic != 0);
-    ceph_assert(get_default_backend_of_device(config.spec.dtype) ==
-               backend_type_t::RANDOM_BLOCK);
-    ceph_assert(config.spec.id <= DEVICE_ID_MAX_VALID);
-  }
-
-  bool is_end_to_end_data_protection() const {
-    return (feature & (uint64_t)rbm_feature_t::RBM_NVME_END_TO_END_PROTECTION);
-  }
-  void set_end_to_end_data_protection() {
-    feature |= (uint64_t)rbm_feature_t::RBM_NVME_END_TO_END_PROTECTION;
-  }
-};
-
 enum class rbm_extent_state_t {
   FREE,                // not allocated
   RESERVED,    // extent is reserved by alloc_new_extent, but is not persistent
@@ -184,18 +116,6 @@ namespace random_block_device {
 seastar::future<std::unique_ptr<random_block_device::RBMDevice>> 
   get_rb_device(const std::string &device);
 
-std::ostream &operator<<(std::ostream &out, const rbm_superblock_t &header);
-std::ostream &operator<<(std::ostream &out, const rbm_shard_info_t &shard);
+std::ostream &operator<<(std::ostream &out, const rbm_extent_state_t &state);
 }
 
-WRITE_CLASS_DENC_BOUNDED(
-  crimson::os::seastore::rbm_shard_info_t
-)
-WRITE_CLASS_DENC_BOUNDED(
-  crimson::os::seastore::rbm_superblock_t
-)
-
-#if FMT_VERSION >= 90000
-template<> struct fmt::formatter<crimson::os::seastore::rbm_superblock_t> : fmt::ostream_formatter {};
-template<> struct fmt::formatter<crimson::os::seastore::rbm_shard_info_t> : fmt::ostream_formatter {};
-#endif
index 26d15d4412dce66f2e2ffd3c50d2a7c63768242e..db8775a1b182249f2bd7cedf352e5a5040b5fe54 100644 (file)
@@ -205,28 +205,4 @@ void BlockRBManager::prefill_fragmented_device()
 }
 #endif
 
-std::ostream &operator<<(std::ostream &out, const rbm_superblock_t &header)
-{
-  out << " rbm_superblock_t(size=" << header.size
-       << ", block_size=" << header.block_size
-       << ", feature=" << header.feature
-       << ", journal_size=" << header.journal_size
-       << ", crc=" << header.crc
-       << ", config=" << header.config
-       << ", shard_num=" << header.shard_num
-       << ", end_to_end_data_protection=" << header.is_end_to_end_data_protection()
-       << ", device_block_size=" << header.nvme_block_size;
-  for (auto p : header.shard_infos) {
-    out << p;
-  }
-  return out << ")";
-}
-
-std::ostream &operator<<(std::ostream &out, const rbm_shard_info_t &shard)
-{
-  out << " rbm_shard_info_t(size=" << shard.size
-      << ", start_offset=" << shard.start_offset;
-  return out << ")";
-}
-
 }
index 46d2ce88aa50ee4603eb5ba67aba93cfb7e9708f..03b2285d8ceb431b6f8d040d0f1a5cabb1f9df97 100644 (file)
@@ -37,9 +37,10 @@ public:
   /*
    * Ondisk layout (TODO)
    *
-   * ---------------------------------------------------------------------------
-   * | rbm_superblock_t | metadatas |        ...      |    data blocks    |
-   * ---------------------------------------------------------------------------
+   * -------------------------------------------------------------------------------
+   * | 23B magic | 37B null padding | DENC-encoded superblock header | data blocks |
+   * |           |                  | header (device_superblock_t)   |             |
+   * -------------------------------------------------------------------------------
    */
 
   read_ertr::future<> read(paddr_t addr, bufferptr &buffer) override;
index 0b759e05c4d455ab8400e021f7e45c98786fd83e..86e2b6bb45d1487bd3300e603921640b8ed06e04 100644 (file)
@@ -44,26 +44,30 @@ RBMDevice::mkfs_ret RBMDevice::do_primary_mkfs(device_config_t config,
     );
   }
 
-  super.block_size = (*st).block_size;
-  super.size = (*st).size;
-  super.config = std::move(config);
-  super.journal_size = journal_size;
-  ceph_assert_always(super.journal_size > 0);
-  ceph_assert_always(super.size >= super.journal_size);
+  const size_t cur_block_size = (*st).block_size;
+  const size_t cur_total_size = (*st).size;
+  ceph_assert_always(journal_size > 0);
+  ceph_assert_always(cur_total_size >= journal_size);
   ceph_assert_always(shard_num > 0);
 
-  std::vector<rbm_shard_info_t> shard_infos(shard_num);
+  const size_t aligned_size =
+    (cur_total_size / shard_num) -
+    ((cur_total_size / shard_num) % cur_block_size);
+
+  std::vector<device_shard_info_t> shard_infos(shard_num);
   for (int i = 0; i < shard_num; i++) {
-    uint64_t aligned_size = 
-      (super.size / shard_num) -
-      ((super.size / shard_num) % super.block_size);
     shard_infos[i].size = aligned_size;
     shard_infos[i].start_offset = i * aligned_size;
-    assert(shard_infos[i].size > super.journal_size);
+    assert(shard_infos[i].size > journal_size);
   }
-  super.shard_infos = shard_infos;
-  super.shard_num = shard_num;
   shard_info = shard_infos[seastar::this_shard_id()];
+  super = device_superblock_t::make_rbm(
+    shard_num,
+    cur_block_size,
+    cur_total_size,
+    journal_size,
+    std::move(config),
+    std::move(shard_infos));
   DEBUG("super {} ", super);
 
   // write super block
@@ -100,24 +104,46 @@ write_ertr::future<> RBMDevice::write_rbm_superblock()
 
   bufferlist bl;
   encode(super, bl);
-  auto iter = bl.begin();
   auto bp = bufferptr(ceph::buffer::create_page_aligned(super.block_size));
-  assert(bl.length() < super.block_size);
-  iter.copy(bl.length(), bp.c_str());
+  bp.zero();
+  // magic at offset 0, followed by 37 bytes of null padding (just zeroed)
+  std::memcpy(bp.c_str(),
+             CRIMSON_DEVICE_SUPERBLOCK_MAGIC.data(), SUPERBLOCK_MAGIC_SIZE);
+  // DENC-encoded superblock at offset 60
+  assert(SUPERBLOCK_HEADER_PREFIX + bl.length() < super.block_size);
+  auto iter = bl.begin();
+  iter.copy(bl.length(), bp.c_str() + SUPERBLOCK_HEADER_PREFIX);
   co_return co_await write(RBM_START_ADDRESS, bp);
 }
 
-read_ertr::future<rbm_superblock_t> RBMDevice::read_rbm_superblock(
+read_ertr::future<device_superblock_t> RBMDevice::read_rbm_superblock(
   rbm_abs_addr addr)
 {
   LOG_PREFIX(RBMDevice::read_rbm_superblock);
   assert(super.block_size > 0);
   auto bptr = bufferptr(ceph::buffer::create_page_aligned(super.block_size));
   co_await read(addr, bptr);
+
+  // verify magic at offset 0
+  superblock_magic_t disk_magic;
+  std::memcpy(disk_magic.data(), bptr.c_str(), SUPERBLOCK_MAGIC_SIZE);
+  if (disk_magic != CRIMSON_DEVICE_SUPERBLOCK_MAGIC) {
+    ERROR("invalid superblock magic in read_rbm_superblock, got: {:02x}",
+      fmt::join(
+        std::views::transform(disk_magic,
+          [](std::byte b) { return std::to_integer<uint8_t>(b); }),
+        " "));
+    co_return co_await read_ertr::future<device_superblock_t>(
+      crimson::ct_error::input_output_error::make()
+    );
+  }
+
+  // decode DENC superblock from offset 60
   bufferlist bl;
-  bl.append(bptr);
+  bl.append(bptr.c_str() + SUPERBLOCK_HEADER_PREFIX,
+           bptr.length() - SUPERBLOCK_HEADER_PREFIX);
   auto p = bl.cbegin();
-  rbm_superblock_t super_block;
+  device_superblock_t super_block;
   bool err = false;
   try {
     decode(super_block, p);
@@ -127,7 +153,7 @@ read_ertr::future<rbm_superblock_t> RBMDevice::read_rbm_superblock(
     err = true;
   }
   if (err) {
-    co_return co_await read_ertr::future<rbm_superblock_t>(
+    co_return co_await read_ertr::future<device_superblock_t>(
       crimson::ct_error::input_output_error::make()
     );
   }
@@ -135,15 +161,16 @@ read_ertr::future<rbm_superblock_t> RBMDevice::read_rbm_superblock(
   bufferlist meta_b_header;
   super_block.crc = 0;
   encode(super_block, meta_b_header);
-  assert(ceph::encoded_sizeof<rbm_superblock_t>(super_block) <
-      super_block.block_size);
+  assert(SUPERBLOCK_HEADER_PREFIX +
+        ceph::encoded_sizeof<device_superblock_t>(super_block) <
+        super_block.block_size);
 
   // Do CRC verification only if data protection is not supported.
   if (super_block.is_end_to_end_data_protection() == false) {
     if (meta_b_header.crc32c(-1) != crc) {
       DEBUG("bad crc on super block, expected {} != actual {} ",
            meta_b_header.crc32c(-1), crc);
-      co_return co_await read_ertr::future<rbm_superblock_t>(
+      co_return co_await read_ertr::future<device_superblock_t>(
        crimson::ct_error::input_output_error::make()
       );
     }
@@ -153,7 +180,7 @@ read_ertr::future<rbm_superblock_t> RBMDevice::read_rbm_superblock(
   super_block.crc = crc;
   super = super_block;
   DEBUG("got {} ", super);
-  co_return co_await read_ertr::future<rbm_superblock_t>(
+  co_return co_await read_ertr::future<device_superblock_t>(
     read_ertr::ready_future_marker{},
     super_block
   );
@@ -231,7 +258,8 @@ read_ertr::future<uint32_t> RBMDevice::get_shard_nums()
 
 EphemeralRBMDeviceRef create_test_ephemeral(uint64_t journal_size, uint64_t data_size) {
   return EphemeralRBMDeviceRef(
-    new EphemeralRBMDevice(journal_size + data_size + 
+    new EphemeralRBMDevice(
+      (journal_size + data_size) * seastar::smp::count +
        random_block_device::RBMDevice::get_shard_reserved_size(),
        EphemeralRBMDevice::TEST_BLOCK_SIZE));
 }
@@ -331,7 +359,7 @@ EphemeralRBMDevice::mount_ret EphemeralRBMDevice::mount() {
 }
 
 EphemeralRBMDevice::mkfs_ret EphemeralRBMDevice::mkfs(device_config_t config) {
-  return do_primary_mkfs(config, 1, DEFAULT_TEST_CBJOURNAL_SIZE);
+  return do_primary_mkfs(config, seastar::smp::count, DEFAULT_TEST_CBJOURNAL_SIZE);
 }
 
 }
index 2bf2336b9de5cd4fc47b3e54e710b45769b55cff..3e87960849db592cecbc5c7066701097d858e911 100644 (file)
@@ -84,8 +84,8 @@ public:
     return _readv(rbm_addr, std::move(ptrs));
   }
 protected:
-  rbm_superblock_t super;
-  rbm_shard_info_t shard_info;
+  device_superblock_t super;
+  device_shard_info_t shard_info;
   uint32_t device_shard_nums = 0;
   store_index_t store_index = 0;
   bool shard_status = true;
@@ -126,7 +126,7 @@ public:
   secondary_device_set_t& get_secondary_devices() final {
     return super.config.secondary_devices;
   }
-  std::size_t get_available_size() const { return super.size; }
+  std::size_t get_available_size() const { return super.total_size; }
   extent_len_t get_block_size() const { return super.block_size; }
 
   read_ertr::future<uint32_t> get_shard_nums() final;
@@ -179,7 +179,7 @@ public:
 
   write_ertr::future<> write_rbm_superblock();
 
-  read_ertr::future<rbm_superblock_t> read_rbm_superblock(rbm_abs_addr addr);
+  read_ertr::future<device_superblock_t> read_rbm_superblock(rbm_abs_addr addr);
 
   using stat_device_ret =
     read_ertr::future<seastar::stat_data>;
index aecf25faa75480bd28486f2c2a1ceeec3aa68d3f..7db5b930c48b6ef0a9dfb80b613ebab732ee7d42 100644 (file)
@@ -123,11 +123,6 @@ constexpr device_id_t DEVICE_ID_SEGMENTED_MIN = 0;
 constexpr device_id_t DEVICE_ID_RANDOM_BLOCK_MIN = 
   1 << (std::numeric_limits<device_id_t>::digits - 1);
 
-// TODO this Signature is only applicable for segment devices(SSD/HDD) not
-// for other two devices like ZBD/RANDOM_BLOCK_SSD
-constexpr const char SEASTORE_SUPERBLOCK_SIGN[] = "seastore block device\n";
-constexpr std::size_t SEASTORE_SUPERBLOCK_SIGN_LEN = sizeof(SEASTORE_SUPERBLOCK_SIGN) - 1;
-
 struct device_id_printer_t {
   device_id_t id;
 };
index 9de3d7191fe79c73780f672d5493f10ef2edc791..e87c9639ec0cf0713b3a1c8b3abfde930ba2a2a8 100644 (file)
@@ -13,33 +13,6 @@ SET_SUBSYS(seastore_device);
 
 namespace crimson::os::seastore {
 
-std::ostream& operator<<(std::ostream& out, const block_shard_info_t& sf)
-{
-  out << "("
-      << "size=0x" << std::hex << sf.size << std::dec
-      << ", segments=" << sf.segments
-      << ", tracker_offset=0x" << std::hex << sf.tracker_offset
-      << ", first_segment_offset=0x" << sf.first_segment_offset << std::dec
-      <<")";
-  return out;
-}
-
-std::ostream& operator<<(std::ostream& out, const block_sm_superblock_t& sb)
-{
-  out << "superblock("
-      << "shard_num=" << sb.shard_num
-      << ", segment_size=0x" << std::hex << sb.segment_size
-      << ", block_size=0x" << sb.block_size << std::dec
-      << ", shard_info:";
-  for (auto &sf : sb.shard_infos) {
-    out << sf
-        << ",";
-  }
-  out << "config=" << sb.config
-      << ")";
-  return out;
-}
-
 std::ostream& operator<<(std::ostream &out, Segment::segment_state_t s)
 {
   using state_t = Segment::segment_state_t;
index 5de78d597a524e873fc25658f3028c6352d082f9..0653e73c0a8963d9e8fd666905f8a2fbaa0462c9 100644 (file)
 namespace crimson::os::seastore {
 
 using std::vector;
-struct block_shard_info_t {
-  std::size_t size;
-  std::size_t segments;
-  uint64_t tracker_offset;
-  uint64_t first_segment_offset;
-
-  DENC(block_shard_info_t, v, p) {
-    DENC_START(1, 1, p);
-    denc(v.size, p);
-    denc(v.segments, p);
-    denc(v.tracker_offset, p);
-    denc(v.first_segment_offset, p);
-    DENC_FINISH(p);
-  }
-};
-
-struct block_sm_superblock_t {
-  uint32_t shard_num = 0;
-  size_t segment_size = 0;
-  size_t block_size = 0;
-
-  std::vector<block_shard_info_t> shard_infos;
-
-  device_config_t config;
-
-  DENC(block_sm_superblock_t, v, p) {
-    DENC_START(1, 1, p);
-    denc(v.shard_num, p);
-    denc(v.segment_size, p);
-    denc(v.block_size, p);
-    denc(v.shard_infos, p);
-    denc(v.config, p);
-    DENC_FINISH(p);
-  }
-
-  void validate() const {
-    if(crimson::common::get_conf<bool>("seastore_require_partition_count_match_reactor_count")) {
-      ceph_assert(shard_num == seastar::smp::count);
-    }
-    ceph_assert(block_size > 0);
-    ceph_assert(segment_size > 0 &&
-                segment_size % block_size == 0);
-    ceph_assert_always(segment_size <= SEGMENT_OFF_MAX);
-    for (unsigned int i = 0; i < shard_num; i ++) {
-      ceph_assert(shard_infos[i].size > segment_size &&
-                  shard_infos[i].size % block_size == 0);
-      ceph_assert_always(shard_infos[i].size <= DEVICE_OFF_MAX);
-      ceph_assert(shard_infos[i].segments > 0);
-      ceph_assert_always(shard_infos[i].segments <= DEVICE_SEGMENT_ID_MAX);
-      ceph_assert(shard_infos[i].tracker_offset > 0 &&
-                  shard_infos[i].tracker_offset % block_size == 0);
-      ceph_assert(shard_infos[i].first_segment_offset > shard_infos[i].tracker_offset &&
-                  shard_infos[i].first_segment_offset % block_size == 0);
-    }
-    ceph_assert(config.spec.magic != 0);
-    ceph_assert(get_default_backend_of_device(config.spec.dtype) ==
-               backend_type_t::SEGMENTED);
-    ceph_assert(config.spec.id <= DEVICE_ID_MAX_VALID);
-    if (!config.major_dev) {
-      ceph_assert(config.secondary_devices.size() == 0);
-    }
-    for (const auto& [k, v] : config.secondary_devices) {
-      ceph_assert(k != config.spec.id);
-      ceph_assert(k <= DEVICE_ID_MAX_VALID);
-      ceph_assert(k == v.id);
-      ceph_assert(v.magic != 0);
-      ceph_assert(v.dtype > device_type_t::NONE);
-      ceph_assert(v.dtype < device_type_t::NUM_TYPES);
-    }
-  }
-};
-
-std::ostream& operator<<(std::ostream&, const block_shard_info_t&);
-std::ostream& operator<<(std::ostream&, const block_sm_superblock_t&);
 
 class Segment : public boost::intrusive_ref_counter<
   Segment,
@@ -204,15 +130,3 @@ public:
 };
 
 }
-
-WRITE_CLASS_DENC(
-  crimson::os::seastore::block_shard_info_t
-)
-WRITE_CLASS_DENC(
-  crimson::os::seastore::block_sm_superblock_t
-)
-
-#if FMT_VERSION >= 90000
-template <> struct fmt::formatter<crimson::os::seastore::block_shard_info_t> : fmt::ostream_formatter {};
-template <> struct fmt::formatter<crimson::os::seastore::block_sm_superblock_t> : fmt::ostream_formatter {};
-#endif
index d4f2129338b4105b9b58bb636d7640c1759e5177..fcc1eced90969cb2e8a32fe4b8d4ccc2c2f6ed51 100644 (file)
@@ -238,7 +238,7 @@ SegmentStateTracker::read_in(
 }
 using std::vector;
 static
-block_sm_superblock_t make_superblock(
+device_superblock_t make_superblock(
   device_id_t device_id,
   device_config_t sm_config,
   const seastar::stat_data &data)
@@ -262,7 +262,7 @@ block_sm_superblock_t make_superblock(
   size_t segments = (size - tracker_off - total_tracker_size) / config_segment_size;
   size_t segments_per_shard = segments / seastar::smp::count;
 
-  vector<block_shard_info_t> shard_infos(seastar::smp::count);
+  vector<device_shard_info_t> shard_infos(seastar::smp::count);
   for (unsigned int i = 0; i < seastar::smp::count; i++) {
     shard_infos[i].size = segments_per_shard * config_segment_size;
     shard_infos[i].segments = segments_per_shard;
@@ -280,13 +280,12 @@ block_sm_superblock_t make_superblock(
     INFO("shard {} infos: {}", i, shard_infos[i]);
   }
 
-  return block_sm_superblock_t{
+  return device_superblock_t::make_segmented(
     seastar::smp::count,
     config_segment_size,
     data.block_size,
-    shard_infos,
-    std::move(sm_config)
-  };
+    std::move(sm_config),
+    std::move(shard_infos));
 }
 
 using open_device_ret = 
@@ -326,31 +325,33 @@ BlockSegmentManager::access_ertr::future<>
 write_superblock(
     device_id_t device_id,
     seastar::file &device,
-    block_sm_superblock_t sb)
+    device_superblock_t sb)
 {
   LOG_PREFIX(block_write_superblock);
   DEBUG("{} write {}", device_id_printer_t{device_id}, sb);
   sb.validate();
-  assert(ceph::encoded_sizeof<block_sm_superblock_t>(sb) <
-        sb.block_size);
+  assert(SUPERBLOCK_HEADER_PREFIX +
+        ceph::encoded_sizeof<device_superblock_t>(sb) < sb.block_size);
   return seastar::do_with(
     bufferptr(ceph::buffer::create_page_aligned(sb.block_size)),
     [=, &device](auto &bp)
   {
-    //  Encode SEASTORE_SUPERBLOCK_SIGN at offset 0 before
-    //  encoding anything else
+    bp.zero();
+    // magic at offset 0, followed by 37 bytes of null padding (already zero)
+    std::memcpy(bp.c_str(),
+               CRIMSON_DEVICE_SUPERBLOCK_MAGIC.data(), SUPERBLOCK_MAGIC_SIZE);
+    // DENC-encoded superblock at offset 60
     bufferlist bl;
-    bl.append(SEASTORE_SUPERBLOCK_SIGN);
     encode(sb, bl);
     auto iter = bl.begin();
-    assert(bl.length() < sb.block_size);
-    iter.copy(bl.length(), bp.c_str());
+    assert(SUPERBLOCK_HEADER_PREFIX + bl.length() < sb.block_size);
+    iter.copy(bl.length(), bp.c_str() + SUPERBLOCK_HEADER_PREFIX);
     return do_write(device_id, device, 0, bp);
   });
 }
 
 static
-BlockSegmentManager::access_ertr::future<block_sm_superblock_t>
+BlockSegmentManager::access_ertr::future<device_superblock_t>
 read_superblock(seastar::file &device, seastar::stat_data sd)
 {
   LOG_PREFIX(block_read_superblock);
@@ -366,28 +367,32 @@ read_superblock(seastar::file &device, seastar::stat_data sd)
       bp.length(),
       bp
     ).safe_then([=, &bp] {
+      // verify magic at offset 0
+      superblock_magic_t disk_magic;
+      std::memcpy(disk_magic.data(), bp.c_str(), SUPERBLOCK_MAGIC_SIZE);
+      if (disk_magic != CRIMSON_DEVICE_SUPERBLOCK_MAGIC) {
+        ERROR("invalid superblock magic, got: {:02x}",
+          fmt::join(
+            std::views::transform(disk_magic,
+              [](std::byte b) { return std::to_integer<uint8_t>(b); }),
+            " "));
+        ceph_abort_msg("invalid superblock magic");
+      }
+      // decode DENC superblock from offset 60
       bufferlist bl;
-      bl.push_back(bp);
-      block_sm_superblock_t ret;
+      bl.append(bp.c_str() + SUPERBLOCK_HEADER_PREFIX,
+               bp.length() - SUPERBLOCK_HEADER_PREFIX);
+      device_superblock_t ret;
       auto bliter = bl.cbegin();
-      // Validate the magic prefix
-      std::string sb_magic;
-      bliter.copy(SEASTORE_SUPERBLOCK_SIGN_LEN, sb_magic);
-      if (sb_magic != SEASTORE_SUPERBLOCK_SIGN) {
-        ERROR("invalid superblock signature: got '{}' expected '{}'",
-             sb_magic, SEASTORE_SUPERBLOCK_SIGN);
-        ceph_abort_msg("invalid superblock signature");
-      }
-
       try {
         decode(ret, bliter);
       } catch (...) {
         ERROR("got decode error!");
         ceph_assert(0 == "invalid superblock");
       }
-      assert(ceph::encoded_sizeof<block_sm_superblock_t>(ret) +
-            SEASTORE_SUPERBLOCK_SIGN_LEN <= sd.block_size);
-      return BlockSegmentManager::access_ertr::future<block_sm_superblock_t>(
+      assert(SUPERBLOCK_HEADER_PREFIX +
+            ceph::encoded_sizeof<device_superblock_t>(ret) <= sd.block_size);
+      return BlockSegmentManager::access_ertr::future<device_superblock_t>(
         BlockSegmentManager::access_ertr::ready_future_marker{},
         ret);
     });
@@ -548,7 +553,7 @@ BlockSegmentManager::mount_ret BlockSegmentManager::shard_mount()
       device_id_printer_t{get_device_id()},
         seastar::this_shard_id() + seastar::smp::count * store_index,
         sb.shard_num);
-      store_active = false;
+      shard_status = false;
       return mount_ertr::now();
     }
     shard_info = sb.shard_infos[seastar::this_shard_id() + seastar::smp::count * store_index];
@@ -556,7 +561,7 @@ BlockSegmentManager::mount_ret BlockSegmentManager::shard_mount()
     sb.validate();
     superblock = sb;
     stats.data_read.increment(
-        ceph::encoded_sizeof<block_sm_superblock_t>(superblock));
+        ceph::encoded_sizeof<device_superblock_t>(superblock));
     tracker = std::make_unique<SegmentStateTracker>(
       shard_info.segments,
       superblock.block_size);
@@ -610,7 +615,7 @@ BlockSegmentManager::mkfs_ret BlockSegmentManager::primary_mkfs(
 
   seastar::file device;
   seastar::stat_data stat;
-  block_sm_superblock_t sb;
+  device_superblock_t sb;
   std::unique_ptr<SegmentStateTracker> tracker;
 
   using crimson::common::get_conf;
@@ -625,7 +630,7 @@ BlockSegmentManager::mkfs_ret BlockSegmentManager::primary_mkfs(
     std::ignore = device.close();
   });
   sb = make_superblock(get_device_id(), sm_config, stat);
-  stats.metadata_write.increment(ceph::encoded_sizeof<block_sm_superblock_t>(sb));
+  stats.metadata_write.increment(ceph::encoded_sizeof<device_superblock_t>(sb));
   co_await write_superblock(get_device_id(), device, sb);
   INFO("{} complete", device_id_printer_t{get_device_id()});
 }
@@ -831,7 +836,7 @@ SegmentManager::read_ertr::future<> BlockSegmentManager::read(
 void BlockSegmentManager::register_metrics(store_index_t store_index)
 {
   LOG_PREFIX(BlockSegmentManager::register_metrics);
-  if (!store_active) {
+  if (!shard_status) {
     INFO("{} shard {} is not active, skip registering metrics",
          device_id_printer_t{get_device_id()}, store_index);
     return;
index 770284aad66629e6a770ec4449faba4eea42b237..dd9184898ad37bb90945d1c0ab61d8c036b9ef70 100644 (file)
@@ -220,8 +220,8 @@ private:
 
   std::string device_path;
   std::unique_ptr<SegmentStateTracker> tracker;
-  block_shard_info_t shard_info;
-  block_sm_superblock_t superblock;
+  device_shard_info_t shard_info;
+  device_superblock_t superblock;
   seastar::file device;
 
   void set_device_id(device_id_t id) {
@@ -260,7 +260,8 @@ private:
 
   uint32_t device_shard_nums = 0;
   store_index_t store_index = 0;
-  bool store_active = true;
+  bool shard_status = true;
+
   class MultiShardDevices {
     public:
       std::vector<std::unique_ptr<BlockSegmentManager>> mshard_devices;
index 2d4ce8c1bfbcd187e750ebb141ed8a07ba4d72cc..2372d9b6d02384b8579244ad992f78cdc7ea28da 100644 (file)
@@ -91,9 +91,9 @@ static open_device_ret open_device(
   );
 }
 
-static zbd_sm_metadata_t make_metadata(
+static device_superblock_t make_metadata(
   uint64_t total_size,
-  seastore_meta_t meta,
+  device_config_t config,
   const seastar::stat_data &data,
   size_t zone_size_sectors,
   size_t zone_capacity_sectors,
@@ -142,7 +142,7 @@ static zbd_sm_metadata_t make_metadata(
     per_shard_segments,
     per_shard_available_size);
 
-  std::vector<zbd_shard_info_t> shard_infos(seastar::smp::count);
+  std::vector<device_shard_info_t> shard_infos(seastar::smp::count);
   for (unsigned int i = 0; i < seastar::smp::count; i++) {
     shard_infos[i].size = per_shard_available_size;
     shard_infos[i].segments = per_shard_segments;
@@ -152,16 +152,15 @@ static zbd_sm_metadata_t make_metadata(
          i, shard_infos[i].first_segment_offset);
   }
 
-  zbd_sm_metadata_t ret = zbd_sm_metadata_t{
+  auto ret = device_superblock_t::make_zbd(
     seastar::smp::count,
     segment_size,
-    zone_capacity * zones_per_segment,
-    zones_per_segment,
-    zone_capacity,
     data.block_size,
+    std::move(config),
     zone_size,
-    shard_infos,
-    meta};
+    zone_capacity,
+    zones_per_segment,
+    std::move(shard_infos));
   ret.validate();
   return ret;
 }
@@ -343,21 +342,26 @@ static write_ertr::future<> do_writev(
 }
 
 static ZBDSegmentManager::access_ertr::future<>
-write_metadata(seastar::file &device, zbd_sm_metadata_t sb)
+write_metadata(seastar::file &device, device_superblock_t sb)
 {
-  assert(ceph::encoded_sizeof_bounded<zbd_sm_metadata_t>() <
-        sb.block_size);
+  assert(SUPERBLOCK_HEADER_PREFIX +
+        ceph::encoded_sizeof<device_superblock_t>(sb) < sb.block_size);
   return seastar::do_with(
     bufferptr(ceph::buffer::create_page_aligned(sb.block_size)),
     [=, &device](auto &bp) {
       LOG_PREFIX(ZBDSegmentManager::write_metadata);
       DEBUG("block_size 0x{:x}", sb.block_size);
+      bp.zero();
+      // magic at offset 0, followed by 37 bytes of null padding (just zero'ed)
+      std::memcpy(bp.c_str(),
+                 CRIMSON_DEVICE_SUPERBLOCK_MAGIC.data(), SUPERBLOCK_MAGIC_SIZE);
+      // DENC-encoded superblock at offset 60
       bufferlist bl;
       encode(sb, bl);
       auto iter = bl.begin();
-      assert(bl.length() < sb.block_size);
+      assert(SUPERBLOCK_HEADER_PREFIX + bl.length() < sb.block_size);
       DEBUG("buffer length 0x{:x}", bl.length());
-      iter.copy(bl.length(), bp.c_str());
+      iter.copy(bl.length(), bp.c_str() + SUPERBLOCK_HEADER_PREFIX);
       DEBUG("doing writeout");
       return do_write(device, 0, bp);
     });
@@ -430,11 +434,9 @@ static read_ertr::future<> do_readv(
 }
 
 static
-ZBDSegmentManager::access_ertr::future<zbd_sm_metadata_t>
+ZBDSegmentManager::access_ertr::future<device_superblock_t>
 read_metadata(seastar::file &device, seastar::stat_data sd)
 {
-  assert(ceph::encoded_sizeof_bounded<zbd_sm_metadata_t>() <
-        sd.block_size);
   return seastar::do_with(
     bufferptr(ceph::buffer::create_page_aligned(sd.block_size)),
     [=, &device](auto &bp) {
@@ -444,13 +446,32 @@ read_metadata(seastar::file &device, seastar::stat_data sd)
        bp.length(),
        bp
       ).safe_then([=, &bp] {
+        LOG_PREFIX(ZBDSegmentManager::read_metadata);
+       // verify magic at offset 0
+       superblock_magic_t disk_magic;
+       std::memcpy(disk_magic.data(), bp.c_str(), SUPERBLOCK_MAGIC_SIZE);
+       if (disk_magic != CRIMSON_DEVICE_SUPERBLOCK_MAGIC) {
+          ERROR("invalid superblock magic, got: {:02x}",
+            fmt::join(
+              std::views::transform(disk_magic,
+                [](std::byte b) { return std::to_integer<uint8_t>(b); }),
+              " "));
+         ceph_abort_msg("invalid superblock magic");
+       }
+       // decode DENC superblock from offset 60
        bufferlist bl;
-       bl.push_back(bp);
-       zbd_sm_metadata_t ret;
+       bl.append(bp.c_str() + SUPERBLOCK_HEADER_PREFIX,
+                 bp.length() - SUPERBLOCK_HEADER_PREFIX);
+       device_superblock_t ret;
        auto bliter = bl.cbegin();
-       decode(ret, bliter);
+        try {
+         decode(ret, bliter);
+        } catch (...) {
+          ERROR("got decode error!");
+          ceph_abort_msg("failed to decode superblock");
+        }
         ret.validate();
-       return ZBDSegmentManager::access_ertr::future<zbd_sm_metadata_t>(
+       return ZBDSegmentManager::access_ertr::future<device_superblock_t>(
          ZBDSegmentManager::access_ertr::ready_future_marker{},
          ret);
       });
@@ -535,7 +556,7 @@ ZBDSegmentManager::mkfs_ret ZBDSegmentManager::primary_mkfs(
   return seastar::do_with(
     seastar::file{},
     seastar::stat_data{},
-    zbd_sm_metadata_t{},
+    device_superblock_t{},
     size_t(),
     size_t(),
     size_t(),
@@ -581,7 +602,7 @@ ZBDSegmentManager::mkfs_ret ZBDSegmentManager::primary_mkfs(
                 zone_size_sects, zone_capacity_sects);
          sb = make_metadata(
             size,
-           config.meta,
+           config,
            stat,
            zone_size_sects,
            zone_capacity_sects,
@@ -589,7 +610,7 @@ ZBDSegmentManager::mkfs_ret ZBDSegmentManager::primary_mkfs(
            nr_zones);
          metadata = sb;
          stats.metadata_write.increment(
-           ceph::encoded_sizeof_bounded<zbd_sm_metadata_t>());
+           ceph::encoded_sizeof<device_superblock_t>(sb));
          DEBUG("Wrote to stats.");
          return write_metadata(device, sb);
        }).finally([&, FNAME] {
@@ -836,17 +857,17 @@ Segment::write_ertr::future<> ZBDSegmentManager::segment_write(
 
 device_id_t ZBDSegmentManager::get_device_id() const
 {
-  return metadata.device_id;
+  return metadata.config.spec.id;
 };
 
 secondary_device_set_t& ZBDSegmentManager::get_secondary_devices()
 {
-  return metadata.secondary_devices;
+  return metadata.config.secondary_devices;
 };
 
 magic_t ZBDSegmentManager::get_magic() const
 {
-  return metadata.magic;
+  return metadata.config.spec.magic;
 };
 
 segment_off_t ZBDSegment::get_write_capacity() const
index 382223b138f7cb4e4c8b019da59fb658eec383c1..c5c110bc433b52b1000712c4ac3e678e9c1455cf 100644 (file)
 
 namespace crimson::os::seastore::segment_manager::zbd {
 
-  struct zbd_shard_info_t {
-    size_t size = 0;
-    size_t segments = 0;
-    size_t first_segment_offset = 0;
-
-    DENC(zbd_shard_info_t, v, p) {
-      DENC_START(1, 1, p);
-      denc(v.size, p);
-      denc(v.segments, p);
-      denc(v.first_segment_offset, p);
-      DENC_FINISH(p);
-    }
-  };
-
-  struct zbd_sm_metadata_t {
-    uint32_t shard_num = 0;
-    size_t segment_size = 0;
-    size_t segment_capacity = 0;
-    size_t zones_per_segment = 0;
-    size_t zone_capacity = 0;
-    size_t block_size = 0;
-    size_t zone_size = 0;
-
-    std::vector<zbd_shard_info_t> shard_infos;
-
-    seastore_meta_t meta;
-    
-    bool major_dev = false;
-    magic_t magic = 0;
-    device_type_t dtype = device_type_t::NONE;
-    device_id_t device_id = 0;
-    secondary_device_set_t secondary_devices;
-
-    DENC(zbd_sm_metadata_t, v, p) {
-      DENC_START(1, 1, p);
-      denc(v.shard_num, p);
-      denc(v.segment_size, p);
-      denc(v.segment_capacity, p);
-      denc(v.zones_per_segment, p);
-      denc(v.zone_capacity, p);
-      denc(v.block_size, p);
-      denc(v.zone_size, p);
-      denc(v.shard_infos, p);
-      denc(v.meta, p);
-      denc(v.magic, p);
-      denc(v.dtype, p);
-      denc(v.device_id, p);
-      if (v.major_dev) {
-       denc(v.secondary_devices, p);
-      }
-      DENC_FINISH(p);
-    }
-
-    void validate() const {
-      for (unsigned int i = 0; i < seastar::smp::count; i++) {
-        ceph_assert_always(shard_infos[i].size > 0);
-        ceph_assert_always(shard_infos[i].size <= DEVICE_OFF_MAX);
-        ceph_assert_always(shard_infos[i].segments > 0);
-        ceph_assert_always(shard_infos[i].segments <= DEVICE_SEGMENT_ID_MAX);
-      }
-      ceph_assert_always(segment_capacity > 0);
-      ceph_assert_always(segment_capacity <= SEGMENT_OFF_MAX);
-    }
-  };
-
   using write_ertr = crimson::errorator<crimson::ct_error::input_output_error>;
   using read_ertr = crimson::errorator<crimson::ct_error::input_output_error>;
 
@@ -117,7 +52,7 @@ namespace crimson::os::seastore::segment_manager::zbd {
     write_ertr::future<> write_padding_bytes(size_t padding_bytes);
   };
 
-  class ZBDSegmentManager final : public SegmentManager{
+  class ZBDSegmentManager final : public SegmentManager {
   // interfaces used by Device
   public:
     seastar::future<> start(uint32_t shard_nums) override;
@@ -145,7 +80,8 @@ namespace crimson::os::seastore::segment_manager::zbd {
     read_ertr::future<> read(
       paddr_t addr, 
       size_t len, 
-      ceph::bufferptr &out) final;
+      ceph::bufferptr &out) override;
+
     read_ertr::future<> readv(
       paddr_t addr,
       std::vector<bufferptr> ptrs) override;
@@ -154,7 +90,7 @@ namespace crimson::os::seastore::segment_manager::zbd {
 
     device_type_t get_device_type() const override {
       return device_type_t::ZBD;
-    }
+    };
 
     size_t get_available_size() const override {
       return shard_info.size;
@@ -169,7 +105,7 @@ namespace crimson::os::seastore::segment_manager::zbd {
     };
 
     const seastore_meta_t &get_meta() const {
-      return metadata.meta;
+      return metadata.config.meta;
     };
 
     device_id_t get_device_id() const override;
@@ -186,10 +122,11 @@ namespace crimson::os::seastore::segment_manager::zbd {
   private:
     friend class ZBDSegment;
     std::string device_path;
-    zbd_shard_info_t shard_info;
-    zbd_sm_metadata_t metadata;
+    device_shard_info_t shard_info;
+    device_superblock_t metadata;
     seastar::file device;
     uint32_t nr_zones;
+
     struct effort_t {
       uint64_t num = 0;
       uint64_t bytes = 0;
@@ -237,6 +174,7 @@ namespace crimson::os::seastore::segment_manager::zbd {
     uint32_t device_shard_nums = 0;
     store_index_t store_index = 0;
     bool shard_status = true;
+
     class MultiShardDevices {
     public:
       std::vector<std::unique_ptr<ZBDSegmentManager>> mshard_devices;
@@ -260,9 +198,3 @@ namespace crimson::os::seastore::segment_manager::zbd {
 
 }
 
-WRITE_CLASS_DENC_BOUNDED(
-  crimson::os::seastore::segment_manager::zbd::zbd_shard_info_t
-)
-WRITE_CLASS_DENC_BOUNDED(
-  crimson::os::seastore::segment_manager::zbd::zbd_sm_metadata_t
-)
index 39e14022005cd1f878c4d873a270bceabb04fbd4..338c2b2a2ddd84332c1265aeb8a38be2937184e5 100644 (file)
@@ -456,7 +456,7 @@ public:
   explicit SeastoreMetaReader(const std::string& path, const std::string& device_type) :
     m_data_path(path), m_device_type(device_type) {}
 
-  tl::expected<crimson::os::seastore::block_sm_superblock_t, std::string> load_seastore_superblock() {
+  tl::expected<crimson::os::seastore::device_superblock_t, std::string> load_seastore_superblock() {
     try {
       std::string block_path = m_data_path + "/block";
 
@@ -479,26 +479,22 @@ public:
 
       auto bliter = bl.cbegin();
 
-      // fmt::println(std::cout, "Device type: {}", m_device_type);
-
-      if (m_device_type != "RANDOM_BLOCK_SSD") {
-        // TODO this Signature is only applicable for segment devices(SSD/HDD) not
-        // for other two devices like ZBD/RANDOM_BLOCK_SSD
-        constexpr const char SEASTORE_SUPERBLOCK_SIGN[] = "seastore block device\n";
-        constexpr std::size_t SEASTORE_SUPERBLOCK_SIGN_LEN = sizeof(SEASTORE_SUPERBLOCK_SIGN) - 1;
-
-        // Validate the magic prefix
-        std::string sb_magic;
-        bliter.copy(SEASTORE_SUPERBLOCK_SIGN_LEN, sb_magic);
-        if (sb_magic != SEASTORE_SUPERBLOCK_SIGN) {
-          return tl::unexpected("invalid superblock signature " + block_path);
-        }
+      // Check magic string before attempting full decode, so that a
+      // non-crimson device produces a clear error instead of a decode
+      // exception.
+      std::string magic;
+      try {
+        denc(magic, bliter);
+      } catch (...) {}
+      if (magic != crimson::os::seastore::CRIMSON_DEVICE_SUPERBLOCK_MAGIC) {
+        return tl::unexpected("invalid superblock signature in " + block_path);
       }
 
-      crimson::os::seastore::block_sm_superblock_t superblock;
+      bliter = bl.cbegin();
+      crimson::os::seastore::device_superblock_t superblock;
       decode(superblock, bliter);
 
-      ceph_assert(ceph::encoded_sizeof<crimson::os::seastore::block_sm_superblock_t>(superblock) <
+      ceph_assert(ceph::encoded_sizeof<crimson::os::seastore::device_superblock_t>(superblock) <
                   block_size);
 
       return superblock;