]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
crimson/os/seastore: support other devices
authorChunmei Liu <chunmei.liu@ibm.com>
Sat, 18 Oct 2025 00:17:44 +0000 (00:17 +0000)
committerchunmei liu <chunmei.liu@ibm.com>
Tue, 3 Feb 2026 22:42:01 +0000 (14:42 -0800)
Signed-off-by: Chunmei Liu <chunmei.liu@ibm.com>
src/crimson/os/seastore/random_block_manager.h
src/crimson/os/seastore/random_block_manager/nvme_block_device.cc
src/crimson/os/seastore/random_block_manager/nvme_block_device.h
src/crimson/os/seastore/random_block_manager/rbm_device.cc
src/crimson/os/seastore/random_block_manager/rbm_device.h
src/crimson/os/seastore/segment_manager/zbd.cc
src/crimson/os/seastore/segment_manager/zbd.h
src/test/crimson/seastore/nvmedevice/test_nvmedevice.cc

index 88a7ff71e75a14650ed26fd1a256e953fb2f6da9..66064b38a4d75f60bf0e3d82c08c75644b6cad53 100644 (file)
@@ -71,7 +71,6 @@ struct rbm_superblock_t {
   }
 
   void validate() const {
-    ceph_assert(shard_num == seastar::smp::count);
     ceph_assert(block_size > 0);
     for (unsigned int i = 0; i < seastar::smp::count; i ++) {
       ceph_assert(shard_infos[i].size > block_size &&
index 8938b8f344e844fe8e7226c9a2f6a34f2fe47b65..61127a8163aa5298f1cdc0453e35c3991959efc0 100644 (file)
@@ -19,9 +19,30 @@ SET_SUBSYS(seastore_device);
 
 namespace crimson::os::seastore::random_block_device::nvme {
 
+seastar::future<> NVMeBlockDevice::start(unsigned int shard_nums)
+{
+  device_shard_nums = shard_nums;
+  auto num_shard_services = (device_shard_nums + seastar::smp::count - 1 ) / seastar::smp::count;
+  LOG_PREFIX(NVMeBlockDevice::start);
+  DEBUG("device_shard_nums={} seastar::smp={}, num_shard_services={}", device_shard_nums, seastar::smp::count, num_shard_services);
+  return shard_devices.start(num_shard_services, device_path);
+
+}
+
+seastar::future<> NVMeBlockDevice::stop()
+{
+  return shard_devices.stop();
+}
+
+Device& NVMeBlockDevice::get_sharded_device(unsigned int store_index)
+{
+  assert(store_index < shard_devices.local().mshard_devices.size());
+  return *shard_devices.local().mshard_devices[store_index];
+}
+
 NVMeBlockDevice::mkfs_ret NVMeBlockDevice::mkfs(device_config_t config) {
   using crimson::common::get_conf;
-  co_await shard_devices.local().do_primary_mkfs(config,
+  co_await shard_devices.local().mshard_devices[0]->do_primary_mkfs(config,
     seastar::smp::count,
     get_conf<Option::size_t>("seastore_cbjournal_size") 
   );
@@ -76,10 +97,12 @@ NVMeBlockDevice::mount_ret NVMeBlockDevice::mount()
   LOG_PREFIX(NVMeBlockDevice::mount);
   DEBUG("mount");
   co_await shard_devices.invoke_on_all([](auto &local_device) {
-    return local_device.do_shard_mount(
-    ).handle_error(
-      crimson::ct_error::assert_all{
-       "Invalid error in NVMeBlockDevice::do_shard_mount"
+    return seastar::do_for_each(local_device.mshard_devices, [](auto& mshard_device) {
+      return mshard_device->do_shard_mount(
+      ).handle_error(
+        crimson::ct_error::assert_all{
+          "Invalid error in NVMeBlockDevice::do_shard_mount"
+      });
     });
   });
 
index f8535e8417aff62d1548fe68c735a7e15d8667cb..b148cdb0e24247cdad9f22d4f790310d24f71786 100644 (file)
@@ -212,7 +212,9 @@ public:
    * atomic_write_unit does not require fsync().
    */
 
-  NVMeBlockDevice(std::string device_path) : device_path(device_path) {}
+  NVMeBlockDevice(std::string device_path, unsigned int store_index = 0)
+    : RBMDevice(store_index),
+      device_path(device_path) {}
   ~NVMeBlockDevice() = default;
 
   open_ertr::future<> open(
@@ -282,17 +284,11 @@ public:
     return device_path;
   }
 
-  seastar::future<> start() final {
-    return shard_devices.start(device_path);
-  }
+  seastar::future<> start(unsigned int shard_nums) final;
 
-  seastar::future<> stop() final {
-    return shard_devices.stop();
-  }
+  seastar::future<> stop() final;
 
-  Device& get_sharded_device() final {
-    return shard_devices.local();
-  }
+  Device& get_sharded_device(unsigned int store_index = 0) final;
 
   uint64_t get_preffered_write_granularity() const { return write_granularity; }
   uint64_t get_preffered_write_alignment() const { return write_alignment; }
@@ -372,7 +368,26 @@ private:
 
   int namespace_id; // TODO: multi namespaces
   std::string device_path;
-  seastar::sharded<NVMeBlockDevice> shard_devices;
+
+  class MultiShardDevices {
+    public:
+      std::vector<std::unique_ptr<NVMeBlockDevice>> mshard_devices;
+
+    public:
+    MultiShardDevices(size_t count,
+                      const std::string path)
+    : mshard_devices() {
+      mshard_devices.reserve(count);
+      for (size_t store_index = 0; store_index < count; ++store_index) {
+        mshard_devices.emplace_back(std::make_unique<NVMeBlockDevice>(
+          path, store_index));
+      }
+    }
+    ~MultiShardDevices() {
+     mshard_devices.clear();
+    }
+  };
+  seastar::sharded<MultiShardDevices> shard_devices;
 };
 
 }
index 91d42170666c2fa71d17a55de158c9f4ec19b0d4..3e0663c061af42aef5147380479ed537a1ae8ed5 100644 (file)
@@ -176,7 +176,7 @@ RBMDevice::mount_ret RBMDevice::do_shard_mount()
     return std::nullopt;
   }));
   if (!st) {
-    co_await mount_ertr::future<>(
+    co_return co_await mount_ertr::future<>(
       crimson::ct_error::input_output_error::make()
     );
 
@@ -190,11 +190,45 @@ RBMDevice::mount_ret RBMDevice::do_shard_mount()
     "Invalid error read_rbm_superblock in RBMDevice::do_shard_mount"}
   );
   LOG_PREFIX(RBMDevice::do_shard_mount);
-  shard_info = s.shard_infos[seastar::this_shard_id()];
+  if(seastar::this_shard_id() + seastar::smp::count * store_index >= s.shard_num) {
+    INFO("{} shard_id {} out of range {}",
+         device_id_printer_t{get_device_id()},
+         seastar::this_shard_id() + seastar::smp::count * store_index,
+         s.shard_num);
+    shard_status = false;
+    co_return;
+  }
+  shard_info = s.shard_infos[seastar::this_shard_id() + seastar::smp::count * store_index];
   INFO("{} read {}", device_id_printer_t{get_device_id()}, shard_info);
   s.validate();
 }
 
+read_ertr::future<unsigned int> RBMDevice::get_shard_nums()
+{
+  co_await open(get_device_path(),
+    seastar::open_flags::rw | seastar::open_flags::dsync
+  ).handle_error(
+    crimson::ct_error::assert_all{
+    "Invalid error open in RBMDevice::get_shard_nums"}
+  );
+
+  auto st = co_await stat_device(
+  ).handle_error(
+    crimson::ct_error::assert_all{
+      "Invalid error stat_device in RBMDevice::get_shard_nums"}
+  );
+
+  assert(st.block_size > 0);
+  super.block_size = st.block_size;
+  auto sb = co_await read_rbm_superblock(RBM_START_ADDRESS
+  ).handle_error(
+    crimson::ct_error::assert_all{
+      "Invalid error in RBMDevice::get_shard_nums"}
+  );
+
+  co_return sb.shard_num;
+}
+
 EphemeralRBMDeviceRef create_test_ephemeral(uint64_t journal_size, uint64_t data_size) {
   return EphemeralRBMDeviceRef(
     new EphemeralRBMDevice(journal_size + data_size + 
index ad79b61a5cfa97b21d65bbf3e566e62d72e777ab..fa9b33ac3612347ea711031c702f2ab9a0588134 100644 (file)
@@ -80,8 +80,12 @@ public:
 protected:
   rbm_superblock_t super;
   rbm_shard_info_t shard_info;
+  unsigned int device_shard_nums = 0;
+  unsigned int store_index = 0;
+  bool shard_status = true;
 public:
-  RBMDevice() {}
+  RBMDevice(unsigned int store_index = 0)
+  : store_index(store_index) {}
   virtual ~RBMDevice() = default;
 
   template <typename T>
@@ -115,6 +119,8 @@ public:
   std::size_t get_available_size() const { return super.size; }
   extent_len_t get_block_size() const { return super.block_size; }
 
+  read_ertr::future<unsigned int> get_shard_nums() final;
+
   virtual read_ertr::future<> read(
     uint64_t offset,
     bufferptr &bptr) = 0;
index cb343e0fa0ca1f23ab837caff86e94492b955c88..5213ce3f3b070d46212f2dc7356a14949625171c 100644 (file)
@@ -45,6 +45,27 @@ template <> struct fmt::formatter<z_op>: fmt::formatter<std::string_view> {
 
 namespace crimson::os::seastore::segment_manager::zbd {
 
+seastar::future<> ZBDSegmentManager::start(unsigned int shard_nums)
+{
+  LOG_PREFIX(ZBDSegmentManager::start);
+  device_shard_nums = shard_nums;
+  auto num_shard_services = (device_shard_nums + seastar::smp::count - 1 ) / seastar::smp::count;
+  INFO("device_shard_nums={} seastar::smp={}, num_shard_services={}", device_shard_nums, seastar::smp::count, num_shard_services);
+  return shard_devices.start(num_shard_services, device_path);
+
+}
+
+seastar::future<> ZBDSegmentManager::stop()
+{
+  return shard_devices.stop();
+}
+
+Device& ZBDSegmentManager::get_sharded_device(unsigned int store_index)
+{
+  assert(store_index < shard_devices.local().mshard_devices.size());
+  return *shard_devices.local().mshard_devices[store_index];
+}
+
 using open_device_ret = ZBDSegmentManager::access_ertr::future<
   std::pair<seastar::file, seastar::stat_data>>;
 static open_device_ret open_device(
@@ -399,13 +420,31 @@ read_metadata(seastar::file &device, seastar::stat_data sd)
     });
 }
 
+ZBDSegmentManager::read_ertr::future<unsigned int> ZBDSegmentManager::get_shard_nums()
+{
+  return open_device(
+    device_path, seastar::open_flags::rw
+  ).safe_then([this](auto p) {
+    device = std::move(p.first);
+    auto sd = p.second;
+    return read_metadata(device, sd);
+  }).safe_then([this](auto meta){
+    return read_ertr::make_ready_future<int>(meta.shard_num);
+  }).handle_error(
+    crimson::ct_error::assert_all{
+      "Invalid error in ZBDSegmentManager::get_shard_nums"
+  });
+}
+
 ZBDSegmentManager::mount_ret ZBDSegmentManager::mount()
 {
   return shard_devices.invoke_on_all([](auto &local_device) {
-    return local_device.shard_mount(
-    ).handle_error(
-      crimson::ct_error::assert_all{
-        "Invalid error in ZBDSegmentManager::mount"
+    return seastar::do_for_each(local_device.mshard_devices, [](auto& mshard_device) {
+      return mshard_device->shard_mount(
+      ).handle_error(
+        crimson::ct_error::assert_all{
+          "Invalid error in ZBDSegmentManager::mount"
+      });
     });
   });
 }
@@ -419,7 +458,15 @@ ZBDSegmentManager::mount_ret ZBDSegmentManager::shard_mount()
     auto sd = p.second;
     return read_metadata(device, sd);
   }).safe_then([=, this](auto meta){
-    shard_info = meta.shard_infos[seastar::this_shard_id()];
+    if(seastar::this_shard_id() + seastar::smp::count * store_index >= meta.shard_num) {
+      INFO("{} shard_id {} out of range {}",
+        device_id_printer_t{get_device_id()},
+        seastar::this_shard_id() + seastar::smp::count * store_index,
+        sb.shard_num);
+      shard_status = false;
+      return mount_ertr::now();
+    }
+    shard_info = meta.shard_infos[seastar::this_shard_id() + seastar::smp::count * store_index];
     metadata = meta;
     return mount_ertr::now();
   });
@@ -428,13 +475,15 @@ ZBDSegmentManager::mount_ret ZBDSegmentManager::shard_mount()
 ZBDSegmentManager::mkfs_ret ZBDSegmentManager::mkfs(
   device_config_t config)
 {
-  return shard_devices.local().primary_mkfs(config
+  return shard_devices.local().mshard_devices[0]->primary_mkfs(config
     ).safe_then([this] {
     return shard_devices.invoke_on_all([](auto &local_device) {
-      return local_device.shard_mkfs(
-      ).handle_error(
-        crimson::ct_error::assert_all{
-          "Invalid error in ZBDSegmentManager::mkfs"
+      return seastar::do_for_each(local_device.mshard_devices, [](auto& mshard_device) {
+        return mshard_device->shard_mkfs(
+        ).handle_error(
+          crimson::ct_error::assert_all{
+            "Invalid error in ZBDSegmentManager::mkfs"
+        });
       });
     });
   });
index d82974783d4240480c7e225f880b7aa2f76a3e6b..0da82d7883d0ca1977724938b2cf813cd7f7c2dc 100644 (file)
@@ -74,7 +74,6 @@ namespace crimson::os::seastore::segment_manager::zbd {
     }
 
     void validate() const {
-      ceph_assert_always(shard_num == seastar::smp::count);
       for (unsigned int i = 0; i < seastar::smp::count; i++) {
         ceph_assert_always(shard_infos[i].size > 0);
         ceph_assert_always(shard_infos[i].size <= DEVICE_OFF_MAX);
@@ -121,22 +120,18 @@ namespace crimson::os::seastore::segment_manager::zbd {
   class ZBDSegmentManager final : public SegmentManager{
   // interfaces used by Device
   public:
-    seastar::future<> start() {
-      return shard_devices.start(device_path);
-    }
+    seastar::future<> start(int shard_nums) final;
 
-    seastar::future<> stop() {
-      return shard_devices.stop();
-    }
+    seastar::future<> stop() final;
 
-    Device& get_sharded_device() final {
-      return shard_devices.local();
-    }
+    Device& get_sharded_device(unsigned int store_index = 0) final;
 
     mount_ret mount() final;
     mkfs_ret mkfs(device_config_t meta) final;
 
-    ZBDSegmentManager(const std::string &path) : device_path(path) {}
+    ZBDSegmentManager(const std::string &path, unsigned int store_index = 0)
+    : device_path(path),
+      store_index(store_index) {}
 
     ~ZBDSegmentManager() final = default;
 
@@ -152,6 +147,8 @@ namespace crimson::os::seastore::segment_manager::zbd {
       size_t len, 
       ceph::bufferptr &out) final;
 
+    read_ertr::future<unsigned int> get_shard_nums() final;
+
     device_type_t get_device_type() const final {
       return device_type_t::ZBD;
     }
@@ -214,7 +211,7 @@ namespace crimson::os::seastore::segment_manager::zbd {
       }
     } stats;
 
-    void register_metrics();
+    void register_metrics(unsigned int store_index);
     seastar::metrics::metric_group metrics;
 
     Segment::close_ertr::future<> segment_close(
@@ -234,7 +231,28 @@ namespace crimson::os::seastore::segment_manager::zbd {
 
     mount_ret shard_mount();
 
-    seastar::sharded<ZBDSegmentManager> shard_devices;
+    unsigned int device_shard_nums = 0;
+    unsigned int store_index = 0;
+    bool shard_status = true;
+    class MultiShardDevices {
+    public:
+      std::vector<std::unique_ptr<ZBDSegmentManager>> mshard_devices;
+
+    public:
+    MultiShardDevices(size_t count,
+                      const std::string path)
+    : mshard_devices() {
+      mshard_devices.reserve(count);
+      for (size_t store_index = 0; store_index < count; ++store_index) {
+        mshard_devices.emplace_back(std::make_unique<ZBDSegmentManager>(
+          path, store_index));
+      }
+    }
+    ~MultiShardDevices() {
+     mshard_devices.clear();
+    }
+  };
+  seastar::sharded<MultiShardDevices> shard_devices;
   };
 
 }
index 8962346c1e193e43ee4429df56f3d6965b023f5b..3ad41e29dc79dcdcebefe0d03bfdcc7547a2d60f 100644 (file)
@@ -58,7 +58,7 @@ TEST_F(nvdev_test_t, write_and_verify_test)
   run_async([this] {
     device.reset(new random_block_device::nvme::NVMeBlockDevice(dev_path));
     local_conf().set_val("seastore_cbjournal_size", "1048576").get();
-    device->start().get();
+    device->start(seastar::smp::count).get();
     device->mkfs(
       device_config_t{
        true,