]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
crimson/os: make ZNS support shard metadata 48717/head
authorchunmei-liu <chunmei.liu@intel.com>
Tue, 22 Nov 2022 16:59:56 +0000 (08:59 -0800)
committerchunmei <chunmei.liu@intel.com>
Thu, 20 Apr 2023 20:08:00 +0000 (20:08 +0000)
Signed-off-by: chunmei-liu <chunmei.liu@intel.com>
src/crimson/os/seastore/segment_manager/zns.cc
src/crimson/os/seastore/segment_manager/zns.h

index 3ccbfecd21f1e907056c0590dc8f8feffeaf6571..deaaadf66687b34d37baa65c13275d720252a559 100644 (file)
@@ -86,7 +86,16 @@ static zns_sm_metadata_t make_metadata(
   size_t segment_size = zone_size;
   size_t zones_per_segment = segment_size / zone_size;
   size_t segments = (num_zones - RESERVED_ZONES) / zones_per_segment;
+  size_t per_shard_segments = segments / seastar::smp::count;
   size_t available_size = zone_capacity * segments;
+  size_t per_shard_available_size = zone_capacity * per_shard_segments;
+  std::vector<zns_shard_info_t> shard_infos(seastar::smp::count);
+  for (unsigned int i = 0; i < seastar::smp::count; i++) {
+    shard_infos[i].size = per_shard_available_size;
+    shard_infos[i].segments = per_shard_segments;
+    shard_infos[i].first_segment_offset = zone_size * RESERVED_ZONES
+      + i * segment_size* per_shard_segments;
+  }
 
   assert(total_size == num_zones * zone_size);
 
@@ -107,15 +116,14 @@ static zns_sm_metadata_t make_metadata(
     zone_capacity * zones_per_segment);
 
   zns_sm_metadata_t ret = zns_sm_metadata_t{
-    available_size,
+    seastar::smp::count,
     segment_size,
     zone_capacity * zones_per_segment,
     zones_per_segment,
     zone_capacity,
     data.block_size,
-    segments,
     zone_size,
-    zone_size * RESERVED_ZONES,
+    shard_infos,
     meta};
   ret.validate();
   return ret;
@@ -160,8 +168,7 @@ static seastar::future<> reset_device(
 {
   return seastar::do_with(
     blk_zone_range{},
-    ZoneReport(nr_zones),
-    [&, nr_zones](auto &range, auto &zr) {
+    [&, nr_zones, zone_size_sects](auto &range) {
       range.sector = 0;
       range.nr_sectors = zone_size_sects * nr_zones;
       return device.ioctl(
@@ -332,7 +339,18 @@ read_metadata(seastar::file &device, seastar::stat_data sd)
     });
 }
 
-ZNSSegmentManager::mount_ret ZNSSegmentManager::mount() 
+ZNSSegmentManager::mount_ret ZNSSegmentManager::mount()
+{
+  return shard_devices.invoke_on_all([](auto &local_device) {
+    return local_device.shard_mount(
+    ).handle_error(
+      crimson::ct_error::assert_all{
+        "Invalid error in ZNSSegmentManager::mount"
+    });
+  });
+}
+
+ZNSSegmentManager::mount_ret ZNSSegmentManager::shard_mount()
 {
   return open_device(
     device_path, seastar::open_flags::rw
@@ -341,6 +359,7 @@ ZNSSegmentManager::mount_ret ZNSSegmentManager::mount()
     auto sd = p.second;
     return read_metadata(device, sd);
   }).safe_then([=, this](auto meta){
+    shard_info = meta.shard_infos[seastar::this_shard_id()];
     metadata = meta;
     return mount_ertr::now();
   });
@@ -349,7 +368,22 @@ ZNSSegmentManager::mount_ret ZNSSegmentManager::mount()
 ZNSSegmentManager::mkfs_ret ZNSSegmentManager::mkfs(
   device_config_t config)
 {
-  LOG_PREFIX(ZNSSegmentManager::mkfs);
+  return shard_devices.local().primary_mkfs(config
+    ).safe_then([this] {
+    return shard_devices.invoke_on_all([](auto &local_device) {
+      return local_device.shard_mkfs(
+      ).handle_error(
+        crimson::ct_error::assert_all{
+          "Invalid error in ZNSSegmentManager::mkfs"
+      });
+    });
+  });
+}
+
+ZNSSegmentManager::mkfs_ret ZNSSegmentManager::primary_mkfs(
+  device_config_t config)
+{
+  LOG_PREFIX(ZNSSegmentManager::primary_mkfs);
   INFO("starting, device_path {}", device_path);
   return seastar::do_with(
     seastar::file{},
@@ -409,6 +443,26 @@ ZNSSegmentManager::mkfs_ret ZNSSegmentManager::mkfs(
     });
 }
 
+ZNSSegmentManager::mkfs_ret ZNSSegmentManager::shard_mkfs()
+{
+  LOG_PREFIX(ZNSSegmentManager::shard_mkfs);
+  INFO("starting, device_path {}", device_path);
+  return open_device(
+    device_path, seastar::open_flags::rw
+  ).safe_then([=, this](auto p) {
+    device = std::move(p.first);
+    auto sd = p.second;
+    return read_metadata(device, sd);
+  }).safe_then([=, this](auto meta){
+    shard_info = meta.shard_infos[seastar::this_shard_id()];
+    metadata = meta;
+    return device.close();
+  }).safe_then([FNAME] {
+    DEBUG("Returning from shard_mkfs.");
+    return mkfs_ertr::now();
+  });
+}
+
 // Return range of sectors to operate on.
 struct blk_zone_range make_range(
   segment_id_t id,
@@ -479,7 +533,7 @@ ZNSSegmentManager::open_ertr::future<SegmentRef> ZNSSegmentManager::open(
       range = make_range(
        id,
        metadata.segment_size,
-        metadata.first_segment_offset);
+        shard_info.first_segment_offset);
       return blk_zone_op(
        device,
        range,
@@ -506,7 +560,7 @@ ZNSSegmentManager::release_ertr::future<> ZNSSegmentManager::release(
       range = make_range(
        id,
        metadata.segment_size,
-        metadata.first_segment_offset);
+        shard_info.first_segment_offset);
       return blk_zone_op(
        device,
        range,
@@ -555,7 +609,7 @@ Segment::close_ertr::future<> ZNSSegmentManager::segment_close(
       range = make_range(
        id,
        metadata.segment_size,
-        metadata.first_segment_offset);
+        shard_info.first_segment_offset);
       return blk_zone_op(
        device,
        range,
index ad8c3b4fe3b6815ca9796038f4689733be95d8c0..b98ff1c89f4ad182074406e35e850dfef86dc055 100644 (file)
 
 namespace crimson::os::seastore::segment_manager::zns {
 
-  struct zns_sm_metadata_t {
+  struct zns_shard_info_t {
     size_t size = 0;
+    size_t segments = 0;
+    size_t first_segment_offset = 0;
+
+    DENC(zns_shard_info_t, v, p) {
+      DENC_START(1, 1, p);
+      denc(v.size, p);
+      denc(v.segments, p);
+      denc(v.first_segment_offset, p);
+      DENC_FINISH(p);
+    }
+  };
+
+  struct zns_sm_metadata_t {
+    unsigned int shard_num = 0;
     size_t segment_size = 0;
     size_t segment_capacity = 0;
     size_t zones_per_segment = 0;
     size_t zone_capacity = 0;
     size_t block_size = 0;
-    size_t segments = 0;
     size_t zone_size = 0;
-    size_t first_segment_offset = 0;
+
+    std::vector<zns_shard_info_t> shard_infos;
 
     seastore_meta_t meta;
     
@@ -40,15 +54,14 @@ namespace crimson::os::seastore::segment_manager::zns {
 
     DENC(zns_sm_metadata_t, v, p) {
       DENC_START(1, 1, p);
-      denc(v.size, p);
+      denc(v.shard_num, p);
       denc(v.segment_size, p);
       denc(v.segment_capacity, p);
       denc(v.zones_per_segment, p);
       denc(v.zone_capacity, p);
       denc(v.block_size, p);
-      denc(v.segments, p);
       denc(v.zone_size, p);
-      denc(v.first_segment_offset, p);
+      denc(v.shard_infos, p);
       denc(v.meta, p);
       denc(v.magic, p);
       denc(v.dtype, p);
@@ -60,12 +73,15 @@ namespace crimson::os::seastore::segment_manager::zns {
     }
 
     void validate() const {
-      ceph_assert_always(size > 0);
-      ceph_assert_always(size <= DEVICE_OFF_MAX);
+      ceph_assert_always(shard_num == seastar::smp::count);
+      for (unsigned int i = 0; i < seastar::smp::count; i++) {
+        ceph_assert_always(shard_infos[i].size > 0);
+        ceph_assert_always(shard_infos[i].size <= DEVICE_OFF_MAX);
+        ceph_assert_always(shard_infos[i].segments > 0);
+        ceph_assert_always(shard_infos[i].segments <= DEVICE_SEGMENT_ID_MAX);
+      }
       ceph_assert_always(segment_capacity > 0);
       ceph_assert_always(segment_capacity <= SEGMENT_OFF_MAX);
-      ceph_assert_always(segments > 0);
-      ceph_assert_always(segments <= DEVICE_SEGMENT_ID_MAX);
     }
   };
 
@@ -102,9 +118,29 @@ namespace crimson::os::seastore::segment_manager::zns {
   };
 
   class ZNSSegmentManager final : public SegmentManager{
+  // interfaces used by Device
   public:
+    seastar::future<> start() {
+      return shard_devices.start(device_path);
+    }
+
+    seastar::future<> stop() {
+      return shard_devices.stop();
+    }
+
+    Device& get_sharded_device() final {
+      return shard_devices.local();
+    }
+
     mount_ret mount() final;
     mkfs_ret mkfs(device_config_t meta) final;
+
+    ZNSSegmentManager(const std::string &path) : device_path(path) {}
+
+    ~ZNSSegmentManager() final = default;
+
+  //interfaces used by each shard device
+  public:
     open_ertr::future<SegmentRef> open(segment_id_t id) final;
     close_ertr::future<> close() final;
 
@@ -120,7 +156,7 @@ namespace crimson::os::seastore::segment_manager::zns {
     }
 
     size_t get_available_size() const final {
-      return metadata.size;
+      return shard_info.size;
     };
 
     extent_len_t get_block_size() const final {
@@ -141,10 +177,6 @@ namespace crimson::os::seastore::segment_manager::zns {
 
     magic_t get_magic() const final;
 
-    ZNSSegmentManager(const std::string &path) : device_path(path) {}
-
-    ~ZNSSegmentManager() final = default;
-
     Segment::write_ertr::future<> segment_write(
     paddr_t addr,
     ceph::bufferlist bl,
@@ -153,6 +185,7 @@ namespace crimson::os::seastore::segment_manager::zns {
   private:
     friend class ZNSSegment;
     std::string device_path;
+    zns_shard_info_t shard_info;
     zns_sm_metadata_t metadata;
     seastar::file device;
     uint32_t nr_zones;
@@ -188,14 +221,26 @@ namespace crimson::os::seastore::segment_manager::zns {
 
     uint64_t get_offset(paddr_t addr) {
       auto& seg_addr = addr.as_seg_paddr();
-      return (metadata.first_segment_offset +
+      return (shard_info.first_segment_offset +
              (seg_addr.get_segment_id().device_segment_id() * 
               metadata.segment_size)) + seg_addr.get_segment_off();
     }
+  private:
+    // shard 0 mkfs
+    mkfs_ret primary_mkfs(device_config_t meta);
+    // all shards mkfs
+    mkfs_ret shard_mkfs();
+
+    mount_ret shard_mount();
+
+    seastar::sharded<ZNSSegmentManager> shard_devices;
   };
 
 }
 
+WRITE_CLASS_DENC_BOUNDED(
+  crimson::os::seastore::segment_manager::zns::zns_shard_info_t
+)
 WRITE_CLASS_DENC_BOUNDED(
   crimson::os::seastore::segment_manager::zns::zns_sm_metadata_t
 )