]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
seastore : add discard and preffered write granularity/alignment features
authorJinyong Ha <jy200.ha@samsung.com>
Fri, 8 Oct 2021 08:29:59 +0000 (17:29 +0900)
committermyoungwon oh <ohmyoungwon@gmail.com>
Fri, 12 Nov 2021 04:28:29 +0000 (13:28 +0900)
Discard informs invalid LBA to SSD and SSD utilize this hint to optimize
internal garbage collection.
Preffered write granularity/alignment are the IO guide provided by SSD.
If user submits IO with following the guide, IO latency and internal WAF
might be optimized.

Signed-off-by: Jinyong Ha <jy200.ha@samsung.com>
src/crimson/os/seastore/random_block_manager/nvmedevice.cc
src/crimson/os/seastore/random_block_manager/nvmedevice.h

index ac3e81c56ca26c239298a7480b09c88a357519f7..5cf85e9406bb73df4503475e1a155ac0734c72d3 100644 (file)
@@ -37,6 +37,11 @@ open_ertr::future<> PosixNVMeDevice::open(
             block_size = (1 << id_namespace_data.lbaf0.lbads);
             data_protection_type = id_namespace_data.dps.protection_type;
             data_protection_enabled = (data_protection_type > 0);
+            if (id_namespace_data.nsfeat.opterf == 1){
+              // NPWG and NPWA is 0'based value
+              write_granularity = block_size * (id_namespace_data.npwg + 1);
+              write_alignment = block_size * (id_namespace_data.npwa + 1);
+            }
             return seastar::now();
           });
         });
@@ -101,6 +106,63 @@ seastar::future<> PosixNVMeDevice::close() {
   return device.close();
 }
 
+nvme_command_ertr::future<nvme_identify_controller_data_t>
+PosixNVMeDevice::identify_controller() {
+  return seastar::do_with(
+    nvme_admin_command_t(),
+    nvme_identify_controller_data_t(),
+    [this](auto &admin_command, auto &data) {
+    admin_command.common.opcode = nvme_admin_command_t::OPCODE_IDENTIFY;
+    admin_command.common.addr = (uint64_t)&data;
+    admin_command.common.data_len = sizeof(data);
+    admin_command.identify.cns = nvme_identify_command_t::CNS_CONTROLLER;
+
+    return pass_admin(admin_command).safe_then([&data](auto status) {
+      return seastar::make_ready_future<nvme_identify_controller_data_t>(
+        std::move(data));
+      });
+  });
+}
+
+discard_ertr::future<> PosixNVMeDevice::discard(uint64_t offset, uint64_t len) {
+  return device.discard(offset, len);
+}
+
+nvme_command_ertr::future<nvme_identify_namespace_data_t>
+PosixNVMeDevice::identify_namespace() {
+  return get_nsid().safe_then([this](auto nsid) {
+    return seastar::do_with(
+      nvme_admin_command_t(),
+      nvme_identify_namespace_data_t(),
+      [this, nsid](auto &admin_command, auto &data) {
+      admin_command.common.opcode = nvme_admin_command_t::OPCODE_IDENTIFY;
+      admin_command.common.addr = (uint64_t)&data;
+      admin_command.common.data_len = sizeof(data);
+      admin_command.common.nsid = nsid;
+      admin_command.identify.cns = nvme_identify_command_t::CNS_NAMESPACE;
+
+      return pass_admin(admin_command).safe_then([&data](auto status){
+        return seastar::make_ready_future<nvme_identify_namespace_data_t>(
+          std::move(data));
+      });
+    });
+  });
+}
+
+nvme_command_ertr::future<int> PosixNVMeDevice::get_nsid() {
+  return device.ioctl(NVME_IOCTL_ID, nullptr);
+}
+
+nvme_command_ertr::future<int> PosixNVMeDevice::pass_admin(
+  nvme_admin_command_t& admin_cmd) {
+  return device.ioctl(NVME_IOCTL_ADMIN_CMD, &admin_cmd);
+}
+
+nvme_command_ertr::future<int> PosixNVMeDevice::pass_through_io(
+  nvme_io_command_t& io_cmd) {
+  return device.ioctl(NVME_IOCTL_IO_CMD, &io_cmd);
+}
+
 open_ertr::future<> TestMemory::open(
   const std::string &in_path,
    seastar::open_flags mode) {
index a16cde8b9a0d54252615399d0c70b42329bc7e96..b602cdc0415ec257b57137390e6205d4493f3799 100644 (file)
@@ -95,6 +95,17 @@ struct dps_t {
   uint8_t reserved : 4;
 };
 
+// Namespace Features (NSFEAT)
+// Indicates features of namespace
+struct nsfeat_t {
+  uint8_t thinp : 1;
+  uint8_t nsabp : 1;
+  uint8_t dae : 1;
+  uint8_t uid_reuse : 1;
+  uint8_t opterf : 1; // Support NPWG, NPWA
+  uint8_t reserved : 3;
+};
+
 // LBA Format (LBAF)
 // Indicates LBA format (metadata size, data size, performance)
 struct lbaf_t {
@@ -107,10 +118,15 @@ struct lbaf_t {
 struct nvme_identify_namespace_data_t {
   union {
     struct {
-      uint8_t unused[28];   // [27:0]
+      uint8_t unused[24];   // [23:0]
+      nsfeat_t nsfeat;      // [24]
+      uint8_t unused2[3];   // [27:25]
       dpc_t dpc;            // [28]
       dps_t dps;            // [29]
-      uint8_t unused2[98];  // [127:30]
+      uint8_t unused3[34];  // [63:30]
+      uint16_t npwg;        // [65:64]
+      uint16_t npwa;        // [67:66]
+      uint8_t unused4[60];  // [127:68]
       lbaf_t lbaf0;         // [131:128]
     };
     uint8_t raw[4096];
@@ -239,7 +255,6 @@ public:
     bufferptr &bptr,
     uint16_t stream = 0) = 0;
 
-  // TODO
   virtual discard_ertr::future<> discard(
     uint64_t offset,
     uint64_t len) { return seastar::now(); }
@@ -326,11 +341,21 @@ public:
 
   seastar::future<> close() override;
 
-private:
-  // identify_controller/namespace are used to get SSD internal information such
-  // as supported features
+  discard_ertr::future<> discard(
+    uint64_t offset,
+    uint64_t len) override;
+
+  nvme_command_ertr::future<int> pass_admin(
+    nvme_admin_command_t& admin_cmd) override;
+  nvme_command_ertr::future<int> pass_through_io(
+    nvme_io_command_t& io_cmd) override;
+
+  bool support_multistream = false;
   uint8_t data_protection_type = 0;
 
+private:
+  // identify_controller/namespace are used to get SSD internal information such
+  // as supported features, NPWG and NPWA;
   nvme_command_ertr::future<nvme_identify_controller_data_t> identify_controller();
   nvme_command_ertr::future<nvme_identify_namespace_data_t> identify_namespace();
   nvme_command_ertr::future<int> get_nsid();
@@ -367,5 +392,4 @@ public:
   char *buf;
   size_t size;
 };
-
 }