From: Jinyong Ha Date: Fri, 8 Oct 2021 08:29:59 +0000 (+0900) Subject: seastore : add discard and preffered write granularity/alignment features X-Git-Tag: v17.1.0~359^2~3 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=0d25fdcc71b9f4f2aa5ea7c5482d64fb798be0a6;p=ceph.git seastore : add discard and preffered write granularity/alignment features Discard informs invalid LBA to SSD and SSD utilize this hint to optimize internal garbage collection. Preffered write granularity/alignment are the IO guide provided by SSD. If user submits IO with following the guide, IO latency and internal WAF might be optimized. Signed-off-by: Jinyong Ha --- diff --git a/src/crimson/os/seastore/random_block_manager/nvmedevice.cc b/src/crimson/os/seastore/random_block_manager/nvmedevice.cc index ac3e81c56ca2..5cf85e9406bb 100644 --- a/src/crimson/os/seastore/random_block_manager/nvmedevice.cc +++ b/src/crimson/os/seastore/random_block_manager/nvmedevice.cc @@ -37,6 +37,11 @@ open_ertr::future<> PosixNVMeDevice::open( block_size = (1 << id_namespace_data.lbaf0.lbads); data_protection_type = id_namespace_data.dps.protection_type; data_protection_enabled = (data_protection_type > 0); + if (id_namespace_data.nsfeat.opterf == 1){ + // NPWG and NPWA is 0'based value + write_granularity = block_size * (id_namespace_data.npwg + 1); + write_alignment = block_size * (id_namespace_data.npwa + 1); + } return seastar::now(); }); }); @@ -101,6 +106,63 @@ seastar::future<> PosixNVMeDevice::close() { return device.close(); } +nvme_command_ertr::future +PosixNVMeDevice::identify_controller() { + return seastar::do_with( + nvme_admin_command_t(), + nvme_identify_controller_data_t(), + [this](auto &admin_command, auto &data) { + admin_command.common.opcode = nvme_admin_command_t::OPCODE_IDENTIFY; + admin_command.common.addr = (uint64_t)&data; + admin_command.common.data_len = sizeof(data); + admin_command.identify.cns = nvme_identify_command_t::CNS_CONTROLLER; + + return pass_admin(admin_command).safe_then([&data](auto status) { + return seastar::make_ready_future( + std::move(data)); + }); + }); +} + +discard_ertr::future<> PosixNVMeDevice::discard(uint64_t offset, uint64_t len) { + return device.discard(offset, len); +} + +nvme_command_ertr::future +PosixNVMeDevice::identify_namespace() { + return get_nsid().safe_then([this](auto nsid) { + return seastar::do_with( + nvme_admin_command_t(), + nvme_identify_namespace_data_t(), + [this, nsid](auto &admin_command, auto &data) { + admin_command.common.opcode = nvme_admin_command_t::OPCODE_IDENTIFY; + admin_command.common.addr = (uint64_t)&data; + admin_command.common.data_len = sizeof(data); + admin_command.common.nsid = nsid; + admin_command.identify.cns = nvme_identify_command_t::CNS_NAMESPACE; + + return pass_admin(admin_command).safe_then([&data](auto status){ + return seastar::make_ready_future( + std::move(data)); + }); + }); + }); +} + +nvme_command_ertr::future PosixNVMeDevice::get_nsid() { + return device.ioctl(NVME_IOCTL_ID, nullptr); +} + +nvme_command_ertr::future PosixNVMeDevice::pass_admin( + nvme_admin_command_t& admin_cmd) { + return device.ioctl(NVME_IOCTL_ADMIN_CMD, &admin_cmd); +} + +nvme_command_ertr::future PosixNVMeDevice::pass_through_io( + nvme_io_command_t& io_cmd) { + return device.ioctl(NVME_IOCTL_IO_CMD, &io_cmd); +} + open_ertr::future<> TestMemory::open( const std::string &in_path, seastar::open_flags mode) { diff --git a/src/crimson/os/seastore/random_block_manager/nvmedevice.h b/src/crimson/os/seastore/random_block_manager/nvmedevice.h index a16cde8b9a0d..b602cdc0415e 100644 --- a/src/crimson/os/seastore/random_block_manager/nvmedevice.h +++ b/src/crimson/os/seastore/random_block_manager/nvmedevice.h @@ -95,6 +95,17 @@ struct dps_t { uint8_t reserved : 4; }; +// Namespace Features (NSFEAT) +// Indicates features of namespace +struct nsfeat_t { + uint8_t thinp : 1; + uint8_t nsabp : 1; + uint8_t dae : 1; + uint8_t uid_reuse : 1; + uint8_t opterf : 1; // Support NPWG, NPWA + uint8_t reserved : 3; +}; + // LBA Format (LBAF) // Indicates LBA format (metadata size, data size, performance) struct lbaf_t { @@ -107,10 +118,15 @@ struct lbaf_t { struct nvme_identify_namespace_data_t { union { struct { - uint8_t unused[28]; // [27:0] + uint8_t unused[24]; // [23:0] + nsfeat_t nsfeat; // [24] + uint8_t unused2[3]; // [27:25] dpc_t dpc; // [28] dps_t dps; // [29] - uint8_t unused2[98]; // [127:30] + uint8_t unused3[34]; // [63:30] + uint16_t npwg; // [65:64] + uint16_t npwa; // [67:66] + uint8_t unused4[60]; // [127:68] lbaf_t lbaf0; // [131:128] }; uint8_t raw[4096]; @@ -239,7 +255,6 @@ public: bufferptr &bptr, uint16_t stream = 0) = 0; - // TODO virtual discard_ertr::future<> discard( uint64_t offset, uint64_t len) { return seastar::now(); } @@ -326,11 +341,21 @@ public: seastar::future<> close() override; -private: - // identify_controller/namespace are used to get SSD internal information such - // as supported features + discard_ertr::future<> discard( + uint64_t offset, + uint64_t len) override; + + nvme_command_ertr::future pass_admin( + nvme_admin_command_t& admin_cmd) override; + nvme_command_ertr::future pass_through_io( + nvme_io_command_t& io_cmd) override; + + bool support_multistream = false; uint8_t data_protection_type = 0; +private: + // identify_controller/namespace are used to get SSD internal information such + // as supported features, NPWG and NPWA; nvme_command_ertr::future identify_controller(); nvme_command_ertr::future identify_namespace(); nvme_command_ertr::future get_nsid(); @@ -367,5 +392,4 @@ public: char *buf; size_t size; }; - }