From: myoungwon oh Date: Tue, 28 May 2024 10:35:45 +0000 (+0000) Subject: crimson/os/seastore: add nvme_read and nvme_write to be used when end to end data... X-Git-Tag: v20.0.0~1459^2~4 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=5f86c84118e6b60f9e5d23d4e5dea8efab6b858a;p=ceph.git crimson/os/seastore: add nvme_read and nvme_write to be used when end to end data protection is enabled Signed-off-by: Myoungwon Oh --- diff --git a/src/crimson/os/seastore/random_block_manager.h b/src/crimson/os/seastore/random_block_manager.h index fca76c313966e..5e8a4d7c55345 100644 --- a/src/crimson/os/seastore/random_block_manager.h +++ b/src/crimson/os/seastore/random_block_manager.h @@ -51,6 +51,8 @@ struct rbm_superblock_t { checksum_t crc = 0; device_config_t config; unsigned int shard_num = 0; + // Must be assigned if ent-to-end-data-protection features is enabled + uint32_t nvme_block_size = 0; std::vector shard_infos; DENC(rbm_superblock_t, v, p) { @@ -63,6 +65,7 @@ struct rbm_superblock_t { denc(v.crc, p); denc(v.config, p); denc(v.shard_num, p); + denc(v.nvme_block_size, p); denc(v.shard_infos, p); DENC_FINISH(p); } diff --git a/src/crimson/os/seastore/random_block_manager/block_rb_manager.cc b/src/crimson/os/seastore/random_block_manager/block_rb_manager.cc index afe1128bc9257..2b303fbc4d902 100644 --- a/src/crimson/os/seastore/random_block_manager/block_rb_manager.cc +++ b/src/crimson/os/seastore/random_block_manager/block_rb_manager.cc @@ -210,7 +210,8 @@ std::ostream &operator<<(std::ostream &out, const rbm_superblock_t &header) << ", crc=" << header.crc << ", config=" << header.config << ", shard_num=" << header.shard_num - << ", end_to_end_data_protection=" << header.is_end_to_end_data_protection(); + << ", end_to_end_data_protection=" << header.is_end_to_end_data_protection() + << ", device_block_size=" << header.nvme_block_size; for (auto p : header.shard_infos) { out << p; } diff --git a/src/crimson/os/seastore/random_block_manager/nvme_block_device.cc b/src/crimson/os/seastore/random_block_manager/nvme_block_device.cc index f0a23cb4077d2..2576ee7466b9b 100644 --- a/src/crimson/os/seastore/random_block_manager/nvme_block_device.cc +++ b/src/crimson/os/seastore/random_block_manager/nvme_block_device.cc @@ -133,6 +133,13 @@ write_ertr::future<> NVMeBlockDevice::write( if (stream >= stream_id_count) { supported_stream = WRITE_LIFE_NOT_SET; } + if (is_end_to_end_data_protection()) { + return seastar::do_with( + std::move(bptr), + [this, offset] (auto &bptr) { + return nvme_write(offset, bptr.length(), bptr.c_str()); + }); + } return seastar::do_with( std::move(bptr), [this, offset, length, supported_stream] (auto& bptr) { @@ -159,9 +166,15 @@ read_ertr::future<> NVMeBlockDevice::read( offset, bptr.length()); auto length = bptr.length(); - + if (length == 0) { + return read_ertr::now(); + } assert((length % super.block_size) == 0); + if (is_end_to_end_data_protection()) { + return nvme_read(offset, length, bptr.c_str()); + } + return device.dma_read(offset, bptr.c_str(), length).handle_exception( [](auto e) -> read_ertr::future { logger().error("read: dma_read got error{}", e); @@ -188,6 +201,13 @@ write_ertr::future<> NVMeBlockDevice::writev( if (stream >= stream_id_count) { supported_stream = WRITE_LIFE_NOT_SET; } + if (is_end_to_end_data_protection()) { + return seastar::do_with( + std::move(bl), + [this, offset] (auto &bl) { + return nvme_write(offset, bl.length(), bl.c_str()); + }); + } bl.rebuild_aligned(super.block_size); return seastar::do_with( @@ -256,6 +276,7 @@ discard_ertr::future<> NVMeBlockDevice::discard(uint64_t offset, uint64_t len) { nvme_command_ertr::future NVMeBlockDevice::identify_namespace(seastar::file f) { return get_nsid(f).safe_then([this, f](auto nsid) { + namespace_id = nsid; return seastar::do_with( nvme_admin_command_t(), nvme_identify_namespace_data_t(), @@ -314,6 +335,7 @@ nvme_command_ertr::future<> NVMeBlockDevice::try_enable_end_to_end_protection() if (id_namespace_data.lbaf[i].ms == nvme_identify_namespace_data_t::METASIZE_FOR_CHECKSUM_OFFLOAD) { lba_format_index = i; + super.nvme_block_size = (1 << id_namespace_data.lbaf[i].lbads); break; } } @@ -362,4 +384,60 @@ nvme_command_ertr::future<> NVMeBlockDevice::initialize_nvme_features() { return nvme_command_ertr::now(); } +write_ertr::future<> NVMeBlockDevice::nvme_write( + uint64_t offset, size_t len, void *buffer_ptr) { + return seastar::do_with( + nvme_io_command_t(), + [this, offset, len, buffer_ptr] (auto &cmd) { + cmd.common.opcode = nvme_io_command_t::OPCODE_WRITE; + cmd.common.nsid = namespace_id; + cmd.common.data_len = len; + // To perform checksum offload, we need to set PRACT to 1 and PRCHK to 4 + // according to NVMe spec. + cmd.rw.prinfo_pract = nvme_rw_command_t::PROTECT_INFORMATION_ACTION_ENABLE; + cmd.rw.prinfo_prchk = nvme_rw_command_t::PROTECT_INFORMATION_CHECK_GUARD; + cmd.common.addr = (__u64)(uintptr_t)buffer_ptr; + ceph_assert(super.nvme_block_size > 0); + auto lba_shift = ffsll(super.nvme_block_size) - 1; + cmd.rw.s_lba = offset >> lba_shift; + cmd.rw.nlb = (len >> lba_shift) - 1; + return pass_through_io(cmd + ).safe_then([] (auto ret) { + if (ret != 0) { + logger().error( + "write nvm command with checksum offload fails : {}", ret); + ceph_abort(); + } + return nvme_command_ertr::now(); + }); + }); +} + +read_ertr::future<> NVMeBlockDevice::nvme_read( + uint64_t offset, size_t len, void *buffer_ptr) { + return seastar::do_with( + nvme_io_command_t(), + [this, offset, len, buffer_ptr] (auto &cmd) { + cmd.common.opcode = nvme_io_command_t::OPCODE_READ; + cmd.common.nsid = namespace_id; + cmd.common.data_len = len; + cmd.rw.prinfo_pract = nvme_rw_command_t::PROTECT_INFORMATION_ACTION_ENABLE; + cmd.rw.prinfo_prchk = nvme_rw_command_t::PROTECT_INFORMATION_CHECK_GUARD; + cmd.common.addr = (__u64)(uintptr_t)buffer_ptr; + ceph_assert(super.nvme_block_size > 0); + auto lba_shift = ffsll(super.nvme_block_size) - 1; + cmd.rw.s_lba = offset >> lba_shift; + cmd.rw.nlb = (len >> lba_shift) - 1; + return pass_through_io(cmd + ).safe_then([] (auto ret) { + if (ret != 0) { + logger().error( + "read nvm command with checksum offload fails : {}", ret); + ceph_abort(); + } + return nvme_command_ertr::now(); + }); + }); +} + } diff --git a/src/crimson/os/seastore/random_block_manager/nvme_block_device.h b/src/crimson/os/seastore/random_block_manager/nvme_block_device.h index 1a9d6297efb05..01755d047a0c0 100644 --- a/src/crimson/os/seastore/random_block_manager/nvme_block_device.h +++ b/src/crimson/os/seastore/random_block_manager/nvme_block_device.h @@ -170,6 +170,11 @@ struct nvme_rw_command_t { uint32_t dspec : 16; static const uint32_t DTYPE_STREAM = 1; + + static const uint8_t PROTECT_INFORMATION_ACTION_ENABLE = 1; + static const uint8_t PROTECT_INFORMATION_CHECK_GUARD = 4; + static const uint8_t PROTECT_INFORMATION_CHECK_APPLICATION_TAG = 2; + static const uint8_t PROTECT_INFORMATION_CHECK_LOGICAL_REFERENCE_TAG = 1; }; struct nvme_io_command_t { @@ -178,7 +183,7 @@ struct nvme_io_command_t { nvme_rw_command_t rw; }; static const uint8_t OPCODE_WRITE = 0x01; - static const uint8_t OPCODE_READ = 0x01; + static const uint8_t OPCODE_READ = 0x02; }; /* @@ -224,6 +229,9 @@ public: uint64_t offset, bufferptr &bptr) final; + read_ertr::future<> nvme_read( + uint64_t offset, size_t len, void *buffer_ptr); + close_ertr::future<> close() override; discard_ertr::future<> discard( @@ -241,6 +249,9 @@ public: ceph::bufferlist bl, uint16_t stream = 0) final; + write_ertr::future<> nvme_write( + uint64_t offset, size_t len, void *buffer_ptr); + stat_device_ret stat_device() final { return seastar::file_stat(device_path, seastar::follow_symlink::yes ).handle_exception([](auto e) -> stat_device_ret { @@ -376,6 +387,7 @@ private: uint64_t write_alignment = 4096; uint32_t atomic_write_unit = 4096; + int namespace_id; // TODO: multi namespaces std::string device_path; seastar::sharded shard_devices; };