if (stream >= stream_id_count) {
supported_stream = WRITE_LIFE_NOT_SET;
}
+ if (is_end_to_end_data_protection()) {
+ return seastar::do_with(
+ std::move(bptr),
+ [this, offset] (auto &bptr) {
+ return nvme_write(offset, bptr.length(), bptr.c_str());
+ });
+ }
return seastar::do_with(
std::move(bptr),
[this, offset, length, supported_stream] (auto& bptr) {
offset,
bptr.length());
auto length = bptr.length();
-
+ if (length == 0) {
+ return read_ertr::now();
+ }
assert((length % super.block_size) == 0);
+ if (is_end_to_end_data_protection()) {
+ return nvme_read(offset, length, bptr.c_str());
+ }
+
return device.dma_read(offset, bptr.c_str(), length).handle_exception(
[](auto e) -> read_ertr::future<size_t> {
logger().error("read: dma_read got error{}", e);
if (stream >= stream_id_count) {
supported_stream = WRITE_LIFE_NOT_SET;
}
+ if (is_end_to_end_data_protection()) {
+ return seastar::do_with(
+ std::move(bl),
+ [this, offset] (auto &bl) {
+ return nvme_write(offset, bl.length(), bl.c_str());
+ });
+ }
bl.rebuild_aligned(super.block_size);
return seastar::do_with(
nvme_command_ertr::future<nvme_identify_namespace_data_t>
NVMeBlockDevice::identify_namespace(seastar::file f) {
return get_nsid(f).safe_then([this, f](auto nsid) {
+ namespace_id = nsid;
return seastar::do_with(
nvme_admin_command_t(),
nvme_identify_namespace_data_t(),
if (id_namespace_data.lbaf[i].ms ==
nvme_identify_namespace_data_t::METASIZE_FOR_CHECKSUM_OFFLOAD) {
lba_format_index = i;
+ super.nvme_block_size = (1 << id_namespace_data.lbaf[i].lbads);
break;
}
}
return nvme_command_ertr::now();
}
+write_ertr::future<> NVMeBlockDevice::nvme_write(
+ uint64_t offset, size_t len, void *buffer_ptr) {
+ return seastar::do_with(
+ nvme_io_command_t(),
+ [this, offset, len, buffer_ptr] (auto &cmd) {
+ cmd.common.opcode = nvme_io_command_t::OPCODE_WRITE;
+ cmd.common.nsid = namespace_id;
+ cmd.common.data_len = len;
+ // To perform checksum offload, we need to set PRACT to 1 and PRCHK to 4
+ // according to NVMe spec.
+ cmd.rw.prinfo_pract = nvme_rw_command_t::PROTECT_INFORMATION_ACTION_ENABLE;
+ cmd.rw.prinfo_prchk = nvme_rw_command_t::PROTECT_INFORMATION_CHECK_GUARD;
+ cmd.common.addr = (__u64)(uintptr_t)buffer_ptr;
+ ceph_assert(super.nvme_block_size > 0);
+ auto lba_shift = ffsll(super.nvme_block_size) - 1;
+ cmd.rw.s_lba = offset >> lba_shift;
+ cmd.rw.nlb = (len >> lba_shift) - 1;
+ return pass_through_io(cmd
+ ).safe_then([] (auto ret) {
+ if (ret != 0) {
+ logger().error(
+ "write nvm command with checksum offload fails : {}", ret);
+ ceph_abort();
+ }
+ return nvme_command_ertr::now();
+ });
+ });
+}
+
+read_ertr::future<> NVMeBlockDevice::nvme_read(
+ uint64_t offset, size_t len, void *buffer_ptr) {
+ return seastar::do_with(
+ nvme_io_command_t(),
+ [this, offset, len, buffer_ptr] (auto &cmd) {
+ cmd.common.opcode = nvme_io_command_t::OPCODE_READ;
+ cmd.common.nsid = namespace_id;
+ cmd.common.data_len = len;
+ cmd.rw.prinfo_pract = nvme_rw_command_t::PROTECT_INFORMATION_ACTION_ENABLE;
+ cmd.rw.prinfo_prchk = nvme_rw_command_t::PROTECT_INFORMATION_CHECK_GUARD;
+ cmd.common.addr = (__u64)(uintptr_t)buffer_ptr;
+ ceph_assert(super.nvme_block_size > 0);
+ auto lba_shift = ffsll(super.nvme_block_size) - 1;
+ cmd.rw.s_lba = offset >> lba_shift;
+ cmd.rw.nlb = (len >> lba_shift) - 1;
+ return pass_through_io(cmd
+ ).safe_then([] (auto ret) {
+ if (ret != 0) {
+ logger().error(
+ "read nvm command with checksum offload fails : {}", ret);
+ ceph_abort();
+ }
+ return nvme_command_ertr::now();
+ });
+ });
+}
+
}
uint32_t dspec : 16;
static const uint32_t DTYPE_STREAM = 1;
+
+ static const uint8_t PROTECT_INFORMATION_ACTION_ENABLE = 1;
+ static const uint8_t PROTECT_INFORMATION_CHECK_GUARD = 4;
+ static const uint8_t PROTECT_INFORMATION_CHECK_APPLICATION_TAG = 2;
+ static const uint8_t PROTECT_INFORMATION_CHECK_LOGICAL_REFERENCE_TAG = 1;
};
struct nvme_io_command_t {
nvme_rw_command_t rw;
};
static const uint8_t OPCODE_WRITE = 0x01;
- static const uint8_t OPCODE_READ = 0x01;
+ static const uint8_t OPCODE_READ = 0x02;
};
/*
uint64_t offset,
bufferptr &bptr) final;
+ read_ertr::future<> nvme_read(
+ uint64_t offset, size_t len, void *buffer_ptr);
+
close_ertr::future<> close() override;
discard_ertr::future<> discard(
ceph::bufferlist bl,
uint16_t stream = 0) final;
+ write_ertr::future<> nvme_write(
+ uint64_t offset, size_t len, void *buffer_ptr);
+
stat_device_ret stat_device() final {
return seastar::file_stat(device_path, seastar::follow_symlink::yes
).handle_exception([](auto e) -> stat_device_ret {
uint64_t write_alignment = 4096;
uint32_t atomic_write_unit = 4096;
+ int namespace_id; // TODO: multi namespaces
std::string device_path;
seastar::sharded<NVMeBlockDevice> shard_devices;
};