From 5590291673ae74a3bb6291b8a181341818a83c6a Mon Sep 17 00:00:00 2001 From: myoungwon oh Date: Sat, 7 May 2022 16:55:04 +0900 Subject: [PATCH] seastore/cbjournal: add writev to submit writes with aligned buffers Signed-off-by: Myoungwon Oh --- .../journal/circular_bounded_journal.cc | 7 +-- .../random_block_manager/nvmedevice.cc | 62 +++++++++++++++++++ .../random_block_manager/nvmedevice.h | 16 ++++- 3 files changed, 78 insertions(+), 7 deletions(-) diff --git a/src/crimson/os/seastore/journal/circular_bounded_journal.cc b/src/crimson/os/seastore/journal/circular_bounded_journal.cc index 1629a4b69410b..e337246a15e75 100644 --- a/src/crimson/os/seastore/journal/circular_bounded_journal.cc +++ b/src/crimson/os/seastore/journal/circular_bounded_journal.cc @@ -292,16 +292,11 @@ CircularBoundedJournal::write_ertr::future<> CircularBoundedJournal::device_writ if (offset + length > get_journal_end()) { return crimson::ct_error::erange::make(); } - bl.rebuild_aligned(get_block_size()); DEBUG( "overwrite in CircularBoundedJournal, offset {}, length {}", offset, length); - auto write_length = length < get_block_size() ? get_block_size() : length; - auto bptr = bufferptr(ceph::buffer::create_page_aligned(write_length)); - auto iter = bl.cbegin(); - iter.copy(bl.length(), bptr.c_str()); - return device->write(offset, bptr + return device->writev(offset, bl ).handle_error( write_ertr::pass_further{}, crimson::ct_error::assert_all{ "Invalid error device->write" } diff --git a/src/crimson/os/seastore/random_block_manager/nvmedevice.cc b/src/crimson/os/seastore/random_block_manager/nvmedevice.cc index a29fefe4d4e3b..a0ebd4a2b5a3b 100644 --- a/src/crimson/os/seastore/random_block_manager/nvmedevice.cc +++ b/src/crimson/os/seastore/random_block_manager/nvmedevice.cc @@ -7,6 +7,7 @@ #include #include "crimson/common/log.h" +#include "crimson/common/errorator-loop.h" #include "include/buffer.h" #include "nvmedevice.h" @@ -132,6 +133,52 @@ read_ertr::future<> PosixNVMeDevice::read( }); } +write_ertr::future<> PosixNVMeDevice::writev( + uint64_t offset, + ceph::bufferlist bl, + uint16_t stream) { + logger().debug( + "block: write offset {} len {}", + offset, + bl.length()); + + uint16_t supported_stream = stream; + if (stream >= stream_id_count) { + supported_stream = WRITE_LIFE_NOT_SET; + } + bl.rebuild_aligned(block_size); + + return seastar::do_with( + bl.prepare_iovs(), + std::move(bl), + [this, supported_stream, offset](auto& iovs, auto& bl) + { + return write_ertr::parallel_for_each( + iovs, + [this, supported_stream, offset](auto& p) mutable + { + auto off = offset + p.offset; + auto len = p.length; + auto& iov = p.iov; + return io_device[supported_stream].dma_write(off, std::move(iov) + ).handle_exception( + [this, off, len](auto e) -> write_ertr::future + { + logger().error("D{} poffset={}~{} dma_write got error -- {}", + get_device_id(), off, len, e); + return crimson::ct_error::input_output_error::make(); + }).then([this, off, len](size_t written) -> write_ertr::future<> { + if (written != len) { + logger().error("D{} poffset={}~{} dma_write len={} inconsistent", + get_device_id(), off, len, written); + return crimson::ct_error::input_output_error::make(); + } + return write_ertr::now(); + }); + }); + }); +} + Device::close_ertr::future<> PosixNVMeDevice::close() { logger().debug(" close "); return device.close().then([this]() { @@ -258,4 +305,19 @@ Device::close_ertr::future<> TestMemory::close() { logger().debug(" close "); return close_ertr::now(); } + +write_ertr::future<> TestMemory::writev( + uint64_t offset, + ceph::bufferlist bl, + uint16_t stream) { + ceph_assert(buf); + logger().debug( + "TestMemory: write offset {} len {}", + offset, + bl.length()); + + bl.begin().copy(bl.length(), buf + offset); + return write_ertr::now(); +} + } diff --git a/src/crimson/os/seastore/random_block_manager/nvmedevice.h b/src/crimson/os/seastore/random_block_manager/nvmedevice.h index 03b7a428be6d0..d097ccc837524 100644 --- a/src/crimson/os/seastore/random_block_manager/nvmedevice.h +++ b/src/crimson/os/seastore/random_block_manager/nvmedevice.h @@ -336,7 +336,11 @@ public: virtual open_ertr::future<> open( const std::string& path, seastar::open_flags mode) = 0; - //virtual seastar::future<> close() = 0; + + virtual write_ertr::future<> writev( + uint64_t offset, + ceph::bufferlist bl, + uint16_t stream = 0) = 0; /* * For passsing through nvme IO or Admin command to SSD @@ -428,6 +432,11 @@ public: return mount_ertr::now(); } + write_ertr::future<> writev( + uint64_t offset, + ceph::bufferlist bl, + uint16_t stream = 0) final; + nvme_command_ertr::future pass_admin( nvme_admin_command_t& admin_cmd) override; nvme_command_ertr::future pass_through_io( @@ -489,6 +498,11 @@ public: close_ertr::future<> close() override; + write_ertr::future<> writev( + uint64_t offset, + ceph::bufferlist bl, + uint16_t stream = 0) final; + char *buf; size_t size; }; -- 2.39.5