]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
seastore/cbjournal: add writev to submit writes with aligned buffers
authormyoungwon oh <ohmyoungwon@gmail.com>
Sat, 7 May 2022 07:55:04 +0000 (16:55 +0900)
committermyoungwon oh <ohmyoungwon@gmail.com>
Thu, 19 May 2022 00:49:22 +0000 (09:49 +0900)
Signed-off-by: Myoungwon Oh <myoungwon.oh@samsung.com>
src/crimson/os/seastore/journal/circular_bounded_journal.cc
src/crimson/os/seastore/random_block_manager/nvmedevice.cc
src/crimson/os/seastore/random_block_manager/nvmedevice.h

index 1629a4b69410b08b95859af0ec73da88e954066f..e337246a15e75cc4624accf19adfef268432d530 100644 (file)
@@ -292,16 +292,11 @@ CircularBoundedJournal::write_ertr::future<> CircularBoundedJournal::device_writ
   if (offset + length > get_journal_end()) {
     return crimson::ct_error::erange::make();
   }
-  bl.rebuild_aligned(get_block_size());
   DEBUG(
     "overwrite in CircularBoundedJournal, offset {}, length {}",
     offset,
     length);
-  auto write_length = length < get_block_size() ? get_block_size() : length;
-  auto bptr = bufferptr(ceph::buffer::create_page_aligned(write_length));
-  auto iter = bl.cbegin();
-  iter.copy(bl.length(), bptr.c_str());
-  return device->write(offset, bptr
+  return device->writev(offset, bl
   ).handle_error(
     write_ertr::pass_further{},
     crimson::ct_error::assert_all{ "Invalid error device->write" }
index a29fefe4d4e3b137b347b3965ecd7542701ba162..a0ebd4a2b5a3b174223cdba22dc3e365eb46281c 100644 (file)
@@ -7,6 +7,7 @@
 #include <fcntl.h>
 
 #include "crimson/common/log.h"
+#include "crimson/common/errorator-loop.h"
 
 #include "include/buffer.h"
 #include "nvmedevice.h"
@@ -132,6 +133,52 @@ read_ertr::future<> PosixNVMeDevice::read(
     });
 }
 
+write_ertr::future<> PosixNVMeDevice::writev(
+  uint64_t offset,
+  ceph::bufferlist bl,
+  uint16_t stream) {
+  logger().debug(
+    "block: write offset {} len {}",
+    offset,
+    bl.length());
+
+  uint16_t supported_stream = stream;
+  if (stream >= stream_id_count) {
+    supported_stream = WRITE_LIFE_NOT_SET;
+  }
+  bl.rebuild_aligned(block_size);
+
+  return seastar::do_with(
+    bl.prepare_iovs(),
+    std::move(bl),
+    [this, supported_stream, offset](auto& iovs, auto& bl)
+  {
+    return write_ertr::parallel_for_each(
+      iovs,
+      [this, supported_stream, offset](auto& p) mutable
+    {
+      auto off = offset + p.offset;
+      auto len = p.length;
+      auto& iov = p.iov;
+      return io_device[supported_stream].dma_write(off, std::move(iov)
+      ).handle_exception(
+        [this, off, len](auto e) -> write_ertr::future<size_t>
+      {
+        logger().error("D{} poffset={}~{} dma_write got error -- {}",
+              get_device_id(), off, len, e);
+        return crimson::ct_error::input_output_error::make();
+      }).then([this, off, len](size_t written) -> write_ertr::future<> {
+        if (written != len) {
+          logger().error("D{} poffset={}~{} dma_write len={} inconsistent",
+                get_device_id(), off, len, written);
+          return crimson::ct_error::input_output_error::make();
+        }
+        return write_ertr::now();
+      });
+    });
+  });
+}
+
 Device::close_ertr::future<> PosixNVMeDevice::close() {
   logger().debug(" close ");
   return device.close().then([this]() {
@@ -258,4 +305,19 @@ Device::close_ertr::future<> TestMemory::close() {
   logger().debug(" close ");
   return close_ertr::now();
 }
+
+write_ertr::future<> TestMemory::writev(
+  uint64_t offset,
+  ceph::bufferlist bl,
+  uint16_t stream) {
+  ceph_assert(buf);
+  logger().debug(
+    "TestMemory: write offset {} len {}",
+    offset,
+    bl.length());
+
+  bl.begin().copy(bl.length(), buf + offset);
+  return write_ertr::now();
+}
+
 }
index 03b7a428be6d08f3209dc3df0a36190bc2a79d62..d097ccc8375241858ca1db01e7c4c981ba12b5d5 100644 (file)
@@ -336,7 +336,11 @@ public:
   virtual open_ertr::future<> open(
       const std::string& path,
       seastar::open_flags mode) = 0;
-  //virtual seastar::future<> close() = 0;
+
+  virtual write_ertr::future<> writev(
+    uint64_t offset,
+    ceph::bufferlist bl,
+    uint16_t stream = 0) = 0;
 
   /*
    * For passsing through nvme IO or Admin command to SSD
@@ -428,6 +432,11 @@ public:
     return mount_ertr::now();
   }
 
+  write_ertr::future<> writev(
+    uint64_t offset,
+    ceph::bufferlist bl,
+    uint16_t stream = 0) final;
+
   nvme_command_ertr::future<int> pass_admin(
     nvme_admin_command_t& admin_cmd) override;
   nvme_command_ertr::future<int> pass_through_io(
@@ -489,6 +498,11 @@ public:
 
   close_ertr::future<> close() override;
 
+  write_ertr::future<> writev(
+    uint64_t offset,
+    ceph::bufferlist bl,
+    uint16_t stream = 0) final;
+
   char *buf;
   size_t size;
 };