]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
os/bluestore: fix aio pwritev lost data problem.
authorkungf <yang.wang@easystack.cn>
Tue, 16 Apr 2019 11:21:41 +0000 (19:21 +0800)
committerSage Weil <sage@redhat.com>
Mon, 29 Apr 2019 17:46:30 +0000 (12:46 -0500)
On Linux, write() (and similar system calls) will transfer at most
0x7ffff000 (2,147,479,552) bytes, it will cap data if aio pwritev
more than 0x7ffff000, so we have the split the data to more aio submit.

Signed-off-by: kungf <yang.wang@easystack.cn>
src/os/bluestore/KernelDevice.cc
src/os/bluestore/KernelDevice.h

index 711d5e72d7ec11276bb7101da7bc5468b3cebcaf..ec163c7f1c08f8b78cc2f74dbf2426685e5e753c 100644 (file)
@@ -845,9 +845,6 @@ int KernelDevice::aio_write(
 
 #ifdef HAVE_LIBAIO
   if (aio && dio && !buffered) {
-    ioc->pending_aios.push_back(aio_t(ioc, choose_fd(false, write_hint)));
-    ++ioc->num_pending;
-    aio_t& aio = ioc->pending_aios.back();
     if (cct->_conf->bdev_inject_crash &&
        rand() % cct->_conf->bdev_inject_crash == 0) {
       derr << __func__ << " bdev_inject_crash: dropping io 0x" << std::hex
@@ -855,16 +852,48 @@ int KernelDevice::aio_write(
           << dendl;
       // generate a real io so that aio_wait behaves properly, but make it
       // a read instead of write, and toss the result.
+      ioc->pending_aios.push_back(aio_t(ioc, choose_fd(false, write_hint)));
+      ++ioc->num_pending;
+      auto& aio = ioc->pending_aios.back();
       aio.pread(off, len);
       ++injecting_crash;
     } else {
-      bl.prepare_iov(&aio.iov);
-      dout(30) << aio << dendl;
-      aio.bl.claim_append(bl);
-      aio.pwritev(off, len);
+      if (bl.length() <= RW_IO_MAX) {
+       // fast path (non-huge write)
+       ioc->pending_aios.push_back(aio_t(ioc, choose_fd(false, write_hint)));
+       ++ioc->num_pending;
+       auto& aio = ioc->pending_aios.back();
+       bl.prepare_iov(&aio.iov);
+       aio.bl.claim_append(bl);
+       aio.pwritev(off, len);
+       dout(30) << aio << dendl;
+       dout(5) << __func__ << " 0x" << std::hex << off << "~" << len
+               << std::dec << " aio " << &aio << dendl;
+      } else {
+       // write in RW_IO_MAX-sized chunks
+       uint64_t prev_len = 0;
+       while (prev_len < bl.length()) {
+         bufferlist tmp;
+         if (prev_len + RW_IO_MAX < bl.length()) {
+           tmp.substr_of(bl, prev_len, RW_IO_MAX);
+         } else {
+           tmp.substr_of(bl, prev_len, bl.length() - prev_len);
+         }
+         auto len = tmp.length();
+         ioc->pending_aios.push_back(aio_t(ioc, choose_fd(false, write_hint)));
+         ++ioc->num_pending;
+         auto& aio = ioc->pending_aios.back();
+         tmp.prepare_iov(&aio.iov);
+         aio.bl.claim_append(tmp);
+         aio.pwritev(off + prev_len, len);
+         dout(30) << aio << dendl;
+         dout(5) << __func__ << " 0x" << std::hex << off + prev_len
+                 << "~" << len
+                 << std::dec << " aio " << &aio << " (piece)" << dendl;
+         prev_len += len;
+       }
+      }
     }
-    dout(5) << __func__ << " 0x" << std::hex << off << "~" << len
-           << std::dec << " aio " << &aio << dendl;
   } else
 #endif
   {
index e24b45b3ac08401651816c67b960466477c6eacf..ec8cf2f8dc40034e96374b1e9fcd4824ff951150 100644 (file)
 #include "ceph_aio.h"
 #include "BlockDevice.h"
 
+#ifndef RW_IO_MAX
+#define RW_IO_MAX 0x7FFFF000
+#endif
+
+
 class KernelDevice : public BlockDevice {
   std::vector<int> fd_directs, fd_buffereds;
   bool enable_wrt = true;