]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
os/bluestore: fix aio pwritev lost data problem.
authorSage Weil <sage@redhat.com>
Wed, 10 Jul 2019 15:36:50 +0000 (10:36 -0500)
committerSage Weil <sage@redhat.com>
Wed, 10 Jul 2019 15:36:50 +0000 (10:36 -0500)
On Linux, write() (and similar system calls) will transfer at most
0x7ffff000 (2,147,479,552) bytes, it will cap data if aio pwritev
more than 0x7ffff000, so we have the split the data to more aio submit.

Signed-off-by: kungf <yang.wang@easystack.cn>
(cherry picked from commit 4d33114a40d5ae0d541c36175977ca22789a3b88)

# Conflicts:
# src/os/bluestore/KernelDevice.cc
- fd_direct vs choose_fd()

src/os/bluestore/KernelDevice.cc
src/os/bluestore/KernelDevice.h

index b8d1ebf1951456ba831b735ebbc07c0d70ac59af..95275f18cce6d0b2625310590abde5172e69b371 100644 (file)
@@ -756,9 +756,6 @@ int KernelDevice::aio_write(
 
 #ifdef HAVE_LIBAIO
   if (aio && dio && !buffered) {
-    ioc->pending_aios.push_back(aio_t(ioc, fd_direct));
-    ++ioc->num_pending;
-    aio_t& aio = ioc->pending_aios.back();
     if (cct->_conf->bdev_inject_crash &&
        rand() % cct->_conf->bdev_inject_crash == 0) {
       derr << __func__ << " bdev_inject_crash: dropping io 0x" << std::hex
@@ -766,16 +763,48 @@ int KernelDevice::aio_write(
           << dendl;
       // generate a real io so that aio_wait behaves properly, but make it
       // a read instead of write, and toss the result.
+      ioc->pending_aios.push_back(aio_t(ioc, fd_direct));
+      ++ioc->num_pending;
+      auto& aio = ioc->pending_aios.back();
       aio.pread(off, len);
       ++injecting_crash;
     } else {
-      bl.prepare_iov(&aio.iov);
-      dout(30) << aio << dendl;
-      aio.bl.claim_append(bl);
-      aio.pwritev(off, len);
+      if (bl.length() <= RW_IO_MAX) {
+       // fast path (non-huge write)
+       ioc->pending_aios.push_back(aio_t(ioc, fd_direct));
+       ++ioc->num_pending;
+       auto& aio = ioc->pending_aios.back();
+       bl.prepare_iov(&aio.iov);
+       aio.bl.claim_append(bl);
+       aio.pwritev(off, len);
+       dout(30) << aio << dendl;
+       dout(5) << __func__ << " 0x" << std::hex << off << "~" << len
+               << std::dec << " aio " << &aio << dendl;
+      } else {
+       // write in RW_IO_MAX-sized chunks
+       uint64_t prev_len = 0;
+       while (prev_len < bl.length()) {
+         bufferlist tmp;
+         if (prev_len + RW_IO_MAX < bl.length()) {
+           tmp.substr_of(bl, prev_len, RW_IO_MAX);
+         } else {
+           tmp.substr_of(bl, prev_len, bl.length() - prev_len);
+         }
+         auto len = tmp.length();
+         ioc->pending_aios.push_back(aio_t(ioc, fd_direct));
+         ++ioc->num_pending;
+         auto& aio = ioc->pending_aios.back();
+         tmp.prepare_iov(&aio.iov);
+         aio.bl.claim_append(tmp);
+         aio.pwritev(off + prev_len, len);
+         dout(30) << aio << dendl;
+         dout(5) << __func__ << " 0x" << std::hex << off + prev_len
+                 << "~" << len
+                 << std::dec << " aio " << &aio << " (piece)" << dendl;
+         prev_len += len;
+       }
+      }
     }
-    dout(5) << __func__ << " 0x" << std::hex << off << "~" << len
-           << std::dec << " aio " << &aio << dendl;
   } else
 #endif
   {
index 9f461ac0630216f18c823db683764cdbca21ebaa..7ce7ad670853db3d9269d9d1eea59076af5ed9be 100644 (file)
 #include "aio.h"
 #include "BlockDevice.h"
 
+#ifndef RW_IO_MAX
+#define RW_IO_MAX 0x7FFFF000
+#endif
+
+
 class KernelDevice : public BlockDevice {
   int fd_direct, fd_buffered;
   std::string path;