From beed83f76499bf3b0957def411b1fb8be32cab59 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 13 Apr 2017 10:29:26 -0400 Subject: [PATCH] os/bluestore/KernelBlockDevice: sync write() method Avoid aio machinery (and context switching) and do a simple synchronous (O_DIRECT) write. Signed-off-by: Sage Weil --- src/os/bluestore/BlockDevice.h | 4 ++ src/os/bluestore/KernelDevice.cc | 89 ++++++++++++++++++++++---------- src/os/bluestore/KernelDevice.h | 3 ++ src/os/bluestore/NVMEDevice.cc | 9 ++++ src/os/bluestore/NVMEDevice.h | 1 + 5 files changed, 79 insertions(+), 27 deletions(-) diff --git a/src/os/bluestore/BlockDevice.h b/src/os/bluestore/BlockDevice.h index 157add61fcc..d0ec9a1b462 100644 --- a/src/os/bluestore/BlockDevice.h +++ b/src/os/bluestore/BlockDevice.h @@ -108,6 +108,10 @@ public: uint64_t len, char *buf, bool buffered) = 0; + virtual int write( + uint64_t off, + bufferlist& bl, + bool buffered) = 0; virtual int aio_read( uint64_t off, diff --git a/src/os/bluestore/KernelDevice.cc b/src/os/bluestore/KernelDevice.cc index 77eaf3602bc..0041ec53a8b 100644 --- a/src/os/bluestore/KernelDevice.cc +++ b/src/os/bluestore/KernelDevice.cc @@ -527,6 +527,66 @@ void KernelDevice::aio_submit(IOContext *ioc) } } +int KernelDevice::_sync_write(uint64_t off, bufferlist &bl, bool buffered) +{ + uint64_t len = bl.length(); + dout(5) << __func__ << " 0x" << std::hex << off << "~" << len + << std::dec << " buffered" << dendl; + if (cct->_conf->bdev_inject_crash && + rand() % cct->_conf->bdev_inject_crash == 0) { + derr << __func__ << " bdev_inject_crash: dropping io 0x" << std::hex + << off << "~" << len << std::dec << dendl; + ++injecting_crash; + return 0; + } + vector iov; + bl.prepare_iov(&iov); + int r = ::pwritev(buffered ? fd_buffered : fd_direct, + &iov[0], iov.size(), off); + + if (r < 0) { + r = -errno; + derr << __func__ << " pwritev error: " << cpp_strerror(r) << dendl; + return r; + } + if (buffered) { + // initiate IO (but do not wait) + r = ::sync_file_range(fd_buffered, off, len, SYNC_FILE_RANGE_WRITE); + if (r < 0) { + r = -errno; + derr << __func__ << " sync_file_range error: " << cpp_strerror(r) << dendl; + return r; + } + } + return 0; +} + +int KernelDevice::write( + uint64_t off, + bufferlist &bl, + bool buffered) +{ + uint64_t len = bl.length(); + dout(20) << __func__ << " 0x" << std::hex << off << "~" << len << std::dec + << (buffered ? " (buffered)" : " (direct)") + << dendl; + assert(off % block_size == 0); + assert(len % block_size == 0); + assert(len > 0); + assert(off < size); + assert(off + len <= size); + + if ((!buffered || bl.get_num_buffers() >= IOV_MAX) && + bl.rebuild_aligned_size_and_memory(block_size, block_size)) { + dout(20) << __func__ << " rebuilding buffer to be aligned" << dendl; + } + dout(40) << "data: "; + bl.hexdump(*_dout); + *_dout << dendl; + + return _sync_write(off, bl, buffered); +} + int KernelDevice::aio_write( uint64_t off, bufferlist &bl, @@ -581,35 +641,10 @@ int KernelDevice::aio_write( } else #endif { - dout(5) << __func__ << " 0x" << std::hex << off << "~" << len - << std::dec << " buffered" << dendl; - if (cct->_conf->bdev_inject_crash && - rand() % cct->_conf->bdev_inject_crash == 0) { - derr << __func__ << " bdev_inject_crash: dropping io 0x" << std::hex - << off << "~" << len << std::dec << dendl; - ++injecting_crash; - return 0; - } - vector iov; - bl.prepare_iov(&iov); - int r = ::pwritev(buffered ? fd_buffered : fd_direct, - &iov[0], iov.size(), off); + int r = _sync_write(off, bl, buffered); _aio_log_finish(ioc, off, len); - - if (r < 0) { - r = -errno; - derr << __func__ << " pwritev error: " << cpp_strerror(r) << dendl; + if (r < 0) return r; - } - if (buffered) { - // initiate IO (but do not wait) - r = ::sync_file_range(fd_buffered, off, len, SYNC_FILE_RANGE_WRITE); - if (r < 0) { - r = -errno; - derr << __func__ << " sync_file_range error: " << cpp_strerror(r) << dendl; - return r; - } - } } return 0; } diff --git a/src/os/bluestore/KernelDevice.h b/src/os/bluestore/KernelDevice.h index 2cc2ebdccee..faccde3d5ba 100644 --- a/src/os/bluestore/KernelDevice.h +++ b/src/os/bluestore/KernelDevice.h @@ -60,6 +60,8 @@ class KernelDevice : public BlockDevice { void _aio_log_start(IOContext *ioc, uint64_t offset, uint64_t length); void _aio_log_finish(IOContext *ioc, uint64_t offset, uint64_t length); + int _sync_write(uint64_t off, bufferlist& bl, bool buffered); + int _lock(); int direct_read_unaligned(uint64_t off, uint64_t len, char *buf); @@ -93,6 +95,7 @@ public: IOContext *ioc) override; int read_random(uint64_t off, uint64_t len, char *buf, bool buffered) override; + int write(uint64_t off, bufferlist& bl, bool buffered) override; int aio_write(uint64_t off, bufferlist& bl, IOContext *ioc, bool buffered) override; diff --git a/src/os/bluestore/NVMEDevice.cc b/src/os/bluestore/NVMEDevice.cc index 0ffdff3dda5..e33e8bec36f 100644 --- a/src/os/bluestore/NVMEDevice.cc +++ b/src/os/bluestore/NVMEDevice.cc @@ -1024,6 +1024,15 @@ int NVMEDevice::aio_write( return 0; } +int NVMEDevice::write(uint64_t off, bufferlist &bl, bool buffered) +{ + // FIXME: there is presumably a more efficient way to do this... + IOContext ioc(NULL); + aio_write(off, bl, &ioc, buffered); + ioc.aio_wait(); + return 0; +} + int NVMEDevice::read(uint64_t off, uint64_t len, bufferlist *pbl, IOContext *ioc, bool buffered) diff --git a/src/os/bluestore/NVMEDevice.h b/src/os/bluestore/NVMEDevice.h index 318c3415a94..f670e308e43 100644 --- a/src/os/bluestore/NVMEDevice.h +++ b/src/os/bluestore/NVMEDevice.h @@ -229,6 +229,7 @@ class NVMEDevice : public BlockDevice { int aio_write(uint64_t off, bufferlist& bl, IOContext *ioc, bool buffered) override; + int write(uint64_t off, bufferlist& bl, bool buffered) override; int flush() override; int read_random(uint64_t off, uint64_t len, char *buf, bool buffered) override; -- 2.39.5