From 8e5891306cfd8995b233b12633f845cc1234048e Mon Sep 17 00:00:00 2001 From: Jianjian Huo Date: Thu, 19 May 2016 15:03:41 -0700 Subject: [PATCH] os/bluestore: convert read_buffered() to read_random() with buffered flag New function will handle unaligned random read for both buffered or direct read. Signed-off-by: Jianjian Huo --- src/os/bluestore/BlockDevice.h | 3 +- src/os/bluestore/BlueFS.cc | 2 +- src/os/bluestore/KernelDevice.cc | 62 +++++++++++++++++++++++++++----- src/os/bluestore/KernelDevice.h | 4 ++- src/os/bluestore/NVMEDevice.cc | 2 +- src/os/bluestore/NVMEDevice.h | 2 +- 6 files changed, 61 insertions(+), 14 deletions(-) diff --git a/src/os/bluestore/BlockDevice.h b/src/os/bluestore/BlockDevice.h index 6ccc79dc9d5ef..f3ba54af4fbfe 100644 --- a/src/os/bluestore/BlockDevice.h +++ b/src/os/bluestore/BlockDevice.h @@ -93,7 +93,8 @@ public: virtual int read(uint64_t off, uint64_t len, bufferlist *pbl, IOContext *ioc, bool buffered) = 0; - virtual int read_buffered(uint64_t off, uint64_t len, char *buf) = 0; + virtual int read_random(uint64_t off, uint64_t len, char *buf, + bool buffered) = 0; virtual int aio_write(uint64_t off, bufferlist& bl, IOContext *ioc, bool buffered) = 0; diff --git a/src/os/bluestore/BlueFS.cc b/src/os/bluestore/BlueFS.cc index d8ed6e1d94bc8..ff1dc71b94475 100644 --- a/src/os/bluestore/BlueFS.cc +++ b/src/os/bluestore/BlueFS.cc @@ -738,7 +738,7 @@ int BlueFS::_read_random( } dout(20) << __func__ << " read buffered " << x_off << "~" << l << " of " << *p << dendl; - int r = bdev[p->bdev]->read_buffered(p->offset + x_off, l, out); + int r = bdev[p->bdev]->read_random(p->offset + x_off, l, out, true); assert(r == 0); off += l; len -= l; diff --git a/src/os/bluestore/KernelDevice.cc b/src/os/bluestore/KernelDevice.cc index 5e4d563a29cc5..2302633ad7556 100644 --- a/src/os/bluestore/KernelDevice.cc +++ b/src/os/bluestore/KernelDevice.cc @@ -25,6 +25,7 @@ #include "common/errno.h" #include "common/debug.h" #include "common/blkdev.h" +#include "common/align.h" #define dout_subsys ceph_subsys_bdev #undef dout_prefix @@ -495,26 +496,69 @@ int KernelDevice::read(uint64_t off, uint64_t len, bufferlist *pbl, return r < 0 ? r : 0; } -int KernelDevice::read_buffered(uint64_t off, uint64_t len, char *buf) +int KernelDevice::direct_read_unaligned(uint64_t off, uint64_t len, char *buf) +{ + uint64_t aligned_off = align_down(off, block_size); + uint64_t aligned_len = align_up(off+len, block_size) - aligned_off; + bufferptr p = buffer::create_page_aligned(aligned_len); + int r = 0; + + r = ::pread(fd_direct, p.c_str(), aligned_len, aligned_off); + if (r < 0) { + r = -errno; + goto out; + } + assert((uint64_t)r == aligned_len); + memcpy(buf, p.c_str() + (off - aligned_off), len); + + dout(40) << __func__ << " data: "; + bufferlist bl; + bl.append(buf, len); + bl.hexdump(*_dout); + *_dout << dendl; + + out: + return r < 0 ? r : 0; +} + +int KernelDevice::read_random(uint64_t off, uint64_t len, char *buf, + bool buffered) { dout(5) << __func__ << " 0x" << std::hex << off << "~" << len << std::dec << dendl; assert(len > 0); assert(off < size); assert(off + len <= size); - int r = 0; - char *t = buf; - uint64_t left = len; - while (left > 0) { - r = ::pread(fd_buffered, t, left, off); + + //if it's direct io and unaligned, we have to use a internal buffer + if (!buffered && ((off % block_size != 0) + || (len % block_size != 0) + || (uintptr_t(buf) % CEPH_PAGE_SIZE != 0))) + return direct_read_unaligned(off, len, buf); + + if (buffered) { + //buffered read + char *t = buf; + uint64_t left = len; + while (left > 0) { + r = ::pread(fd_buffered, t, left, off); + if (r < 0) { + r = -errno; + goto out; + } + off += r; + t += r; + left -= r; + } + } else { + //direct and aligned read + r = ::pread(fd_direct, buf, len, off); if (r < 0) { r = -errno; goto out; } - off += r; - t += r; - left -= r; + assert((uint64_t)r == len); } dout(40) << __func__ << " data: "; diff --git a/src/os/bluestore/KernelDevice.h b/src/os/bluestore/KernelDevice.h index 0a1cc36a7af82..48cb0d44d93d0 100644 --- a/src/os/bluestore/KernelDevice.h +++ b/src/os/bluestore/KernelDevice.h @@ -62,6 +62,8 @@ class KernelDevice : public BlockDevice { int _lock(); + int direct_read_unaligned(uint64_t off, uint64_t len, char *buf); + public: KernelDevice(aio_callback_t cb, void *cbpriv); @@ -77,7 +79,7 @@ public: int read(uint64_t off, uint64_t len, bufferlist *pbl, IOContext *ioc, bool buffered) override; - int read_buffered(uint64_t off, uint64_t len, char *buf) override; + int read_random(uint64_t off, uint64_t len, char *buf, bool buffered) override; int aio_write(uint64_t off, bufferlist& bl, IOContext *ioc, diff --git a/src/os/bluestore/NVMEDevice.cc b/src/os/bluestore/NVMEDevice.cc index 71667549f047a..c5ba1e4808644 100644 --- a/src/os/bluestore/NVMEDevice.cc +++ b/src/os/bluestore/NVMEDevice.cc @@ -938,7 +938,7 @@ int NVMEDevice::read(uint64_t off, uint64_t len, bufferlist *pbl, return r; } -int NVMEDevice::read_buffered(uint64_t off, uint64_t len, char *buf) +int NVMEDevice::read_random(uint64_t off, uint64_t len, char *buf, bool buffered) { assert(len > 0); assert(off < size); diff --git a/src/os/bluestore/NVMEDevice.h b/src/os/bluestore/NVMEDevice.h index 62294def241d2..75b751ae533d9 100644 --- a/src/os/bluestore/NVMEDevice.h +++ b/src/os/bluestore/NVMEDevice.h @@ -228,7 +228,7 @@ class NVMEDevice : public BlockDevice { IOContext *ioc, bool buffered) override; int flush() override; - int read_buffered(uint64_t off, uint64_t len, char *buf) override; + int read_random(uint64_t off, uint64_t len, char *buf, bool buffered) override; // for managing buffered readers/writers int invalidate_cache(uint64_t off, uint64_t len) override; -- 2.39.5