From 5e433fee58623c39c7452b338024f244a06336cf Mon Sep 17 00:00:00 2001 From: Igor Fedotov Date: Wed, 29 Apr 2020 19:59:03 +0300 Subject: [PATCH] os/bluestore: fix large (>2GB) writes when bluefs_buffered_io = true Fixes: https://tracker.ceph.com/issues/45337 Signed-off-by: Igor Fedotov (cherry picked from commit d9b0250e215b4755f38b57dbaabecef1063db8d6) --- src/os/bluestore/KernelDevice.cc | 40 +++++++++++++++++++++++++------- 1 file changed, 32 insertions(+), 8 deletions(-) diff --git a/src/os/bluestore/KernelDevice.cc b/src/os/bluestore/KernelDevice.cc index 8481d67a47446..0fe58822525b6 100644 --- a/src/os/bluestore/KernelDevice.cc +++ b/src/os/bluestore/KernelDevice.cc @@ -773,18 +773,42 @@ int KernelDevice::_sync_write(uint64_t off, bufferlist &bl, bool buffered, int w } vector iov; bl.prepare_iov(&iov); - int r = ::pwritev(choose_fd(buffered, write_hint), - &iov[0], iov.size(), off); - if (r < 0) { - r = -errno; - derr << __func__ << " pwritev error: " << cpp_strerror(r) << dendl; - return r; - } + auto left = len; + auto o = off; + size_t idx = 0; + do { + auto r = ::pwritev(choose_fd(buffered, write_hint), + &iov[idx], iov.size() - idx, o); + + if (r < 0) { + r = -errno; + derr << __func__ << " pwritev error: " << cpp_strerror(r) << dendl; + return r; + } + o += r; + left -= r; + if (left) { + // skip fully processed IOVs + while (idx < iov.size() && (size_t)r >= iov[idx].iov_len) { + r -= iov[idx++].iov_len; + } + // update partially processed one if any + if (r) { + ceph_assert(idx < iov.size()); + ceph_assert((size_t)r < iov[idx].iov_len); + iov[idx].iov_base = static_cast(iov[idx].iov_base) + r; + iov[idx].iov_len -= r; + r = 0; + } + ceph_assert(r == 0); + } + } while (left); + #ifdef HAVE_SYNC_FILE_RANGE if (buffered) { // initiate IO and wait till it completes - r = ::sync_file_range(fd_buffereds[WRITE_LIFE_NOT_SET], off, len, SYNC_FILE_RANGE_WRITE|SYNC_FILE_RANGE_WAIT_AFTER|SYNC_FILE_RANGE_WAIT_BEFORE); + auto r = ::sync_file_range(fd_buffereds[WRITE_LIFE_NOT_SET], off, len, SYNC_FILE_RANGE_WRITE|SYNC_FILE_RANGE_WAIT_AFTER|SYNC_FILE_RANGE_WAIT_BEFORE); if (r < 0) { r = -errno; derr << __func__ << " sync_file_range error: " << cpp_strerror(r) << dendl; -- 2.39.5