From: Haodong Tang Date: Fri, 30 Jun 2017 05:08:15 +0000 (+0800) Subject: os/bluestore/KernelDevice: batch aio submit X-Git-Tag: v12.1.1~174^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=1ed9ae8fe15acce7d41f95456872dc73cecb1ba2;p=ceph.git os/bluestore/KernelDevice: batch aio submit reduce unnessary system call(io_submit). Signed-off-by: Haodong Tang --- diff --git a/src/os/bluestore/KernelDevice.cc b/src/os/bluestore/KernelDevice.cc index c6b9e5ad2898..032e21362533 100644 --- a/src/os/bluestore/KernelDevice.cc +++ b/src/os/bluestore/KernelDevice.cc @@ -481,54 +481,46 @@ void KernelDevice::aio_submit(IOContext *ioc) << " pending " << ioc->num_pending.load() << " running " << ioc->num_running.load() << dendl; + if (ioc->num_pending.load() == 0) { return; } + // move these aside, and get our end iterator position now, as the // aios might complete as soon as they are submitted and queue more // wal aio's. list::iterator e = ioc->running_aios.begin(); ioc->running_aios.splice(e, ioc->pending_aios); - list::iterator p = ioc->running_aios.begin(); int pending = ioc->num_pending.load(); ioc->num_running += pending; ioc->num_pending -= pending; assert(ioc->num_pending.load() == 0); // we should be only thread doing this + assert(ioc->pending_aios.size() == 0); + + if (cct->_conf->bdev_debug_aio) { + list::iterator p = ioc->running_aios.begin(); + while (p != e) { + for (auto& io : p->iov) + dout(30) << __func__ << " iov " << (void*)io.iov_base + << " len " << io.iov_len << dendl; - bool done = false; - while (!done) { - aio_t& aio = *p; - aio.priv = static_cast(ioc); - dout(20) << __func__ << " aio " << &aio << " fd " << aio.fd - << " 0x" << std::hex << aio.offset << "~" << aio.length - << std::dec << dendl; - for (auto& io : aio.iov) - dout(30) << __func__ << " iov " << (void*)io.iov_base - << " len " << io.iov_len << dendl; - - // be careful: as soon as we submit aio we race with completion. - // since we are holding a ref take care not to dereference txc at - // all after that point. - list::iterator cur = p; - ++p; - done = (p == e); - - // do not dereference txc (or it's contents) after we submit (if - // done == true and we don't loop) - int retries = 0; - if (cct->_conf->bdev_debug_aio) { std::lock_guard l(debug_queue_lock); - debug_aio_link(*cur); - } - int r = aio_queue.submit(*cur, &retries); - if (retries) - derr << __func__ << " retries " << retries << dendl; - if (r) { - derr << " aio submit got " << cpp_strerror(r) << dendl; - assert(r == 0); + debug_aio_link(*p++); } } + + void *priv = static_cast(ioc); + int r, retries = 0; + r = aio_queue.submit_batch(ioc->running_aios.begin(), e, + ioc->num_running.load(), priv, &retries); + + if (retries) + derr << __func__ << " retries " << retries << dendl; + if (r < 0) { + derr << " aio submit got " << cpp_strerror(r) << dendl; + assert(r == 0); + } } int KernelDevice::_sync_write(uint64_t off, bufferlist &bl, bool buffered) diff --git a/src/os/fs/aio.cc b/src/os/fs/aio.cc index a5edf6266655..cfe0c5cf8c89 100644 --- a/src/os/fs/aio.cc +++ b/src/os/fs/aio.cc @@ -5,14 +5,16 @@ #if defined(HAVE_LIBAIO) + int aio_queue_t::submit(aio_t &aio, int *retries) { // 2^16 * 125us = ~8 seconds, so max sleep is ~16 seconds int attempts = 16; int delay = 125; iocb *piocb = &aio.iocb; + int r; while (true) { - int r = io_submit(ctx, 1, &piocb); + r = io_submit(ctx, 1, &piocb); if (r < 0) { if (r == -EAGAIN && attempts-- > 0) { usleep(delay); @@ -20,12 +22,43 @@ int aio_queue_t::submit(aio_t &aio, int *retries) (*retries)++; continue; } - return r; } assert(r == 1); break; } - return 0; + return r; +} + +int aio_queue_t::submit_batch(aio_iter begin, aio_iter end, + uint16_t aios_size, void *priv, + int *retries) +{ + // 2^16 * 125us = ~8 seconds, so max sleep is ~16 seconds + int attempts = 16; + int delay = 125; + + aio_iter cur = begin; + struct iocb *piocb[aios_size]; + int r, pos = 0; + while (cur != end) { + cur->priv = priv; + *(piocb+pos) = &cur->iocb; + ++pos; + ++cur; + } + while (true) { + r = io_submit(ctx, pos, piocb); + if (r < 0) { + if (r == -EAGAIN && attempts-- > 0) { + usleep(delay); + delay *= 2; + (*retries)++; + continue; + } + } + break; + } + return r; } int aio_queue_t::get_next_completed(int timeout_ms, aio_t **paio, int max) diff --git a/src/os/fs/aio.h b/src/os/fs/aio.h index c4757158cc90..2517e5f64fbb 100644 --- a/src/os/fs/aio.h +++ b/src/os/fs/aio.h @@ -11,6 +11,7 @@ #include #include "include/buffer.h" +#include "include/types.h" struct aio_t { struct iocb iocb; // must be first element; see shenanigans in aio_queue_t @@ -55,6 +56,8 @@ struct aio_queue_t { int max_iodepth; io_context_t ctx; + typedef list::iterator aio_iter; + explicit aio_queue_t(unsigned max_iodepth) : max_iodepth(max_iodepth), ctx(0) { @@ -83,6 +86,8 @@ struct aio_queue_t { } int submit(aio_t &aio, int *retries); + int submit_batch(aio_iter begin, aio_iter end, uint16_t aios_size, + void *priv, int *retries); int get_next_completed(int timeout_ms, aio_t **paio, int max); };