From 653882c446504e0465fe08bf36fa5b88081d872e Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 10 Dec 2015 16:17:53 -0500 Subject: [PATCH] os/bluestore/BlockDevice: move to simple mutex model Just for now, while we get the rest of this working. Signed-off-by: Sage Weil --- src/os/bluestore/BlockDevice.cc | 89 ++++++++++++++++++--------------- src/os/bluestore/BlockDevice.h | 34 +++++++------ 2 files changed, 68 insertions(+), 55 deletions(-) diff --git a/src/os/bluestore/BlockDevice.cc b/src/os/bluestore/BlockDevice.cc index abb45ffbe016c..e73c2feffa336 100644 --- a/src/os/bluestore/BlockDevice.cc +++ b/src/os/bluestore/BlockDevice.cc @@ -22,16 +22,18 @@ void IOContext::aio_wait() { - Mutex::Locker l(io_map.lock); + Mutex::Locker l(lock); _aio_wait(); } void IOContext::_aio_wait() { - while (num_aio.read()) { - dout(10) << __func__ << " " << this << " waiting for " << num_aio.read() - << " aios to complete" << dendl; - io_map.cond.Wait(io_map.lock); + // see _aio_thread for waker logic + while (num_running > 0 || num_reading > 0) { + dout(10) << __func__ << " " << this + << " waiting for " << num_running << " aios and/or " + << num_reading << " readers to complete" << dendl; + cond.Wait(lock); } dout(20) << __func__ << " " << this << " done" << dendl; } @@ -168,18 +170,16 @@ void BlockDevice::_aio_thread() dout(30) << __func__ << " got " << r << " completed aios" << dendl; for (int i = 0; i < r; ++i) { IOContext *ioc = static_cast(aio[i]->priv); - int left = ioc->num_aio.dec(); + Mutex::Locker l(ioc->lock); + --ioc->num_running; dout(10) << __func__ << " finished aio " << aio[i] << " ioc " << ioc - << left << " aios left" << dendl; - bool plug = ioc->plug; + << " with " << ioc->num_running << " aios left" << dendl; _aio_finish(ioc, aio[i]->offset, aio[i]->length); - if (left == 0) { - if (ioc->priv && !plug) { + if (ioc->num_running == 0) { + ioc->running_bl.clear(); + ioc->running_aios.clear(); + if (ioc->priv) { aio_callback(aio_callback_priv, ioc->priv); - } else { - dout(10) << __func__ << " signaling IOContext " << dendl; - Mutex::Locker l(ioc->io_map.lock); - ioc->io_map.cond.Signal(); } } } @@ -190,22 +190,19 @@ void BlockDevice::_aio_thread() void BlockDevice::_aio_prepare(IOContext *ioc, uint64_t offset, uint64_t length) { - Mutex::Locker l(ioc->io_map.lock); dout(20) << __func__ << " " << offset << "~" << length - << " (" << ioc->io_map.blocks << ")" << dendl; - while (ioc->io_map.blocks.intersects(offset, length)) { + << " (" << ioc->blocks << ")" << dendl; + while (ioc->blocks.intersects(offset, length)) { dout(20) << __func__ << " waiting for overlapping io on " << offset << "~" << length - << " (" << ioc->io_map.blocks << ")" << dendl; - // set the plug so that we do not trigger the ioc completion, and instead - // signal the ioc cond in _aio_thread. - ioc->plug = true; - aio_submit(ioc); + << " (" << ioc->blocks << ")" << dendl; + if (ioc->num_pending) { + aio_submit(ioc); + } ioc->_aio_wait(); - ioc->plug = false; dout(20) << __func__ << " done waiting" << dendl; } - ioc->io_map.blocks.insert(offset, length); + ioc->blocks.insert(offset, length); if (g_conf->bdev_debug_inflight_ios) { if (debug_inflight.intersects(offset, length)) { @@ -220,12 +217,12 @@ void BlockDevice::_aio_prepare(IOContext *ioc, uint64_t offset, uint64_t length) void BlockDevice::_aio_finish(IOContext *ioc, uint64_t offset, uint64_t length) { - Mutex::Locker l(ioc->io_map.lock); + assert(ioc->lock.is_locked()); dout(20) << __func__ << " " << aio << " " << offset << "~" << length - << " (" << ioc->io_map.blocks << ")" + << " (" << ioc->blocks << ")" << dendl; - ioc->io_map.blocks.erase(offset, length); - ioc->io_map.cond.Signal(); + ioc->blocks.erase(offset, length); + ioc->cond.Signal(); if (g_conf->bdev_debug_inflight_ios) { debug_inflight.erase(offset, length); @@ -234,18 +231,22 @@ void BlockDevice::_aio_finish(IOContext *ioc, uint64_t offset, uint64_t length) void BlockDevice::aio_submit(IOContext *ioc) { - int num = ioc->pending_aios.size(); - dout(20) << __func__ << " ioc " << ioc << " submitting " << num << dendl; - assert(num > 0); - ioc->num_aio.set(num); + Mutex::Locker l(ioc->lock); + dout(20) << __func__ << " ioc " << ioc + << " pending " << ioc->num_pending + << " running " << ioc->num_running + << dendl; +#warning fixme can we make this avoid a mutex? // move these aside, and get our end iterator position now, as the // aios might complete as soon as they are submitted and queue more // wal aio's. - list::iterator e = ioc->submitted_aios.begin(); - ioc->submitted_aios.splice(e, ioc->pending_aios); - list::iterator p = ioc->submitted_aios.begin(); - assert(p != e); + list::iterator e = ioc->running_aios.begin(); + ioc->running_aios.splice(e, ioc->pending_aios); + list::iterator p = ioc->running_aios.begin(); + ioc->num_running += ioc->num_pending; + ioc->num_pending = 0; + ioc->running_bl.claim_append(ioc->pending_bl); bool done = false; while (!done) { FS::aio_t& aio = *p; @@ -298,14 +299,18 @@ int BlockDevice::aio_write( bl.hexdump(*_dout); *_dout << dendl; - _aio_prepare(ioc, off, bl.length()); + { + Mutex::Locker l(ioc->lock); + _aio_prepare(ioc, off, bl.length()); + } #ifdef HAVE_LIBAIO if (aio && dio) { ioc->pending_aios.push_back(FS::aio_t(ioc, fd)); + ++ioc->num_pending; FS::aio_t& aio = ioc->pending_aios.back(); bl.prepare_iov(&aio.iov); - ioc->aio_bl.append(bl); + ioc->pending_bl.append(bl); aio.pwritev(off); dout(2) << __func__ << " prepared aio " << &aio << dendl; } else @@ -359,7 +364,11 @@ int BlockDevice::read(uint64_t off, uint64_t len, bufferlist *pbl, IOContext *io assert(off < size); assert(off + len <= size); - _aio_prepare(ioc, off, len); + { + Mutex::Locker l(ioc->lock); + _aio_prepare(ioc, off, len); + ++ioc->num_reading; + } bufferptr p = buffer::create_page_aligned(len); int r = ::pread(fd, p.c_str(), len, off); @@ -370,6 +379,8 @@ int BlockDevice::read(uint64_t off, uint64_t len, bufferlist *pbl, IOContext *io pbl->clear(); pbl->push_back(p); out: + Mutex::Locker l(ioc->lock); + --ioc->num_reading; _aio_finish(ioc, off, len); return r < 0 ? r : 0; } diff --git a/src/os/bluestore/BlockDevice.h b/src/os/bluestore/BlockDevice.h index 2c867727627a9..4e8c090a867be 100644 --- a/src/os/bluestore/BlockDevice.h +++ b/src/os/bluestore/BlockDevice.h @@ -8,30 +8,32 @@ #include "include/interval_set.h" /// track in-flight io -struct IOMap { +struct IOContext { + void *priv; + Mutex lock; Cond cond; interval_set blocks; ///< blocks with aio in flight - IOMap() : lock("IOMap::lock") {} -}; - -struct IOContext { - void *priv; - IOMap io_map; - list pending_aios; ///< not yet submitted - list submitted_aios; ///< submitting or submitted - bufferlist aio_bl; // just a pile of refs - atomic_t num_aio; - bool plug; + list running_aios; ///< submitting or submitted + int num_pending; + int num_running; + int num_reading; + + bufferlist pending_bl; // just a pile of refs + bufferlist running_bl; // just a pile of refs - IOContext(void *p) : priv(p), plug(false) {} + IOContext(void *p) + : priv(p), + lock("IOContext::lock"), + num_pending(0), + num_running(0), + num_reading(0) {} bool has_aios() { - return - !pending_aios.empty() || - !submitted_aios.empty(); + Mutex::Locker l(lock); + return num_pending + num_running; } void aio_wait(); -- 2.39.5